summaryrefslogtreecommitdiff
path: root/win32/iconv/gentranslit.c
diff options
context:
space:
mode:
Diffstat (limited to 'win32/iconv/gentranslit.c')
-rw-r--r--win32/iconv/gentranslit.c259
1 files changed, 259 insertions, 0 deletions
diff --git a/win32/iconv/gentranslit.c b/win32/iconv/gentranslit.c
new file mode 100644
index 0000000..dbe2f7d
--- /dev/null
+++ b/win32/iconv/gentranslit.c
@@ -0,0 +1,259 @@
+/* Copyright (C) 1999-2003 Free Software Foundation, Inc.
+ This file is part of the GNU LIBICONV Library.
+
+ The GNU LIBICONV Library is free software; you can redistribute it
+ and/or modify it under the terms of the GNU Library General Public
+ License as published by the Free Software Foundation; either version 2
+ of the License, or (at your option) any later version.
+
+ The GNU LIBICONV Library is distributed in the hope that it will be
+ useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Library General Public License for more details.
+
+ You should have received a copy of the GNU Library General Public
+ License along with the GNU LIBICONV Library; see the file COPYING.LIB.
+ If not, write to the Free Software Foundation, Inc., 59 Temple Place -
+ Suite 330, Boston, MA 02111-1307, USA. */
+
+/*
+ * Generates a table of small strings, used for transliteration, from a table
+ * containing lines of the form
+ * Unicode <tab> utf-8 replacement <tab> # comment
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+
+int main (int argc, char *argv[])
+{
+ unsigned int data[0x100000];
+ int uni2index[0x110000];
+ int index;
+
+ if (argc != 1)
+ exit(1);
+
+ printf("/*\n");
+ printf(" * Copyright (C) 1999-2003 Free Software Foundation, Inc.\n");
+ printf(" * This file is part of the GNU LIBICONV Library.\n");
+ printf(" *\n");
+ printf(" * The GNU LIBICONV Library is free software; you can redistribute it\n");
+ printf(" * and/or modify it under the terms of the GNU Library General Public\n");
+ printf(" * License as published by the Free Software Foundation; either version 2\n");
+ printf(" * of the License, or (at your option) any later version.\n");
+ printf(" *\n");
+ printf(" * The GNU LIBICONV Library is distributed in the hope that it will be\n");
+ printf(" * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of\n");
+ printf(" * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\n");
+ printf(" * Library General Public License for more details.\n");
+ printf(" *\n");
+ printf(" * You should have received a copy of the GNU Library General Public\n");
+ printf(" * License along with the GNU LIBICONV Library; see the file COPYING.LIB.\n");
+ printf(" * If not, write to the Free Software Foundation, Inc., 59 Temple Place -\n");
+ printf(" * Suite 330, Boston, MA 02111-1307, USA.\n");
+ printf(" */\n");
+ printf("\n");
+ printf("/*\n");
+ printf(" * Transliteration table\n");
+ printf(" */\n");
+ printf("\n");
+ {
+ int c;
+ int j;
+ for (j = 0; j < 0x110000; j++)
+ uni2index[j] = -1;
+ index = 0;
+ for (;;) {
+ c = getc(stdin);
+ if (c == EOF)
+ break;
+ if (c == '#') {
+ do { c = getc(stdin); } while (!(c == EOF || c == '\n'));
+ continue;
+ }
+ ungetc(c,stdin);
+ if (scanf("%x",&j) != 1)
+ exit(1);
+ c = getc(stdin);
+ if (c != '\t')
+ exit(1);
+ for (;;) {
+ c = getc(stdin);
+ if (c == EOF || c == '\n')
+ exit(1);
+ if (c == '\t')
+ break;
+ if (uni2index[j] < 0) {
+ uni2index[j] = index;
+ data[index++] = 0;
+ }
+ if (c >= 0x80) {
+ /* Finish reading an UTF-8 character. */
+ if (c < 0xc0)
+ exit(1);
+ else {
+ unsigned int i = (c < 0xe0 ? 2 : c < 0xf0 ? 3 : c < 0xf8 ? 4 : c < 0xfc ? 5 : 6);
+ c &= (1 << (8-i)) - 1;
+ while (--i > 0) {
+ int cc = getc(stdin);
+ if (!(cc >= 0x80 && cc < 0xc0))
+ exit(1);
+ c <<= 6; c |= (cc & 0x3f);
+ }
+ }
+ }
+ data[index++] = (unsigned int) c;
+ }
+ if (uni2index[j] >= 0)
+ data[uni2index[j]] = index - uni2index[j] - 1;
+ do { c = getc(stdin); } while (!(c == EOF || c == '\n'));
+ }
+ }
+ printf("static const unsigned int translit_data[%d] = {",index);
+ {
+ int i;
+ for (i = 0; i < index; i++) {
+ if (data[i] < 32)
+ printf("\n %3d,",data[i]);
+ else if (data[i] == '\'')
+ printf("'\\'',");
+ else if (data[i] == '\\')
+ printf("'\\\\',");
+ else if (data[i] < 127)
+ printf(" '%c',",data[i]);
+ else if (data[i] < 256)
+ printf("0x%02X,",data[i]);
+ else
+ printf("0x%04X,",data[i]);
+ }
+ printf("\n};\n");
+ }
+ printf("\n");
+ {
+ bool pages[0x1100];
+ int line[0x22000];
+ int tableno;
+ struct { int minline; int maxline; int usecount; const char* suffix; } tables[0x2000];
+ int i, j, p, j1, j2, t;
+
+ for (p = 0; p < 0x1100; p++)
+ pages[p] = false;
+ for (j = 0; j < 0x110000; j++)
+ if (uni2index[j] >= 0)
+ pages[j>>8] = true;
+ for (j1 = 0; j1 < 0x22000; j1++) {
+ bool all_invalid = true;
+ for (j2 = 0; j2 < 8; j2++) {
+ j = 8*j1+j2;
+ if (uni2index[j] >= 0)
+ all_invalid = false;
+ }
+ if (all_invalid)
+ line[j1] = -1;
+ else
+ line[j1] = 0;
+ }
+ tableno = 0;
+ for (j1 = 0; j1 < 0x22000; j1++) {
+ if (line[j1] >= 0) {
+ if (tableno > 0
+ && ((j1 > 0 && line[j1-1] == tableno-1)
+ || ((tables[tableno-1].maxline >> 5) == (j1 >> 5)
+ && j1 - tables[tableno-1].maxline <= 8))) {
+ line[j1] = tableno-1;
+ tables[tableno-1].maxline = j1;
+ } else {
+ tableno++;
+ line[j1] = tableno-1;
+ tables[tableno-1].minline = tables[tableno-1].maxline = j1;
+ }
+ }
+ }
+ for (t = 0; t < tableno; t++) {
+ tables[t].usecount = 0;
+ j1 = 8*tables[t].minline;
+ j2 = 8*(tables[t].maxline+1);
+ for (j = j1; j < j2; j++)
+ if (uni2index[j] >= 0)
+ tables[t].usecount++;
+ }
+ for (t = 0, p = -1, i = 0; t < tableno; t++) {
+ if (tables[t].usecount > 1) {
+ char* s;
+ if (p == tables[t].minline >> 5) {
+ s = (char*) malloc(5+1);
+ sprintf(s, "%02x_%d", p, ++i);
+ } else {
+ p = tables[t].minline >> 5;
+ s = (char*) malloc(2+1);
+ sprintf(s, "%02x", p);
+ }
+ tables[t].suffix = s;
+ } else
+ tables[t].suffix = NULL;
+ }
+ {
+ p = -1;
+ for (t = 0; t < tableno; t++)
+ if (tables[t].usecount > 1) {
+ p = 0;
+ printf("static const short translit_page%s[%d] = {\n", tables[t].suffix, 8*(tables[t].maxline-tables[t].minline+1));
+ for (j1 = tables[t].minline; j1 <= tables[t].maxline; j1++) {
+ if ((j1 % 0x20) == 0 && j1 > tables[t].minline)
+ printf(" /* 0x%04x */\n", 8*j1);
+ printf(" ");
+ for (j2 = 0; j2 < 8; j2++) {
+ j = 8*j1+j2;
+ printf(" %4d,", uni2index[j]);
+ }
+ printf(" /* 0x%02x-0x%02x */\n", 8*(j1 % 0x20), 8*(j1 % 0x20)+7);
+ }
+ printf("};\n");
+ }
+ if (p >= 0)
+ printf("\n");
+ }
+ printf("#define translit_index(wc) \\\n (");
+ for (j1 = 0; j1 < 0x22000;) {
+ t = line[j1];
+ for (j2 = j1; j2 < 0x22000 && line[j2] == t; j2++);
+ if (t >= 0) {
+ if (j1 != tables[t].minline) abort();
+ if (j2 > tables[t].maxline+1) abort();
+ j2 = tables[t].maxline+1;
+ }
+ if (t == -1) {
+ } else {
+ if (t >= 0 && tables[t].usecount == 0) abort();
+ if (t >= 0 && tables[t].usecount == 1) {
+ if (j2 != j1+1) abort();
+ for (j = 8*j1; j < 8*j2; j++)
+ if (uni2index[j] >= 0) {
+ printf("wc == 0x%04x ? %d", j, uni2index[j]);
+ break;
+ }
+ } else {
+ if (j1 == 0) {
+ printf("wc < 0x%04x", 8*j2);
+ } else {
+ printf("wc >= 0x%04x && wc < 0x%04x", 8*j1, 8*j2);
+ }
+ printf(" ? translit_page%s[wc", tables[t].suffix);
+ if (tables[t].minline > 0)
+ printf("-0x%04x", 8*j1);
+ printf("]");
+ }
+ printf(" : \\\n ");
+ }
+ j1 = j2;
+ }
+ printf("-1)\n");
+ }
+
+ fflush(stdout);
+ if (ferror(stdout))
+ exit(1);
+ exit(0);
+}