diff options
Diffstat (limited to 'MSVC/iconv/gentranslit.c')
-rw-r--r-- | MSVC/iconv/gentranslit.c | 259 |
1 files changed, 259 insertions, 0 deletions
diff --git a/MSVC/iconv/gentranslit.c b/MSVC/iconv/gentranslit.c new file mode 100644 index 0000000..dbe2f7d --- /dev/null +++ b/MSVC/iconv/gentranslit.c @@ -0,0 +1,259 @@ +/* Copyright (C) 1999-2003 Free Software Foundation, Inc. + This file is part of the GNU LIBICONV Library. + + The GNU LIBICONV Library is free software; you can redistribute it + and/or modify it under the terms of the GNU Library General Public + License as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + The GNU LIBICONV Library is distributed in the hope that it will be + useful, but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with the GNU LIBICONV Library; see the file COPYING.LIB. + If not, write to the Free Software Foundation, Inc., 59 Temple Place - + Suite 330, Boston, MA 02111-1307, USA. */ + +/* + * Generates a table of small strings, used for transliteration, from a table + * containing lines of the form + * Unicode <tab> utf-8 replacement <tab> # comment + */ + +#include <stdio.h> +#include <stdlib.h> +#include <stdbool.h> + +int main (int argc, char *argv[]) +{ + unsigned int data[0x100000]; + int uni2index[0x110000]; + int index; + + if (argc != 1) + exit(1); + + printf("/*\n"); + printf(" * Copyright (C) 1999-2003 Free Software Foundation, Inc.\n"); + printf(" * This file is part of the GNU LIBICONV Library.\n"); + printf(" *\n"); + printf(" * The GNU LIBICONV Library is free software; you can redistribute it\n"); + printf(" * and/or modify it under the terms of the GNU Library General Public\n"); + printf(" * License as published by the Free Software Foundation; either version 2\n"); + printf(" * of the License, or (at your option) any later version.\n"); + printf(" *\n"); + printf(" * The GNU LIBICONV Library is distributed in the hope that it will be\n"); + printf(" * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of\n"); + printf(" * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\n"); + printf(" * Library General Public License for more details.\n"); + printf(" *\n"); + printf(" * You should have received a copy of the GNU Library General Public\n"); + printf(" * License along with the GNU LIBICONV Library; see the file COPYING.LIB.\n"); + printf(" * If not, write to the Free Software Foundation, Inc., 59 Temple Place -\n"); + printf(" * Suite 330, Boston, MA 02111-1307, USA.\n"); + printf(" */\n"); + printf("\n"); + printf("/*\n"); + printf(" * Transliteration table\n"); + printf(" */\n"); + printf("\n"); + { + int c; + int j; + for (j = 0; j < 0x110000; j++) + uni2index[j] = -1; + index = 0; + for (;;) { + c = getc(stdin); + if (c == EOF) + break; + if (c == '#') { + do { c = getc(stdin); } while (!(c == EOF || c == '\n')); + continue; + } + ungetc(c,stdin); + if (scanf("%x",&j) != 1) + exit(1); + c = getc(stdin); + if (c != '\t') + exit(1); + for (;;) { + c = getc(stdin); + if (c == EOF || c == '\n') + exit(1); + if (c == '\t') + break; + if (uni2index[j] < 0) { + uni2index[j] = index; + data[index++] = 0; + } + if (c >= 0x80) { + /* Finish reading an UTF-8 character. */ + if (c < 0xc0) + exit(1); + else { + unsigned int i = (c < 0xe0 ? 2 : c < 0xf0 ? 3 : c < 0xf8 ? 4 : c < 0xfc ? 5 : 6); + c &= (1 << (8-i)) - 1; + while (--i > 0) { + int cc = getc(stdin); + if (!(cc >= 0x80 && cc < 0xc0)) + exit(1); + c <<= 6; c |= (cc & 0x3f); + } + } + } + data[index++] = (unsigned int) c; + } + if (uni2index[j] >= 0) + data[uni2index[j]] = index - uni2index[j] - 1; + do { c = getc(stdin); } while (!(c == EOF || c == '\n')); + } + } + printf("static const unsigned int translit_data[%d] = {",index); + { + int i; + for (i = 0; i < index; i++) { + if (data[i] < 32) + printf("\n %3d,",data[i]); + else if (data[i] == '\'') + printf("'\\'',"); + else if (data[i] == '\\') + printf("'\\\\',"); + else if (data[i] < 127) + printf(" '%c',",data[i]); + else if (data[i] < 256) + printf("0x%02X,",data[i]); + else + printf("0x%04X,",data[i]); + } + printf("\n};\n"); + } + printf("\n"); + { + bool pages[0x1100]; + int line[0x22000]; + int tableno; + struct { int minline; int maxline; int usecount; const char* suffix; } tables[0x2000]; + int i, j, p, j1, j2, t; + + for (p = 0; p < 0x1100; p++) + pages[p] = false; + for (j = 0; j < 0x110000; j++) + if (uni2index[j] >= 0) + pages[j>>8] = true; + for (j1 = 0; j1 < 0x22000; j1++) { + bool all_invalid = true; + for (j2 = 0; j2 < 8; j2++) { + j = 8*j1+j2; + if (uni2index[j] >= 0) + all_invalid = false; + } + if (all_invalid) + line[j1] = -1; + else + line[j1] = 0; + } + tableno = 0; + for (j1 = 0; j1 < 0x22000; j1++) { + if (line[j1] >= 0) { + if (tableno > 0 + && ((j1 > 0 && line[j1-1] == tableno-1) + || ((tables[tableno-1].maxline >> 5) == (j1 >> 5) + && j1 - tables[tableno-1].maxline <= 8))) { + line[j1] = tableno-1; + tables[tableno-1].maxline = j1; + } else { + tableno++; + line[j1] = tableno-1; + tables[tableno-1].minline = tables[tableno-1].maxline = j1; + } + } + } + for (t = 0; t < tableno; t++) { + tables[t].usecount = 0; + j1 = 8*tables[t].minline; + j2 = 8*(tables[t].maxline+1); + for (j = j1; j < j2; j++) + if (uni2index[j] >= 0) + tables[t].usecount++; + } + for (t = 0, p = -1, i = 0; t < tableno; t++) { + if (tables[t].usecount > 1) { + char* s; + if (p == tables[t].minline >> 5) { + s = (char*) malloc(5+1); + sprintf(s, "%02x_%d", p, ++i); + } else { + p = tables[t].minline >> 5; + s = (char*) malloc(2+1); + sprintf(s, "%02x", p); + } + tables[t].suffix = s; + } else + tables[t].suffix = NULL; + } + { + p = -1; + for (t = 0; t < tableno; t++) + if (tables[t].usecount > 1) { + p = 0; + printf("static const short translit_page%s[%d] = {\n", tables[t].suffix, 8*(tables[t].maxline-tables[t].minline+1)); + for (j1 = tables[t].minline; j1 <= tables[t].maxline; j1++) { + if ((j1 % 0x20) == 0 && j1 > tables[t].minline) + printf(" /* 0x%04x */\n", 8*j1); + printf(" "); + for (j2 = 0; j2 < 8; j2++) { + j = 8*j1+j2; + printf(" %4d,", uni2index[j]); + } + printf(" /* 0x%02x-0x%02x */\n", 8*(j1 % 0x20), 8*(j1 % 0x20)+7); + } + printf("};\n"); + } + if (p >= 0) + printf("\n"); + } + printf("#define translit_index(wc) \\\n ("); + for (j1 = 0; j1 < 0x22000;) { + t = line[j1]; + for (j2 = j1; j2 < 0x22000 && line[j2] == t; j2++); + if (t >= 0) { + if (j1 != tables[t].minline) abort(); + if (j2 > tables[t].maxline+1) abort(); + j2 = tables[t].maxline+1; + } + if (t == -1) { + } else { + if (t >= 0 && tables[t].usecount == 0) abort(); + if (t >= 0 && tables[t].usecount == 1) { + if (j2 != j1+1) abort(); + for (j = 8*j1; j < 8*j2; j++) + if (uni2index[j] >= 0) { + printf("wc == 0x%04x ? %d", j, uni2index[j]); + break; + } + } else { + if (j1 == 0) { + printf("wc < 0x%04x", 8*j2); + } else { + printf("wc >= 0x%04x && wc < 0x%04x", 8*j1, 8*j2); + } + printf(" ? translit_page%s[wc", tables[t].suffix); + if (tables[t].minline > 0) + printf("-0x%04x", 8*j1); + printf("]"); + } + printf(" : \\\n "); + } + j1 = j2; + } + printf("-1)\n"); + } + + fflush(stdout); + if (ferror(stdout)) + exit(1); + exit(0); +} |