diff options
author | Nicolas "Pixel" Noble <pixel@nobis-crew.org> | 2014-08-09 19:32:46 -0700 |
---|---|---|
committer | Nicolas "Pixel" Noble <pixel@nobis-crew.org> | 2014-08-09 19:32:46 -0700 |
commit | 3d077e536e9fae7c292e9ab7c1e76e64076723f1 (patch) | |
tree | 048fa53558fc8eecb0a696691549ee7426d02cac /win32/iconv/iso2022_jp2.h | |
parent | bddaf98342a461f4e02389d4db390098fb423fbf (diff) |
Replacing GPLed iconv with a public domain one.
Diffstat (limited to 'win32/iconv/iso2022_jp2.h')
-rw-r--r-- | win32/iconv/iso2022_jp2.h | 688 |
1 files changed, 0 insertions, 688 deletions
diff --git a/win32/iconv/iso2022_jp2.h b/win32/iconv/iso2022_jp2.h deleted file mode 100644 index 276d04c..0000000 --- a/win32/iconv/iso2022_jp2.h +++ /dev/null @@ -1,688 +0,0 @@ -/* - * Copyright (C) 1999-2001 Free Software Foundation, Inc. - * This file is part of the GNU LIBICONV Library. - * - * The GNU LIBICONV Library is free software; you can redistribute it - * and/or modify it under the terms of the GNU Library General Public - * License as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * The GNU LIBICONV Library is distributed in the hope that it will be - * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Library General Public License for more details. - * - * You should have received a copy of the GNU Library General Public - * License along with the GNU LIBICONV Library; see the file COPYING.LIB. - * If not, write to the Free Software Foundation, Inc., 59 Temple Place - - * Suite 330, Boston, MA 02111-1307, USA. - */ - -/* - * ISO-2022-JP-2 - */ - -/* Specification: RFC 1554 */ -/* ESC '(' 'I' for JISX0201 Katakana is an extension not found in RFC 1554 or - CJK.INF, but implemented in glibc-2.1 and qt-2.0. */ - -#define ESC 0x1b - -/* - * The state is composed of one of the following values - */ -#define STATE_ASCII 0 -#define STATE_JISX0201ROMAN 1 -#define STATE_JISX0201KATAKANA 2 -#define STATE_JISX0208 3 -#define STATE_JISX0212 4 -#define STATE_GB2312 5 -#define STATE_KSC5601 6 -/* - * and one of the following values, << 8 - */ -#define STATE_G2_NONE 0 -#define STATE_G2_ISO8859_1 1 -#define STATE_G2_ISO8859_7 2 - -#define SPLIT_STATE \ - unsigned int state1 = state & 0xff, state2 = state >> 8 -#define COMBINE_STATE \ - state = (state2 << 8) | state1 - -static int -iso2022_jp2_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n) -{ - state_t state = conv->istate; - SPLIT_STATE; - int count = 0; - unsigned char c; - for (;;) { - c = *s; - if (c == ESC) { - if (n < count+3) - goto none; - if (s[1] == '(') { - if (s[2] == 'B') { - state1 = STATE_ASCII; - s += 3; count += 3; - if (n < count+1) - goto none; - continue; - } - if (s[2] == 'J') { - state1 = STATE_JISX0201ROMAN; - s += 3; count += 3; - if (n < count+1) - goto none; - continue; - } - if (s[2] == 'I') { - state1 = STATE_JISX0201KATAKANA; - s += 3; count += 3; - if (n < count+1) - goto none; - continue; - } - return RET_ILSEQ; - } - if (s[1] == '$') { - if (s[2] == '@' || s[2] == 'B') { - /* We don't distinguish JIS X 0208-1978 and JIS X 0208-1983. */ - state1 = STATE_JISX0208; - s += 3; count += 3; - if (n < count+1) - goto none; - continue; - } - if (s[2] == 'A') { - state1 = STATE_GB2312; - s += 3; count += 3; - if (n < count+1) - goto none; - continue; - } - if (s[2] == '(') { - if (n < count+4) - goto none; - if (s[3] == 'D') { - state1 = STATE_JISX0212; - s += 4; count += 4; - if (n < count+1) - goto none; - continue; - } - if (s[3] == 'C') { - state1 = STATE_KSC5601; - s += 4; count += 4; - if (n < count+1) - goto none; - continue; - } - return RET_ILSEQ; - } - return RET_ILSEQ; - } - if (s[1] == '.') { - if (n < count+3) - goto none; - if (s[2] == 'A') { - state2 = STATE_G2_ISO8859_1; - s += 3; count += 3; - if (n < count+1) - goto none; - continue; - } - if (s[2] == 'F') { - state2 = STATE_G2_ISO8859_7; - s += 3; count += 3; - if (n < count+1) - goto none; - continue; - } - return RET_ILSEQ; - } - if (s[1] == 'N') { - switch (state2) { - case STATE_G2_NONE: - return RET_ILSEQ; - case STATE_G2_ISO8859_1: - if (s[2] < 0x80) { - unsigned char buf = s[2]+0x80; - int ret = iso8859_1_mbtowc(conv,pwc,&buf,1); - if (ret == RET_ILSEQ) - return RET_ILSEQ; - if (ret != 1) abort(); - COMBINE_STATE; - conv->istate = state; - return count+3; - } else - return RET_ILSEQ; - case STATE_G2_ISO8859_7: - if (s[2] < 0x80) { - unsigned char buf = s[2]+0x80; - int ret = iso8859_7_mbtowc(conv,pwc,&buf,1); - if (ret == RET_ILSEQ) - return RET_ILSEQ; - if (ret != 1) abort(); - COMBINE_STATE; - conv->istate = state; - return count+3; - } else - return RET_ILSEQ; - default: abort(); - } - } - return RET_ILSEQ; - } - break; - } - switch (state1) { - case STATE_ASCII: - if (c < 0x80) { - int ret = ascii_mbtowc(conv,pwc,s,1); - if (ret == RET_ILSEQ) - return RET_ILSEQ; - if (ret != 1) abort(); - if (*pwc == 0x000a || *pwc == 0x000d) - state2 = STATE_G2_NONE; - COMBINE_STATE; - conv->istate = state; - return count+1; - } else - return RET_ILSEQ; - case STATE_JISX0201ROMAN: - if (c < 0x80) { - int ret = jisx0201_mbtowc(conv,pwc,s,1); - if (ret == RET_ILSEQ) - return RET_ILSEQ; - if (ret != 1) abort(); - if (*pwc == 0x000a || *pwc == 0x000d) - state2 = STATE_G2_NONE; - COMBINE_STATE; - conv->istate = state; - return count+1; - } else - return RET_ILSEQ; - case STATE_JISX0201KATAKANA: - if (c < 0x80) { - unsigned char buf = c+0x80; - int ret = jisx0201_mbtowc(conv,pwc,&buf,1); - if (ret == RET_ILSEQ) - return RET_ILSEQ; - if (ret != 1) abort(); - COMBINE_STATE; - conv->istate = state; - return count+1; - } else - return RET_ILSEQ; - case STATE_JISX0208: - if (n < count+2) - goto none; - if (s[0] < 0x80 && s[1] < 0x80) { - int ret = jisx0208_mbtowc(conv,pwc,s,2); - if (ret == RET_ILSEQ) - return RET_ILSEQ; - if (ret != 2) abort(); - COMBINE_STATE; - conv->istate = state; - return count+2; - } else - return RET_ILSEQ; - case STATE_JISX0212: - if (n < count+2) - goto none; - if (s[0] < 0x80 && s[1] < 0x80) { - int ret = jisx0212_mbtowc(conv,pwc,s,2); - if (ret == RET_ILSEQ) - return RET_ILSEQ; - if (ret != 2) abort(); - COMBINE_STATE; - conv->istate = state; - return count+2; - } else - return RET_ILSEQ; - case STATE_GB2312: - if (n < count+2) - goto none; - if (s[0] < 0x80 && s[1] < 0x80) { - int ret = gb2312_mbtowc(conv,pwc,s,2); - if (ret == RET_ILSEQ) - return RET_ILSEQ; - if (ret != 2) abort(); - COMBINE_STATE; - conv->istate = state; - return count+2; - } else - return RET_ILSEQ; - case STATE_KSC5601: - if (n < count+2) - goto none; - if (s[0] < 0x80 && s[1] < 0x80) { - int ret = ksc5601_mbtowc(conv,pwc,s,2); - if (ret == RET_ILSEQ) - return RET_ILSEQ; - if (ret != 2) abort(); - COMBINE_STATE; - conv->istate = state; - return count+2; - } else - return RET_ILSEQ; - default: abort(); - } - -none: - COMBINE_STATE; - conv->istate = state; - return RET_TOOFEW(count); -} - -#undef COMBINE_STATE -#undef SPLIT_STATE - -/* - * The state can also contain one of the following values, << 16. - * Values >= STATE_TAG_LANGUAGE are temporary tag parsing states. - */ -#define STATE_TAG_NONE 0 -#define STATE_TAG_LANGUAGE 4 -#define STATE_TAG_LANGUAGE_j 5 -#define STATE_TAG_LANGUAGE_ja 1 -#define STATE_TAG_LANGUAGE_k 6 -#define STATE_TAG_LANGUAGE_ko 2 -#define STATE_TAG_LANGUAGE_z 7 -#define STATE_TAG_LANGUAGE_zh 3 - -#define SPLIT_STATE \ - unsigned int state1 = state & 0xff, state2 = (state >> 8) & 0xff, state3 = state >> 16 -#define COMBINE_STATE \ - state = (state3 << 16) | (state2 << 8) | state1 - -static int -iso2022_jp2_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, int n) -{ - state_t state = conv->ostate; - SPLIT_STATE; - unsigned char buf[2]; - int ret; - /* This defines the conversion preferences depending on the current - langauge tag. */ - enum conversion { none = 0, european, japanese, chinese, korean, other }; - static const unsigned int conversion_lists[STATE_TAG_LANGUAGE] = { - /* STATE_TAG_NONE */ - japanese + (european << 3) + (chinese << 6) + (korean << 9) + (other << 12), - /* STATE_TAG_LANGUAGE_ja */ - japanese + (european << 3) + (chinese << 6) + (korean << 9) + (other << 12), - /* STATE_TAG_LANGUAGE_ko */ - korean + (european << 3) + (japanese << 6) + (chinese << 9) + (other << 12), - /* STATE_TAG_LANGUAGE_zh */ - chinese + (european << 3) + (japanese << 6) + (korean << 9) + (other << 12) - }; - unsigned int conversion_list; - - /* Handle Unicode tag characters (range U+E0000..U+E007F). */ - if ((wc >> 7) == (0xe0000 >> 7)) { - char c = wc & 0x7f; - if (c >= 'A' && c <= 'Z') - c += 'a'-'A'; - switch (c) { - case 0x01: - state3 = STATE_TAG_LANGUAGE; - COMBINE_STATE; - conv->ostate = state; - return 0; - case 'j': - if (state3 == STATE_TAG_LANGUAGE) { - state3 = STATE_TAG_LANGUAGE_j; - COMBINE_STATE; - conv->ostate = state; - return 0; - } - break; - case 'a': - if (state3 == STATE_TAG_LANGUAGE_j) { - state3 = STATE_TAG_LANGUAGE_ja; - COMBINE_STATE; - conv->ostate = state; - return 0; - } - break; - case 'k': - if (state3 == STATE_TAG_LANGUAGE) { - state3 = STATE_TAG_LANGUAGE_k; - COMBINE_STATE; - conv->ostate = state; - return 0; - } - break; - case 'o': - if (state3 == STATE_TAG_LANGUAGE_k) { - state3 = STATE_TAG_LANGUAGE_ko; - COMBINE_STATE; - conv->ostate = state; - return 0; - } - break; - case 'z': - if (state3 == STATE_TAG_LANGUAGE) { - state3 = STATE_TAG_LANGUAGE_z; - COMBINE_STATE; - conv->ostate = state; - return 0; - } - break; - case 'h': - if (state3 == STATE_TAG_LANGUAGE_z) { - state3 = STATE_TAG_LANGUAGE_zh; - COMBINE_STATE; - conv->ostate = state; - return 0; - } - break; - case 0x7f: - state3 = STATE_TAG_NONE; - COMBINE_STATE; - conv->ostate = state; - return 0; - default: - break; - } - /* Other tag characters reset the tag parsing state or are ignored. */ - if (state3 >= STATE_TAG_LANGUAGE) - state3 = STATE_TAG_NONE; - COMBINE_STATE; - conv->ostate = state; - return 0; - } - if (state3 >= STATE_TAG_LANGUAGE) - state3 = STATE_TAG_NONE; - - /* Try ASCII. */ - ret = ascii_wctomb(conv,buf,wc,1); - if (ret != RET_ILUNI) { - if (ret != 1) abort(); - if (buf[0] < 0x80) { - int count = (state1 == STATE_ASCII ? 1 : 4); - if (n < count) - return RET_TOOSMALL; - if (state1 != STATE_ASCII) { - r[0] = ESC; - r[1] = '('; - r[2] = 'B'; - r += 3; - state1 = STATE_ASCII; - } - r[0] = buf[0]; - if (wc == 0x000a || wc == 0x000d) - state2 = STATE_G2_NONE; - COMBINE_STATE; - conv->ostate = state; - return count; - } - } - - conversion_list = conversion_lists[state3]; - - do { - switch (conversion_list & ((1 << 3) - 1)) { - - case european: - - /* Try ISO-8859-1. */ - ret = iso8859_1_wctomb(conv,buf,wc,1); - if (ret != RET_ILUNI) { - if (ret != 1) abort(); - if (buf[0] >= 0x80) { - int count = (state2 == STATE_G2_ISO8859_1 ? 3 : 6); - if (n < count) - return RET_TOOSMALL; - if (state2 != STATE_G2_ISO8859_1) { - r[0] = ESC; - r[1] = '.'; - r[2] = 'A'; - r += 3; - state2 = STATE_G2_ISO8859_1; - } - r[0] = ESC; - r[1] = 'N'; - r[2] = buf[0]-0x80; - COMBINE_STATE; - conv->ostate = state; - return count; - } - } - - /* Try ISO-8859-7. */ - ret = iso8859_7_wctomb(conv,buf,wc,1); - if (ret != RET_ILUNI) { - if (ret != 1) abort(); - if (buf[0] >= 0x80) { - int count = (state2 == STATE_G2_ISO8859_7 ? 3 : 6); - if (n < count) - return RET_TOOSMALL; - if (state2 != STATE_G2_ISO8859_7) { - r[0] = ESC; - r[1] = '.'; - r[2] = 'F'; - r += 3; - state2 = STATE_G2_ISO8859_7; - } - r[0] = ESC; - r[1] = 'N'; - r[2] = buf[0]-0x80; - COMBINE_STATE; - conv->ostate = state; - return count; - } - } - - break; - - case japanese: - - /* Try JIS X 0201-1976 Roman. */ - ret = jisx0201_wctomb(conv,buf,wc,1); - if (ret != RET_ILUNI) { - if (ret != 1) abort(); - if (buf[0] < 0x80) { - int count = (state1 == STATE_JISX0201ROMAN ? 1 : 4); - if (n < count) - return RET_TOOSMALL; - if (state1 != STATE_JISX0201ROMAN) { - r[0] = ESC; - r[1] = '('; - r[2] = 'J'; - r += 3; - state1 = STATE_JISX0201ROMAN; - } - r[0] = buf[0]; - if (wc == 0x000a || wc == 0x000d) - state2 = STATE_G2_NONE; - COMBINE_STATE; - conv->ostate = state; - return count; - } - } - - /* Try JIS X 0208-1990 in place of JIS X 0208-1978 and - JIS X 0208-1983. */ - ret = jisx0208_wctomb(conv,buf,wc,2); - if (ret != RET_ILUNI) { - if (ret != 2) abort(); - if (buf[0] < 0x80 && buf[1] < 0x80) { - int count = (state1 == STATE_JISX0208 ? 2 : 5); - if (n < count) - return RET_TOOSMALL; - if (state1 != STATE_JISX0208) { - r[0] = ESC; - r[1] = '$'; - r[2] = 'B'; - r += 3; - state1 = STATE_JISX0208; - } - r[0] = buf[0]; - r[1] = buf[1]; - COMBINE_STATE; - conv->ostate = state; - return count; - } - } - - /* Try JIS X 0212-1990. */ - ret = jisx0212_wctomb(conv,buf,wc,2); - if (ret != RET_ILUNI) { - if (ret != 2) abort(); - if (buf[0] < 0x80 && buf[1] < 0x80) { - int count = (state1 == STATE_JISX0212 ? 2 : 6); - if (n < count) - return RET_TOOSMALL; - if (state1 != STATE_JISX0212) { - r[0] = ESC; - r[1] = '$'; - r[2] = '('; - r[3] = 'D'; - r += 4; - state1 = STATE_JISX0212; - } - r[0] = buf[0]; - r[1] = buf[1]; - COMBINE_STATE; - conv->ostate = state; - return count; - } - } - - break; - - case chinese: - - /* Try GB 2312-1980. */ - ret = gb2312_wctomb(conv,buf,wc,2); - if (ret != RET_ILUNI) { - if (ret != 2) abort(); - if (buf[0] < 0x80 && buf[1] < 0x80) { - int count = (state1 == STATE_GB2312 ? 2 : 5); - if (n < count) - return RET_TOOSMALL; - if (state1 != STATE_GB2312) { - r[0] = ESC; - r[1] = '$'; - r[2] = 'A'; - r += 3; - state1 = STATE_GB2312; - } - r[0] = buf[0]; - r[1] = buf[1]; - COMBINE_STATE; - conv->ostate = state; - return count; - } - } - - break; - - case korean: - - /* Try KS C 5601-1992. */ - ret = ksc5601_wctomb(conv,buf,wc,2); - if (ret != RET_ILUNI) { - if (ret != 2) abort(); - if (buf[0] < 0x80 && buf[1] < 0x80) { - int count = (state1 == STATE_KSC5601 ? 2 : 6); - if (n < count) - return RET_TOOSMALL; - if (state1 != STATE_KSC5601) { - r[0] = ESC; - r[1] = '$'; - r[2] = '('; - r[3] = 'C'; - r += 4; - state1 = STATE_KSC5601; - } - r[0] = buf[0]; - r[1] = buf[1]; - COMBINE_STATE; - conv->ostate = state; - return count; - } - } - - break; - - case other: - - /* Try JIS X 0201-1976 Kana. This is not officially part of - ISO-2022-JP-2, according to RFC 1554. Therefore we try this - only after all other attempts. */ - ret = jisx0201_wctomb(conv,buf,wc,1); - if (ret != RET_ILUNI) { - if (ret != 1) abort(); - if (buf[0] >= 0x80) { - int count = (state1 == STATE_JISX0201KATAKANA ? 1 : 4); - if (n < count) - return RET_TOOSMALL; - if (state1 != STATE_JISX0201KATAKANA) { - r[0] = ESC; - r[1] = '('; - r[2] = 'I'; - r += 3; - state1 = STATE_JISX0201KATAKANA; - } - r[0] = buf[0]-0x80; - COMBINE_STATE; - conv->ostate = state; - return count; - } - } - - break; - - default: - abort(); - } - - conversion_list = conversion_list >> 3; - } while (conversion_list != 0); - - return RET_ILUNI; -} - -static int -iso2022_jp2_reset (conv_t conv, unsigned char *r, int n) -{ - state_t state = conv->ostate; - SPLIT_STATE; - (void)state2; - (void)state3; - if (state1 != STATE_ASCII) { - if (n < 3) - return RET_TOOSMALL; - r[0] = ESC; - r[1] = '('; - r[2] = 'B'; - /* conv->ostate = 0; will be done by the caller */ - return 3; - } else - return 0; -} - -#undef COMBINE_STATE -#undef SPLIT_STATE -#undef STATE_TAG_LANGUAGE_zh -#undef STATE_TAG_LANGUAGE_z -#undef STATE_TAG_LANGUAGE_ko -#undef STATE_TAG_LANGUAGE_k -#undef STATE_TAG_LANGUAGE_ja -#undef STATE_TAG_LANGUAGE_j -#undef STATE_TAG_LANGUAGE -#undef STATE_TAG_NONE -#undef STATE_G2_ISO8859_7 -#undef STATE_G2_ISO8859_1 -#undef STATE_G2_NONE -#undef STATE_KSC5601 -#undef STATE_GB2312 -#undef STATE_JISX0212 -#undef STATE_JISX0208 -#undef STATE_JISX0201KATAKANA -#undef STATE_JISX0201ROMAN -#undef STATE_ASCII |