From 7b52cc13af4e85f1ca2deb6b6c77de9c95ea0dcf Mon Sep 17 00:00:00 2001 From: scuri Date: Fri, 17 Oct 2008 06:10:33 +0000 Subject: First commit - moving from LuaForge to SourceForge --- src/pdflib/pdcore/pc_unicode.h | 283 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 283 insertions(+) create mode 100644 src/pdflib/pdcore/pc_unicode.h (limited to 'src/pdflib/pdcore/pc_unicode.h') diff --git a/src/pdflib/pdcore/pc_unicode.h b/src/pdflib/pdcore/pc_unicode.h new file mode 100644 index 0000000..c5c1354 --- /dev/null +++ b/src/pdflib/pdcore/pc_unicode.h @@ -0,0 +1,283 @@ +/*---------------------------------------------------------------------------* + | PDFlib - A library for generating PDF on the fly | + +---------------------------------------------------------------------------+ + | Copyright (c) 1997-2006 Thomas Merz and PDFlib GmbH. All rights reserved. | + +---------------------------------------------------------------------------+ + | | + | This software is subject to the PDFlib license. It is NOT in the | + | public domain. Extended versions and commercial licenses are | + | available, please check http://www.pdflib.com. | + | | + *---------------------------------------------------------------------------*/ + +/* $Id: pc_unicode.h,v 1.1 2008/10/17 06:10:43 scuri Exp $ + * + * Unicode glyph name conversion routines + * + */ + +#ifndef PC_UNICODE_H +#define PC_UNICODE_H + +#define PDC_NUM_BMPVAL 0x10000 +#define PDC_NUM_UNIVAL 0x110000 +#define PDC_MAX_UNIVAL 0x10FFFF + +#define PDC_UNICODE_HT 0x0009 +#define PDC_UNICODE_LF 0x000A +#define PDC_UNICODE_VT 0x000B +#define PDC_UNICODE_FF 0x000C +#define PDC_UNICODE_CR 0x000D +#define PDC_UNICODE_ETB 0x0017 +#define PDC_UNICODE_ESC 0x001B +#define PDC_UNICODE_SPACE 0x0020 +#define PDC_UNICODE_QUOTMARK 0x0022 +#define PDC_UNICODE_AMPERSAND 0x0026 +#define PDC_UNICODE_APOSTROPHE 0x0027 +#define PDC_UNICODE_HYPHEN 0x002D +#define PDC_UNICODE_PERIOD 0x002E +#define PDC_UNICODE_SEMICOLON 0x003B +#define PDC_UNICODE_LESS_THAN 0x003C +#define PDC_UNICODE_GREATER_THAN 0x003E +#define PDC_UNICODE_BACKSLASH 0x005C +#define PDC_UNICODE_LEFT_CURLY 0x007B +#define PDC_UNICODE_RIGHT_CURLY 0x007D +#define PDC_UNICODE_DELETE 0x007F +#define PDC_UNICODE_NEL 0x0085 +#define PDC_UNICODE_NBSP 0x00A0 +#define PDC_UNICODE_SHY 0x00AD +#define PDC_UNICODE_MACRON 0x00AF +#define PDC_UNICODE_MICRO 0x00B5 +#define PDC_UNICODE_MIDDLEDOT 0x00B7 +#define PDC_UNICODE_MODMACRON 0x02C9 +#define PDC_UNICODE_CAPDELTA 0x0394 +#define PDC_UNICODE_CAPOMEGA 0x03A9 +#define PDC_UNICODE_SMALLMU 0x03BC +#define PDC_UNICODE_LS 0x2028 +#define PDC_UNICODE_PS 0x2029 +#define PDC_UNICODE_NNBSP 0x202F +#define PDC_UNICODE_FRACSLASH 0x2044 +#define PDC_UNICODE_MMSPACE 0x205F +#define PDC_UNICODE_EURO 0x20AC +#define PDC_UNICODE_OHMSIGN 0x2126 +#define PDC_UNICODE_INCREMENT 0x2206 +#define PDC_UNICODE_DIVSLASH 0x2215 +#define PDC_UNICODE_BULLETOP 0x2219 +#define PDC_UNICODE_IDEOSPACE 0x3000 + +/* maximal value of Latin-1 characters */ +#define PDC_UNICODE_MAXASCII 0x007F +#define PDC_UNICODE_MAXLATIN1 0x00FF + +/* maximal resp. single value of Japanese HW characters */ +#define PDC_UNICODE_MAXHW 0x007E +#define PDC_UNICODE_SINGHW 0x00A5 + +/* Unicode borders of fullwidth forms of ASCII characters */ +#define PDC_UNICODE_MINFWASCII 0xFF00 +#define PDC_UNICODE_MAXFWASCII 0xFF5E +#define PDC_UNICODE_DIFFWASCII 0xFEE0 + /* PDC_UNICODE_MINFASCII - PDC_UNICODE_SPACE */ + +/* Unicode borders of fullwidth forms of Symbol characters */ +#define PDC_UNICODE_MINFWSYMBOL 0xFFE0 +#define PDC_UNICODE_MAXFWSYMBOL 0xFFE6 + +/* Unicode borders of Private Use Area (PUA) */ +#define PDC_UNICODE_MINPUA 0xE000 +#define PDC_UNICODE_MAXPUA 0xF8FF + +/* Begin of PDFlib PUA */ +#define PDC_UNICODE_PDFPUA 0xF200 + +/* Unicode borders of Unicode Corporate Use Subarea as used by Adobe Systems */ +#define PDC_UNICODE_MINCUS 0xF600 +#define PDC_UNICODE_MAXCUS 0xF8FF + +/* Unicode Surrogate ranges */ +#define PDC_UNICODE_MINHIGHSUR 0xD800 +#define PDC_UNICODE_MAXHIGHSUR 0xDBFF +#define PDC_UNICODE_MINLOWSUR 0xDC00 +#define PDC_UNICODE_MAXLOWSUR 0xDFFF + +/* Unicode borders of higher Unicode spaces */ +#define PDC_UNICODE_MINSPACE 0x2000 +#define PDC_UNICODE_MAXSPACE 0x200B + +/* Unicode borders of CJK compatibility forms and small form variants */ +#define PDC_UNICODE_MINCJKFORMS 0xFE30 +#define PDC_UNICODE_MIDCJKFORMS 0xFE48 +#define PDC_UNICODE_MAXCJKFORMS 0xFE6F + +/* replacement character */ +#define PDC_UNICODE_REPLCHAR 0xFFFD + +/* special character for CRLF */ +#define PDF_UNICODE_CRLF 0xFDD0 + +/* not a character */ +#define PDC_UNICODE_NOTCHAR 0xFFFF + +/* Latin and Armenian ligatures */ +#define PDC_UNICODE_CAPLIGATIJ 0x0132 +#define PDC_UNICODE_SMALLLIGATIJ 0x0133 +#define PDC_UNICODE_MINLIGAT 0xFB00 +#define PDC_UNICODE_MAXLIGAT 0xFB17 + + +/* The Unicode byte order mark (BOM) byte parts */ +#define PDC_UNICODE_BOM 0xFEFF +#define PDF_BOM0 0xFE +#define PDF_BOM1 0xFF +#define PDF_BOM2 0xEF +#define PDF_BOM3 0xBB +#define PDF_BOM4 0xBF + +/* + * check whether the string is plain C or UTF16 unicode + * by looking for the BOM in big-endian or little-endian format resp. + * s must not be NULL. + */ +#define pdc_is_utf16be_unicode(s) \ + (((pdc_byte *)(s))[0] == PDF_BOM0 && \ + ((pdc_byte *)(s))[1] == PDF_BOM1) + +#define pdc_is_utf16le_unicode(s) \ + (((pdc_byte *)(s))[0] == PDF_BOM1 && \ + ((pdc_byte *)(s))[1] == PDF_BOM0) + +/* + * check whether the string is plain C or UTF8 unicode + * by looking for the BOM + * s must not be NULL. + */ +#define pdc_is_utf8_unicode(s) \ + (((pdc_byte *)(s))[0] == PDF_BOM2 && \ + ((pdc_byte *)(s))[1] == PDF_BOM3 && \ + ((pdc_byte *)(s))[2] == PDF_BOM4) + + +#define PDC_UTF8_STRING "\xEF\xBB\xBF" +#define pdc_is_utf8_bytecode(s) \ + (((pdc_byte *)(s))[0] == PDF_BOM2 && \ + ((pdc_byte *)(s))[1] == PDF_BOM3 && \ + ((pdc_byte *)(s))[2] == PDF_BOM4) +#define pdc_copy_utf8_bom(s) \ + ((pdc_byte *)(s))[0] = PDF_BOM2, \ + ((pdc_byte *)(s))[1] = PDF_BOM3, \ + ((pdc_byte *)(s))[2] = PDF_BOM4; +#define PDC_UTF8 pdc_utf8 +#define PDC_UTF8_STRG "utf8" +#define PDC_UTF8_FLAG pdc_false + + +#define PDC_HTML_CTRLCHAR '&' +#define PDC_HTML_DELIMITCHAR ';' + +typedef enum +{ + conversionOK, /* conversion successful */ + sourceExhausted, /* partial character in source, but hit end */ + targetExhausted, /* insuff. room in target for conversion */ + sourceIllegal /* source sequence is illegal/malformed */ +} +pdc_convers_result; + +typedef enum +{ + strictConversion = 0, + lenientConversion +} +pdc_convers_flags; + +/* flags for pdc_convert_string(), pdc_strdup_ext(), + * pdc_utfxx6_to_utfxx(), pdc_convert_name_ext() + */ +#define PDC_CONV_FORCEUTF16 (1<<0) +#define PDC_CONV_TRY7BYTES (1<<1) +#define PDC_CONV_TRYBYTES (1<<2) +#define PDC_CONV_WITHBOM (1<<3) +#define PDC_CONV_NOBOM (1<<4) +#define PDC_CONV_AUTOBOM (1<<5) +#define PDC_CONV_ANALYZE (1<<6) +#define PDC_CONV_TMPALLOC (1<<7) +#define PDC_CONV_HTMLCHAR (1<<8) +#define PDC_CONV_NEWALLOC (1<<9) +#define PDC_CONV_INFLATE (1<<10) +#define PDC_CONV_ESCSEQU (1<<11) +#define PDC_CONV_BSSEQU (1<<12) +#define PDC_CONV_EBCDIC (1<<13) +#define PDC_CONV_ENCERROR (1<<14) +#define PDC_CONV_KEEPLBCHAR (1<<15) +#define PDC_CONV_LOGGING (1<<16) +#define PDC_CONV_ISUTF8 (1<<17) + +/* DON'T change the order */ +typedef enum +{ + pdc_auto = 1, + pdc_auto2 = 2, + pdc_bytes = 3, + pdc_bytes2 = 4, + pdc_utf8 = 5, /* UTF-8 */ + + pdc_utf16 = 7, /* UTF-16 */ + pdc_utf16be = 8, /* UTF-16 big endian */ + pdc_utf16le = 9, /* UTF-16 little endian */ + pdc_utf32 = 10 /* UTF-32 */ +} +pdc_text_format; + +/* copy for pdflib in p_keyconn.h */ +#if defined(PC_UNICODE_C) +static const pdc_keyconn pdc_textformat_keylist[] = +{ + {"auto", pdc_auto}, + {"auto2", pdc_auto2}, + {"bytes", pdc_bytes}, + {"bytes2", pdc_bytes2}, + {"utf8", pdc_utf8}, + {"utf16", pdc_utf16}, + {"utf16be", pdc_utf16be}, + {"utf16le", pdc_utf16le}, + {NULL, 0} +}; +#endif /* PC_UNICODE_C */ + +int pdc_convert_string(pdc_core *pdc, + pdc_text_format inutf, int codepage, pdc_encodingvector *inev, + pdc_byte *instring, int inlen, pdc_text_format *oututf_p, + pdc_encodingvector *outev, pdc_byte **outstring, int *outlen, int flags, + pdc_bool verbose); + +int pdc_convert_textstring(pdc_core *pdc, + pdc_text_format inutf, int codepage, pdc_encodingvector *inev, + const pdc_glyph_tab *glyphtab, int tabsize, int replchar, + pdc_byte *instring, int inlen, + pdc_text_format *oututf_p, pdc_encodingvector *outev, + pdc_byte **outstring, int *outlen, int flags, + pdc_bool verbose); + +char *pdc_convert_name(pdc_core *pdc, const char *name, int len, int flags); +char *pdc_convert_name_ext(pdc_core *pdc, const char *name, int len, + pdc_encoding enc, int codepage, int flags); + +char *pdc_utf8_to_hostbytes(pdc_core *pdc, pdc_bool honorlang, char *name); +char *pdc_hostbytes_to_utf8(pdc_core *pdc, pdc_bool honorlang, char *name); + +char *pdc_utf16_to_utf8(pdc_core *pdc, const char *utf16string, int len, + int flags, int *size); +char *pdc_utf8_to_utf16(pdc_core *pdc, const char *utf8string, + const char *format, int flags, int *size); +char *pdc_utf16_to_utf32(pdc_core *pdc, const char *utf16string, int len, + int *size); +char *pdc_utf32_to_utf8(pdc_core *pdc, const char *utf32string, int len, + int flags, int *size); +char *pdc_utf32_to_utf16(pdc_core *pdc, const char *utf32string, int len, + const char *format, int flags, int *size); +int pdc_char16_to_char32(pdc_core *pdc, const pdc_ushort *ustext, int *ic, + int len, pdc_bool verbose); +int pdc_char32_to_char16(pdc_core *pdc, int usv, pdc_ushort *uvlist, + pdc_bool verbose); + +#endif /* PC_UNICODE_H */ -- cgit v1.2.3