summaryrefslogtreecommitdiff
path: root/src/pdflib/pdcore/pc_unicode.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/pdflib/pdcore/pc_unicode.h')
-rw-r--r--src/pdflib/pdcore/pc_unicode.h283
1 files changed, 283 insertions, 0 deletions
diff --git a/src/pdflib/pdcore/pc_unicode.h b/src/pdflib/pdcore/pc_unicode.h
new file mode 100644
index 0000000..c5c1354
--- /dev/null
+++ b/src/pdflib/pdcore/pc_unicode.h
@@ -0,0 +1,283 @@
+/*---------------------------------------------------------------------------*
+ | PDFlib - A library for generating PDF on the fly |
+ +---------------------------------------------------------------------------+
+ | Copyright (c) 1997-2006 Thomas Merz and PDFlib GmbH. All rights reserved. |
+ +---------------------------------------------------------------------------+
+ | |
+ | This software is subject to the PDFlib license. It is NOT in the |
+ | public domain. Extended versions and commercial licenses are |
+ | available, please check http://www.pdflib.com. |
+ | |
+ *---------------------------------------------------------------------------*/
+
+/* $Id: pc_unicode.h,v 1.1 2008/10/17 06:10:43 scuri Exp $
+ *
+ * Unicode glyph name conversion routines
+ *
+ */
+
+#ifndef PC_UNICODE_H
+#define PC_UNICODE_H
+
+#define PDC_NUM_BMPVAL 0x10000
+#define PDC_NUM_UNIVAL 0x110000
+#define PDC_MAX_UNIVAL 0x10FFFF
+
+#define PDC_UNICODE_HT 0x0009
+#define PDC_UNICODE_LF 0x000A
+#define PDC_UNICODE_VT 0x000B
+#define PDC_UNICODE_FF 0x000C
+#define PDC_UNICODE_CR 0x000D
+#define PDC_UNICODE_ETB 0x0017
+#define PDC_UNICODE_ESC 0x001B
+#define PDC_UNICODE_SPACE 0x0020
+#define PDC_UNICODE_QUOTMARK 0x0022
+#define PDC_UNICODE_AMPERSAND 0x0026
+#define PDC_UNICODE_APOSTROPHE 0x0027
+#define PDC_UNICODE_HYPHEN 0x002D
+#define PDC_UNICODE_PERIOD 0x002E
+#define PDC_UNICODE_SEMICOLON 0x003B
+#define PDC_UNICODE_LESS_THAN 0x003C
+#define PDC_UNICODE_GREATER_THAN 0x003E
+#define PDC_UNICODE_BACKSLASH 0x005C
+#define PDC_UNICODE_LEFT_CURLY 0x007B
+#define PDC_UNICODE_RIGHT_CURLY 0x007D
+#define PDC_UNICODE_DELETE 0x007F
+#define PDC_UNICODE_NEL 0x0085
+#define PDC_UNICODE_NBSP 0x00A0
+#define PDC_UNICODE_SHY 0x00AD
+#define PDC_UNICODE_MACRON 0x00AF
+#define PDC_UNICODE_MICRO 0x00B5
+#define PDC_UNICODE_MIDDLEDOT 0x00B7
+#define PDC_UNICODE_MODMACRON 0x02C9
+#define PDC_UNICODE_CAPDELTA 0x0394
+#define PDC_UNICODE_CAPOMEGA 0x03A9
+#define PDC_UNICODE_SMALLMU 0x03BC
+#define PDC_UNICODE_LS 0x2028
+#define PDC_UNICODE_PS 0x2029
+#define PDC_UNICODE_NNBSP 0x202F
+#define PDC_UNICODE_FRACSLASH 0x2044
+#define PDC_UNICODE_MMSPACE 0x205F
+#define PDC_UNICODE_EURO 0x20AC
+#define PDC_UNICODE_OHMSIGN 0x2126
+#define PDC_UNICODE_INCREMENT 0x2206
+#define PDC_UNICODE_DIVSLASH 0x2215
+#define PDC_UNICODE_BULLETOP 0x2219
+#define PDC_UNICODE_IDEOSPACE 0x3000
+
+/* maximal value of Latin-1 characters */
+#define PDC_UNICODE_MAXASCII 0x007F
+#define PDC_UNICODE_MAXLATIN1 0x00FF
+
+/* maximal resp. single value of Japanese HW characters */
+#define PDC_UNICODE_MAXHW 0x007E
+#define PDC_UNICODE_SINGHW 0x00A5
+
+/* Unicode borders of fullwidth forms of ASCII characters */
+#define PDC_UNICODE_MINFWASCII 0xFF00
+#define PDC_UNICODE_MAXFWASCII 0xFF5E
+#define PDC_UNICODE_DIFFWASCII 0xFEE0
+ /* PDC_UNICODE_MINFASCII - PDC_UNICODE_SPACE */
+
+/* Unicode borders of fullwidth forms of Symbol characters */
+#define PDC_UNICODE_MINFWSYMBOL 0xFFE0
+#define PDC_UNICODE_MAXFWSYMBOL 0xFFE6
+
+/* Unicode borders of Private Use Area (PUA) */
+#define PDC_UNICODE_MINPUA 0xE000
+#define PDC_UNICODE_MAXPUA 0xF8FF
+
+/* Begin of PDFlib PUA */
+#define PDC_UNICODE_PDFPUA 0xF200
+
+/* Unicode borders of Unicode Corporate Use Subarea as used by Adobe Systems */
+#define PDC_UNICODE_MINCUS 0xF600
+#define PDC_UNICODE_MAXCUS 0xF8FF
+
+/* Unicode Surrogate ranges */
+#define PDC_UNICODE_MINHIGHSUR 0xD800
+#define PDC_UNICODE_MAXHIGHSUR 0xDBFF
+#define PDC_UNICODE_MINLOWSUR 0xDC00
+#define PDC_UNICODE_MAXLOWSUR 0xDFFF
+
+/* Unicode borders of higher Unicode spaces */
+#define PDC_UNICODE_MINSPACE 0x2000
+#define PDC_UNICODE_MAXSPACE 0x200B
+
+/* Unicode borders of CJK compatibility forms and small form variants */
+#define PDC_UNICODE_MINCJKFORMS 0xFE30
+#define PDC_UNICODE_MIDCJKFORMS 0xFE48
+#define PDC_UNICODE_MAXCJKFORMS 0xFE6F
+
+/* replacement character */
+#define PDC_UNICODE_REPLCHAR 0xFFFD
+
+/* special character for CRLF */
+#define PDF_UNICODE_CRLF 0xFDD0
+
+/* not a character */
+#define PDC_UNICODE_NOTCHAR 0xFFFF
+
+/* Latin and Armenian ligatures */
+#define PDC_UNICODE_CAPLIGATIJ 0x0132
+#define PDC_UNICODE_SMALLLIGATIJ 0x0133
+#define PDC_UNICODE_MINLIGAT 0xFB00
+#define PDC_UNICODE_MAXLIGAT 0xFB17
+
+
+/* The Unicode byte order mark (BOM) byte parts */
+#define PDC_UNICODE_BOM 0xFEFF
+#define PDF_BOM0 0xFE
+#define PDF_BOM1 0xFF
+#define PDF_BOM2 0xEF
+#define PDF_BOM3 0xBB
+#define PDF_BOM4 0xBF
+
+/*
+ * check whether the string is plain C or UTF16 unicode
+ * by looking for the BOM in big-endian or little-endian format resp.
+ * s must not be NULL.
+ */
+#define pdc_is_utf16be_unicode(s) \
+ (((pdc_byte *)(s))[0] == PDF_BOM0 && \
+ ((pdc_byte *)(s))[1] == PDF_BOM1)
+
+#define pdc_is_utf16le_unicode(s) \
+ (((pdc_byte *)(s))[0] == PDF_BOM1 && \
+ ((pdc_byte *)(s))[1] == PDF_BOM0)
+
+/*
+ * check whether the string is plain C or UTF8 unicode
+ * by looking for the BOM
+ * s must not be NULL.
+ */
+#define pdc_is_utf8_unicode(s) \
+ (((pdc_byte *)(s))[0] == PDF_BOM2 && \
+ ((pdc_byte *)(s))[1] == PDF_BOM3 && \
+ ((pdc_byte *)(s))[2] == PDF_BOM4)
+
+
+#define PDC_UTF8_STRING "\xEF\xBB\xBF"
+#define pdc_is_utf8_bytecode(s) \
+ (((pdc_byte *)(s))[0] == PDF_BOM2 && \
+ ((pdc_byte *)(s))[1] == PDF_BOM3 && \
+ ((pdc_byte *)(s))[2] == PDF_BOM4)
+#define pdc_copy_utf8_bom(s) \
+ ((pdc_byte *)(s))[0] = PDF_BOM2, \
+ ((pdc_byte *)(s))[1] = PDF_BOM3, \
+ ((pdc_byte *)(s))[2] = PDF_BOM4;
+#define PDC_UTF8 pdc_utf8
+#define PDC_UTF8_STRG "utf8"
+#define PDC_UTF8_FLAG pdc_false
+
+
+#define PDC_HTML_CTRLCHAR '&'
+#define PDC_HTML_DELIMITCHAR ';'
+
+typedef enum
+{
+ conversionOK, /* conversion successful */
+ sourceExhausted, /* partial character in source, but hit end */
+ targetExhausted, /* insuff. room in target for conversion */
+ sourceIllegal /* source sequence is illegal/malformed */
+}
+pdc_convers_result;
+
+typedef enum
+{
+ strictConversion = 0,
+ lenientConversion
+}
+pdc_convers_flags;
+
+/* flags for pdc_convert_string(), pdc_strdup_ext(),
+ * pdc_utfxx6_to_utfxx(), pdc_convert_name_ext()
+ */
+#define PDC_CONV_FORCEUTF16 (1<<0)
+#define PDC_CONV_TRY7BYTES (1<<1)
+#define PDC_CONV_TRYBYTES (1<<2)
+#define PDC_CONV_WITHBOM (1<<3)
+#define PDC_CONV_NOBOM (1<<4)
+#define PDC_CONV_AUTOBOM (1<<5)
+#define PDC_CONV_ANALYZE (1<<6)
+#define PDC_CONV_TMPALLOC (1<<7)
+#define PDC_CONV_HTMLCHAR (1<<8)
+#define PDC_CONV_NEWALLOC (1<<9)
+#define PDC_CONV_INFLATE (1<<10)
+#define PDC_CONV_ESCSEQU (1<<11)
+#define PDC_CONV_BSSEQU (1<<12)
+#define PDC_CONV_EBCDIC (1<<13)
+#define PDC_CONV_ENCERROR (1<<14)
+#define PDC_CONV_KEEPLBCHAR (1<<15)
+#define PDC_CONV_LOGGING (1<<16)
+#define PDC_CONV_ISUTF8 (1<<17)
+
+/* DON'T change the order */
+typedef enum
+{
+ pdc_auto = 1,
+ pdc_auto2 = 2,
+ pdc_bytes = 3,
+ pdc_bytes2 = 4,
+ pdc_utf8 = 5, /* UTF-8 */
+
+ pdc_utf16 = 7, /* UTF-16 */
+ pdc_utf16be = 8, /* UTF-16 big endian */
+ pdc_utf16le = 9, /* UTF-16 little endian */
+ pdc_utf32 = 10 /* UTF-32 */
+}
+pdc_text_format;
+
+/* copy for pdflib in p_keyconn.h */
+#if defined(PC_UNICODE_C)
+static const pdc_keyconn pdc_textformat_keylist[] =
+{
+ {"auto", pdc_auto},
+ {"auto2", pdc_auto2},
+ {"bytes", pdc_bytes},
+ {"bytes2", pdc_bytes2},
+ {"utf8", pdc_utf8},
+ {"utf16", pdc_utf16},
+ {"utf16be", pdc_utf16be},
+ {"utf16le", pdc_utf16le},
+ {NULL, 0}
+};
+#endif /* PC_UNICODE_C */
+
+int pdc_convert_string(pdc_core *pdc,
+ pdc_text_format inutf, int codepage, pdc_encodingvector *inev,
+ pdc_byte *instring, int inlen, pdc_text_format *oututf_p,
+ pdc_encodingvector *outev, pdc_byte **outstring, int *outlen, int flags,
+ pdc_bool verbose);
+
+int pdc_convert_textstring(pdc_core *pdc,
+ pdc_text_format inutf, int codepage, pdc_encodingvector *inev,
+ const pdc_glyph_tab *glyphtab, int tabsize, int replchar,
+ pdc_byte *instring, int inlen,
+ pdc_text_format *oututf_p, pdc_encodingvector *outev,
+ pdc_byte **outstring, int *outlen, int flags,
+ pdc_bool verbose);
+
+char *pdc_convert_name(pdc_core *pdc, const char *name, int len, int flags);
+char *pdc_convert_name_ext(pdc_core *pdc, const char *name, int len,
+ pdc_encoding enc, int codepage, int flags);
+
+char *pdc_utf8_to_hostbytes(pdc_core *pdc, pdc_bool honorlang, char *name);
+char *pdc_hostbytes_to_utf8(pdc_core *pdc, pdc_bool honorlang, char *name);
+
+char *pdc_utf16_to_utf8(pdc_core *pdc, const char *utf16string, int len,
+ int flags, int *size);
+char *pdc_utf8_to_utf16(pdc_core *pdc, const char *utf8string,
+ const char *format, int flags, int *size);
+char *pdc_utf16_to_utf32(pdc_core *pdc, const char *utf16string, int len,
+ int *size);
+char *pdc_utf32_to_utf8(pdc_core *pdc, const char *utf32string, int len,
+ int flags, int *size);
+char *pdc_utf32_to_utf16(pdc_core *pdc, const char *utf32string, int len,
+ const char *format, int flags, int *size);
+int pdc_char16_to_char32(pdc_core *pdc, const pdc_ushort *ustext, int *ic,
+ int len, pdc_bool verbose);
+int pdc_char32_to_char16(pdc_core *pdc, int usv, pdc_ushort *uvlist,
+ pdc_bool verbose);
+
+#endif /* PC_UNICODE_H */