From 7b52cc13af4e85f1ca2deb6b6c77de9c95ea0dcf Mon Sep 17 00:00:00 2001 From: scuri Date: Fri, 17 Oct 2008 06:10:33 +0000 Subject: First commit - moving from LuaForge to SourceForge --- src/pdflib/pdcore/pc_chartabs.c | 613 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 613 insertions(+) create mode 100644 src/pdflib/pdcore/pc_chartabs.c (limited to 'src/pdflib/pdcore/pc_chartabs.c') diff --git a/src/pdflib/pdcore/pc_chartabs.c b/src/pdflib/pdcore/pc_chartabs.c new file mode 100644 index 0000000..3799d45 --- /dev/null +++ b/src/pdflib/pdcore/pc_chartabs.c @@ -0,0 +1,613 @@ +/*---------------------------------------------------------------------------* + | PDFlib - A library for generating PDF on the fly | + +---------------------------------------------------------------------------+ + | Copyright (c) 1997-2006 Thomas Merz and PDFlib GmbH. All rights reserved. | + +---------------------------------------------------------------------------+ + | | + | This software is subject to the PDFlib license. It is NOT in the | + | public domain. Extended versions and commercial licenses are | + | available, please check http://www.pdflib.com. | + | | + *---------------------------------------------------------------------------*/ + +/* $Id: pc_chartabs.c,v 1.1 2008/10/17 06:10:43 scuri Exp $ + * + * PDFlib routines for converting glyph or character names to Unicode + * and vice versa + * + */ + +#define PC_CHARTABS_C + +#include "pc_util.h" +#include "pc_chartabs.h" +#include "pc_ctype.h" + + +/* ---------------- general character search functions ------------------- */ + +/* + * Binary search for list of codes in a pdc_glyph_tab array sorted by glyphname + */ +int +pdc_glyphname2codelist(const char *glyphname, const pdc_glyph_tab *glyphtab, + int tabsize, pdc_ushort *codelist) +{ + int lo = 0; + int hi = glyphname ? tabsize : lo; + int nv = 0; + + while (lo < hi) + { + int i = (lo + hi) / 2; + int cmp = strcmp(glyphname, glyphtab[i].name); + + if (cmp == 0) + { + for (; i >= 1; i--) + { + if (strcmp(glyphname, glyphtab[i-1].name)) + break; + } + for (; i < tabsize; i++) + { + if (strcmp(glyphname, glyphtab[i].name)) + break; + codelist[nv] = glyphtab[i].code; + nv++; + } + return nv; + } + + if (cmp < 0) + hi = i; + else + lo = i + 1; + } + + return nv; +} + +/* + * Binary search for code in a pdc_glyph_tab array sorted by glyphname + */ +int +pdc_glyphname2code(const char *glyphname, const pdc_glyph_tab *glyphtab, + int tabsize) +{ + int lo = 0; + int hi = glyphname ? tabsize : lo; + + while (lo < hi) + { + int i = (lo + hi) / 2; + int cmp = strcmp(glyphname, glyphtab[i].name); + + if (cmp == 0) + return (int) glyphtab[i].code; + + if (cmp < 0) + hi = i; + else + lo = i + 1; + } + + return -1; +} + +/* + * Binary search for glyphname in a pdc_glyph_tab array sorted by code + */ +const char * +pdc_code2glyphname(pdc_ushort code, const pdc_glyph_tab *glyphtab, int tabsize) +{ + int lo = 0; + int hi = tabsize; + + while (lo < hi) + { + int i = (lo + hi) / 2; + + if (code == glyphtab[i].code) + return glyphtab[i].name; + + if (code < glyphtab[i].code) + hi = i; + else + lo = i + 1; + } + + return NULL; +} + +/* + * Binary search for list of codes in a pdc_code_map array sorted by source code + */ +int +pdc_code2codelist(pdc_core *pdc, pdc_ushort code, + const pdc_code_map *codemap, int tabsize, + pdc_ushort *codelist, int listsize) +{ + int lo = 0; + int hi = tabsize; + int nv = 0; + + while (lo < hi) + { + int i = (lo + hi) / 2; + + if (codemap[i].src == code) + { + for (; i >= 1; i--) + { + if (codemap[i-1].src != code) + break; + } + + for (; i < tabsize; i++) + { + if (codemap[i].src != code) + break; + + if (nv >= listsize) + pdc_error(pdc, PDC_E_CONV_LIST_MEMOVERFLOW, 0, 0, 0, 0); + + codelist[nv] = codemap[i].dst; + nv++; + } + + return nv; + } + if (codemap[i].src > code) + hi = i; + else + lo = i + 1; + } + + return nv; +} + +/* + * Binary search for glyphname in a pdc_glyph_tab array sorted by glyphname + * to get the static pointer for the glyphname. + */ +const char * +pdc_glyphname2glyphname(const char *glyphname, + const pdc_glyph_tab *glyphtab, int tabsize) +{ + int lo = 0; + int hi = tabsize; + + while (lo < hi) + { + int i = (lo + hi) / 2; + int cmp = strcmp(glyphname, glyphtab[i].name); + + if (cmp == 0) + return glyphtab[i].name; + + if (cmp < 0) + hi = i; + else + lo = i + 1; + } + + return NULL; +} + + +/* ---------------- special character search functions ------------------- */ + +/* + * Returns the Unicode value of a glyph name in Adobe Glyph List 1.2'. + * If the name is not contained in AGL, -1 will be returned. + */ +int +pdc_adobe2unicode(const char *glyphname) +{ + return pdc_glyphname2code(glyphname, tab_agl2uni, + (sizeof (tab_agl2uni)) / (sizeof (pdc_glyph_tab))); +} + +/* + * Returns the name in AGL 1.2' or ZapfDingbats font, + * which corresponds to the supplied Unicode value. + * If the value doesn't have a corresponding glyph name, + * NULL will be returned. + * For control codes ".notdef" will be returned. + * But this is not compatibel with AGL 2.0! + */ +const char * +pdc_unicode2adobe(pdc_ushort uv) +{ + const char *glyphname; + + /* AGL 1.2' glyphname */ + glyphname = pdc_code2glyphname(uv, tab_uni2agl, + (sizeof tab_uni2agl) / (sizeof (pdc_glyph_tab))); + if (glyphname != NULL) + return glyphname; + + /* C0 and C1 control characters. + * They have never a graphical representation but are defined. + */ + if (uv < PDC_UNICODE_SPACE || + (uv >= PDC_UNICODE_DELETE && uv < PDC_UNICODE_NBSP)) + return glyph__notdef; + + return NULL; +} + +const char * +pdc_get_notdef_glyphname(void) +{ + return (char *) glyph__notdef; +} + +/* + * Returns the Unicode value of a ZapfDingbats glyph name. + * If the name is not contained in the ZapfDingbats list + * -1 will be returned. + */ +int +pdc_zadb2unicode(const char *glyphname) +{ + return pdc_glyphname2code(glyphname, tab_zadb2uni, + (sizeof (tab_zadb2uni)) / (sizeof (pdc_glyph_tab))); +} + +/* + * Returns the glyph name in the ZapfDingbats font which corresponds + * to the supplied Unicode value. If the value doesn't have a + * corresponding glyph name NULL will be returned. + */ +const char * +pdc_unicode2zadb(pdc_ushort uv) +{ + return pdc_code2glyphname(uv, tab_uni2zadb, + (sizeof tab_uni2zadb) / (sizeof (pdc_glyph_tab))); +} + +/* + * Returns the Unicode values of a glyph name in Adobe Glyph List 2.0 + * which is not contained in AGL-1.2'. + * + * The function presupposes that uvlist is an array of PDC_MAX_UVLIST. + * + * Return value is the number of Unicodes. + */ +int +pdc_newadobe2unicodelist(const char *glyphname, pdc_ushort *uvlist) +{ + return pdc_glyphname2codelist(glyphname, tab_diffagl2uni, + (sizeof tab_diffagl2uni) / (sizeof (pdc_glyph_tab)), + uvlist); +} + +/* + * Returns the glyph name in Adobe Glyph List 2.0 + * which is not contained in AGL-1.2' corresponding + * to the supplied Unicode value. Ambiguous Unicode + * values or glyph names are not supported! + * If the value doesn't have a corresponding glyph name + * NULL will be returned. + */ +const char * +pdc_unicode2newadobe(pdc_ushort uv) +{ + return pdc_code2glyphname(uv, tab_uni2diffagl, + (sizeof tab_uni2diffagl) / (sizeof (pdc_glyph_tab))); +} + +/* + * Returns the glyph name in Adobe Glyph List 2.0 + * which is not contained in AGL-1.2' and which matches + * the supplied glyph name. + * If no match is found NULL will be returned. + */ +const char * +pdc_get_newadobe_glyphname(const char *glyphname) +{ + return pdc_glyphname2glyphname(glyphname, tab_diffagl2uni, + (sizeof tab_diffagl2uni) / (sizeof (pdc_glyph_tab))); +} + + +/* + * Returns the alternative Unicode value of a double-mapped + * AGL-1.2 glyph name. If the name is not double-mapped, + * -1 will be returned. + */ +int +pdc_glyphname2altunicode(const char *glyphname) +{ + return pdc_glyphname2code(glyphname, tab_double_mappping, + (sizeof (tab_double_mappping)) / (sizeof (pdc_glyph_tab))); +} + +/* + * Returns true if a character name is contained in pc_standard_latin_charset. + * Otherwise false will be returned. + */ +pdc_bool +pdc_is_std_charname(const char *glyphname) +{ + int lo = 0; + int hi = ((sizeof pc_standard_latin_charset) / (sizeof (char *))); + + if (glyphname) + { + while (lo < hi) + { + int i = (lo + hi) / 2; + int cmp = strcmp(glyphname, pc_standard_latin_charset[i]); + + if (cmp == 0) + return pdc_true; + + if (cmp < 0) + hi = i; + else + lo = i + 1; + } + } + + return pdc_false; +} + + + +/* -------------- special character mapping for Type1 fonts --------------- */ + +/* + * Deletes a bit in a bit mask. The bit indicates that + * the respective glyph name of AGL 2.0 is not available + * in a PostScript font. The glyph name is used to avoid + * ambiguities (see comment in pc_chartabs.h) + * + */ + +#define PDC_BIT_NBSP (1L<<0) +#define PDC_BIT_SHY (1L<<1) +#define PDC_BIT_MODMACRON (1L<<2) +#define PDC_BIT_CAPDELTA (1L<<3) +#define PDC_BIT_CAPOMEGA (1L<<4) +#define PDC_BIT_DIVSLASH (1L<<5) +#define PDC_BIT_BULLETOP (1L<<6) +#define PDC_BIT_SMALLMU (1L<<7) + +void +pdc_delete_missingglyph_bit(pdc_ushort uv, pdc_ulong *bmask) +{ + switch(uv) + { + case PDC_UNICODE_NBSP: + *bmask &= ~PDC_BIT_NBSP; + return; + + case PDC_UNICODE_SHY: + *bmask &= ~PDC_BIT_SHY; + return; + + case PDC_UNICODE_MODMACRON: + *bmask &= ~PDC_BIT_MODMACRON; + return; + + case PDC_UNICODE_CAPDELTA: + *bmask &= ~PDC_BIT_CAPDELTA; + return; + + case PDC_UNICODE_CAPOMEGA: + *bmask &= ~PDC_BIT_CAPOMEGA; + return; + + case PDC_UNICODE_DIVSLASH: + *bmask &= ~PDC_BIT_DIVSLASH; + return; + + case PDC_UNICODE_BULLETOP: + *bmask &= ~PDC_BIT_BULLETOP; + return; + + case PDC_UNICODE_SMALLMU: + *bmask &= ~PDC_BIT_SMALLMU; + return; + + default: + return; + } +} + +/* + * Returnes an alternative Unicode value and/or glyph name for an + * AGL 2.0 glyph name which is not available in a PostScript font. + * + */ + +pdc_ushort +pdc_get_alter_glyphname(pdc_ushort uv, pdc_ulong bmask, char **glyphname) +{ + switch(uv) + { + case PDC_UNICODE_NBSP: + if (bmask & PDC_BIT_NBSP) + { + if (glyphname) + *glyphname = (char *) glyph_space; + return PDC_UNICODE_SPACE; + } + break; + + case PDC_UNICODE_SHY: + if (bmask & PDC_BIT_SHY) + { + if (glyphname) + *glyphname = (char *) glyph_hyphen; + return PDC_UNICODE_HYPHEN; + } + break; + + case PDC_UNICODE_MODMACRON: + if (bmask & PDC_BIT_MODMACRON) + { + if (glyphname) + *glyphname = (char *) glyph_macron; + return PDC_UNICODE_MACRON; + } + break; + + case PDC_UNICODE_CAPDELTA: + if (bmask & PDC_BIT_CAPDELTA) + { + if (glyphname) + *glyphname = (char *) glyph_Delta; + return PDC_UNICODE_INCREMENT; + } + break; + + case PDC_UNICODE_CAPOMEGA: + if (bmask & PDC_BIT_CAPOMEGA) + { + if (glyphname) + *glyphname = (char *) glyph_Omega; + return PDC_UNICODE_OHMSIGN; + } + break; + + case PDC_UNICODE_DIVSLASH: + if (bmask & PDC_BIT_DIVSLASH) + { + if (glyphname) + *glyphname = (char *) glyph_fraction; + return PDC_UNICODE_FRACSLASH; + } + + case PDC_UNICODE_BULLETOP: + if (bmask & PDC_BIT_BULLETOP) + { + if (glyphname) + *glyphname = (char *) glyph_periodcentered; + return PDC_UNICODE_MIDDLEDOT; + } + + case PDC_UNICODE_SMALLMU: + if (bmask & PDC_BIT_SMALLMU) + { + if (glyphname) + *glyphname = (char *) glyph_mu; + return PDC_UNICODE_MICRO; + } + + default: + if (glyphname) + { + if (*glyphname == NULL) + *glyphname = (char *) pdc_get_notdef_glyphname(); + return 0; + } + } + + return uv; +} + +/* + * Returns the Unicode value for a given string Unicode expression: + * + * - Byte 1...255 -> U0001...U00FF + * - U+XXXXX + * - 0xXXXXX + * - HTML character reference without frame syntax &...; + * + * If no conversion is possible -1 will be returned. + */ +int +pdc_string2unicode(pdc_core *pdc, const char *text, int i_flags, + const pdc_keyconn *keyconn, pdc_bool verbose) +{ + int iz = PDC_KEY_NOTFOUND, usv = -1; + pdc_bool seterr = pdc_false; + int flags = PDC_INT_UNSIGNED; + int i = 0; + + (void) verbose; + + /* single byte as Unicode value */ + if (strlen(text) == 1) + { + char c = text[0]; + usv = (pdc_byte) c; + } + else + { + /* keyword */ + if (keyconn) + { + if (i_flags & PDC_INT_CASESENS) + iz = pdc_get_keycode(text, keyconn); + else + iz = pdc_get_keycode_ci(text, keyconn); + } + if (iz != PDC_KEY_NOTFOUND) + { + usv = iz; + } + else + { + /* Unicode value */ + if (!pdc_strincmp(text, "U+", 2)) + { + flags |= PDC_INT_HEXADEC; + i = 2; + } + if (!pdc_str2integer(&text[i], flags, &iz)) + { + seterr = pdc_true; + } + else if (iz >= PDC_NUM_UNIVAL || + (iz >= PDC_UNICODE_MINHIGHSUR && + iz <= PDC_UNICODE_MAXLOWSUR)) + { + seterr = pdc_true; + } + else + { + usv = iz; + } + } + } + + if (seterr) + { + pdc_set_errmsg(pdc, PDC_E_CONV_ILLUTF32, &text[i], 0, 0, 0); + if (verbose) + pdc_error(pdc, -1, 0, 0, 0, 0); + } + + return usv; +} + +/* + * Returns true if Unicode character is a character relevant for line breaking + * + */ +pdc_bool +pdc_is_linebreaking_relchar(pdc_ushort uv) +{ + switch (uv) + { + case PDC_UNICODE_HT: + case PDC_UNICODE_LF: + case PDC_UNICODE_VT: + case PDC_UNICODE_FF: + case PDC_UNICODE_CR: + case PDC_UNICODE_NEL: + case PDC_UNICODE_SHY: + case PDC_UNICODE_LS: + case PDC_UNICODE_PS: + return pdc_true; + } + + return pdc_false; +} + + -- cgit v1.2.3