*** empty log message ***

author: scuri <scuri> 2010-06-21 22:55:10 +0000
committer: scuri <scuri> 2010-06-21 22:55:10 +0000
commit: 37a92b86d13e89d0dcec92be6d23ceced29dbc36 (patch)
tree: faff61ef668379212b4ed948934533c3f2fe308b /src/ftgl/FTUnicode.h
parent: a124216ee05a5d63ea8fcdafcd050ad1fadf0b09 (diff)
1 files changed, 237 insertions, 0 deletions
diff --git a/src/ftgl/FTUnicode.h b/src/ftgl/FTUnicode.h
new file mode 100644
index 0000000..6c74100
--- /dev/null
+++ b/src/ftgl/FTUnicode.h
@@ -0,0 +1,237 @@
+/*
+ * FTGL - OpenGL font library
+ *
+ * Copyright (c) 2008 Daniel Remenak <dtremenak@users.sourceforge.net>
+ *
+ * Portions derived from ConvertUTF.c Copyright (C) 2001-2004 Unicode, Inc
+ *   Unicode, Inc. hereby grants the right to freely use the information
+ *   supplied in this file in the creation of products supporting the
+ *   Unicode Standard, and to make copies of this file in any form
+ *   for internal or external distribution as long as this notice
+ *   remains attached.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef    __FTUnicode__
+#define    __FTUnicode__
+
+/**
+ * Provides a way to easily walk multibyte unicode strings in the various
+ * Unicode encodings (UTF-8, UTF-16, UTF-32, UCS-2, and UCS-4).  Encodings
+ * with elements larger than one byte must already be in the correct endian
+ * order for the current architecture.
+ */
+template <typename T>
+class FTUnicodeStringItr
+{
+public:
+    /**
+     * Constructor.  Also reads the first character and stores it.
+     *
+     * @param string  The buffer to iterate.  No copy is made.
+     */
+    FTUnicodeStringItr(const T* string) : curPos(string), nextPos(string)
+    {
+        (*this)++;
+    };
+
+    /**
+     * Pre-increment operator.  Reads the next unicode character and sets
+     * the state appropriately.
+     * Note - not protected against overruns.
+     */
+    FTUnicodeStringItr& operator++()
+    {
+        curPos = nextPos;
+        // unicode handling
+        switch (sizeof(T))
+        {
+            case 1: // UTF-8
+                // get this character
+                readUTF8(); break;
+            case 2: // UTF-16
+                readUTF16(); break;
+            case 4: // UTF-32
+                // fall through
+            default: // error condition really, but give it a shot anyway
+                curChar = *nextPos++;
+        }
+        return *this;
+    }
+
+    /**
+     * Post-increment operator.  Reads the next character and sets
+     * the state appropriately.
+     * Note - not protected against overruns.
+     */
+    FTUnicodeStringItr operator++(int)
+    {
+        FTUnicodeStringItr temp = *this;
+        ++*this;
+        return temp;
+    }
+
+    /**
+     * Equality operator.  Two FTUnicodeStringItrs are considered equal
+     * if they have the same current buffer and buffer position.
+     */
+    bool operator==(const FTUnicodeStringItr& right) const
+    {
+        if (curPos == right.getBufferFromHere())
+            return true;
+        return false;
+    }
+
+    /**
+     * Dereference operator.
+     *
+     * @return  The unicode codepoint of the character currently pointed
+     * to by the FTUnicodeStringItr.
+     */
+    unsigned int operator*() const
+    {
+        return curChar;
+    }
+
+    /**
+     * Buffer-fetching getter.  You can use this to retreive the buffer
+     * starting at the currently-iterated character for functions which
+     * require a Unicode string as input.
+     */
+    const T* getBufferFromHere() const { return curPos; }
+
+private:
+    /**
+     * Helper function for reading a single UTF8 character from the string.
+     * Updates internal state appropriately.
+     */
+    void readUTF8();
+
+    /**
+     * Helper function for reading a single UTF16 character from the string.
+     * Updates internal state appropriately.
+     */
+    void readUTF16();
+
+    /**
+     * The buffer position of the first element in the current character.
+     */
+    const T* curPos;
+
+    /**
+     * The character stored at the current buffer position (prefetched on
+     * increment, so there's no penalty for dereferencing more than once).
+     */
+    unsigned int curChar;
+
+    /**
+     * The buffer position of the first element in the next character.
+     */
+    const T* nextPos;
+
+    // unicode magic numbers
+    static const char utf8bytes[256];
+    static const unsigned long offsetsFromUTF8[6];
+    static const unsigned long highSurrogateStart;
+    static const unsigned long highSurrogateEnd;
+    static const unsigned long lowSurrogateStart;
+    static const unsigned long lowSurrogateEnd;
+    static const unsigned long highSurrogateShift;
+    static const unsigned long lowSurrogateBase;
+};
+
+/* The first character in a UTF8 sequence indicates how many bytes
+ * to read (among other things) */
+template <typename T>
+const char FTUnicodeStringItr<T>::utf8bytes[256] = {
+  1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+  1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+  1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+  1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+  1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+  1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+  2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+  3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, 4,4,4,4,4,4,4,4,5,5,5,5,6,6,6,6
+};
+
+/* Magic values subtracted from a buffer value during UTF8 conversion.
+ * This table contains as many values as there might be trailing bytes
+ * in a UTF-8 sequence. */
+template <typename T>
+const unsigned long FTUnicodeStringItr<T>::offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080UL,
+  0x03C82080UL, 0xFA082080UL, 0x82082080UL };
+
+// get a UTF8 character; leave the tracking pointer at the start of the
+// next character
+// not protected against invalid UTF8
+template <typename T>
+inline void FTUnicodeStringItr<T>::readUTF8()
+{
+    unsigned int ch = 0;
+    unsigned int extraBytesToRead = utf8bytes[(unsigned char)(*nextPos)];
+    // falls through
+    switch (extraBytesToRead)
+    {
+          case 6: ch += *nextPos++; ch <<= 6; /* remember, illegal UTF-8 */
+          case 5: ch += *nextPos++; ch <<= 6; /* remember, illegal UTF-8 */
+          case 4: ch += *nextPos++; ch <<= 6;
+          case 3: ch += *nextPos++; ch <<= 6;
+          case 2: ch += *nextPos++; ch <<= 6;
+          case 1: ch += *nextPos++;
+    }
+    ch -= offsetsFromUTF8[extraBytesToRead-1];
+    curChar = ch;
+}
+
+// Magic numbers for UTF-16 conversions
+template <typename T>
+const unsigned long FTUnicodeStringItr<T>::highSurrogateStart = 0xD800;
+template <typename T>
+const unsigned long FTUnicodeStringItr<T>::highSurrogateEnd = 0xDBFF;
+template <typename T>
+const unsigned long FTUnicodeStringItr<T>::lowSurrogateStart = 0xDC00;
+template <typename T>
+const unsigned long FTUnicodeStringItr<T>::lowSurrogateEnd = 0xDFFF;
+template <typename T>
+const unsigned long FTUnicodeStringItr<T>::highSurrogateShift = 10;
+template <typename T>
+const unsigned long FTUnicodeStringItr<T>::lowSurrogateBase = 0x0010000UL;
+
+template <typename T>
+inline void FTUnicodeStringItr<T>::readUTF16()
+{
+    unsigned int ch = *nextPos++;
+    // if we have the first half of the surrogate pair
+    if (ch >= highSurrogateStart && ch <= highSurrogateEnd)
+    {
+        unsigned int ch2 = *curPos;
+        // complete the surrogate pair
+        if (ch2 >= lowSurrogateStart && ch2 <= lowSurrogateEnd)
+        {
+            ch = ((ch - highSurrogateStart) << highSurrogateShift)
+                + (ch2 - lowSurrogateStart) + lowSurrogateBase;
+            ++nextPos;
+        }
+    }
+    curChar = ch;
+}
+
+#endif
author	scuri <scuri>	2010-06-21 22:55:10 +0000
committer	scuri <scuri>	2010-06-21 22:55:10 +0000
commit	37a92b86d13e89d0dcec92be6d23ceced29dbc36 (patch)
tree	faff61ef668379212b4ed948934533c3f2fe308b /src/ftgl/FTUnicode.h
parent	a124216ee05a5d63ea8fcdafcd050ad1fadf0b09 (diff)