Hash :
cdc9b560
Author :
Date :
2008-06-03T12:23:07
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237
/*
* FTGL - OpenGL font library
*
* Copyright (c) 2008 Daniel Remenak <dtremenak@users.sourceforge.net>
*
* Portions derived from ConvertUTF.c Copyright (C) 2001-2004 Unicode, Inc
* Unicode, Inc. hereby grants the right to freely use the information
* supplied in this file in the creation of products supporting the
* Unicode Standard, and to make copies of this file in any form
* for internal or external distribution as long as this notice
* remains attached.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef __FTUnicode__
#define __FTUnicode__
/**
* Provides a way to easily walk multibyte unicode strings in the various
* Unicode encodings (UTF-8, UTF-16, UTF-32, UCS-2, and UCS-4). Encodings
* with elements larger than one byte must already be in the correct endian
* order for the current architecture.
*/
template <typename T>
class FTUnicodeStringItr
{
public:
/**
* Constructor. Also reads the first character and stores it.
*
* @param string The buffer to iterate. No copy is made.
*/
FTUnicodeStringItr(const T* string) : curPos(string), nextPos(string)
{
(*this)++;
};
/**
* Pre-increment operator. Reads the next unicode character and sets
* the state appropriately.
* Note - not protected against overruns.
*/
FTUnicodeStringItr& operator++()
{
curPos = nextPos;
// unicode handling
switch (sizeof(T))
{
case 1: // UTF-8
// get this character
readUTF8(); break;
case 2: // UTF-16
readUTF16(); break;
case 4: // UTF-32
// fall through
default: // error condition really, but give it a shot anyway
curChar = *nextPos++;
}
return *this;
}
/**
* Post-increment operator. Reads the next character and sets
* the state appropriately.
* Note - not protected against overruns.
*/
FTUnicodeStringItr operator++(int)
{
FTUnicodeStringItr temp = *this;
++*this;
return temp;
}
/**
* Equality operator. Two FTUnicodeStringItrs are considered equal
* if they have the same current buffer and buffer position.
*/
bool operator==(const FTUnicodeStringItr& right) const
{
if (curPos == right.getBufferFromHere())
return true;
return false;
}
/**
* Dereference operator.
*
* @return The unicode codepoint of the character currently pointed
* to by the FTUnicodeStringItr.
*/
unsigned int operator*() const
{
return curChar;
}
/**
* Buffer-fetching getter. You can use this to retreive the buffer
* starting at the currently-iterated character for functions which
* require a Unicode string as input.
*/
const T* getBufferFromHere() const { return curPos; }
private:
/**
* Helper function for reading a single UTF8 character from the string.
* Updates internal state appropriately.
*/
void readUTF8();
/**
* Helper function for reading a single UTF16 character from the string.
* Updates internal state appropriately.
*/
void readUTF16();
/**
* The buffer position of the first element in the current character.
*/
const T* curPos;
/**
* The character stored at the current buffer position (prefetched on
* increment, so there's no penalty for dereferencing more than once).
*/
unsigned int curChar;
/**
* The buffer position of the first element in the next character.
*/
const T* nextPos;
// unicode magic numbers
static const char utf8bytes[256];
static const unsigned long offsetsFromUTF8[6];
static const unsigned long highSurrogateStart;
static const unsigned long highSurrogateEnd;
static const unsigned long lowSurrogateStart;
static const unsigned long lowSurrogateEnd;
static const unsigned long highSurrogateShift;
static const unsigned long lowSurrogateBase;
};
/* The first character in a UTF8 sequence indicates how many bytes
* to read (among other things) */
template <typename T>
const char FTUnicodeStringItr<T>::utf8bytes[256] = {
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, 4,4,4,4,4,4,4,4,5,5,5,5,6,6,6,6
};
/* Magic values subtracted from a buffer value during UTF8 conversion.
* This table contains as many values as there might be trailing bytes
* in a UTF-8 sequence. */
template <typename T>
const unsigned long FTUnicodeStringItr<T>::offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080UL,
0x03C82080UL, 0xFA082080UL, 0x82082080UL };
// get a UTF8 character; leave the tracking pointer at the start of the
// next character
// not protected against invalid UTF8
template <typename T>
inline void FTUnicodeStringItr<T>::readUTF8()
{
unsigned int ch = 0;
unsigned int extraBytesToRead = utf8bytes[(unsigned char)(*nextPos)];
// falls through
switch (extraBytesToRead)
{
case 6: ch += *nextPos++; ch <<= 6; /* remember, illegal UTF-8 */
case 5: ch += *nextPos++; ch <<= 6; /* remember, illegal UTF-8 */
case 4: ch += *nextPos++; ch <<= 6;
case 3: ch += *nextPos++; ch <<= 6;
case 2: ch += *nextPos++; ch <<= 6;
case 1: ch += *nextPos++;
}
ch -= offsetsFromUTF8[extraBytesToRead-1];
curChar = ch;
}
// Magic numbers for UTF-16 conversions
template <typename T>
const unsigned long FTUnicodeStringItr<T>::highSurrogateStart = 0xD800;
template <typename T>
const unsigned long FTUnicodeStringItr<T>::highSurrogateEnd = 0xDBFF;
template <typename T>
const unsigned long FTUnicodeStringItr<T>::lowSurrogateStart = 0xDC00;
template <typename T>
const unsigned long FTUnicodeStringItr<T>::lowSurrogateEnd = 0xDFFF;
template <typename T>
const unsigned long FTUnicodeStringItr<T>::highSurrogateShift = 10;
template <typename T>
const unsigned long FTUnicodeStringItr<T>::lowSurrogateBase = 0x0010000UL;
template <typename T>
inline void FTUnicodeStringItr<T>::readUTF16()
{
unsigned int ch = *nextPos++;
// if we have the first half of the surrogate pair
if (ch >= highSurrogateStart && ch <= highSurrogateEnd)
{
unsigned int ch2 = *curPos;
// complete the surrogate pair
if (ch2 >= lowSurrogateStart && ch2 <= lowSurrogateEnd)
{
ch = ((ch - highSurrogateStart) << highSurrogateShift)
+ (ch2 - lowSurrogateStart) + lowSurrogateBase;
++nextPos;
}
}
curChar = ch;
}
#endif