encodings.h
Engine/source/persistence/rapidjson/encodings.h
Classes:
class
Dynamically select encoding according to stream's runtime-specified UTF encoding type.
class
Encoding conversion.
class
Specialization of Transcoder with same source and target encoding.
class
UTF-16 encoding.
class
UTF-16 big endian encoding.
class
UTF-16 little endian encoding.
class
UTF-32 encoding.
class
UTF-32 big endian encoding.
class
UTF-32 little endian enocoding.
class
UTF-8 encoding.
Public Defines
define
RAPIDJSON_COPY() c = is.Take(); *codepoint = (*codepoint << 6) | (static_cast<unsigned char>(c) & 0x3Fu)
define
RAPIDJSON_COPY() os.Put(c = is.Take())
define
RAPIDJSON_ENCODINGS_FUNC(x) <Ch>::x, <Ch>::x, <Ch>::x, <Ch>::x, <Ch>::x
define
RAPIDJSON_TAIL() (); (0x70)
define
RAPIDJSON_TAIL() (); (0x70)
define
RAPIDJSON_TRANS(mask) result &= ((GetRange(static_cast<unsigned char>(c)) & mask) != 0)
define
RAPIDJSON_TRANS(mask) result &= ((GetRange(static_cast<unsigned char>(c)) & mask) != 0)
Public Enumerations
enum
UTFType { kUTF8 = 0 kUTF16LE = 1 kUTF16BE = 2 kUTF32LE = 3 kUTF32BE = 4 }
Runtime-specified UTF encoding type of a stream.
Public Functions
Detailed Description
Public Defines
RAPIDJSON_COPY() c = is.Take(); *codepoint = (*codepoint << 6) | (static_cast<unsigned char>(c) & 0x3Fu)
RAPIDJSON_COPY() os.Put(c = is.Take())
RAPIDJSON_ENCODINGS_FUNC(x) <Ch>::x, <Ch>::x, <Ch>::x, <Ch>::x, <Ch>::x
RAPIDJSON_TAIL() (); (0x70)
RAPIDJSON_TAIL() (); (0x70)
RAPIDJSON_TRANS(mask) result &= ((GetRange(static_cast<unsigned char>(c)) & mask) != 0)
RAPIDJSON_TRANS(mask) result &= ((GetRange(static_cast<unsigned char>(c)) & mask) != 0)
Public Enumerations
UTFType
Enumerator
- kUTF8 = 0
UTF-8.
- kUTF16LE = 1
UTF-16 little endian.
- kUTF16BE = 2
UTF-16 big endian.
- kUTF32LE = 3
UTF-32 little endian.
- kUTF32BE = 4
UTF-32 big endian.
Runtime-specified UTF encoding type of a stream.
Public Functions
PutUnsafe(Stream & stream, typename Stream::Ch c)
Write character to a stream, presuming buffer is reserved.
1 2// Tencent is pleased to support the open source community by making RapidJSON available. 3// 4// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. 5// 6// Licensed under the MIT License (the "License"); you may not use this file except 7// in compliance with the License. You may obtain a copy of the License at 8// 9// http://opensource.org/licenses/MIT 10// 11// Unless required by applicable law or agreed to in writing, software distributed 12// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 13// CONDITIONS OF ANY KIND, either express or implied. See the License for the 14// specific language governing permissions and limitations under the License. 15 16#ifndef RAPIDJSON_ENCODINGS_H_ 17#define RAPIDJSON_ENCODINGS_H_ 18 19#include "rapidjson.h" 20 21#if defined(_MSC_VER) && !defined(__clang__) 22RAPIDJSON_DIAG_PUSH 23RAPIDJSON_DIAG_OFF(4244) // conversion from 'type1' to 'type2', possible loss of data 24RAPIDJSON_DIAG_OFF(4702) // unreachable code 25#elif defined(__GNUC__) 26RAPIDJSON_DIAG_PUSH 27RAPIDJSON_DIAG_OFF(effc++) 28RAPIDJSON_DIAG_OFF(overflow) 29#endif 30 31RAPIDJSON_NAMESPACE_BEGIN 32 33/////////////////////////////////////////////////////////////////////////////// 34// Encoding 35 36/*! \class rapidjson::Encoding 37 \brief Concept for encoding of Unicode characters. 38 39\code 40concept Encoding { 41 typename Ch; //! Type of character. A "character" is actually a code unit in unicode's definition. 42 43 enum { supportUnicode = 1 }; // or 0 if not supporting unicode 44 45 //! \brief Encode a Unicode codepoint to an output stream. 46 //! \param os Output stream. 47 //! \param codepoint An unicode codepoint, ranging from 0x0 to 0x10FFFF inclusively. 48 template<typename OutputStream> 49 static void Encode(OutputStream& os, unsigned codepoint); 50 51 //! \brief Decode a Unicode codepoint from an input stream. 52 //! \param is Input stream. 53 //! \param codepoint Output of the unicode codepoint. 54 //! \return true if a valid codepoint can be decoded from the stream. 55 template <typename InputStream> 56 static bool Decode(InputStream& is, unsigned* codepoint); 57 58 //! \brief Validate one Unicode codepoint from an encoded stream. 59 //! \param is Input stream to obtain codepoint. 60 //! \param os Output for copying one codepoint. 61 //! \return true if it is valid. 62 //! \note This function just validating and copying the codepoint without actually decode it. 63 template <typename InputStream, typename OutputStream> 64 static bool Validate(InputStream& is, OutputStream& os); 65 66 // The following functions are deal with byte streams. 67 68 //! Take a character from input byte stream, skip BOM if exist. 69 template <typename InputByteStream> 70 static CharType TakeBOM(InputByteStream& is); 71 72 //! Take a character from input byte stream. 73 template <typename InputByteStream> 74 static Ch Take(InputByteStream& is); 75 76 //! Put BOM to output byte stream. 77 template <typename OutputByteStream> 78 static void PutBOM(OutputByteStream& os); 79 80 //! Put a character to output byte stream. 81 template <typename OutputByteStream> 82 static void Put(OutputByteStream& os, Ch c); 83}; 84\endcode 85*/ 86 87/////////////////////////////////////////////////////////////////////////////// 88// UTF8 89 90//! UTF-8 encoding. 91/*! http://en.wikipedia.org/wiki/UTF-8 92 http://tools.ietf.org/html/rfc3629 93 \tparam CharType Code unit for storing 8-bit UTF-8 data. Default is char. 94 \note implements Encoding concept 95*/ 96template<typename CharType = char> 97struct UTF8 { 98 typedef CharType Ch; 99 100 enum { supportUnicode = 1 }; 101 102 template<typename OutputStream> 103 static void Encode(OutputStream& os, unsigned codepoint) { 104 if (codepoint <= 0x7F) 105 os.Put(static_cast<Ch>(codepoint & 0xFF)); 106 else if (codepoint <= 0x7FF) { 107 os.Put(static_cast<Ch>(0xC0 | ((codepoint >> 6) & 0xFF))); 108 os.Put(static_cast<Ch>(0x80 | ((codepoint & 0x3F)))); 109 } 110 else if (codepoint <= 0xFFFF) { 111 os.Put(static_cast<Ch>(0xE0 | ((codepoint >> 12) & 0xFF))); 112 os.Put(static_cast<Ch>(0x80 | ((codepoint >> 6) & 0x3F))); 113 os.Put(static_cast<Ch>(0x80 | (codepoint & 0x3F))); 114 } 115 else { 116 RAPIDJSON_ASSERT(codepoint <= 0x10FFFF); 117 os.Put(static_cast<Ch>(0xF0 | ((codepoint >> 18) & 0xFF))); 118 os.Put(static_cast<Ch>(0x80 | ((codepoint >> 12) & 0x3F))); 119 os.Put(static_cast<Ch>(0x80 | ((codepoint >> 6) & 0x3F))); 120 os.Put(static_cast<Ch>(0x80 | (codepoint & 0x3F))); 121 } 122 } 123 124 template<typename OutputStream> 125 static void EncodeUnsafe(OutputStream& os, unsigned codepoint) { 126 if (codepoint <= 0x7F) 127 PutUnsafe(os, static_cast<Ch>(codepoint & 0xFF)); 128 else if (codepoint <= 0x7FF) { 129 PutUnsafe(os, static_cast<Ch>(0xC0 | ((codepoint >> 6) & 0xFF))); 130 PutUnsafe(os, static_cast<Ch>(0x80 | ((codepoint & 0x3F)))); 131 } 132 else if (codepoint <= 0xFFFF) { 133 PutUnsafe(os, static_cast<Ch>(0xE0 | ((codepoint >> 12) & 0xFF))); 134 PutUnsafe(os, static_cast<Ch>(0x80 | ((codepoint >> 6) & 0x3F))); 135 PutUnsafe(os, static_cast<Ch>(0x80 | (codepoint & 0x3F))); 136 } 137 else { 138 RAPIDJSON_ASSERT(codepoint <= 0x10FFFF); 139 PutUnsafe(os, static_cast<Ch>(0xF0 | ((codepoint >> 18) & 0xFF))); 140 PutUnsafe(os, static_cast<Ch>(0x80 | ((codepoint >> 12) & 0x3F))); 141 PutUnsafe(os, static_cast<Ch>(0x80 | ((codepoint >> 6) & 0x3F))); 142 PutUnsafe(os, static_cast<Ch>(0x80 | (codepoint & 0x3F))); 143 } 144 } 145 146 template <typename InputStream> 147 static bool Decode(InputStream& is, unsigned* codepoint) { 148#define RAPIDJSON_COPY() c = is.Take(); *codepoint = (*codepoint << 6) | (static_cast<unsigned char>(c) & 0x3Fu) 149#define RAPIDJSON_TRANS(mask) result &= ((GetRange(static_cast<unsigned char>(c)) & mask) != 0) 150#define RAPIDJSON_TAIL() RAPIDJSON_COPY(); RAPIDJSON_TRANS(0x70) 151 typename InputStream::Ch c = is.Take(); 152 if (!(c & 0x80)) { 153 *codepoint = static_cast<unsigned char>(c); 154 return true; 155 } 156 157 unsigned char type = GetRange(static_cast<unsigned char>(c)); 158 if (type >= 32) { 159 *codepoint = 0; 160 } else { 161 *codepoint = (0xFFu >> type) & static_cast<unsigned char>(c); 162 } 163 bool result = true; 164 switch (type) { 165 case 2: RAPIDJSON_TAIL(); return result; 166 case 3: RAPIDJSON_TAIL(); RAPIDJSON_TAIL(); return result; 167 case 4: RAPIDJSON_COPY(); RAPIDJSON_TRANS(0x50); RAPIDJSON_TAIL(); return result; 168 case 5: RAPIDJSON_COPY(); RAPIDJSON_TRANS(0x10); RAPIDJSON_TAIL(); RAPIDJSON_TAIL(); return result; 169 case 6: RAPIDJSON_TAIL(); RAPIDJSON_TAIL(); RAPIDJSON_TAIL(); return result; 170 case 10: RAPIDJSON_COPY(); RAPIDJSON_TRANS(0x20); RAPIDJSON_TAIL(); return result; 171 case 11: RAPIDJSON_COPY(); RAPIDJSON_TRANS(0x60); RAPIDJSON_TAIL(); RAPIDJSON_TAIL(); return result; 172 default: return false; 173 } 174#undef RAPIDJSON_COPY 175#undef RAPIDJSON_TRANS 176#undef RAPIDJSON_TAIL 177 } 178 179 template <typename InputStream, typename OutputStream> 180 static bool Validate(InputStream& is, OutputStream& os) { 181#define RAPIDJSON_COPY() os.Put(c = is.Take()) 182#define RAPIDJSON_TRANS(mask) result &= ((GetRange(static_cast<unsigned char>(c)) & mask) != 0) 183#define RAPIDJSON_TAIL() RAPIDJSON_COPY(); RAPIDJSON_TRANS(0x70) 184 Ch c; 185 RAPIDJSON_COPY(); 186 if (!(c & 0x80)) 187 return true; 188 189 bool result = true; 190 switch (GetRange(static_cast<unsigned char>(c))) { 191 case 2: RAPIDJSON_TAIL(); return result; 192 case 3: RAPIDJSON_TAIL(); RAPIDJSON_TAIL(); return result; 193 case 4: RAPIDJSON_COPY(); RAPIDJSON_TRANS(0x50); RAPIDJSON_TAIL(); return result; 194 case 5: RAPIDJSON_COPY(); RAPIDJSON_TRANS(0x10); RAPIDJSON_TAIL(); RAPIDJSON_TAIL(); return result; 195 case 6: RAPIDJSON_TAIL(); RAPIDJSON_TAIL(); RAPIDJSON_TAIL(); return result; 196 case 10: RAPIDJSON_COPY(); RAPIDJSON_TRANS(0x20); RAPIDJSON_TAIL(); return result; 197 case 11: RAPIDJSON_COPY(); RAPIDJSON_TRANS(0x60); RAPIDJSON_TAIL(); RAPIDJSON_TAIL(); return result; 198 default: return false; 199 } 200#undef RAPIDJSON_COPY 201#undef RAPIDJSON_TRANS 202#undef RAPIDJSON_TAIL 203 } 204 205 static unsigned char GetRange(unsigned char c) { 206 // Referring to DFA of http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ 207 // With new mapping 1 -> 0x10, 7 -> 0x20, 9 -> 0x40, such that AND operation can test multiple types. 208 static const unsigned char type[] = { 209 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 210 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 211 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 212 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 213 0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10, 214 0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40, 215 0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20, 216 0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20, 217 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 218 10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8, 219 }; 220 return type[c]; 221 } 222 223 template <typename InputByteStream> 224 static CharType TakeBOM(InputByteStream& is) { 225 RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); 226 typename InputByteStream::Ch c = Take(is); 227 if (static_cast<unsigned char>(c) != 0xEFu) return c; 228 c = is.Take(); 229 if (static_cast<unsigned char>(c) != 0xBBu) return c; 230 c = is.Take(); 231 if (static_cast<unsigned char>(c) != 0xBFu) return c; 232 c = is.Take(); 233 return c; 234 } 235 236 template <typename InputByteStream> 237 static Ch Take(InputByteStream& is) { 238 RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); 239 return static_cast<Ch>(is.Take()); 240 } 241 242 template <typename OutputByteStream> 243 static void PutBOM(OutputByteStream& os) { 244 RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); 245 os.Put(static_cast<typename OutputByteStream::Ch>(0xEFu)); 246 os.Put(static_cast<typename OutputByteStream::Ch>(0xBBu)); 247 os.Put(static_cast<typename OutputByteStream::Ch>(0xBFu)); 248 } 249 250 template <typename OutputByteStream> 251 static void Put(OutputByteStream& os, Ch c) { 252 RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); 253 os.Put(static_cast<typename OutputByteStream::Ch>(c)); 254 } 255}; 256 257/////////////////////////////////////////////////////////////////////////////// 258// UTF16 259 260//! UTF-16 encoding. 261/*! http://en.wikipedia.org/wiki/UTF-16 262 http://tools.ietf.org/html/rfc2781 263 \tparam CharType Type for storing 16-bit UTF-16 data. Default is wchar_t. C++11 may use char16_t instead. 264 \note implements Encoding concept 265 266 \note For in-memory access, no need to concern endianness. The code units and code points are represented by CPU's endianness. 267 For streaming, use UTF16LE and UTF16BE, which handle endianness. 268*/ 269template<typename CharType = wchar_t> 270struct UTF16 { 271 typedef CharType Ch; 272 RAPIDJSON_STATIC_ASSERT(sizeof(Ch) >= 2); 273 274 enum { supportUnicode = 1 }; 275 276 template<typename OutputStream> 277 static void Encode(OutputStream& os, unsigned codepoint) { 278 RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputStream::Ch) >= 2); 279 if (codepoint <= 0xFFFF) { 280 RAPIDJSON_ASSERT(codepoint < 0xD800 || codepoint > 0xDFFF); // Code point itself cannot be surrogate pair 281 os.Put(static_cast<typename OutputStream::Ch>(codepoint)); 282 } 283 else { 284 RAPIDJSON_ASSERT(codepoint <= 0x10FFFF); 285 unsigned v = codepoint - 0x10000; 286 os.Put(static_cast<typename OutputStream::Ch>((v >> 10) | 0xD800)); 287 os.Put(static_cast<typename OutputStream::Ch>((v & 0x3FF) | 0xDC00)); 288 } 289 } 290 291 292 template<typename OutputStream> 293 static void EncodeUnsafe(OutputStream& os, unsigned codepoint) { 294 RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputStream::Ch) >= 2); 295 if (codepoint <= 0xFFFF) { 296 RAPIDJSON_ASSERT(codepoint < 0xD800 || codepoint > 0xDFFF); // Code point itself cannot be surrogate pair 297 PutUnsafe(os, static_cast<typename OutputStream::Ch>(codepoint)); 298 } 299 else { 300 RAPIDJSON_ASSERT(codepoint <= 0x10FFFF); 301 unsigned v = codepoint - 0x10000; 302 PutUnsafe(os, static_cast<typename OutputStream::Ch>((v >> 10) | 0xD800)); 303 PutUnsafe(os, static_cast<typename OutputStream::Ch>((v & 0x3FF) | 0xDC00)); 304 } 305 } 306 307 template <typename InputStream> 308 static bool Decode(InputStream& is, unsigned* codepoint) { 309 RAPIDJSON_STATIC_ASSERT(sizeof(typename InputStream::Ch) >= 2); 310 typename InputStream::Ch c = is.Take(); 311 if (c < 0xD800 || c > 0xDFFF) { 312 *codepoint = static_cast<unsigned>(c); 313 return true; 314 } 315 else if (c <= 0xDBFF) { 316 *codepoint = (static_cast<unsigned>(c) & 0x3FF) << 10; 317 c = is.Take(); 318 *codepoint |= (static_cast<unsigned>(c) & 0x3FF); 319 *codepoint += 0x10000; 320 return c >= 0xDC00 && c <= 0xDFFF; 321 } 322 return false; 323 } 324 325 template <typename InputStream, typename OutputStream> 326 static bool Validate(InputStream& is, OutputStream& os) { 327 RAPIDJSON_STATIC_ASSERT(sizeof(typename InputStream::Ch) >= 2); 328 RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputStream::Ch) >= 2); 329 typename InputStream::Ch c; 330 os.Put(static_cast<typename OutputStream::Ch>(c = is.Take())); 331 if (c < 0xD800 || c > 0xDFFF) 332 return true; 333 else if (c <= 0xDBFF) { 334 os.Put(c = is.Take()); 335 return c >= 0xDC00 && c <= 0xDFFF; 336 } 337 return false; 338 } 339}; 340 341//! UTF-16 little endian encoding. 342template<typename CharType = wchar_t> 343struct UTF16LE : UTF16<CharType> { 344 template <typename InputByteStream> 345 static CharType TakeBOM(InputByteStream& is) { 346 RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); 347 CharType c = Take(is); 348 return static_cast<uint16_t>(c) == 0xFEFFu ? Take(is) : c; 349 } 350 351 template <typename InputByteStream> 352 static CharType Take(InputByteStream& is) { 353 RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); 354 unsigned c = static_cast<uint8_t>(is.Take()); 355 c |= static_cast<unsigned>(static_cast<uint8_t>(is.Take())) << 8; 356 return static_cast<CharType>(c); 357 } 358 359 template <typename OutputByteStream> 360 static void PutBOM(OutputByteStream& os) { 361 RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); 362 os.Put(static_cast<typename OutputByteStream::Ch>(0xFFu)); 363 os.Put(static_cast<typename OutputByteStream::Ch>(0xFEu)); 364 } 365 366 template <typename OutputByteStream> 367 static void Put(OutputByteStream& os, CharType c) { 368 RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); 369 os.Put(static_cast<typename OutputByteStream::Ch>(static_cast<unsigned>(c) & 0xFFu)); 370 os.Put(static_cast<typename OutputByteStream::Ch>((static_cast<unsigned>(c) >> 8) & 0xFFu)); 371 } 372}; 373 374//! UTF-16 big endian encoding. 375template<typename CharType = wchar_t> 376struct UTF16BE : UTF16<CharType> { 377 template <typename InputByteStream> 378 static CharType TakeBOM(InputByteStream& is) { 379 RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); 380 CharType c = Take(is); 381 return static_cast<uint16_t>(c) == 0xFEFFu ? Take(is) : c; 382 } 383 384 template <typename InputByteStream> 385 static CharType Take(InputByteStream& is) { 386 RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); 387 unsigned c = static_cast<unsigned>(static_cast<uint8_t>(is.Take())) << 8; 388 c |= static_cast<unsigned>(static_cast<uint8_t>(is.Take())); 389 return static_cast<CharType>(c); 390 } 391 392 template <typename OutputByteStream> 393 static void PutBOM(OutputByteStream& os) { 394 RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); 395 os.Put(static_cast<typename OutputByteStream::Ch>(0xFEu)); 396 os.Put(static_cast<typename OutputByteStream::Ch>(0xFFu)); 397 } 398 399 template <typename OutputByteStream> 400 static void Put(OutputByteStream& os, CharType c) { 401 RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); 402 os.Put(static_cast<typename OutputByteStream::Ch>((static_cast<unsigned>(c) >> 8) & 0xFFu)); 403 os.Put(static_cast<typename OutputByteStream::Ch>(static_cast<unsigned>(c) & 0xFFu)); 404 } 405}; 406 407/////////////////////////////////////////////////////////////////////////////// 408// UTF32 409 410//! UTF-32 encoding. 411/*! http://en.wikipedia.org/wiki/UTF-32 412 \tparam CharType Type for storing 32-bit UTF-32 data. Default is unsigned. C++11 may use char32_t instead. 413 \note implements Encoding concept 414 415 \note For in-memory access, no need to concern endianness. The code units and code points are represented by CPU's endianness. 416 For streaming, use UTF32LE and UTF32BE, which handle endianness. 417*/ 418template<typename CharType = unsigned> 419struct UTF32 { 420 typedef CharType Ch; 421 RAPIDJSON_STATIC_ASSERT(sizeof(Ch) >= 4); 422 423 enum { supportUnicode = 1 }; 424 425 template<typename OutputStream> 426 static void Encode(OutputStream& os, unsigned codepoint) { 427 RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputStream::Ch) >= 4); 428 RAPIDJSON_ASSERT(codepoint <= 0x10FFFF); 429 os.Put(codepoint); 430 } 431 432 template<typename OutputStream> 433 static void EncodeUnsafe(OutputStream& os, unsigned codepoint) { 434 RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputStream::Ch) >= 4); 435 RAPIDJSON_ASSERT(codepoint <= 0x10FFFF); 436 PutUnsafe(os, codepoint); 437 } 438 439 template <typename InputStream> 440 static bool Decode(InputStream& is, unsigned* codepoint) { 441 RAPIDJSON_STATIC_ASSERT(sizeof(typename InputStream::Ch) >= 4); 442 Ch c = is.Take(); 443 *codepoint = c; 444 return c <= 0x10FFFF; 445 } 446 447 template <typename InputStream, typename OutputStream> 448 static bool Validate(InputStream& is, OutputStream& os) { 449 RAPIDJSON_STATIC_ASSERT(sizeof(typename InputStream::Ch) >= 4); 450 Ch c; 451 os.Put(c = is.Take()); 452 return c <= 0x10FFFF; 453 } 454}; 455 456//! UTF-32 little endian enocoding. 457template<typename CharType = unsigned> 458struct UTF32LE : UTF32<CharType> { 459 template <typename InputByteStream> 460 static CharType TakeBOM(InputByteStream& is) { 461 RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); 462 CharType c = Take(is); 463 return static_cast<uint32_t>(c) == 0x0000FEFFu ? Take(is) : c; 464 } 465 466 template <typename InputByteStream> 467 static CharType Take(InputByteStream& is) { 468 RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); 469 unsigned c = static_cast<uint8_t>(is.Take()); 470 c |= static_cast<unsigned>(static_cast<uint8_t>(is.Take())) << 8; 471 c |= static_cast<unsigned>(static_cast<uint8_t>(is.Take())) << 16; 472 c |= static_cast<unsigned>(static_cast<uint8_t>(is.Take())) << 24; 473 return static_cast<CharType>(c); 474 } 475 476 template <typename OutputByteStream> 477 static void PutBOM(OutputByteStream& os) { 478 RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); 479 os.Put(static_cast<typename OutputByteStream::Ch>(0xFFu)); 480 os.Put(static_cast<typename OutputByteStream::Ch>(0xFEu)); 481 os.Put(static_cast<typename OutputByteStream::Ch>(0x00u)); 482 os.Put(static_cast<typename OutputByteStream::Ch>(0x00u)); 483 } 484 485 template <typename OutputByteStream> 486 static void Put(OutputByteStream& os, CharType c) { 487 RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); 488 os.Put(static_cast<typename OutputByteStream::Ch>(c & 0xFFu)); 489 os.Put(static_cast<typename OutputByteStream::Ch>((c >> 8) & 0xFFu)); 490 os.Put(static_cast<typename OutputByteStream::Ch>((c >> 16) & 0xFFu)); 491 os.Put(static_cast<typename OutputByteStream::Ch>((c >> 24) & 0xFFu)); 492 } 493}; 494 495//! UTF-32 big endian encoding. 496template<typename CharType = unsigned> 497struct UTF32BE : UTF32<CharType> { 498 template <typename InputByteStream> 499 static CharType TakeBOM(InputByteStream& is) { 500 RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); 501 CharType c = Take(is); 502 return static_cast<uint32_t>(c) == 0x0000FEFFu ? Take(is) : c; 503 } 504 505 template <typename InputByteStream> 506 static CharType Take(InputByteStream& is) { 507 RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); 508 unsigned c = static_cast<unsigned>(static_cast<uint8_t>(is.Take())) << 24; 509 c |= static_cast<unsigned>(static_cast<uint8_t>(is.Take())) << 16; 510 c |= static_cast<unsigned>(static_cast<uint8_t>(is.Take())) << 8; 511 c |= static_cast<unsigned>(static_cast<uint8_t>(is.Take())); 512 return static_cast<CharType>(c); 513 } 514 515 template <typename OutputByteStream> 516 static void PutBOM(OutputByteStream& os) { 517 RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); 518 os.Put(static_cast<typename OutputByteStream::Ch>(0x00u)); 519 os.Put(static_cast<typename OutputByteStream::Ch>(0x00u)); 520 os.Put(static_cast<typename OutputByteStream::Ch>(0xFEu)); 521 os.Put(static_cast<typename OutputByteStream::Ch>(0xFFu)); 522 } 523 524 template <typename OutputByteStream> 525 static void Put(OutputByteStream& os, CharType c) { 526 RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); 527 os.Put(static_cast<typename OutputByteStream::Ch>((c >> 24) & 0xFFu)); 528 os.Put(static_cast<typename OutputByteStream::Ch>((c >> 16) & 0xFFu)); 529 os.Put(static_cast<typename OutputByteStream::Ch>((c >> 8) & 0xFFu)); 530 os.Put(static_cast<typename OutputByteStream::Ch>(c & 0xFFu)); 531 } 532}; 533 534/////////////////////////////////////////////////////////////////////////////// 535// ASCII 536 537//! ASCII encoding. 538/*! http://en.wikipedia.org/wiki/ASCII 539 \tparam CharType Code unit for storing 7-bit ASCII data. Default is char. 540 \note implements Encoding concept 541*/ 542template<typename CharType = char> 543struct ASCII { 544 typedef CharType Ch; 545 546 enum { supportUnicode = 0 }; 547 548 template<typename OutputStream> 549 static void Encode(OutputStream& os, unsigned codepoint) { 550 RAPIDJSON_ASSERT(codepoint <= 0x7F); 551 os.Put(static_cast<Ch>(codepoint & 0xFF)); 552 } 553 554 template<typename OutputStream> 555 static void EncodeUnsafe(OutputStream& os, unsigned codepoint) { 556 RAPIDJSON_ASSERT(codepoint <= 0x7F); 557 PutUnsafe(os, static_cast<Ch>(codepoint & 0xFF)); 558 } 559 560 template <typename InputStream> 561 static bool Decode(InputStream& is, unsigned* codepoint) { 562 uint8_t c = static_cast<uint8_t>(is.Take()); 563 *codepoint = c; 564 return c <= 0X7F; 565 } 566 567 template <typename InputStream, typename OutputStream> 568 static bool Validate(InputStream& is, OutputStream& os) { 569 uint8_t c = static_cast<uint8_t>(is.Take()); 570 os.Put(static_cast<typename OutputStream::Ch>(c)); 571 return c <= 0x7F; 572 } 573 574 template <typename InputByteStream> 575 static CharType TakeBOM(InputByteStream& is) { 576 RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); 577 uint8_t c = static_cast<uint8_t>(Take(is)); 578 return static_cast<Ch>(c); 579 } 580 581 template <typename InputByteStream> 582 static Ch Take(InputByteStream& is) { 583 RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); 584 return static_cast<Ch>(is.Take()); 585 } 586 587 template <typename OutputByteStream> 588 static void PutBOM(OutputByteStream& os) { 589 RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); 590 (void)os; 591 } 592 593 template <typename OutputByteStream> 594 static void Put(OutputByteStream& os, Ch c) { 595 RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); 596 os.Put(static_cast<typename OutputByteStream::Ch>(c)); 597 } 598}; 599 600/////////////////////////////////////////////////////////////////////////////// 601// AutoUTF 602 603//! Runtime-specified UTF encoding type of a stream. 604enum UTFType { 605 kUTF8 = 0, //!< UTF-8. 606 kUTF16LE = 1, //!< UTF-16 little endian. 607 kUTF16BE = 2, //!< UTF-16 big endian. 608 kUTF32LE = 3, //!< UTF-32 little endian. 609 kUTF32BE = 4 //!< UTF-32 big endian. 610}; 611 612//! Dynamically select encoding according to stream's runtime-specified UTF encoding type. 613/*! \note This class can be used with AutoUTFInputtStream and AutoUTFOutputStream, which provides GetType(). 614*/ 615template<typename CharType> 616struct AutoUTF { 617 typedef CharType Ch; 618 619 enum { supportUnicode = 1 }; 620 621#define RAPIDJSON_ENCODINGS_FUNC(x) UTF8<Ch>::x, UTF16LE<Ch>::x, UTF16BE<Ch>::x, UTF32LE<Ch>::x, UTF32BE<Ch>::x 622 623 template<typename OutputStream> 624 static RAPIDJSON_FORCEINLINE void Encode(OutputStream& os, unsigned codepoint) { 625 typedef void (*EncodeFunc)(OutputStream&, unsigned); 626 static const EncodeFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(Encode) }; 627 (*f[os.GetType()])(os, codepoint); 628 } 629 630 template<typename OutputStream> 631 static RAPIDJSON_FORCEINLINE void EncodeUnsafe(OutputStream& os, unsigned codepoint) { 632 typedef void (*EncodeFunc)(OutputStream&, unsigned); 633 static const EncodeFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(EncodeUnsafe) }; 634 (*f[os.GetType()])(os, codepoint); 635 } 636 637 template <typename InputStream> 638 static RAPIDJSON_FORCEINLINE bool Decode(InputStream& is, unsigned* codepoint) { 639 typedef bool (*DecodeFunc)(InputStream&, unsigned*); 640 static const DecodeFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(Decode) }; 641 return (*f[is.GetType()])(is, codepoint); 642 } 643 644 template <typename InputStream, typename OutputStream> 645 static RAPIDJSON_FORCEINLINE bool Validate(InputStream& is, OutputStream& os) { 646 typedef bool (*ValidateFunc)(InputStream&, OutputStream&); 647 static const ValidateFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(Validate) }; 648 return (*f[is.GetType()])(is, os); 649 } 650 651#undef RAPIDJSON_ENCODINGS_FUNC 652}; 653 654/////////////////////////////////////////////////////////////////////////////// 655// Transcoder 656 657//! Encoding conversion. 658template<typename SourceEncoding, typename TargetEncoding> 659struct Transcoder { 660 //! Take one Unicode codepoint from source encoding, convert it to target encoding and put it to the output stream. 661 template<typename InputStream, typename OutputStream> 662 static RAPIDJSON_FORCEINLINE bool Transcode(InputStream& is, OutputStream& os) { 663 unsigned codepoint; 664 if (!SourceEncoding::Decode(is, &codepoint)) 665 return false; 666 TargetEncoding::Encode(os, codepoint); 667 return true; 668 } 669 670 template<typename InputStream, typename OutputStream> 671 static RAPIDJSON_FORCEINLINE bool TranscodeUnsafe(InputStream& is, OutputStream& os) { 672 unsigned codepoint; 673 if (!SourceEncoding::Decode(is, &codepoint)) 674 return false; 675 TargetEncoding::EncodeUnsafe(os, codepoint); 676 return true; 677 } 678 679 //! Validate one Unicode codepoint from an encoded stream. 680 template<typename InputStream, typename OutputStream> 681 static RAPIDJSON_FORCEINLINE bool Validate(InputStream& is, OutputStream& os) { 682 return Transcode(is, os); // Since source/target encoding is different, must transcode. 683 } 684}; 685 686// Forward declaration. 687template<typename Stream> 688inline void PutUnsafe(Stream& stream, typename Stream::Ch c); 689 690//! Specialization of Transcoder with same source and target encoding. 691template<typename Encoding> 692struct Transcoder<Encoding, Encoding> { 693 template<typename InputStream, typename OutputStream> 694 static RAPIDJSON_FORCEINLINE bool Transcode(InputStream& is, OutputStream& os) { 695 os.Put(is.Take()); // Just copy one code unit. This semantic is different from primary template class. 696 return true; 697 } 698 699 template<typename InputStream, typename OutputStream> 700 static RAPIDJSON_FORCEINLINE bool TranscodeUnsafe(InputStream& is, OutputStream& os) { 701 PutUnsafe(os, is.Take()); // Just copy one code unit. This semantic is different from primary template class. 702 return true; 703 } 704 705 template<typename InputStream, typename OutputStream> 706 static RAPIDJSON_FORCEINLINE bool Validate(InputStream& is, OutputStream& os) { 707 return Encoding::Validate(is, os); // source/target encoding are the same 708 } 709}; 710 711RAPIDJSON_NAMESPACE_END 712 713#if defined(__GNUC__) || (defined(_MSC_VER) && !defined(__clang__)) 714RAPIDJSON_DIAG_POP 715#endif 716 717#endif // RAPIDJSON_ENCODINGS_H_ 718