encodings.h

Engine/source/persistence/rapidjson/encodings.h

More...

Classes:

class

ASCII encoding.

class

Dynamically select encoding according to stream's runtime-specified UTF encoding type.

class

Encoding conversion.

class

Specialization of Transcoder with same source and target encoding.

class

UTF-16 encoding.

class

UTF-16 big endian encoding.

class

UTF-16 little endian encoding.

class

UTF-32 encoding.

class

UTF-32 big endian encoding.

class

UTF-32 little endian enocoding.

class

UTF-8 encoding.

Public Defines

define
RAPIDJSON_COPY() c = is.Take(); *codepoint = (*codepoint << 6) | (static_cast<unsigned char>(c) & 0x3Fu)
define
RAPIDJSON_COPY() os.Put(c = is.Take())
define
RAPIDJSON_ENCODINGS_FUNC(x) <Ch>::x, <Ch>::x, <Ch>::x, <Ch>::x, <Ch>::x
define
RAPIDJSON_TAIL() (); (0x70)
define
RAPIDJSON_TAIL() (); (0x70)
define
RAPIDJSON_TRANS(mask) result &= ((GetRange(static_cast<unsigned char>(c)) & mask) != 0)
define
RAPIDJSON_TRANS(mask) result &= ((GetRange(static_cast<unsigned char>(c)) & mask) != 0)

Public Enumerations

enum
UTFType {
  kUTF8 = 0
  kUTF16LE = 1
  kUTF16BE = 2
  kUTF32LE = 3
  kUTF32BE = 4
}

Runtime-specified UTF encoding type of a stream.

Public Functions

PutUnsafe(Stream & stream, typename Stream::Ch c)

Write character to a stream, presuming buffer is reserved.

Detailed Description

Public Defines

RAPIDJSON_COPY() c = is.Take(); *codepoint = (*codepoint << 6) | (static_cast<unsigned char>(c) & 0x3Fu)
RAPIDJSON_COPY() os.Put(c = is.Take())
RAPIDJSON_ENCODINGS_FUNC(x) <Ch>::x, <Ch>::x, <Ch>::x, <Ch>::x, <Ch>::x
RAPIDJSON_TAIL() (); (0x70)
RAPIDJSON_TAIL() (); (0x70)
RAPIDJSON_TRANS(mask) result &= ((GetRange(static_cast<unsigned char>(c)) & mask) != 0)
RAPIDJSON_TRANS(mask) result &= ((GetRange(static_cast<unsigned char>(c)) & mask) != 0)

Public Enumerations

UTFType

Enumerator

kUTF8 = 0

UTF-8.

kUTF16LE = 1

UTF-16 little endian.

kUTF16BE = 2

UTF-16 big endian.

kUTF32LE = 3

UTF-32 little endian.

kUTF32BE = 4

UTF-32 big endian.

Runtime-specified UTF encoding type of a stream.

Public Functions

PutUnsafe(Stream & stream, typename Stream::Ch c)

Write character to a stream, presuming buffer is reserved.

  1
  2// Tencent is pleased to support the open source community by making RapidJSON available.
  3// 
  4// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
  5//
  6// Licensed under the MIT License (the "License"); you may not use this file except
  7// in compliance with the License. You may obtain a copy of the License at
  8//
  9// http://opensource.org/licenses/MIT
 10//
 11// Unless required by applicable law or agreed to in writing, software distributed 
 12// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 
 13// CONDITIONS OF ANY KIND, either express or implied. See the License for the 
 14// specific language governing permissions and limitations under the License.
 15
 16#ifndef RAPIDJSON_ENCODINGS_H_
 17#define RAPIDJSON_ENCODINGS_H_
 18
 19#include "rapidjson.h"
 20
 21#if defined(_MSC_VER) && !defined(__clang__)
 22RAPIDJSON_DIAG_PUSH
 23RAPIDJSON_DIAG_OFF(4244) // conversion from 'type1' to 'type2', possible loss of data
 24RAPIDJSON_DIAG_OFF(4702)  // unreachable code
 25#elif defined(__GNUC__)
 26RAPIDJSON_DIAG_PUSH
 27RAPIDJSON_DIAG_OFF(effc++)
 28RAPIDJSON_DIAG_OFF(overflow)
 29#endif
 30
 31RAPIDJSON_NAMESPACE_BEGIN
 32
 33///////////////////////////////////////////////////////////////////////////////
 34// Encoding
 35
 36/*! \class rapidjson::Encoding
 37    \brief Concept for encoding of Unicode characters.
 38
 39\code
 40concept Encoding {
 41    typename Ch;    //! Type of character. A "character" is actually a code unit in unicode's definition.
 42
 43    enum { supportUnicode = 1 }; // or 0 if not supporting unicode
 44
 45    //! \brief Encode a Unicode codepoint to an output stream.
 46    //! \param os Output stream.
 47    //! \param codepoint An unicode codepoint, ranging from 0x0 to 0x10FFFF inclusively.
 48    template<typename OutputStream>
 49    static void Encode(OutputStream& os, unsigned codepoint);
 50
 51    //! \brief Decode a Unicode codepoint from an input stream.
 52    //! \param is Input stream.
 53    //! \param codepoint Output of the unicode codepoint.
 54    //! \return true if a valid codepoint can be decoded from the stream.
 55    template <typename InputStream>
 56    static bool Decode(InputStream& is, unsigned* codepoint);
 57
 58    //! \brief Validate one Unicode codepoint from an encoded stream.
 59    //! \param is Input stream to obtain codepoint.
 60    //! \param os Output for copying one codepoint.
 61    //! \return true if it is valid.
 62    //! \note This function just validating and copying the codepoint without actually decode it.
 63    template <typename InputStream, typename OutputStream>
 64    static bool Validate(InputStream& is, OutputStream& os);
 65
 66    // The following functions are deal with byte streams.
 67
 68    //! Take a character from input byte stream, skip BOM if exist.
 69    template <typename InputByteStream>
 70    static CharType TakeBOM(InputByteStream& is);
 71
 72    //! Take a character from input byte stream.
 73    template <typename InputByteStream>
 74    static Ch Take(InputByteStream& is);
 75
 76    //! Put BOM to output byte stream.
 77    template <typename OutputByteStream>
 78    static void PutBOM(OutputByteStream& os);
 79
 80    //! Put a character to output byte stream.
 81    template <typename OutputByteStream>
 82    static void Put(OutputByteStream& os, Ch c);
 83};
 84\endcode
 85*/
 86
 87///////////////////////////////////////////////////////////////////////////////
 88// UTF8
 89
 90//! UTF-8 encoding.
 91/*! http://en.wikipedia.org/wiki/UTF-8
 92    http://tools.ietf.org/html/rfc3629
 93    \tparam CharType Code unit for storing 8-bit UTF-8 data. Default is char.
 94    \note implements Encoding concept
 95*/
 96template<typename CharType = char>
 97struct UTF8 {
 98    typedef CharType Ch;
 99
100    enum { supportUnicode = 1 };
101
102    template<typename OutputStream>
103    static void Encode(OutputStream& os, unsigned codepoint) {
104        if (codepoint <= 0x7F) 
105            os.Put(static_cast<Ch>(codepoint & 0xFF));
106        else if (codepoint <= 0x7FF) {
107            os.Put(static_cast<Ch>(0xC0 | ((codepoint >> 6) & 0xFF)));
108            os.Put(static_cast<Ch>(0x80 | ((codepoint & 0x3F))));
109        }
110        else if (codepoint <= 0xFFFF) {
111            os.Put(static_cast<Ch>(0xE0 | ((codepoint >> 12) & 0xFF)));
112            os.Put(static_cast<Ch>(0x80 | ((codepoint >> 6) & 0x3F)));
113            os.Put(static_cast<Ch>(0x80 | (codepoint & 0x3F)));
114        }
115        else {
116            RAPIDJSON_ASSERT(codepoint <= 0x10FFFF);
117            os.Put(static_cast<Ch>(0xF0 | ((codepoint >> 18) & 0xFF)));
118            os.Put(static_cast<Ch>(0x80 | ((codepoint >> 12) & 0x3F)));
119            os.Put(static_cast<Ch>(0x80 | ((codepoint >> 6) & 0x3F)));
120            os.Put(static_cast<Ch>(0x80 | (codepoint & 0x3F)));
121        }
122    }
123
124    template<typename OutputStream>
125    static void EncodeUnsafe(OutputStream& os, unsigned codepoint) {
126        if (codepoint <= 0x7F) 
127            PutUnsafe(os, static_cast<Ch>(codepoint & 0xFF));
128        else if (codepoint <= 0x7FF) {
129            PutUnsafe(os, static_cast<Ch>(0xC0 | ((codepoint >> 6) & 0xFF)));
130            PutUnsafe(os, static_cast<Ch>(0x80 | ((codepoint & 0x3F))));
131        }
132        else if (codepoint <= 0xFFFF) {
133            PutUnsafe(os, static_cast<Ch>(0xE0 | ((codepoint >> 12) & 0xFF)));
134            PutUnsafe(os, static_cast<Ch>(0x80 | ((codepoint >> 6) & 0x3F)));
135            PutUnsafe(os, static_cast<Ch>(0x80 | (codepoint & 0x3F)));
136        }
137        else {
138            RAPIDJSON_ASSERT(codepoint <= 0x10FFFF);
139            PutUnsafe(os, static_cast<Ch>(0xF0 | ((codepoint >> 18) & 0xFF)));
140            PutUnsafe(os, static_cast<Ch>(0x80 | ((codepoint >> 12) & 0x3F)));
141            PutUnsafe(os, static_cast<Ch>(0x80 | ((codepoint >> 6) & 0x3F)));
142            PutUnsafe(os, static_cast<Ch>(0x80 | (codepoint & 0x3F)));
143        }
144    }
145
146    template <typename InputStream>
147    static bool Decode(InputStream& is, unsigned* codepoint) {
148#define RAPIDJSON_COPY() c = is.Take(); *codepoint = (*codepoint << 6) | (static_cast<unsigned char>(c) & 0x3Fu)
149#define RAPIDJSON_TRANS(mask) result &= ((GetRange(static_cast<unsigned char>(c)) & mask) != 0)
150#define RAPIDJSON_TAIL() RAPIDJSON_COPY(); RAPIDJSON_TRANS(0x70)
151        typename InputStream::Ch c = is.Take();
152        if (!(c & 0x80)) {
153            *codepoint = static_cast<unsigned char>(c);
154            return true;
155        }
156
157        unsigned char type = GetRange(static_cast<unsigned char>(c));
158        if (type >= 32) {
159            *codepoint = 0;
160        } else {
161            *codepoint = (0xFFu >> type) & static_cast<unsigned char>(c);
162        }
163        bool result = true;
164        switch (type) {
165        case 2: RAPIDJSON_TAIL(); return result;
166        case 3: RAPIDJSON_TAIL(); RAPIDJSON_TAIL(); return result;
167        case 4: RAPIDJSON_COPY(); RAPIDJSON_TRANS(0x50); RAPIDJSON_TAIL(); return result;
168        case 5: RAPIDJSON_COPY(); RAPIDJSON_TRANS(0x10); RAPIDJSON_TAIL(); RAPIDJSON_TAIL(); return result;
169        case 6: RAPIDJSON_TAIL(); RAPIDJSON_TAIL(); RAPIDJSON_TAIL(); return result;
170        case 10: RAPIDJSON_COPY(); RAPIDJSON_TRANS(0x20); RAPIDJSON_TAIL(); return result;
171        case 11: RAPIDJSON_COPY(); RAPIDJSON_TRANS(0x60); RAPIDJSON_TAIL(); RAPIDJSON_TAIL(); return result;
172        default: return false;
173        }
174#undef RAPIDJSON_COPY
175#undef RAPIDJSON_TRANS
176#undef RAPIDJSON_TAIL
177    }
178
179    template <typename InputStream, typename OutputStream>
180    static bool Validate(InputStream& is, OutputStream& os) {
181#define RAPIDJSON_COPY() os.Put(c = is.Take())
182#define RAPIDJSON_TRANS(mask) result &= ((GetRange(static_cast<unsigned char>(c)) & mask) != 0)
183#define RAPIDJSON_TAIL() RAPIDJSON_COPY(); RAPIDJSON_TRANS(0x70)
184        Ch c;
185        RAPIDJSON_COPY();
186        if (!(c & 0x80))
187            return true;
188
189        bool result = true;
190        switch (GetRange(static_cast<unsigned char>(c))) {
191        case 2: RAPIDJSON_TAIL(); return result;
192        case 3: RAPIDJSON_TAIL(); RAPIDJSON_TAIL(); return result;
193        case 4: RAPIDJSON_COPY(); RAPIDJSON_TRANS(0x50); RAPIDJSON_TAIL(); return result;
194        case 5: RAPIDJSON_COPY(); RAPIDJSON_TRANS(0x10); RAPIDJSON_TAIL(); RAPIDJSON_TAIL(); return result;
195        case 6: RAPIDJSON_TAIL(); RAPIDJSON_TAIL(); RAPIDJSON_TAIL(); return result;
196        case 10: RAPIDJSON_COPY(); RAPIDJSON_TRANS(0x20); RAPIDJSON_TAIL(); return result;
197        case 11: RAPIDJSON_COPY(); RAPIDJSON_TRANS(0x60); RAPIDJSON_TAIL(); RAPIDJSON_TAIL(); return result;
198        default: return false;
199        }
200#undef RAPIDJSON_COPY
201#undef RAPIDJSON_TRANS
202#undef RAPIDJSON_TAIL
203    }
204
205    static unsigned char GetRange(unsigned char c) {
206        // Referring to DFA of http://bjoern.hoehrmann.de/utf-8/decoder/dfa/
207        // With new mapping 1 -> 0x10, 7 -> 0x20, 9 -> 0x40, such that AND operation can test multiple types.
208        static const unsigned char type[] = {
209            0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
210            0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
211            0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
212            0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
213            0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,
214            0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,
215            0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,
216            0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,
217            8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
218            10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8,
219        };
220        return type[c];
221    }
222
223    template <typename InputByteStream>
224    static CharType TakeBOM(InputByteStream& is) {
225        RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
226        typename InputByteStream::Ch c = Take(is);
227        if (static_cast<unsigned char>(c) != 0xEFu) return c;
228        c = is.Take();
229        if (static_cast<unsigned char>(c) != 0xBBu) return c;
230        c = is.Take();
231        if (static_cast<unsigned char>(c) != 0xBFu) return c;
232        c = is.Take();
233        return c;
234    }
235
236    template <typename InputByteStream>
237    static Ch Take(InputByteStream& is) {
238        RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
239        return static_cast<Ch>(is.Take());
240    }
241
242    template <typename OutputByteStream>
243    static void PutBOM(OutputByteStream& os) {
244        RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
245        os.Put(static_cast<typename OutputByteStream::Ch>(0xEFu));
246        os.Put(static_cast<typename OutputByteStream::Ch>(0xBBu));
247        os.Put(static_cast<typename OutputByteStream::Ch>(0xBFu));
248    }
249
250    template <typename OutputByteStream>
251    static void Put(OutputByteStream& os, Ch c) {
252        RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
253        os.Put(static_cast<typename OutputByteStream::Ch>(c));
254    }
255};
256
257///////////////////////////////////////////////////////////////////////////////
258// UTF16
259
260//! UTF-16 encoding.
261/*! http://en.wikipedia.org/wiki/UTF-16
262    http://tools.ietf.org/html/rfc2781
263    \tparam CharType Type for storing 16-bit UTF-16 data. Default is wchar_t. C++11 may use char16_t instead.
264    \note implements Encoding concept
265
266    \note For in-memory access, no need to concern endianness. The code units and code points are represented by CPU's endianness.
267    For streaming, use UTF16LE and UTF16BE, which handle endianness.
268*/
269template<typename CharType = wchar_t>
270struct UTF16 {
271    typedef CharType Ch;
272    RAPIDJSON_STATIC_ASSERT(sizeof(Ch) >= 2);
273
274    enum { supportUnicode = 1 };
275
276    template<typename OutputStream>
277    static void Encode(OutputStream& os, unsigned codepoint) {
278        RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputStream::Ch) >= 2);
279        if (codepoint <= 0xFFFF) {
280            RAPIDJSON_ASSERT(codepoint < 0xD800 || codepoint > 0xDFFF); // Code point itself cannot be surrogate pair 
281            os.Put(static_cast<typename OutputStream::Ch>(codepoint));
282        }
283        else {
284            RAPIDJSON_ASSERT(codepoint <= 0x10FFFF);
285            unsigned v = codepoint - 0x10000;
286            os.Put(static_cast<typename OutputStream::Ch>((v >> 10) | 0xD800));
287            os.Put(static_cast<typename OutputStream::Ch>((v & 0x3FF) | 0xDC00));
288        }
289    }
290
291
292    template<typename OutputStream>
293    static void EncodeUnsafe(OutputStream& os, unsigned codepoint) {
294        RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputStream::Ch) >= 2);
295        if (codepoint <= 0xFFFF) {
296            RAPIDJSON_ASSERT(codepoint < 0xD800 || codepoint > 0xDFFF); // Code point itself cannot be surrogate pair 
297            PutUnsafe(os, static_cast<typename OutputStream::Ch>(codepoint));
298        }
299        else {
300            RAPIDJSON_ASSERT(codepoint <= 0x10FFFF);
301            unsigned v = codepoint - 0x10000;
302            PutUnsafe(os, static_cast<typename OutputStream::Ch>((v >> 10) | 0xD800));
303            PutUnsafe(os, static_cast<typename OutputStream::Ch>((v & 0x3FF) | 0xDC00));
304        }
305    }
306
307    template <typename InputStream>
308    static bool Decode(InputStream& is, unsigned* codepoint) {
309        RAPIDJSON_STATIC_ASSERT(sizeof(typename InputStream::Ch) >= 2);
310        typename InputStream::Ch c = is.Take();
311        if (c < 0xD800 || c > 0xDFFF) {
312            *codepoint = static_cast<unsigned>(c);
313            return true;
314        }
315        else if (c <= 0xDBFF) {
316            *codepoint = (static_cast<unsigned>(c) & 0x3FF) << 10;
317            c = is.Take();
318            *codepoint |= (static_cast<unsigned>(c) & 0x3FF);
319            *codepoint += 0x10000;
320            return c >= 0xDC00 && c <= 0xDFFF;
321        }
322        return false;
323    }
324
325    template <typename InputStream, typename OutputStream>
326    static bool Validate(InputStream& is, OutputStream& os) {
327        RAPIDJSON_STATIC_ASSERT(sizeof(typename InputStream::Ch) >= 2);
328        RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputStream::Ch) >= 2);
329        typename InputStream::Ch c;
330        os.Put(static_cast<typename OutputStream::Ch>(c = is.Take()));
331        if (c < 0xD800 || c > 0xDFFF)
332            return true;
333        else if (c <= 0xDBFF) {
334            os.Put(c = is.Take());
335            return c >= 0xDC00 && c <= 0xDFFF;
336        }
337        return false;
338    }
339};
340
341//! UTF-16 little endian encoding.
342template<typename CharType = wchar_t>
343struct UTF16LE : UTF16<CharType> {
344    template <typename InputByteStream>
345    static CharType TakeBOM(InputByteStream& is) {
346        RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
347        CharType c = Take(is);
348        return static_cast<uint16_t>(c) == 0xFEFFu ? Take(is) : c;
349    }
350
351    template <typename InputByteStream>
352    static CharType Take(InputByteStream& is) {
353        RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
354        unsigned c = static_cast<uint8_t>(is.Take());
355        c |= static_cast<unsigned>(static_cast<uint8_t>(is.Take())) << 8;
356        return static_cast<CharType>(c);
357    }
358
359    template <typename OutputByteStream>
360    static void PutBOM(OutputByteStream& os) {
361        RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
362        os.Put(static_cast<typename OutputByteStream::Ch>(0xFFu));
363        os.Put(static_cast<typename OutputByteStream::Ch>(0xFEu));
364    }
365
366    template <typename OutputByteStream>
367    static void Put(OutputByteStream& os, CharType c) {
368        RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
369        os.Put(static_cast<typename OutputByteStream::Ch>(static_cast<unsigned>(c) & 0xFFu));
370        os.Put(static_cast<typename OutputByteStream::Ch>((static_cast<unsigned>(c) >> 8) & 0xFFu));
371    }
372};
373
374//! UTF-16 big endian encoding.
375template<typename CharType = wchar_t>
376struct UTF16BE : UTF16<CharType> {
377    template <typename InputByteStream>
378    static CharType TakeBOM(InputByteStream& is) {
379        RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
380        CharType c = Take(is);
381        return static_cast<uint16_t>(c) == 0xFEFFu ? Take(is) : c;
382    }
383
384    template <typename InputByteStream>
385    static CharType Take(InputByteStream& is) {
386        RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
387        unsigned c = static_cast<unsigned>(static_cast<uint8_t>(is.Take())) << 8;
388        c |= static_cast<unsigned>(static_cast<uint8_t>(is.Take()));
389        return static_cast<CharType>(c);
390    }
391
392    template <typename OutputByteStream>
393    static void PutBOM(OutputByteStream& os) {
394        RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
395        os.Put(static_cast<typename OutputByteStream::Ch>(0xFEu));
396        os.Put(static_cast<typename OutputByteStream::Ch>(0xFFu));
397    }
398
399    template <typename OutputByteStream>
400    static void Put(OutputByteStream& os, CharType c) {
401        RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
402        os.Put(static_cast<typename OutputByteStream::Ch>((static_cast<unsigned>(c) >> 8) & 0xFFu));
403        os.Put(static_cast<typename OutputByteStream::Ch>(static_cast<unsigned>(c) & 0xFFu));
404    }
405};
406
407///////////////////////////////////////////////////////////////////////////////
408// UTF32
409
410//! UTF-32 encoding. 
411/*! http://en.wikipedia.org/wiki/UTF-32
412    \tparam CharType Type for storing 32-bit UTF-32 data. Default is unsigned. C++11 may use char32_t instead.
413    \note implements Encoding concept
414
415    \note For in-memory access, no need to concern endianness. The code units and code points are represented by CPU's endianness.
416    For streaming, use UTF32LE and UTF32BE, which handle endianness.
417*/
418template<typename CharType = unsigned>
419struct UTF32 {
420    typedef CharType Ch;
421    RAPIDJSON_STATIC_ASSERT(sizeof(Ch) >= 4);
422
423    enum { supportUnicode = 1 };
424
425    template<typename OutputStream>
426    static void Encode(OutputStream& os, unsigned codepoint) {
427        RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputStream::Ch) >= 4);
428        RAPIDJSON_ASSERT(codepoint <= 0x10FFFF);
429        os.Put(codepoint);
430    }
431
432    template<typename OutputStream>
433    static void EncodeUnsafe(OutputStream& os, unsigned codepoint) {
434        RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputStream::Ch) >= 4);
435        RAPIDJSON_ASSERT(codepoint <= 0x10FFFF);
436        PutUnsafe(os, codepoint);
437    }
438
439    template <typename InputStream>
440    static bool Decode(InputStream& is, unsigned* codepoint) {
441        RAPIDJSON_STATIC_ASSERT(sizeof(typename InputStream::Ch) >= 4);
442        Ch c = is.Take();
443        *codepoint = c;
444        return c <= 0x10FFFF;
445    }
446
447    template <typename InputStream, typename OutputStream>
448    static bool Validate(InputStream& is, OutputStream& os) {
449        RAPIDJSON_STATIC_ASSERT(sizeof(typename InputStream::Ch) >= 4);
450        Ch c;
451        os.Put(c = is.Take());
452        return c <= 0x10FFFF;
453    }
454};
455
456//! UTF-32 little endian enocoding.
457template<typename CharType = unsigned>
458struct UTF32LE : UTF32<CharType> {
459    template <typename InputByteStream>
460    static CharType TakeBOM(InputByteStream& is) {
461        RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
462        CharType c = Take(is);
463        return static_cast<uint32_t>(c) == 0x0000FEFFu ? Take(is) : c;
464    }
465
466    template <typename InputByteStream>
467    static CharType Take(InputByteStream& is) {
468        RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
469        unsigned c = static_cast<uint8_t>(is.Take());
470        c |= static_cast<unsigned>(static_cast<uint8_t>(is.Take())) << 8;
471        c |= static_cast<unsigned>(static_cast<uint8_t>(is.Take())) << 16;
472        c |= static_cast<unsigned>(static_cast<uint8_t>(is.Take())) << 24;
473        return static_cast<CharType>(c);
474    }
475
476    template <typename OutputByteStream>
477    static void PutBOM(OutputByteStream& os) {
478        RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
479        os.Put(static_cast<typename OutputByteStream::Ch>(0xFFu));
480        os.Put(static_cast<typename OutputByteStream::Ch>(0xFEu));
481        os.Put(static_cast<typename OutputByteStream::Ch>(0x00u));
482        os.Put(static_cast<typename OutputByteStream::Ch>(0x00u));
483    }
484
485    template <typename OutputByteStream>
486    static void Put(OutputByteStream& os, CharType c) {
487        RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
488        os.Put(static_cast<typename OutputByteStream::Ch>(c & 0xFFu));
489        os.Put(static_cast<typename OutputByteStream::Ch>((c >> 8) & 0xFFu));
490        os.Put(static_cast<typename OutputByteStream::Ch>((c >> 16) & 0xFFu));
491        os.Put(static_cast<typename OutputByteStream::Ch>((c >> 24) & 0xFFu));
492    }
493};
494
495//! UTF-32 big endian encoding.
496template<typename CharType = unsigned>
497struct UTF32BE : UTF32<CharType> {
498    template <typename InputByteStream>
499    static CharType TakeBOM(InputByteStream& is) {
500        RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
501        CharType c = Take(is);
502        return static_cast<uint32_t>(c) == 0x0000FEFFu ? Take(is) : c; 
503    }
504
505    template <typename InputByteStream>
506    static CharType Take(InputByteStream& is) {
507        RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
508        unsigned c = static_cast<unsigned>(static_cast<uint8_t>(is.Take())) << 24;
509        c |= static_cast<unsigned>(static_cast<uint8_t>(is.Take())) << 16;
510        c |= static_cast<unsigned>(static_cast<uint8_t>(is.Take())) << 8;
511        c |= static_cast<unsigned>(static_cast<uint8_t>(is.Take()));
512        return static_cast<CharType>(c);
513    }
514
515    template <typename OutputByteStream>
516    static void PutBOM(OutputByteStream& os) {
517        RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
518        os.Put(static_cast<typename OutputByteStream::Ch>(0x00u));
519        os.Put(static_cast<typename OutputByteStream::Ch>(0x00u));
520        os.Put(static_cast<typename OutputByteStream::Ch>(0xFEu));
521        os.Put(static_cast<typename OutputByteStream::Ch>(0xFFu));
522    }
523
524    template <typename OutputByteStream>
525    static void Put(OutputByteStream& os, CharType c) {
526        RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
527        os.Put(static_cast<typename OutputByteStream::Ch>((c >> 24) & 0xFFu));
528        os.Put(static_cast<typename OutputByteStream::Ch>((c >> 16) & 0xFFu));
529        os.Put(static_cast<typename OutputByteStream::Ch>((c >> 8) & 0xFFu));
530        os.Put(static_cast<typename OutputByteStream::Ch>(c & 0xFFu));
531    }
532};
533
534///////////////////////////////////////////////////////////////////////////////
535// ASCII
536
537//! ASCII encoding.
538/*! http://en.wikipedia.org/wiki/ASCII
539    \tparam CharType Code unit for storing 7-bit ASCII data. Default is char.
540    \note implements Encoding concept
541*/
542template<typename CharType = char>
543struct ASCII {
544    typedef CharType Ch;
545
546    enum { supportUnicode = 0 };
547
548    template<typename OutputStream>
549    static void Encode(OutputStream& os, unsigned codepoint) {
550        RAPIDJSON_ASSERT(codepoint <= 0x7F);
551        os.Put(static_cast<Ch>(codepoint & 0xFF));
552    }
553
554    template<typename OutputStream>
555    static void EncodeUnsafe(OutputStream& os, unsigned codepoint) {
556        RAPIDJSON_ASSERT(codepoint <= 0x7F);
557        PutUnsafe(os, static_cast<Ch>(codepoint & 0xFF));
558    }
559
560    template <typename InputStream>
561    static bool Decode(InputStream& is, unsigned* codepoint) {
562        uint8_t c = static_cast<uint8_t>(is.Take());
563        *codepoint = c;
564        return c <= 0X7F;
565    }
566
567    template <typename InputStream, typename OutputStream>
568    static bool Validate(InputStream& is, OutputStream& os) {
569        uint8_t c = static_cast<uint8_t>(is.Take());
570        os.Put(static_cast<typename OutputStream::Ch>(c));
571        return c <= 0x7F;
572    }
573
574    template <typename InputByteStream>
575    static CharType TakeBOM(InputByteStream& is) {
576        RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
577        uint8_t c = static_cast<uint8_t>(Take(is));
578        return static_cast<Ch>(c);
579    }
580
581    template <typename InputByteStream>
582    static Ch Take(InputByteStream& is) {
583        RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
584        return static_cast<Ch>(is.Take());
585    }
586
587    template <typename OutputByteStream>
588    static void PutBOM(OutputByteStream& os) {
589        RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
590        (void)os;
591    }
592
593    template <typename OutputByteStream>
594    static void Put(OutputByteStream& os, Ch c) {
595        RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
596        os.Put(static_cast<typename OutputByteStream::Ch>(c));
597    }
598};
599
600///////////////////////////////////////////////////////////////////////////////
601// AutoUTF
602
603//! Runtime-specified UTF encoding type of a stream.
604enum UTFType {
605    kUTF8 = 0,      //!< UTF-8.
606    kUTF16LE = 1,   //!< UTF-16 little endian.
607    kUTF16BE = 2,   //!< UTF-16 big endian.
608    kUTF32LE = 3,   //!< UTF-32 little endian.
609    kUTF32BE = 4    //!< UTF-32 big endian.
610};
611
612//! Dynamically select encoding according to stream's runtime-specified UTF encoding type.
613/*! \note This class can be used with AutoUTFInputtStream and AutoUTFOutputStream, which provides GetType().
614*/
615template<typename CharType>
616struct AutoUTF {
617    typedef CharType Ch;
618
619    enum { supportUnicode = 1 };
620
621#define RAPIDJSON_ENCODINGS_FUNC(x) UTF8<Ch>::x, UTF16LE<Ch>::x, UTF16BE<Ch>::x, UTF32LE<Ch>::x, UTF32BE<Ch>::x
622
623    template<typename OutputStream>
624    static RAPIDJSON_FORCEINLINE void Encode(OutputStream& os, unsigned codepoint) {
625        typedef void (*EncodeFunc)(OutputStream&, unsigned);
626        static const EncodeFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(Encode) };
627        (*f[os.GetType()])(os, codepoint);
628    }
629
630    template<typename OutputStream>
631    static RAPIDJSON_FORCEINLINE void EncodeUnsafe(OutputStream& os, unsigned codepoint) {
632        typedef void (*EncodeFunc)(OutputStream&, unsigned);
633        static const EncodeFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(EncodeUnsafe) };
634        (*f[os.GetType()])(os, codepoint);
635    }
636
637    template <typename InputStream>
638    static RAPIDJSON_FORCEINLINE bool Decode(InputStream& is, unsigned* codepoint) {
639        typedef bool (*DecodeFunc)(InputStream&, unsigned*);
640        static const DecodeFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(Decode) };
641        return (*f[is.GetType()])(is, codepoint);
642    }
643
644    template <typename InputStream, typename OutputStream>
645    static RAPIDJSON_FORCEINLINE bool Validate(InputStream& is, OutputStream& os) {
646        typedef bool (*ValidateFunc)(InputStream&, OutputStream&);
647        static const ValidateFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(Validate) };
648        return (*f[is.GetType()])(is, os);
649    }
650
651#undef RAPIDJSON_ENCODINGS_FUNC
652};
653
654///////////////////////////////////////////////////////////////////////////////
655// Transcoder
656
657//! Encoding conversion.
658template<typename SourceEncoding, typename TargetEncoding>
659struct Transcoder {
660    //! Take one Unicode codepoint from source encoding, convert it to target encoding and put it to the output stream.
661    template<typename InputStream, typename OutputStream>
662    static RAPIDJSON_FORCEINLINE bool Transcode(InputStream& is, OutputStream& os) {
663        unsigned codepoint;
664        if (!SourceEncoding::Decode(is, &codepoint))
665            return false;
666        TargetEncoding::Encode(os, codepoint);
667        return true;
668    }
669
670    template<typename InputStream, typename OutputStream>
671    static RAPIDJSON_FORCEINLINE bool TranscodeUnsafe(InputStream& is, OutputStream& os) {
672        unsigned codepoint;
673        if (!SourceEncoding::Decode(is, &codepoint))
674            return false;
675        TargetEncoding::EncodeUnsafe(os, codepoint);
676        return true;
677    }
678
679    //! Validate one Unicode codepoint from an encoded stream.
680    template<typename InputStream, typename OutputStream>
681    static RAPIDJSON_FORCEINLINE bool Validate(InputStream& is, OutputStream& os) {
682        return Transcode(is, os);   // Since source/target encoding is different, must transcode.
683    }
684};
685
686// Forward declaration.
687template<typename Stream>
688inline void PutUnsafe(Stream& stream, typename Stream::Ch c);
689
690//! Specialization of Transcoder with same source and target encoding.
691template<typename Encoding>
692struct Transcoder<Encoding, Encoding> {
693    template<typename InputStream, typename OutputStream>
694    static RAPIDJSON_FORCEINLINE bool Transcode(InputStream& is, OutputStream& os) {
695        os.Put(is.Take());  // Just copy one code unit. This semantic is different from primary template class.
696        return true;
697    }
698    
699    template<typename InputStream, typename OutputStream>
700    static RAPIDJSON_FORCEINLINE bool TranscodeUnsafe(InputStream& is, OutputStream& os) {
701        PutUnsafe(os, is.Take());  // Just copy one code unit. This semantic is different from primary template class.
702        return true;
703    }
704    
705    template<typename InputStream, typename OutputStream>
706    static RAPIDJSON_FORCEINLINE bool Validate(InputStream& is, OutputStream& os) {
707        return Encoding::Validate(is, os);  // source/target encoding are the same
708    }
709};
710
711RAPIDJSON_NAMESPACE_END
712
713#if defined(__GNUC__) || (defined(_MSC_VER) && !defined(__clang__))
714RAPIDJSON_DIAG_POP
715#endif
716
717#endif // RAPIDJSON_ENCODINGS_H_
718