encodedstream.h
Engine/source/persistence/rapidjson/encodedstream.h
Classes:
class
Input stream wrapper with dynamically bound encoding and automatic encoding detection.
class
Output stream wrapper with dynamically bound encoding and automatic encoding detection.
class
Input byte stream wrapper with a statically bound encoding.
class
Specialized for UTF8 MemoryStream.
class
Output byte stream wrapper with statically bound encoding.
Public Defines
define
RAPIDJSON_ENCODINGS_FUNC(x) <Ch>::x, <Ch>::x, <Ch>::x, <Ch>::x, <Ch>::x
Detailed Description
Public Defines
RAPIDJSON_ENCODINGS_FUNC(x) <Ch>::x, <Ch>::x, <Ch>::x, <Ch>::x, <Ch>::x
1 2// Tencent is pleased to support the open source community by making RapidJSON available. 3// 4// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. 5// 6// Licensed under the MIT License (the "License"); you may not use this file except 7// in compliance with the License. You may obtain a copy of the License at 8// 9// http://opensource.org/licenses/MIT 10// 11// Unless required by applicable law or agreed to in writing, software distributed 12// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 13// CONDITIONS OF ANY KIND, either express or implied. See the License for the 14// specific language governing permissions and limitations under the License. 15 16#ifndef RAPIDJSON_ENCODEDSTREAM_H_ 17#define RAPIDJSON_ENCODEDSTREAM_H_ 18 19#include "stream.h" 20#include "memorystream.h" 21 22#ifdef __GNUC__ 23RAPIDJSON_DIAG_PUSH 24RAPIDJSON_DIAG_OFF(effc++) 25#endif 26 27#ifdef __clang__ 28RAPIDJSON_DIAG_PUSH 29RAPIDJSON_DIAG_OFF(padded) 30#endif 31 32RAPIDJSON_NAMESPACE_BEGIN 33 34//! Input byte stream wrapper with a statically bound encoding. 35/*! 36 \tparam Encoding The interpretation of encoding of the stream. Either UTF8, UTF16LE, UTF16BE, UTF32LE, UTF32BE. 37 \tparam InputByteStream Type of input byte stream. For example, FileReadStream. 38*/ 39template <typename Encoding, typename InputByteStream> 40class EncodedInputStream { 41 RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); 42public: 43 typedef typename Encoding::Ch Ch; 44 45 EncodedInputStream(InputByteStream& is) : is_(is) { 46 current_ = Encoding::TakeBOM(is_); 47 } 48 49 Ch Peek() const { return current_; } 50 Ch Take() { Ch c = current_; current_ = Encoding::Take(is_); return c; } 51 size_t Tell() const { return is_.Tell(); } 52 53 // Not implemented 54 void Put(Ch) { RAPIDJSON_ASSERT(false); } 55 void Flush() { RAPIDJSON_ASSERT(false); } 56 Ch* PutBegin() { RAPIDJSON_ASSERT(false); return 0; } 57 size_t PutEnd(Ch*) { RAPIDJSON_ASSERT(false); return 0; } 58 59private: 60 EncodedInputStream(const EncodedInputStream&); 61 EncodedInputStream& operator=(const EncodedInputStream&); 62 63 InputByteStream& is_; 64 Ch current_; 65}; 66 67//! Specialized for UTF8 MemoryStream. 68template <> 69class EncodedInputStream<UTF8<>, MemoryStream> { 70public: 71 typedef UTF8<>::Ch Ch; 72 73 EncodedInputStream(MemoryStream& is) : is_(is) { 74 if (static_cast<unsigned char>(is_.Peek()) == 0xEFu) is_.Take(); 75 if (static_cast<unsigned char>(is_.Peek()) == 0xBBu) is_.Take(); 76 if (static_cast<unsigned char>(is_.Peek()) == 0xBFu) is_.Take(); 77 } 78 Ch Peek() const { return is_.Peek(); } 79 Ch Take() { return is_.Take(); } 80 size_t Tell() const { return is_.Tell(); } 81 82 // Not implemented 83 void Put(Ch) {} 84 void Flush() {} 85 Ch* PutBegin() { return 0; } 86 size_t PutEnd(Ch*) { return 0; } 87 88 MemoryStream& is_; 89 90private: 91 EncodedInputStream(const EncodedInputStream&); 92 EncodedInputStream& operator=(const EncodedInputStream&); 93}; 94 95//! Output byte stream wrapper with statically bound encoding. 96/*! 97 \tparam Encoding The interpretation of encoding of the stream. Either UTF8, UTF16LE, UTF16BE, UTF32LE, UTF32BE. 98 \tparam OutputByteStream Type of input byte stream. For example, FileWriteStream. 99*/ 100template <typename Encoding, typename OutputByteStream> 101class EncodedOutputStream { 102 RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); 103public: 104 typedef typename Encoding::Ch Ch; 105 106 EncodedOutputStream(OutputByteStream& os, bool putBOM = true) : os_(os) { 107 if (putBOM) 108 Encoding::PutBOM(os_); 109 } 110 111 void Put(Ch c) { Encoding::Put(os_, c); } 112 void Flush() { os_.Flush(); } 113 114 // Not implemented 115 Ch Peek() const { RAPIDJSON_ASSERT(false); return 0;} 116 Ch Take() { RAPIDJSON_ASSERT(false); return 0;} 117 size_t Tell() const { RAPIDJSON_ASSERT(false); return 0; } 118 Ch* PutBegin() { RAPIDJSON_ASSERT(false); return 0; } 119 size_t PutEnd(Ch*) { RAPIDJSON_ASSERT(false); return 0; } 120 121private: 122 EncodedOutputStream(const EncodedOutputStream&); 123 EncodedOutputStream& operator=(const EncodedOutputStream&); 124 125 OutputByteStream& os_; 126}; 127 128#define RAPIDJSON_ENCODINGS_FUNC(x) UTF8<Ch>::x, UTF16LE<Ch>::x, UTF16BE<Ch>::x, UTF32LE<Ch>::x, UTF32BE<Ch>::x 129 130//! Input stream wrapper with dynamically bound encoding and automatic encoding detection. 131/*! 132 \tparam CharType Type of character for reading. 133 \tparam InputByteStream type of input byte stream to be wrapped. 134*/ 135template <typename CharType, typename InputByteStream> 136class AutoUTFInputStream { 137 RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); 138public: 139 typedef CharType Ch; 140 141 //! Constructor. 142 /*! 143 \param is input stream to be wrapped. 144 \param type UTF encoding type if it is not detected from the stream. 145 */ 146 AutoUTFInputStream(InputByteStream& is, UTFType type = kUTF8) : is_(&is), type_(type), hasBOM_(false) { 147 RAPIDJSON_ASSERT(type >= kUTF8 && type <= kUTF32BE); 148 DetectType(); 149 static const TakeFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(Take) }; 150 takeFunc_ = f[type_]; 151 current_ = takeFunc_(*is_); 152 } 153 154 UTFType GetType() const { return type_; } 155 bool HasBOM() const { return hasBOM_; } 156 157 Ch Peek() const { return current_; } 158 Ch Take() { Ch c = current_; current_ = takeFunc_(*is_); return c; } 159 size_t Tell() const { return is_->Tell(); } 160 161 // Not implemented 162 void Put(Ch) { RAPIDJSON_ASSERT(false); } 163 void Flush() { RAPIDJSON_ASSERT(false); } 164 Ch* PutBegin() { RAPIDJSON_ASSERT(false); return 0; } 165 size_t PutEnd(Ch*) { RAPIDJSON_ASSERT(false); return 0; } 166 167private: 168 AutoUTFInputStream(const AutoUTFInputStream&); 169 AutoUTFInputStream& operator=(const AutoUTFInputStream&); 170 171 // Detect encoding type with BOM or RFC 4627 172 void DetectType() { 173 // BOM (Byte Order Mark): 174 // 00 00 FE FF UTF-32BE 175 // FF FE 00 00 UTF-32LE 176 // FE FF UTF-16BE 177 // FF FE UTF-16LE 178 // EF BB BF UTF-8 179 180 const unsigned char* c = reinterpret_cast<const unsigned char *>(is_->Peek4()); 181 if (!c) 182 return; 183 184 unsigned bom = static_cast<unsigned>(c[0] | (c[1] << 8) | (c[2] << 16) | (c[3] << 24)); 185 hasBOM_ = false; 186 if (bom == 0xFFFE0000) { type_ = kUTF32BE; hasBOM_ = true; is_->Take(); is_->Take(); is_->Take(); is_->Take(); } 187 else if (bom == 0x0000FEFF) { type_ = kUTF32LE; hasBOM_ = true; is_->Take(); is_->Take(); is_->Take(); is_->Take(); } 188 else if ((bom & 0xFFFF) == 0xFFFE) { type_ = kUTF16BE; hasBOM_ = true; is_->Take(); is_->Take(); } 189 else if ((bom & 0xFFFF) == 0xFEFF) { type_ = kUTF16LE; hasBOM_ = true; is_->Take(); is_->Take(); } 190 else if ((bom & 0xFFFFFF) == 0xBFBBEF) { type_ = kUTF8; hasBOM_ = true; is_->Take(); is_->Take(); is_->Take(); } 191 192 // RFC 4627: Section 3 193 // "Since the first two characters of a JSON text will always be ASCII 194 // characters [RFC0020], it is possible to determine whether an octet 195 // stream is UTF-8, UTF-16 (BE or LE), or UTF-32 (BE or LE) by looking 196 // at the pattern of nulls in the first four octets." 197 // 00 00 00 xx UTF-32BE 198 // 00 xx 00 xx UTF-16BE 199 // xx 00 00 00 UTF-32LE 200 // xx 00 xx 00 UTF-16LE 201 // xx xx xx xx UTF-8 202 203 if (!hasBOM_) { 204 int pattern = (c[0] ? 1 : 0) | (c[1] ? 2 : 0) | (c[2] ? 4 : 0) | (c[3] ? 8 : 0); 205 switch (pattern) { 206 case 0x08: type_ = kUTF32BE; break; 207 case 0x0A: type_ = kUTF16BE; break; 208 case 0x01: type_ = kUTF32LE; break; 209 case 0x05: type_ = kUTF16LE; break; 210 case 0x0F: type_ = kUTF8; break; 211 default: break; // Use type defined by user. 212 } 213 } 214 215 // Runtime check whether the size of character type is sufficient. It only perform checks with assertion. 216 if (type_ == kUTF16LE || type_ == kUTF16BE) RAPIDJSON_ASSERT(sizeof(Ch) >= 2); 217 if (type_ == kUTF32LE || type_ == kUTF32BE) RAPIDJSON_ASSERT(sizeof(Ch) >= 4); 218 } 219 220 typedef Ch (*TakeFunc)(InputByteStream& is); 221 InputByteStream* is_; 222 UTFType type_; 223 Ch current_; 224 TakeFunc takeFunc_; 225 bool hasBOM_; 226}; 227 228//! Output stream wrapper with dynamically bound encoding and automatic encoding detection. 229/*! 230 \tparam CharType Type of character for writing. 231 \tparam OutputByteStream type of output byte stream to be wrapped. 232*/ 233template <typename CharType, typename OutputByteStream> 234class AutoUTFOutputStream { 235 RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); 236public: 237 typedef CharType Ch; 238 239 //! Constructor. 240 /*! 241 \param os output stream to be wrapped. 242 \param type UTF encoding type. 243 \param putBOM Whether to write BOM at the beginning of the stream. 244 */ 245 AutoUTFOutputStream(OutputByteStream& os, UTFType type, bool putBOM) : os_(&os), type_(type) { 246 RAPIDJSON_ASSERT(type >= kUTF8 && type <= kUTF32BE); 247 248 // Runtime check whether the size of character type is sufficient. It only perform checks with assertion. 249 if (type_ == kUTF16LE || type_ == kUTF16BE) RAPIDJSON_ASSERT(sizeof(Ch) >= 2); 250 if (type_ == kUTF32LE || type_ == kUTF32BE) RAPIDJSON_ASSERT(sizeof(Ch) >= 4); 251 252 static const PutFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(Put) }; 253 putFunc_ = f[type_]; 254 255 if (putBOM) 256 PutBOM(); 257 } 258 259 UTFType GetType() const { return type_; } 260 261 void Put(Ch c) { putFunc_(*os_, c); } 262 void Flush() { os_->Flush(); } 263 264 // Not implemented 265 Ch Peek() const { RAPIDJSON_ASSERT(false); return 0;} 266 Ch Take() { RAPIDJSON_ASSERT(false); return 0;} 267 size_t Tell() const { RAPIDJSON_ASSERT(false); return 0; } 268 Ch* PutBegin() { RAPIDJSON_ASSERT(false); return 0; } 269 size_t PutEnd(Ch*) { RAPIDJSON_ASSERT(false); return 0; } 270 271private: 272 AutoUTFOutputStream(const AutoUTFOutputStream&); 273 AutoUTFOutputStream& operator=(const AutoUTFOutputStream&); 274 275 void PutBOM() { 276 typedef void (*PutBOMFunc)(OutputByteStream&); 277 static const PutBOMFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(PutBOM) }; 278 f[type_](*os_); 279 } 280 281 typedef void (*PutFunc)(OutputByteStream&, Ch); 282 283 OutputByteStream* os_; 284 UTFType type_; 285 PutFunc putFunc_; 286}; 287 288#undef RAPIDJSON_ENCODINGS_FUNC 289 290RAPIDJSON_NAMESPACE_END 291 292#ifdef __clang__ 293RAPIDJSON_DIAG_POP 294#endif 295 296#ifdef __GNUC__ 297RAPIDJSON_DIAG_POP 298#endif 299 300#endif // RAPIDJSON_FILESTREAM_H_ 301