diff options
Diffstat (limited to 'include/rapidjson/encodings.h')
-rw-r--r-- | include/rapidjson/encodings.h | 716 |
1 files changed, 716 insertions, 0 deletions
diff --git a/include/rapidjson/encodings.h b/include/rapidjson/encodings.h new file mode 100644 index 0000000..0b24467 --- /dev/null +++ b/include/rapidjson/encodings.h @@ -0,0 +1,716 @@ +// Tencent is pleased to support the open source community by making RapidJSON available. +// +// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. +// +// Licensed under the MIT License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// http://opensource.org/licenses/MIT +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef RAPIDJSON_ENCODINGS_H_ +#define RAPIDJSON_ENCODINGS_H_ + +#include "rapidjson.h" + +#if defined(_MSC_VER) && !defined(__clang__) +RAPIDJSON_DIAG_PUSH +RAPIDJSON_DIAG_OFF(4244) // conversion from 'type1' to 'type2', possible loss of data +RAPIDJSON_DIAG_OFF(4702) // unreachable code +#elif defined(__GNUC__) +RAPIDJSON_DIAG_PUSH +RAPIDJSON_DIAG_OFF(effc++) +RAPIDJSON_DIAG_OFF(overflow) +#endif + +RAPIDJSON_NAMESPACE_BEGIN + +/////////////////////////////////////////////////////////////////////////////// +// Encoding + +/*! \class rapidjson::Encoding + \brief Concept for encoding of Unicode characters. + +\code +concept Encoding { + typename Ch; //! Type of character. A "character" is actually a code unit in unicode's definition. + + enum { supportUnicode = 1 }; // or 0 if not supporting unicode + + //! \brief Encode a Unicode codepoint to an output stream. + //! \param os Output stream. + //! \param codepoint An unicode codepoint, ranging from 0x0 to 0x10FFFF inclusively. + template<typename OutputStream> + static void Encode(OutputStream& os, unsigned codepoint); + + //! \brief Decode a Unicode codepoint from an input stream. + //! \param is Input stream. + //! \param codepoint Output of the unicode codepoint. + //! \return true if a valid codepoint can be decoded from the stream. + template <typename InputStream> + static bool Decode(InputStream& is, unsigned* codepoint); + + //! \brief Validate one Unicode codepoint from an encoded stream. + //! \param is Input stream to obtain codepoint. + //! \param os Output for copying one codepoint. + //! \return true if it is valid. + //! \note This function just validating and copying the codepoint without actually decode it. + template <typename InputStream, typename OutputStream> + static bool Validate(InputStream& is, OutputStream& os); + + // The following functions are deal with byte streams. + + //! Take a character from input byte stream, skip BOM if exist. + template <typename InputByteStream> + static CharType TakeBOM(InputByteStream& is); + + //! Take a character from input byte stream. + template <typename InputByteStream> + static Ch Take(InputByteStream& is); + + //! Put BOM to output byte stream. + template <typename OutputByteStream> + static void PutBOM(OutputByteStream& os); + + //! Put a character to output byte stream. + template <typename OutputByteStream> + static void Put(OutputByteStream& os, Ch c); +}; +\endcode +*/ + +/////////////////////////////////////////////////////////////////////////////// +// UTF8 + +//! UTF-8 encoding. +/*! http://en.wikipedia.org/wiki/UTF-8 + http://tools.ietf.org/html/rfc3629 + \tparam CharType Code unit for storing 8-bit UTF-8 data. Default is char. + \note implements Encoding concept +*/ +template<typename CharType = char> +struct UTF8 { + typedef CharType Ch; + + enum { supportUnicode = 1 }; + + template<typename OutputStream> + static void Encode(OutputStream& os, unsigned codepoint) { + if (codepoint <= 0x7F) + os.Put(static_cast<Ch>(codepoint & 0xFF)); + else if (codepoint <= 0x7FF) { + os.Put(static_cast<Ch>(0xC0 | ((codepoint >> 6) & 0xFF))); + os.Put(static_cast<Ch>(0x80 | ((codepoint & 0x3F)))); + } + else if (codepoint <= 0xFFFF) { + os.Put(static_cast<Ch>(0xE0 | ((codepoint >> 12) & 0xFF))); + os.Put(static_cast<Ch>(0x80 | ((codepoint >> 6) & 0x3F))); + os.Put(static_cast<Ch>(0x80 | (codepoint & 0x3F))); + } + else { + RAPIDJSON_ASSERT(codepoint <= 0x10FFFF); + os.Put(static_cast<Ch>(0xF0 | ((codepoint >> 18) & 0xFF))); + os.Put(static_cast<Ch>(0x80 | ((codepoint >> 12) & 0x3F))); + os.Put(static_cast<Ch>(0x80 | ((codepoint >> 6) & 0x3F))); + os.Put(static_cast<Ch>(0x80 | (codepoint & 0x3F))); + } + } + + template<typename OutputStream> + static void EncodeUnsafe(OutputStream& os, unsigned codepoint) { + if (codepoint <= 0x7F) + PutUnsafe(os, static_cast<Ch>(codepoint & 0xFF)); + else if (codepoint <= 0x7FF) { + PutUnsafe(os, static_cast<Ch>(0xC0 | ((codepoint >> 6) & 0xFF))); + PutUnsafe(os, static_cast<Ch>(0x80 | ((codepoint & 0x3F)))); + } + else if (codepoint <= 0xFFFF) { + PutUnsafe(os, static_cast<Ch>(0xE0 | ((codepoint >> 12) & 0xFF))); + PutUnsafe(os, static_cast<Ch>(0x80 | ((codepoint >> 6) & 0x3F))); + PutUnsafe(os, static_cast<Ch>(0x80 | (codepoint & 0x3F))); + } + else { + RAPIDJSON_ASSERT(codepoint <= 0x10FFFF); + PutUnsafe(os, static_cast<Ch>(0xF0 | ((codepoint >> 18) & 0xFF))); + PutUnsafe(os, static_cast<Ch>(0x80 | ((codepoint >> 12) & 0x3F))); + PutUnsafe(os, static_cast<Ch>(0x80 | ((codepoint >> 6) & 0x3F))); + PutUnsafe(os, static_cast<Ch>(0x80 | (codepoint & 0x3F))); + } + } + + template <typename InputStream> + static bool Decode(InputStream& is, unsigned* codepoint) { +#define RAPIDJSON_COPY() c = is.Take(); *codepoint = (*codepoint << 6) | (static_cast<unsigned char>(c) & 0x3Fu) +#define RAPIDJSON_TRANS(mask) result &= ((GetRange(static_cast<unsigned char>(c)) & mask) != 0) +#define RAPIDJSON_TAIL() RAPIDJSON_COPY(); RAPIDJSON_TRANS(0x70) + typename InputStream::Ch c = is.Take(); + if (!(c & 0x80)) { + *codepoint = static_cast<unsigned char>(c); + return true; + } + + unsigned char type = GetRange(static_cast<unsigned char>(c)); + if (type >= 32) { + *codepoint = 0; + } else { + *codepoint = (0xFFu >> type) & static_cast<unsigned char>(c); + } + bool result = true; + switch (type) { + case 2: RAPIDJSON_TAIL(); return result; + case 3: RAPIDJSON_TAIL(); RAPIDJSON_TAIL(); return result; + case 4: RAPIDJSON_COPY(); RAPIDJSON_TRANS(0x50); RAPIDJSON_TAIL(); return result; + case 5: RAPIDJSON_COPY(); RAPIDJSON_TRANS(0x10); RAPIDJSON_TAIL(); RAPIDJSON_TAIL(); return result; + case 6: RAPIDJSON_TAIL(); RAPIDJSON_TAIL(); RAPIDJSON_TAIL(); return result; + case 10: RAPIDJSON_COPY(); RAPIDJSON_TRANS(0x20); RAPIDJSON_TAIL(); return result; + case 11: RAPIDJSON_COPY(); RAPIDJSON_TRANS(0x60); RAPIDJSON_TAIL(); RAPIDJSON_TAIL(); return result; + default: return false; + } +#undef RAPIDJSON_COPY +#undef RAPIDJSON_TRANS +#undef RAPIDJSON_TAIL + } + + template <typename InputStream, typename OutputStream> + static bool Validate(InputStream& is, OutputStream& os) { +#define RAPIDJSON_COPY() os.Put(c = is.Take()) +#define RAPIDJSON_TRANS(mask) result &= ((GetRange(static_cast<unsigned char>(c)) & mask) != 0) +#define RAPIDJSON_TAIL() RAPIDJSON_COPY(); RAPIDJSON_TRANS(0x70) + Ch c; + RAPIDJSON_COPY(); + if (!(c & 0x80)) + return true; + + bool result = true; + switch (GetRange(static_cast<unsigned char>(c))) { + case 2: RAPIDJSON_TAIL(); return result; + case 3: RAPIDJSON_TAIL(); RAPIDJSON_TAIL(); return result; + case 4: RAPIDJSON_COPY(); RAPIDJSON_TRANS(0x50); RAPIDJSON_TAIL(); return result; + case 5: RAPIDJSON_COPY(); RAPIDJSON_TRANS(0x10); RAPIDJSON_TAIL(); RAPIDJSON_TAIL(); return result; + case 6: RAPIDJSON_TAIL(); RAPIDJSON_TAIL(); RAPIDJSON_TAIL(); return result; + case 10: RAPIDJSON_COPY(); RAPIDJSON_TRANS(0x20); RAPIDJSON_TAIL(); return result; + case 11: RAPIDJSON_COPY(); RAPIDJSON_TRANS(0x60); RAPIDJSON_TAIL(); RAPIDJSON_TAIL(); return result; + default: return false; + } +#undef RAPIDJSON_COPY +#undef RAPIDJSON_TRANS +#undef RAPIDJSON_TAIL + } + + static unsigned char GetRange(unsigned char c) { + // Referring to DFA of http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ + // With new mapping 1 -> 0x10, 7 -> 0x20, 9 -> 0x40, such that AND operation can test multiple types. + static const unsigned char type[] = { + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10, + 0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40, + 0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20, + 0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20, + 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, + 10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8, + }; + return type[c]; + } + + template <typename InputByteStream> + static CharType TakeBOM(InputByteStream& is) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); + typename InputByteStream::Ch c = Take(is); + if (static_cast<unsigned char>(c) != 0xEFu) return c; + c = is.Take(); + if (static_cast<unsigned char>(c) != 0xBBu) return c; + c = is.Take(); + if (static_cast<unsigned char>(c) != 0xBFu) return c; + c = is.Take(); + return c; + } + + template <typename InputByteStream> + static Ch Take(InputByteStream& is) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); + return static_cast<Ch>(is.Take()); + } + + template <typename OutputByteStream> + static void PutBOM(OutputByteStream& os) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); + os.Put(static_cast<typename OutputByteStream::Ch>(0xEFu)); + os.Put(static_cast<typename OutputByteStream::Ch>(0xBBu)); + os.Put(static_cast<typename OutputByteStream::Ch>(0xBFu)); + } + + template <typename OutputByteStream> + static void Put(OutputByteStream& os, Ch c) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); + os.Put(static_cast<typename OutputByteStream::Ch>(c)); + } +}; + +/////////////////////////////////////////////////////////////////////////////// +// UTF16 + +//! UTF-16 encoding. +/*! http://en.wikipedia.org/wiki/UTF-16 + http://tools.ietf.org/html/rfc2781 + \tparam CharType Type for storing 16-bit UTF-16 data. Default is wchar_t. C++11 may use char16_t instead. + \note implements Encoding concept + + \note For in-memory access, no need to concern endianness. The code units and code points are represented by CPU's endianness. + For streaming, use UTF16LE and UTF16BE, which handle endianness. +*/ +template<typename CharType = wchar_t> +struct UTF16 { + typedef CharType Ch; + RAPIDJSON_STATIC_ASSERT(sizeof(Ch) >= 2); + + enum { supportUnicode = 1 }; + + template<typename OutputStream> + static void Encode(OutputStream& os, unsigned codepoint) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputStream::Ch) >= 2); + if (codepoint <= 0xFFFF) { + RAPIDJSON_ASSERT(codepoint < 0xD800 || codepoint > 0xDFFF); // Code point itself cannot be surrogate pair + os.Put(static_cast<typename OutputStream::Ch>(codepoint)); + } + else { + RAPIDJSON_ASSERT(codepoint <= 0x10FFFF); + unsigned v = codepoint - 0x10000; + os.Put(static_cast<typename OutputStream::Ch>((v >> 10) | 0xD800)); + os.Put(static_cast<typename OutputStream::Ch>((v & 0x3FF) | 0xDC00)); + } + } + + + template<typename OutputStream> + static void EncodeUnsafe(OutputStream& os, unsigned codepoint) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputStream::Ch) >= 2); + if (codepoint <= 0xFFFF) { + RAPIDJSON_ASSERT(codepoint < 0xD800 || codepoint > 0xDFFF); // Code point itself cannot be surrogate pair + PutUnsafe(os, static_cast<typename OutputStream::Ch>(codepoint)); + } + else { + RAPIDJSON_ASSERT(codepoint <= 0x10FFFF); + unsigned v = codepoint - 0x10000; + PutUnsafe(os, static_cast<typename OutputStream::Ch>((v >> 10) | 0xD800)); + PutUnsafe(os, static_cast<typename OutputStream::Ch>((v & 0x3FF) | 0xDC00)); + } + } + + template <typename InputStream> + static bool Decode(InputStream& is, unsigned* codepoint) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename InputStream::Ch) >= 2); + typename InputStream::Ch c = is.Take(); + if (c < 0xD800 || c > 0xDFFF) { + *codepoint = static_cast<unsigned>(c); + return true; + } + else if (c <= 0xDBFF) { + *codepoint = (static_cast<unsigned>(c) & 0x3FF) << 10; + c = is.Take(); + *codepoint |= (static_cast<unsigned>(c) & 0x3FF); + *codepoint += 0x10000; + return c >= 0xDC00 && c <= 0xDFFF; + } + return false; + } + + template <typename InputStream, typename OutputStream> + static bool Validate(InputStream& is, OutputStream& os) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename InputStream::Ch) >= 2); + RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputStream::Ch) >= 2); + typename InputStream::Ch c; + os.Put(static_cast<typename OutputStream::Ch>(c = is.Take())); + if (c < 0xD800 || c > 0xDFFF) + return true; + else if (c <= 0xDBFF) { + os.Put(c = is.Take()); + return c >= 0xDC00 && c <= 0xDFFF; + } + return false; + } +}; + +//! UTF-16 little endian encoding. +template<typename CharType = wchar_t> +struct UTF16LE : UTF16<CharType> { + template <typename InputByteStream> + static CharType TakeBOM(InputByteStream& is) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); + CharType c = Take(is); + return static_cast<uint16_t>(c) == 0xFEFFu ? Take(is) : c; + } + + template <typename InputByteStream> + static CharType Take(InputByteStream& is) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); + unsigned c = static_cast<uint8_t>(is.Take()); + c |= static_cast<unsigned>(static_cast<uint8_t>(is.Take())) << 8; + return static_cast<CharType>(c); + } + + template <typename OutputByteStream> + static void PutBOM(OutputByteStream& os) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); + os.Put(static_cast<typename OutputByteStream::Ch>(0xFFu)); + os.Put(static_cast<typename OutputByteStream::Ch>(0xFEu)); + } + + template <typename OutputByteStream> + static void Put(OutputByteStream& os, CharType c) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); + os.Put(static_cast<typename OutputByteStream::Ch>(static_cast<unsigned>(c) & 0xFFu)); + os.Put(static_cast<typename OutputByteStream::Ch>((static_cast<unsigned>(c) >> 8) & 0xFFu)); + } +}; + +//! UTF-16 big endian encoding. +template<typename CharType = wchar_t> +struct UTF16BE : UTF16<CharType> { + template <typename InputByteStream> + static CharType TakeBOM(InputByteStream& is) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); + CharType c = Take(is); + return static_cast<uint16_t>(c) == 0xFEFFu ? Take(is) : c; + } + + template <typename InputByteStream> + static CharType Take(InputByteStream& is) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); + unsigned c = static_cast<unsigned>(static_cast<uint8_t>(is.Take())) << 8; + c |= static_cast<unsigned>(static_cast<uint8_t>(is.Take())); + return static_cast<CharType>(c); + } + + template <typename OutputByteStream> + static void PutBOM(OutputByteStream& os) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); + os.Put(static_cast<typename OutputByteStream::Ch>(0xFEu)); + os.Put(static_cast<typename OutputByteStream::Ch>(0xFFu)); + } + + template <typename OutputByteStream> + static void Put(OutputByteStream& os, CharType c) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); + os.Put(static_cast<typename OutputByteStream::Ch>((static_cast<unsigned>(c) >> 8) & 0xFFu)); + os.Put(static_cast<typename OutputByteStream::Ch>(static_cast<unsigned>(c) & 0xFFu)); + } +}; + +/////////////////////////////////////////////////////////////////////////////// +// UTF32 + +//! UTF-32 encoding. +/*! http://en.wikipedia.org/wiki/UTF-32 + \tparam CharType Type for storing 32-bit UTF-32 data. Default is unsigned. C++11 may use char32_t instead. + \note implements Encoding concept + + \note For in-memory access, no need to concern endianness. The code units and code points are represented by CPU's endianness. + For streaming, use UTF32LE and UTF32BE, which handle endianness. +*/ +template<typename CharType = unsigned> +struct UTF32 { + typedef CharType Ch; + RAPIDJSON_STATIC_ASSERT(sizeof(Ch) >= 4); + + enum { supportUnicode = 1 }; + + template<typename OutputStream> + static void Encode(OutputStream& os, unsigned codepoint) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputStream::Ch) >= 4); + RAPIDJSON_ASSERT(codepoint <= 0x10FFFF); + os.Put(codepoint); + } + + template<typename OutputStream> + static void EncodeUnsafe(OutputStream& os, unsigned codepoint) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputStream::Ch) >= 4); + RAPIDJSON_ASSERT(codepoint <= 0x10FFFF); + PutUnsafe(os, codepoint); + } + + template <typename InputStream> + static bool Decode(InputStream& is, unsigned* codepoint) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename InputStream::Ch) >= 4); + Ch c = is.Take(); + *codepoint = c; + return c <= 0x10FFFF; + } + + template <typename InputStream, typename OutputStream> + static bool Validate(InputStream& is, OutputStream& os) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename InputStream::Ch) >= 4); + Ch c; + os.Put(c = is.Take()); + return c <= 0x10FFFF; + } +}; + +//! UTF-32 little endian enocoding. +template<typename CharType = unsigned> +struct UTF32LE : UTF32<CharType> { + template <typename InputByteStream> + static CharType TakeBOM(InputByteStream& is) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); + CharType c = Take(is); + return static_cast<uint32_t>(c) == 0x0000FEFFu ? Take(is) : c; + } + + template <typename InputByteStream> + static CharType Take(InputByteStream& is) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); + unsigned c = static_cast<uint8_t>(is.Take()); + c |= static_cast<unsigned>(static_cast<uint8_t>(is.Take())) << 8; + c |= static_cast<unsigned>(static_cast<uint8_t>(is.Take())) << 16; + c |= static_cast<unsigned>(static_cast<uint8_t>(is.Take())) << 24; + return static_cast<CharType>(c); + } + + template <typename OutputByteStream> + static void PutBOM(OutputByteStream& os) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); + os.Put(static_cast<typename OutputByteStream::Ch>(0xFFu)); + os.Put(static_cast<typename OutputByteStream::Ch>(0xFEu)); + os.Put(static_cast<typename OutputByteStream::Ch>(0x00u)); + os.Put(static_cast<typename OutputByteStream::Ch>(0x00u)); + } + + template <typename OutputByteStream> + static void Put(OutputByteStream& os, CharType c) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); + os.Put(static_cast<typename OutputByteStream::Ch>(c & 0xFFu)); + os.Put(static_cast<typename OutputByteStream::Ch>((c >> 8) & 0xFFu)); + os.Put(static_cast<typename OutputByteStream::Ch>((c >> 16) & 0xFFu)); + os.Put(static_cast<typename OutputByteStream::Ch>((c >> 24) & 0xFFu)); + } +}; + +//! UTF-32 big endian encoding. +template<typename CharType = unsigned> +struct UTF32BE : UTF32<CharType> { + template <typename InputByteStream> + static CharType TakeBOM(InputByteStream& is) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); + CharType c = Take(is); + return static_cast<uint32_t>(c) == 0x0000FEFFu ? Take(is) : c; + } + + template <typename InputByteStream> + static CharType Take(InputByteStream& is) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); + unsigned c = static_cast<unsigned>(static_cast<uint8_t>(is.Take())) << 24; + c |= static_cast<unsigned>(static_cast<uint8_t>(is.Take())) << 16; + c |= static_cast<unsigned>(static_cast<uint8_t>(is.Take())) << 8; + c |= static_cast<unsigned>(static_cast<uint8_t>(is.Take())); + return static_cast<CharType>(c); + } + + template <typename OutputByteStream> + static void PutBOM(OutputByteStream& os) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); + os.Put(static_cast<typename OutputByteStream::Ch>(0x00u)); + os.Put(static_cast<typename OutputByteStream::Ch>(0x00u)); + os.Put(static_cast<typename OutputByteStream::Ch>(0xFEu)); + os.Put(static_cast<typename OutputByteStream::Ch>(0xFFu)); + } + + template <typename OutputByteStream> + static void Put(OutputByteStream& os, CharType c) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); + os.Put(static_cast<typename OutputByteStream::Ch>((c >> 24) & 0xFFu)); + os.Put(static_cast<typename OutputByteStream::Ch>((c >> 16) & 0xFFu)); + os.Put(static_cast<typename OutputByteStream::Ch>((c >> 8) & 0xFFu)); + os.Put(static_cast<typename OutputByteStream::Ch>(c & 0xFFu)); + } +}; + +/////////////////////////////////////////////////////////////////////////////// +// ASCII + +//! ASCII encoding. +/*! http://en.wikipedia.org/wiki/ASCII + \tparam CharType Code unit for storing 7-bit ASCII data. Default is char. + \note implements Encoding concept +*/ +template<typename CharType = char> +struct ASCII { + typedef CharType Ch; + + enum { supportUnicode = 0 }; + + template<typename OutputStream> + static void Encode(OutputStream& os, unsigned codepoint) { + RAPIDJSON_ASSERT(codepoint <= 0x7F); + os.Put(static_cast<Ch>(codepoint & 0xFF)); + } + + template<typename OutputStream> + static void EncodeUnsafe(OutputStream& os, unsigned codepoint) { + RAPIDJSON_ASSERT(codepoint <= 0x7F); + PutUnsafe(os, static_cast<Ch>(codepoint & 0xFF)); + } + + template <typename InputStream> + static bool Decode(InputStream& is, unsigned* codepoint) { + uint8_t c = static_cast<uint8_t>(is.Take()); + *codepoint = c; + return c <= 0X7F; + } + + template <typename InputStream, typename OutputStream> + static bool Validate(InputStream& is, OutputStream& os) { + uint8_t c = static_cast<uint8_t>(is.Take()); + os.Put(static_cast<typename OutputStream::Ch>(c)); + return c <= 0x7F; + } + + template <typename InputByteStream> + static CharType TakeBOM(InputByteStream& is) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); + uint8_t c = static_cast<uint8_t>(Take(is)); + return static_cast<Ch>(c); + } + + template <typename InputByteStream> + static Ch Take(InputByteStream& is) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); + return static_cast<Ch>(is.Take()); + } + + template <typename OutputByteStream> + static void PutBOM(OutputByteStream& os) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); + (void)os; + } + + template <typename OutputByteStream> + static void Put(OutputByteStream& os, Ch c) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); + os.Put(static_cast<typename OutputByteStream::Ch>(c)); + } +}; + +/////////////////////////////////////////////////////////////////////////////// +// AutoUTF + +//! Runtime-specified UTF encoding type of a stream. +enum UTFType { + kUTF8 = 0, //!< UTF-8. + kUTF16LE = 1, //!< UTF-16 little endian. + kUTF16BE = 2, //!< UTF-16 big endian. + kUTF32LE = 3, //!< UTF-32 little endian. + kUTF32BE = 4 //!< UTF-32 big endian. +}; + +//! Dynamically select encoding according to stream's runtime-specified UTF encoding type. +/*! \note This class can be used with AutoUTFInputtStream and AutoUTFOutputStream, which provides GetType(). +*/ +template<typename CharType> +struct AutoUTF { + typedef CharType Ch; + + enum { supportUnicode = 1 }; + +#define RAPIDJSON_ENCODINGS_FUNC(x) UTF8<Ch>::x, UTF16LE<Ch>::x, UTF16BE<Ch>::x, UTF32LE<Ch>::x, UTF32BE<Ch>::x + + template<typename OutputStream> + static RAPIDJSON_FORCEINLINE void Encode(OutputStream& os, unsigned codepoint) { + typedef void (*EncodeFunc)(OutputStream&, unsigned); + static const EncodeFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(Encode) }; + (*f[os.GetType()])(os, codepoint); + } + + template<typename OutputStream> + static RAPIDJSON_FORCEINLINE void EncodeUnsafe(OutputStream& os, unsigned codepoint) { + typedef void (*EncodeFunc)(OutputStream&, unsigned); + static const EncodeFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(EncodeUnsafe) }; + (*f[os.GetType()])(os, codepoint); + } + + template <typename InputStream> + static RAPIDJSON_FORCEINLINE bool Decode(InputStream& is, unsigned* codepoint) { + typedef bool (*DecodeFunc)(InputStream&, unsigned*); + static const DecodeFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(Decode) }; + return (*f[is.GetType()])(is, codepoint); + } + + template <typename InputStream, typename OutputStream> + static RAPIDJSON_FORCEINLINE bool Validate(InputStream& is, OutputStream& os) { + typedef bool (*ValidateFunc)(InputStream&, OutputStream&); + static const ValidateFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(Validate) }; + return (*f[is.GetType()])(is, os); + } + +#undef RAPIDJSON_ENCODINGS_FUNC +}; + +/////////////////////////////////////////////////////////////////////////////// +// Transcoder + +//! Encoding conversion. +template<typename SourceEncoding, typename TargetEncoding> +struct Transcoder { + //! Take one Unicode codepoint from source encoding, convert it to target encoding and put it to the output stream. + template<typename InputStream, typename OutputStream> + static RAPIDJSON_FORCEINLINE bool Transcode(InputStream& is, OutputStream& os) { + unsigned codepoint; + if (!SourceEncoding::Decode(is, &codepoint)) + return false; + TargetEncoding::Encode(os, codepoint); + return true; + } + + template<typename InputStream, typename OutputStream> + static RAPIDJSON_FORCEINLINE bool TranscodeUnsafe(InputStream& is, OutputStream& os) { + unsigned codepoint; + if (!SourceEncoding::Decode(is, &codepoint)) + return false; + TargetEncoding::EncodeUnsafe(os, codepoint); + return true; + } + + //! Validate one Unicode codepoint from an encoded stream. + template<typename InputStream, typename OutputStream> + static RAPIDJSON_FORCEINLINE bool Validate(InputStream& is, OutputStream& os) { + return Transcode(is, os); // Since source/target encoding is different, must transcode. + } +}; + +// Forward declaration. +template<typename Stream> +inline void PutUnsafe(Stream& stream, typename Stream::Ch c); + +//! Specialization of Transcoder with same source and target encoding. +template<typename Encoding> +struct Transcoder<Encoding, Encoding> { + template<typename InputStream, typename OutputStream> + static RAPIDJSON_FORCEINLINE bool Transcode(InputStream& is, OutputStream& os) { + os.Put(is.Take()); // Just copy one code unit. This semantic is different from primary template class. + return true; + } + + template<typename InputStream, typename OutputStream> + static RAPIDJSON_FORCEINLINE bool TranscodeUnsafe(InputStream& is, OutputStream& os) { + PutUnsafe(os, is.Take()); // Just copy one code unit. This semantic is different from primary template class. + return true; + } + + template<typename InputStream, typename OutputStream> + static RAPIDJSON_FORCEINLINE bool Validate(InputStream& is, OutputStream& os) { + return Encoding::Validate(is, os); // source/target encoding are the same + } +}; + +RAPIDJSON_NAMESPACE_END + +#if defined(__GNUC__) || (defined(_MSC_VER) && !defined(__clang__)) +RAPIDJSON_DIAG_POP +#endif + +#endif // RAPIDJSON_ENCODINGS_H_ |