summaryrefslogtreecommitdiff
path: root/include/rapidjson/encodedstream.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/rapidjson/encodedstream.h')
-rw-r--r--include/rapidjson/encodedstream.h299
1 files changed, 299 insertions, 0 deletions
diff --git a/include/rapidjson/encodedstream.h b/include/rapidjson/encodedstream.h
new file mode 100644
index 0000000..223601c
--- /dev/null
+++ b/include/rapidjson/encodedstream.h
@@ -0,0 +1,299 @@
+// Tencent is pleased to support the open source community by making RapidJSON available.
+//
+// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
+//
+// Licensed under the MIT License (the "License"); you may not use this file except
+// in compliance with the License. You may obtain a copy of the License at
+//
+// http://opensource.org/licenses/MIT
+//
+// Unless required by applicable law or agreed to in writing, software distributed
+// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+#ifndef RAPIDJSON_ENCODEDSTREAM_H_
+#define RAPIDJSON_ENCODEDSTREAM_H_
+
+#include "stream.h"
+#include "memorystream.h"
+
+#ifdef __GNUC__
+RAPIDJSON_DIAG_PUSH
+RAPIDJSON_DIAG_OFF(effc++)
+#endif
+
+#ifdef __clang__
+RAPIDJSON_DIAG_PUSH
+RAPIDJSON_DIAG_OFF(padded)
+#endif
+
+RAPIDJSON_NAMESPACE_BEGIN
+
+//! Input byte stream wrapper with a statically bound encoding.
+/*!
+ \tparam Encoding The interpretation of encoding of the stream. Either UTF8, UTF16LE, UTF16BE, UTF32LE, UTF32BE.
+ \tparam InputByteStream Type of input byte stream. For example, FileReadStream.
+*/
+template <typename Encoding, typename InputByteStream>
+class EncodedInputStream {
+ RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
+public:
+ typedef typename Encoding::Ch Ch;
+
+ EncodedInputStream(InputByteStream& is) : is_(is) {
+ current_ = Encoding::TakeBOM(is_);
+ }
+
+ Ch Peek() const { return current_; }
+ Ch Take() { Ch c = current_; current_ = Encoding::Take(is_); return c; }
+ size_t Tell() const { return is_.Tell(); }
+
+ // Not implemented
+ void Put(Ch) { RAPIDJSON_ASSERT(false); }
+ void Flush() { RAPIDJSON_ASSERT(false); }
+ Ch* PutBegin() { RAPIDJSON_ASSERT(false); return 0; }
+ size_t PutEnd(Ch*) { RAPIDJSON_ASSERT(false); return 0; }
+
+private:
+ EncodedInputStream(const EncodedInputStream&);
+ EncodedInputStream& operator=(const EncodedInputStream&);
+
+ InputByteStream& is_;
+ Ch current_;
+};
+
+//! Specialized for UTF8 MemoryStream.
+template <>
+class EncodedInputStream<UTF8<>, MemoryStream> {
+public:
+ typedef UTF8<>::Ch Ch;
+
+ EncodedInputStream(MemoryStream& is) : is_(is) {
+ if (static_cast<unsigned char>(is_.Peek()) == 0xEFu) is_.Take();
+ if (static_cast<unsigned char>(is_.Peek()) == 0xBBu) is_.Take();
+ if (static_cast<unsigned char>(is_.Peek()) == 0xBFu) is_.Take();
+ }
+ Ch Peek() const { return is_.Peek(); }
+ Ch Take() { return is_.Take(); }
+ size_t Tell() const { return is_.Tell(); }
+
+ // Not implemented
+ void Put(Ch) {}
+ void Flush() {}
+ Ch* PutBegin() { return 0; }
+ size_t PutEnd(Ch*) { return 0; }
+
+ MemoryStream& is_;
+
+private:
+ EncodedInputStream(const EncodedInputStream&);
+ EncodedInputStream& operator=(const EncodedInputStream&);
+};
+
+//! Output byte stream wrapper with statically bound encoding.
+/*!
+ \tparam Encoding The interpretation of encoding of the stream. Either UTF8, UTF16LE, UTF16BE, UTF32LE, UTF32BE.
+ \tparam OutputByteStream Type of input byte stream. For example, FileWriteStream.
+*/
+template <typename Encoding, typename OutputByteStream>
+class EncodedOutputStream {
+ RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
+public:
+ typedef typename Encoding::Ch Ch;
+
+ EncodedOutputStream(OutputByteStream& os, bool putBOM = true) : os_(os) {
+ if (putBOM)
+ Encoding::PutBOM(os_);
+ }
+
+ void Put(Ch c) { Encoding::Put(os_, c); }
+ void Flush() { os_.Flush(); }
+
+ // Not implemented
+ Ch Peek() const { RAPIDJSON_ASSERT(false); return 0;}
+ Ch Take() { RAPIDJSON_ASSERT(false); return 0;}
+ size_t Tell() const { RAPIDJSON_ASSERT(false); return 0; }
+ Ch* PutBegin() { RAPIDJSON_ASSERT(false); return 0; }
+ size_t PutEnd(Ch*) { RAPIDJSON_ASSERT(false); return 0; }
+
+private:
+ EncodedOutputStream(const EncodedOutputStream&);
+ EncodedOutputStream& operator=(const EncodedOutputStream&);
+
+ OutputByteStream& os_;
+};
+
+#define RAPIDJSON_ENCODINGS_FUNC(x) UTF8<Ch>::x, UTF16LE<Ch>::x, UTF16BE<Ch>::x, UTF32LE<Ch>::x, UTF32BE<Ch>::x
+
+//! Input stream wrapper with dynamically bound encoding and automatic encoding detection.
+/*!
+ \tparam CharType Type of character for reading.
+ \tparam InputByteStream type of input byte stream to be wrapped.
+*/
+template <typename CharType, typename InputByteStream>
+class AutoUTFInputStream {
+ RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
+public:
+ typedef CharType Ch;
+
+ //! Constructor.
+ /*!
+ \param is input stream to be wrapped.
+ \param type UTF encoding type if it is not detected from the stream.
+ */
+ AutoUTFInputStream(InputByteStream& is, UTFType type = kUTF8) : is_(&is), type_(type), hasBOM_(false) {
+ RAPIDJSON_ASSERT(type >= kUTF8 && type <= kUTF32BE);
+ DetectType();
+ static const TakeFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(Take) };
+ takeFunc_ = f[type_];
+ current_ = takeFunc_(*is_);
+ }
+
+ UTFType GetType() const { return type_; }
+ bool HasBOM() const { return hasBOM_; }
+
+ Ch Peek() const { return current_; }
+ Ch Take() { Ch c = current_; current_ = takeFunc_(*is_); return c; }
+ size_t Tell() const { return is_->Tell(); }
+
+ // Not implemented
+ void Put(Ch) { RAPIDJSON_ASSERT(false); }
+ void Flush() { RAPIDJSON_ASSERT(false); }
+ Ch* PutBegin() { RAPIDJSON_ASSERT(false); return 0; }
+ size_t PutEnd(Ch*) { RAPIDJSON_ASSERT(false); return 0; }
+
+private:
+ AutoUTFInputStream(const AutoUTFInputStream&);
+ AutoUTFInputStream& operator=(const AutoUTFInputStream&);
+
+ // Detect encoding type with BOM or RFC 4627
+ void DetectType() {
+ // BOM (Byte Order Mark):
+ // 00 00 FE FF UTF-32BE
+ // FF FE 00 00 UTF-32LE
+ // FE FF UTF-16BE
+ // FF FE UTF-16LE
+ // EF BB BF UTF-8
+
+ const unsigned char* c = reinterpret_cast<const unsigned char *>(is_->Peek4());
+ if (!c)
+ return;
+
+ unsigned bom = static_cast<unsigned>(c[0] | (c[1] << 8) | (c[2] << 16) | (c[3] << 24));
+ hasBOM_ = false;
+ if (bom == 0xFFFE0000) { type_ = kUTF32BE; hasBOM_ = true; is_->Take(); is_->Take(); is_->Take(); is_->Take(); }
+ else if (bom == 0x0000FEFF) { type_ = kUTF32LE; hasBOM_ = true; is_->Take(); is_->Take(); is_->Take(); is_->Take(); }
+ else if ((bom & 0xFFFF) == 0xFFFE) { type_ = kUTF16BE; hasBOM_ = true; is_->Take(); is_->Take(); }
+ else if ((bom & 0xFFFF) == 0xFEFF) { type_ = kUTF16LE; hasBOM_ = true; is_->Take(); is_->Take(); }
+ else if ((bom & 0xFFFFFF) == 0xBFBBEF) { type_ = kUTF8; hasBOM_ = true; is_->Take(); is_->Take(); is_->Take(); }
+
+ // RFC 4627: Section 3
+ // "Since the first two characters of a JSON text will always be ASCII
+ // characters [RFC0020], it is possible to determine whether an octet
+ // stream is UTF-8, UTF-16 (BE or LE), or UTF-32 (BE or LE) by looking
+ // at the pattern of nulls in the first four octets."
+ // 00 00 00 xx UTF-32BE
+ // 00 xx 00 xx UTF-16BE
+ // xx 00 00 00 UTF-32LE
+ // xx 00 xx 00 UTF-16LE
+ // xx xx xx xx UTF-8
+
+ if (!hasBOM_) {
+ int pattern = (c[0] ? 1 : 0) | (c[1] ? 2 : 0) | (c[2] ? 4 : 0) | (c[3] ? 8 : 0);
+ switch (pattern) {
+ case 0x08: type_ = kUTF32BE; break;
+ case 0x0A: type_ = kUTF16BE; break;
+ case 0x01: type_ = kUTF32LE; break;
+ case 0x05: type_ = kUTF16LE; break;
+ case 0x0F: type_ = kUTF8; break;
+ default: break; // Use type defined by user.
+ }
+ }
+
+ // Runtime check whether the size of character type is sufficient. It only perform checks with assertion.
+ if (type_ == kUTF16LE || type_ == kUTF16BE) RAPIDJSON_ASSERT(sizeof(Ch) >= 2);
+ if (type_ == kUTF32LE || type_ == kUTF32BE) RAPIDJSON_ASSERT(sizeof(Ch) >= 4);
+ }
+
+ typedef Ch (*TakeFunc)(InputByteStream& is);
+ InputByteStream* is_;
+ UTFType type_;
+ Ch current_;
+ TakeFunc takeFunc_;
+ bool hasBOM_;
+};
+
+//! Output stream wrapper with dynamically bound encoding and automatic encoding detection.
+/*!
+ \tparam CharType Type of character for writing.
+ \tparam OutputByteStream type of output byte stream to be wrapped.
+*/
+template <typename CharType, typename OutputByteStream>
+class AutoUTFOutputStream {
+ RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
+public:
+ typedef CharType Ch;
+
+ //! Constructor.
+ /*!
+ \param os output stream to be wrapped.
+ \param type UTF encoding type.
+ \param putBOM Whether to write BOM at the beginning of the stream.
+ */
+ AutoUTFOutputStream(OutputByteStream& os, UTFType type, bool putBOM) : os_(&os), type_(type) {
+ RAPIDJSON_ASSERT(type >= kUTF8 && type <= kUTF32BE);
+
+ // Runtime check whether the size of character type is sufficient. It only perform checks with assertion.
+ if (type_ == kUTF16LE || type_ == kUTF16BE) RAPIDJSON_ASSERT(sizeof(Ch) >= 2);
+ if (type_ == kUTF32LE || type_ == kUTF32BE) RAPIDJSON_ASSERT(sizeof(Ch) >= 4);
+
+ static const PutFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(Put) };
+ putFunc_ = f[type_];
+
+ if (putBOM)
+ PutBOM();
+ }
+
+ UTFType GetType() const { return type_; }
+
+ void Put(Ch c) { putFunc_(*os_, c); }
+ void Flush() { os_->Flush(); }
+
+ // Not implemented
+ Ch Peek() const { RAPIDJSON_ASSERT(false); return 0;}
+ Ch Take() { RAPIDJSON_ASSERT(false); return 0;}
+ size_t Tell() const { RAPIDJSON_ASSERT(false); return 0; }
+ Ch* PutBegin() { RAPIDJSON_ASSERT(false); return 0; }
+ size_t PutEnd(Ch*) { RAPIDJSON_ASSERT(false); return 0; }
+
+private:
+ AutoUTFOutputStream(const AutoUTFOutputStream&);
+ AutoUTFOutputStream& operator=(const AutoUTFOutputStream&);
+
+ void PutBOM() {
+ typedef void (*PutBOMFunc)(OutputByteStream&);
+ static const PutBOMFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(PutBOM) };
+ f[type_](*os_);
+ }
+
+ typedef void (*PutFunc)(OutputByteStream&, Ch);
+
+ OutputByteStream* os_;
+ UTFType type_;
+ PutFunc putFunc_;
+};
+
+#undef RAPIDJSON_ENCODINGS_FUNC
+
+RAPIDJSON_NAMESPACE_END
+
+#ifdef __clang__
+RAPIDJSON_DIAG_POP
+#endif
+
+#ifdef __GNUC__
+RAPIDJSON_DIAG_POP
+#endif
+
+#endif // RAPIDJSON_FILESTREAM_H_