Commit dfcc46a6 authored by Johannes Henkel's avatar Johannes Henkel Committed by Commit Bot

[DevTools] Roll inspector_protocol (v8).

New Revision: 8b7ea912e516a6daa61487c700687a9426e3a396

Update v8 files / build config accordingly.
- There's now a new library in third_party/inspector_protocol,
  bindings/bindings.h, which is configured much like encoding/encoding.h.
  It doesn't have much stuff in it yet, but will soon get more code
  that would otherwise need to go into jinja templates.
  It also comes with a new test, only a smoke test thus far.

Change-Id: I9c00a54a840c214b4bb744a3b272e5ce221954fc
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1678273Reviewed-by: 's avatarAlexei Filippov <alph@chromium.org>
Reviewed-by: 's avatarYang Guo <yangguo@chromium.org>
Commit-Queue: Johannes Henkel <johannes@chromium.org>
Cr-Commit-Position: refs/heads/master@{#62442}
parent 3c4f9bfa
......@@ -67,9 +67,6 @@ config("inspector_config") {
configs = [ "../../:internal_config" ]
include_dirs = [
"../../include",
# For including encoding/encoding.h.
"../../third_party/inspector_protocol",
]
}
......@@ -101,6 +98,7 @@ v8_source_set("inspector") {
":inspector_string_conversions",
"../..:v8_version",
"../../third_party/inspector_protocol:encoding",
"../../third_party/inspector_protocol:bindings",
]
public_deps = [
......
......@@ -47,7 +47,10 @@
},
"encoding_lib": {
"header": "encoding/encoding.h",
"namespace": ["v8_inspector_protocol_encoding"]
"namespace": "v8_inspector_protocol_encoding"
},
"bindings_lib": {
"namespace": "v8_inspector_protocol_bindings"
}
}
......@@ -301,6 +301,7 @@ v8_source_set("unittests_sources") {
"../..:v8_libbase",
"../..:v8_libplatform",
"../../third_party/inspector_protocol:encoding_test",
"../../third_party/inspector_protocol:bindings_test",
"//build/win:default_exe_manifest",
"//testing/gmock",
"//testing/gtest",
......
......@@ -9,6 +9,13 @@ static_library("encoding") {
]
}
static_library("bindings") {
sources = [
"bindings/bindings.cc",
"bindings/bindings.h",
]
}
# encoding_test is part of the unittests, defined in
# test/unittests/BUILD.gn.
......@@ -32,3 +39,23 @@ v8_source_set("encoding_test") {
]
testonly = true
}
v8_source_set("bindings_test") {
sources = [
"bindings/bindings_test.cc",
"bindings/bindings_test_helper.h",
]
configs = [
"../..:external_config",
"../..:internal_config_base",
]
deps = [
":bindings",
"../..:v8_libbase",
"../../src/inspector:inspector_string_conversions",
"//testing/gmock",
"//testing/gtest",
]
testonly = true
}
......@@ -2,7 +2,7 @@ Name: inspector protocol
Short Name: inspector_protocol
URL: https://chromium.googlesource.com/deps/inspector_protocol/
Version: 0
Revision: 83b1154a9661d22bba9a368d368214cc20880419
Revision: 8b7ea912e516a6daa61487c700687a9426e3a396
License: BSD
License File: LICENSE
Security Critical: no
......
// Copyright 2019 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "bindings.h"
// Copyright 2019 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef V8_INSPECTOR_PROTOCOL_BINDINGS_BINDINGS_H_
#define V8_INSPECTOR_PROTOCOL_BINDINGS_BINDINGS_H_
#include <cassert>
#include <memory>
namespace v8_inspector_protocol_bindings {
namespace glue {
// =============================================================================
// glue::detail::PtrMaybe, glue::detail::ValueMaybe, templates for optional
// pointers / values which are used in ../lib/Forward_h.template.
// =============================================================================
namespace detail {
template <typename T>
class PtrMaybe {
public:
PtrMaybe() = default;
PtrMaybe(std::unique_ptr<T> value) : value_(std::move(value)) {}
PtrMaybe(PtrMaybe&& other) noexcept : value_(std::move(other.value_)) {}
void operator=(std::unique_ptr<T> value) { value_ = std::move(value); }
T* fromJust() const {
assert(value_);
return value_.get();
}
T* fromMaybe(T* default_value) const {
return value_ ? value_.get() : default_value;
}
bool isJust() const { return value_ != nullptr; }
std::unique_ptr<T> takeJust() {
assert(value_);
return std::move(value_);
}
private:
std::unique_ptr<T> value_;
};
template <typename T>
class ValueMaybe {
public:
ValueMaybe() : is_just_(false), value_() {}
ValueMaybe(T value) : is_just_(true), value_(std::move(value)) {}
ValueMaybe(ValueMaybe&& other) noexcept
: is_just_(other.is_just_), value_(std::move(other.value_)) {}
void operator=(T value) {
value_ = value;
is_just_ = true;
}
const T& fromJust() const {
assert(is_just_);
return value_;
}
const T& fromMaybe(const T& default_value) const {
return is_just_ ? value_ : default_value;
}
bool isJust() const { return is_just_; }
// TODO(johannes): |is_just_| isn't reset by this operation -
// introduce && to ensure avoiding continued usage of |this|?
T takeJust() {
assert(is_just_);
return std::move(value_);
}
private:
bool is_just_;
T value_;
};
} // namespace detail
} // namespace glue
} // namespace v8_inspector_protocol_bindings
#define PROTOCOL_DISALLOW_COPY(ClassName) \
private: \
ClassName(const ClassName&) = delete; \
ClassName& operator=(const ClassName&) = delete
#endif // V8_INSPECTOR_PROTOCOL_BINDINGS_BINDINGS_H_
// Copyright 2019 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "bindings.h"
#include <string>
#include <vector>
#include "bindings_test_helper.h"
namespace v8_inspector_protocol_bindings {
namespace glue {
// =============================================================================
// glue::detail::PtrMaybe, glue::detail::ValueMaybe, templates for optional
// pointers / values which are used in ../lib/Forward_h.template.
// =============================================================================
TEST(PtrMaybeTest, SmokeTest) {
detail::PtrMaybe<std::vector<uint32_t>> example;
EXPECT_FALSE(example.isJust());
EXPECT_TRUE(nullptr == example.fromMaybe(nullptr));
std::unique_ptr<std::vector<uint32_t>> v(new std::vector<uint32_t>);
v->push_back(42);
v->push_back(21);
example = std::move(v);
EXPECT_TRUE(example.isJust());
EXPECT_THAT(*example.fromJust(), testing::ElementsAre(42, 21));
std::unique_ptr<std::vector<uint32_t>> out = example.takeJust();
EXPECT_FALSE(example.isJust());
EXPECT_THAT(*out, testing::ElementsAre(42, 21));
}
TEST(PtrValueTest, SmokeTest) {
detail::ValueMaybe<int32_t> example;
EXPECT_FALSE(example.isJust());
EXPECT_EQ(-1, example.fromMaybe(-1));
example = 42;
EXPECT_TRUE(example.isJust());
EXPECT_EQ(42, example.fromJust());
int32_t out = example.takeJust();
EXPECT_EQ(out, 42);
}
} // namespace glue
} // namespace v8_inspector_protocol_bindings
// Copyright 2019 The V8 Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// This file is V8 specific, to make bindings_test.cc work.
// It is not rolled from the upstream project.
#ifndef V8_INSPECTOR_PROTOCOL_BINDINGS_BINDINGS_TEST_HELPER_H_
#define V8_INSPECTOR_PROTOCOL_BINDINGS_BINDINGS_TEST_HELPER_H_
#include <string>
#include <vector>
#include "src/base/logging.h"
#include "testing/gmock/include/gmock/gmock.h"
#include "testing/gtest/include/gtest/gtest.h"
#endif // V8_INSPECTOR_PROTOCOL_BINDINGS_BINDINGS_TEST_HELPER_H_
......@@ -65,12 +65,14 @@ def read_config():
cmdline_parser.add_argument("--jinja_dir", type=unicode, required=True)
cmdline_parser.add_argument("--config", type=unicode, required=True)
cmdline_parser.add_argument("--config_value", default=[], action="append")
cmdline_parser.add_argument("--inspector_protocol_dir", type=unicode, required=True)
arg_options = cmdline_parser.parse_args()
jinja_dir = arg_options.jinja_dir
output_base = arg_options.output_base
config_file = arg_options.config
config_base = os.path.dirname(config_file)
config_values = arg_options.config_value
inspector_protocol_dir = arg_options.inspector_protocol_dir.lstrip('/')
except Exception:
# Work with python 2 and 3 http://docs.python.org/py3k/howto/pyporting.html
exc = sys.exc_info()[1]
......@@ -103,15 +105,12 @@ def read_config():
".lib.export_header": False,
# The encoding lib consists of encoding/encoding.h and
# encoding/encoding.cc in its subdirectory, which binaries
# may link / depend on, instead of relying on the
# JINJA2 templates lib/encoding_{h,cc}.template.
# In that case, |header| identifies the include file
# and |namespace| is the namespace it's using. Usually
# inspector_protocol_encoding but for v8's copy it's
# v8_inspector_protocol_encoding.
# TODO(johannes): Migrate away from lib/encoding_{h,cc}.template
# in favor of this.
".encoding_lib": { "header": "", "namespace": []},
# must link / depend on.
".encoding_lib.header": os.path.join(inspector_protocol_dir, "encoding/encoding.h"),
".encoding_lib.namespace": "",
# Ditto for bindings, see bindings/bindings.h.
".bindings_lib.header": os.path.join(inspector_protocol_dir, "bindings/bindings.h"),
".bindings_lib.namespace": ""
}
for key_value in config_values:
parts = key_value.split("=")
......@@ -635,10 +634,8 @@ def main():
"Values_h.template",
"Object_h.template",
"ValueConversions_h.template",
"Maybe_h.template",
"DispatcherBase_h.template",
"Parser_h.template",
"encoding_h.template",
]
protocol_cpp_templates = [
......@@ -648,12 +645,10 @@ def main():
"Object_cpp.template",
"DispatcherBase_cpp.template",
"Parser_cpp.template",
"encoding_cpp.template",
]
forward_h_templates = [
"Forward_h.template",
"Allocator_h.template",
"FrontendChannel_h.template",
]
......
......@@ -33,16 +33,12 @@ template("inspector_protocol_generate") {
invoker.config_file,
"$inspector_protocol_dir/lib/base_string_adapter_cc.template",
"$inspector_protocol_dir/lib/base_string_adapter_h.template",
"$inspector_protocol_dir/lib/encoding_h.template",
"$inspector_protocol_dir/lib/encoding_cpp.template",
"$inspector_protocol_dir/lib/Allocator_h.template",
"$inspector_protocol_dir/lib/DispatcherBase_cpp.template",
"$inspector_protocol_dir/lib/DispatcherBase_h.template",
"$inspector_protocol_dir/lib/ErrorSupport_cpp.template",
"$inspector_protocol_dir/lib/ErrorSupport_h.template",
"$inspector_protocol_dir/lib/Forward_h.template",
"$inspector_protocol_dir/lib/FrontendChannel_h.template",
"$inspector_protocol_dir/lib/Maybe_h.template",
"$inspector_protocol_dir/lib/Object_cpp.template",
"$inspector_protocol_dir/lib/Object_h.template",
"$inspector_protocol_dir/lib/Parser_cpp.template",
......@@ -67,6 +63,8 @@ template("inspector_protocol_generate") {
rebase_path(invoker.out_dir, root_build_dir),
"--config",
rebase_path(invoker.config_file, root_build_dir),
"--inspector_protocol_dir",
"$inspector_protocol_dir",
]
if (defined(invoker.config_values)) {
......
......@@ -5,16 +5,12 @@
{
'variables': {
'inspector_protocol_files': [
'lib/encoding_h.template',
'lib/encoding_cpp.template',
'lib/Allocator_h.template',
'lib/DispatcherBase_cpp.template',
'lib/DispatcherBase_h.template',
'lib/ErrorSupport_cpp.template',
'lib/ErrorSupport_h.template',
'lib/Forward_h.template',
'lib/FrontendChannel_h.template',
'lib/Maybe_h.template',
'lib/Object_cpp.template',
'lib/Object_h.template',
'lib/Parser_cpp.template',
......
// This file is generated by Allocator_h.template.
// Copyright 2016 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef {{"_".join(config.protocol.namespace)}}_Allocator_h
#define {{"_".join(config.protocol.namespace)}}_Allocator_h
{% for namespace in config.protocol.namespace %}
namespace {{namespace}} {
{% endfor %}
#define PROTOCOL_DISALLOW_COPY(ClassName) \
private: \
ClassName(const ClassName&) = delete; \
ClassName& operator=(const ClassName&) = delete
{% for namespace in config.protocol.namespace %}
} // namespace {{namespace}}
{% endfor %}
#endif // !defined({{"_".join(config.protocol.namespace)}}_Allocator_h)
......@@ -18,6 +18,8 @@
#include <unordered_map>
#include <unordered_set>
#include "{{config.bindings_lib.header}}"
{% for namespace in config.protocol.namespace %}
namespace {{namespace}} {
{% endfor %}
......@@ -54,6 +56,32 @@ struct ArrayTypedef<bool> { typedef std::vector<bool> type; };
template <typename T>
using Array = typename detail::ArrayTypedef<T>::type;
namespace detail {
using {{config.bindings_lib.namespace}}::glue::detail::PtrMaybe;
using {{config.bindings_lib.namespace}}::glue::detail::ValueMaybe;
template <typename T>
struct MaybeTypedef { typedef PtrMaybe<T> type; };
template <>
struct MaybeTypedef<bool> { typedef ValueMaybe<bool> type; };
template <>
struct MaybeTypedef<int> { typedef ValueMaybe<int> type; };
template <>
struct MaybeTypedef<double> { typedef ValueMaybe<double> type; };
template <>
struct MaybeTypedef<String> { typedef ValueMaybe<String> type; };
template <>
struct MaybeTypedef<Binary> { typedef ValueMaybe<Binary> type; };
} // namespace detail
template <typename T>
using Maybe = typename detail::MaybeTypedef<T>::type;
{% for namespace in config.protocol.namespace %}
} // namespace {{namespace}}
{% endfor %}
......
// This file is generated by Maybe_h.template.
// Copyright 2016 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef {{"_".join(config.protocol.namespace)}}_Maybe_h
#define {{"_".join(config.protocol.namespace)}}_Maybe_h
//#include "Forward.h"
{% for namespace in config.protocol.namespace %}
namespace {{namespace}} {
{% endfor %}
namespace detail {
template<typename T>
class PtrMaybe {
public:
PtrMaybe() = default;
PtrMaybe(std::unique_ptr<T> value) : m_value(std::move(value)) { }
PtrMaybe(PtrMaybe&& other) noexcept : m_value(std::move(other.m_value)) {}
void operator=(std::unique_ptr<T> value) { m_value = std::move(value); }
T* fromJust() const { DCHECK(m_value); return m_value.get(); }
T* fromMaybe(T* defaultValue) const { return m_value ? m_value.get() : defaultValue; }
bool isJust() const { return !!m_value; }
std::unique_ptr<T> takeJust() { DCHECK(m_value); return std::move(m_value); }
private:
std::unique_ptr<T> m_value;
};
template<typename T>
class ValueMaybe {
public:
ValueMaybe() : m_isJust(false), m_value() { }
ValueMaybe(T value) : m_isJust(true), m_value(std::move(value)) { }
ValueMaybe(ValueMaybe&& other) noexcept
: m_isJust(other.m_isJust),
m_value(std::move(other.m_value)) {}
void operator=(T value) { m_value = value; m_isJust = true; }
const T& fromJust() const { DCHECK(m_isJust); return m_value; }
const T& fromMaybe(const T& defaultValue) const { return m_isJust ? m_value : defaultValue; }
bool isJust() const { return m_isJust; }
T takeJust() { DCHECK(m_isJust); return std::move(m_value); }
private:
bool m_isJust;
T m_value;
};
template <typename T>
struct MaybeTypedef { typedef PtrMaybe<T> type; };
template <>
struct MaybeTypedef<bool> { typedef ValueMaybe<bool> type; };
template <>
struct MaybeTypedef<int> { typedef ValueMaybe<int> type; };
template <>
struct MaybeTypedef<double> { typedef ValueMaybe<double> type; };
template <>
struct MaybeTypedef<String> { typedef ValueMaybe<String> type; };
template <>
struct MaybeTypedef<Binary> { typedef ValueMaybe<Binary> type; };
} // namespace detail
template <typename T>
using Maybe = typename detail::MaybeTypedef<T>::type;
{% for namespace in config.protocol.namespace %}
} // namespace {{namespace}}
{% endfor %}
#endif // !defined({{"_".join(config.protocol.namespace)}}_Maybe_h)
......@@ -6,9 +6,7 @@
//#include "Values.h"
{% if config.encoding_lib.header %}
#include "{{config.encoding_lib.header}}"
{% endif %}
{% for namespace in config.protocol.namespace %}
namespace {{namespace}} {
......@@ -68,29 +66,27 @@ void escapeStringForJSONInternal(const Char* str, unsigned len,
// to this constant.
static constexpr int kStackLimitValues = 1000;
{% if config.encoding_lib.namespace %}
using {{"::".join(config.encoding_lib.namespace)}}::Error;
using {{"::".join(config.encoding_lib.namespace)}}::Status;
using {{"::".join(config.encoding_lib.namespace)}}::span;
using {{config.encoding_lib.namespace}}::Error;
using {{config.encoding_lib.namespace}}::Status;
using {{config.encoding_lib.namespace}}::span;
namespace cbor {
using {{"::".join(config.encoding_lib.namespace + ['cbor'])}}::CBORTokenTag;
using {{"::".join(config.encoding_lib.namespace + ['cbor'])}}::CBORTokenizer;
using {{"::".join(config.encoding_lib.namespace + ['cbor'])}}::EncodeBinary;
using {{"::".join(config.encoding_lib.namespace + ['cbor'])}}::EncodeDouble;
using {{"::".join(config.encoding_lib.namespace + ['cbor'])}}::EncodeFalse;
using {{"::".join(config.encoding_lib.namespace + ['cbor'])}}::EncodeFromLatin1;
using {{"::".join(config.encoding_lib.namespace + ['cbor'])}}::EncodeFromUTF16;
using {{"::".join(config.encoding_lib.namespace + ['cbor'])}}::EncodeIndefiniteLengthArrayStart;
using {{"::".join(config.encoding_lib.namespace + ['cbor'])}}::EncodeIndefiniteLengthMapStart;
using {{"::".join(config.encoding_lib.namespace + ['cbor'])}}::EncodeInt32;
using {{"::".join(config.encoding_lib.namespace + ['cbor'])}}::EncodeNull;
using {{"::".join(config.encoding_lib.namespace + ['cbor'])}}::EncodeStop;
using {{"::".join(config.encoding_lib.namespace + ['cbor'])}}::EncodeString8;
using {{"::".join(config.encoding_lib.namespace + ['cbor'])}}::EncodeTrue;
using {{"::".join(config.encoding_lib.namespace + ['cbor'])}}::EnvelopeEncoder;
using {{"::".join(config.encoding_lib.namespace + ['cbor'])}}::InitialByteForEnvelope;
using {{config.encoding_lib.namespace}}::cbor::CBORTokenTag;
using {{config.encoding_lib.namespace}}::cbor::CBORTokenizer;
using {{config.encoding_lib.namespace}}::cbor::EncodeBinary;
using {{config.encoding_lib.namespace}}::cbor::EncodeDouble;
using {{config.encoding_lib.namespace}}::cbor::EncodeFalse;
using {{config.encoding_lib.namespace}}::cbor::EncodeFromLatin1;
using {{config.encoding_lib.namespace}}::cbor::EncodeFromUTF16;
using {{config.encoding_lib.namespace}}::cbor::EncodeIndefiniteLengthArrayStart;
using {{config.encoding_lib.namespace}}::cbor::EncodeIndefiniteLengthMapStart;
using {{config.encoding_lib.namespace}}::cbor::EncodeInt32;
using {{config.encoding_lib.namespace}}::cbor::EncodeNull;
using {{config.encoding_lib.namespace}}::cbor::EncodeStop;
using {{config.encoding_lib.namespace}}::cbor::EncodeString8;
using {{config.encoding_lib.namespace}}::cbor::EncodeTrue;
using {{config.encoding_lib.namespace}}::cbor::EnvelopeEncoder;
using {{config.encoding_lib.namespace}}::cbor::InitialByteForEnvelope;
} // namespace cbor
{% endif %}
// Below are three parsing routines for CBOR, which cover enough
// to roundtrip JSON messages.
......
{# This template is generated by gen_cbor_templates.py. #}
// Generated by lib/encoding_cpp.template.
// Copyright 2019 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
{% if config.encoding_lib.header == "" %}
#include <algorithm>
#include <cassert>
#include <cmath>
#include <cstring>
#include <limits>
#include <stack>
{% for namespace in config.protocol.namespace %}
namespace {{namespace}} {
{% endfor %}
// ===== encoding/encoding.cc =====
// =============================================================================
// Status and Error codes
// =============================================================================
std::string Status::ToASCIIString() const {
switch (error) {
case Error::OK:
return "OK";
case Error::JSON_PARSER_UNPROCESSED_INPUT_REMAINS:
return ToASCIIString("JSON: unprocessed input remains");
case Error::JSON_PARSER_STACK_LIMIT_EXCEEDED:
return ToASCIIString("JSON: stack limit exceeded");
case Error::JSON_PARSER_NO_INPUT:
return ToASCIIString("JSON: no input");
case Error::JSON_PARSER_INVALID_TOKEN:
return ToASCIIString("JSON: invalid token");
case Error::JSON_PARSER_INVALID_NUMBER:
return ToASCIIString("JSON: invalid number");
case Error::JSON_PARSER_INVALID_STRING:
return ToASCIIString("JSON: invalid string");
case Error::JSON_PARSER_UNEXPECTED_ARRAY_END:
return ToASCIIString("JSON: unexpected array end");
case Error::JSON_PARSER_COMMA_OR_ARRAY_END_EXPECTED:
return ToASCIIString("JSON: comma or array end expected");
case Error::JSON_PARSER_STRING_LITERAL_EXPECTED:
return ToASCIIString("JSON: string literal expected");
case Error::JSON_PARSER_COLON_EXPECTED:
return ToASCIIString("JSON: colon expected");
case Error::JSON_PARSER_UNEXPECTED_MAP_END:
return ToASCIIString("JSON: unexpected map end");
case Error::JSON_PARSER_COMMA_OR_MAP_END_EXPECTED:
return ToASCIIString("JSON: comma or map end expected");
case Error::JSON_PARSER_VALUE_EXPECTED:
return ToASCIIString("JSON: value expected");
case Error::CBOR_INVALID_INT32:
return ToASCIIString("CBOR: invalid int32");
case Error::CBOR_INVALID_DOUBLE:
return ToASCIIString("CBOR: invalid double");
case Error::CBOR_INVALID_ENVELOPE:
return ToASCIIString("CBOR: invalid envelope");
case Error::CBOR_INVALID_STRING8:
return ToASCIIString("CBOR: invalid string8");
case Error::CBOR_INVALID_STRING16:
return ToASCIIString("CBOR: invalid string16");
case Error::CBOR_INVALID_BINARY:
return ToASCIIString("CBOR: invalid binary");
case Error::CBOR_UNSUPPORTED_VALUE:
return ToASCIIString("CBOR: unsupported value");
case Error::CBOR_NO_INPUT:
return ToASCIIString("CBOR: no input");
case Error::CBOR_INVALID_START_BYTE:
return ToASCIIString("CBOR: invalid start byte");
case Error::CBOR_UNEXPECTED_EOF_EXPECTED_VALUE:
return ToASCIIString("CBOR: unexpected eof expected value");
case Error::CBOR_UNEXPECTED_EOF_IN_ARRAY:
return ToASCIIString("CBOR: unexpected eof in array");
case Error::CBOR_UNEXPECTED_EOF_IN_MAP:
return ToASCIIString("CBOR: unexpected eof in map");
case Error::CBOR_INVALID_MAP_KEY:
return ToASCIIString("CBOR: invalid map key");
case Error::CBOR_STACK_LIMIT_EXCEEDED:
return ToASCIIString("CBOR: stack limit exceeded");
case Error::CBOR_TRAILING_JUNK:
return ToASCIIString("CBOR: trailing junk");
case Error::CBOR_MAP_START_EXPECTED:
return ToASCIIString("CBOR: map start expected");
case Error::CBOR_MAP_STOP_EXPECTED:
return ToASCIIString("CBOR: map stop expected");
case Error::CBOR_ENVELOPE_SIZE_LIMIT_EXCEEDED:
return ToASCIIString("CBOR: envelope size limit exceeded");
}
// Some compilers can't figure out that we can't get here.
return "INVALID ERROR CODE";
}
std::string Status::ToASCIIString(const char* msg) const {
return std::string(msg) + " at position " + std::to_string(pos);
}
namespace cbor {
namespace {
// Indicates the number of bits the "initial byte" needs to be shifted to the
// right after applying |kMajorTypeMask| to produce the major type in the
// lowermost bits.
static constexpr uint8_t kMajorTypeBitShift = 5u;
// Mask selecting the low-order 5 bits of the "initial byte", which is where
// the additional information is encoded.
static constexpr uint8_t kAdditionalInformationMask = 0x1f;
// Mask selecting the high-order 3 bits of the "initial byte", which indicates
// the major type of the encoded value.
static constexpr uint8_t kMajorTypeMask = 0xe0;
// Indicates the integer is in the following byte.
static constexpr uint8_t kAdditionalInformation1Byte = 24u;
// Indicates the integer is in the next 2 bytes.
static constexpr uint8_t kAdditionalInformation2Bytes = 25u;
// Indicates the integer is in the next 4 bytes.
static constexpr uint8_t kAdditionalInformation4Bytes = 26u;
// Indicates the integer is in the next 8 bytes.
static constexpr uint8_t kAdditionalInformation8Bytes = 27u;
// Encodes the initial byte, consisting of the |type| in the first 3 bits
// followed by 5 bits of |additional_info|.
constexpr uint8_t EncodeInitialByte(MajorType type, uint8_t additional_info) {
return (static_cast<uint8_t>(type) << kMajorTypeBitShift) |
(additional_info & kAdditionalInformationMask);
}
// TAG 24 indicates that what follows is a byte string which is
// encoded in CBOR format. We use this as a wrapper for
// maps and arrays, allowing us to skip them, because the
// byte string carries its size (byte length).
// https://tools.ietf.org/html/rfc7049#section-2.4.4.1
static constexpr uint8_t kInitialByteForEnvelope =
EncodeInitialByte(MajorType::TAG, 24);
// The initial byte for a byte string with at most 2^32 bytes
// of payload. This is used for envelope encoding, even if
// the byte string is shorter.
static constexpr uint8_t kInitialByteFor32BitLengthByteString =
EncodeInitialByte(MajorType::BYTE_STRING, 26);
// See RFC 7049 Section 2.2.1, indefinite length arrays / maps have additional
// info = 31.
static constexpr uint8_t kInitialByteIndefiniteLengthArray =
EncodeInitialByte(MajorType::ARRAY, 31);
static constexpr uint8_t kInitialByteIndefiniteLengthMap =
EncodeInitialByte(MajorType::MAP, 31);
// See RFC 7049 Section 2.3, Table 1; this is used for finishing indefinite
// length maps / arrays.
static constexpr uint8_t kStopByte =
EncodeInitialByte(MajorType::SIMPLE_VALUE, 31);
// See RFC 7049 Section 2.3, Table 2.
static constexpr uint8_t kEncodedTrue =
EncodeInitialByte(MajorType::SIMPLE_VALUE, 21);
static constexpr uint8_t kEncodedFalse =
EncodeInitialByte(MajorType::SIMPLE_VALUE, 20);
static constexpr uint8_t kEncodedNull =
EncodeInitialByte(MajorType::SIMPLE_VALUE, 22);
static constexpr uint8_t kInitialByteForDouble =
EncodeInitialByte(MajorType::SIMPLE_VALUE, 27);
// See RFC 7049 Table 3 and Section 2.4.4.2. This is used as a prefix for
// arbitrary binary data encoded as BYTE_STRING.
static constexpr uint8_t kExpectedConversionToBase64Tag =
EncodeInitialByte(MajorType::TAG, 22);
// Writes the bytes for |v| to |out|, starting with the most significant byte.
// See also: https://commandcenter.blogspot.com/2012/04/byte-order-fallacy.html
template <typename T, class C>
void WriteBytesMostSignificantByteFirst(T v, C* out) {
for (int shift_bytes = sizeof(T) - 1; shift_bytes >= 0; --shift_bytes)
out->push_back(0xff & (v >> (shift_bytes * 8)));
}
// Extracts sizeof(T) bytes from |in| to extract a value of type T
// (e.g. uint64_t, uint32_t, ...), most significant byte first.
// See also: https://commandcenter.blogspot.com/2012/04/byte-order-fallacy.html
template <typename T>
T ReadBytesMostSignificantByteFirst(span<uint8_t> in) {
assert(in.size() >= sizeof(T));
T result = 0;
for (size_t shift_bytes = 0; shift_bytes < sizeof(T); ++shift_bytes)
result |= T(in[sizeof(T) - 1 - shift_bytes]) << (shift_bytes * 8);
return result;
}
} // namespace
namespace internals {
// Reads the start of a token with definitive size from |bytes|.
// |type| is the major type as specified in RFC 7049 Section 2.1.
// |value| is the payload (e.g. for MajorType::UNSIGNED) or is the size
// (e.g. for BYTE_STRING).
// If successful, returns the number of bytes read. Otherwise returns 0.
size_t ReadTokenStart(span<uint8_t> bytes, MajorType* type, uint64_t* value) {
if (bytes.empty())
return 0;
uint8_t initial_byte = bytes[0];
*type = MajorType((initial_byte & kMajorTypeMask) >> kMajorTypeBitShift);
uint8_t additional_information = initial_byte & kAdditionalInformationMask;
if (additional_information < 24) {
// Values 0-23 are encoded directly into the additional info of the
// initial byte.
*value = additional_information;
return 1;
}
if (additional_information == kAdditionalInformation1Byte) {
// Values 24-255 are encoded with one initial byte, followed by the value.
if (bytes.size() < 2)
return 0;
*value = ReadBytesMostSignificantByteFirst<uint8_t>(bytes.subspan(1));
return 2;
}
if (additional_information == kAdditionalInformation2Bytes) {
// Values 256-65535: 1 initial byte + 2 bytes payload.
if (bytes.size() < 1 + sizeof(uint16_t))
return 0;
*value = ReadBytesMostSignificantByteFirst<uint16_t>(bytes.subspan(1));
return 3;
}
if (additional_information == kAdditionalInformation4Bytes) {
// 32 bit uint: 1 initial byte + 4 bytes payload.
if (bytes.size() < 1 + sizeof(uint32_t))
return 0;
*value = ReadBytesMostSignificantByteFirst<uint32_t>(bytes.subspan(1));
return 5;
}
if (additional_information == kAdditionalInformation8Bytes) {
// 64 bit uint: 1 initial byte + 8 bytes payload.
if (bytes.size() < 1 + sizeof(uint64_t))
return 0;
*value = ReadBytesMostSignificantByteFirst<uint64_t>(bytes.subspan(1));
return 9;
}
return 0;
}
// Writes the start of a token with |type|. The |value| may indicate the size,
// or it may be the payload if the value is an unsigned integer.
template <typename C>
void WriteTokenStartTmpl(MajorType type, uint64_t value, C* encoded) {
if (value < 24) {
// Values 0-23 are encoded directly into the additional info of the
// initial byte.
encoded->push_back(EncodeInitialByte(type, /*additional_info=*/value));
return;
}
if (value <= std::numeric_limits<uint8_t>::max()) {
// Values 24-255 are encoded with one initial byte, followed by the value.
encoded->push_back(EncodeInitialByte(type, kAdditionalInformation1Byte));
encoded->push_back(value);
return;
}
if (value <= std::numeric_limits<uint16_t>::max()) {
// Values 256-65535: 1 initial byte + 2 bytes payload.
encoded->push_back(EncodeInitialByte(type, kAdditionalInformation2Bytes));
WriteBytesMostSignificantByteFirst<uint16_t>(value, encoded);
return;
}
if (value <= std::numeric_limits<uint32_t>::max()) {
// 32 bit uint: 1 initial byte + 4 bytes payload.
encoded->push_back(EncodeInitialByte(type, kAdditionalInformation4Bytes));
WriteBytesMostSignificantByteFirst<uint32_t>(static_cast<uint32_t>(value),
encoded);
return;
}
// 64 bit uint: 1 initial byte + 8 bytes payload.
encoded->push_back(EncodeInitialByte(type, kAdditionalInformation8Bytes));
WriteBytesMostSignificantByteFirst<uint64_t>(value, encoded);
}
void WriteTokenStart(MajorType type,
uint64_t value,
std::vector<uint8_t>* encoded) {
WriteTokenStartTmpl(type, value, encoded);
}
void WriteTokenStart(MajorType type, uint64_t value, std::string* encoded) {
WriteTokenStartTmpl(type, value, encoded);
}
} // namespace internals
// =============================================================================
// Detecting CBOR content
// =============================================================================
uint8_t InitialByteForEnvelope() {
return kInitialByteForEnvelope;
}
uint8_t InitialByteFor32BitLengthByteString() {
return kInitialByteFor32BitLengthByteString;
}
bool IsCBORMessage(span<uint8_t> msg) {
return msg.size() >= 6 && msg[0] == InitialByteForEnvelope() &&
msg[1] == InitialByteFor32BitLengthByteString();
}
// =============================================================================
// Encoding invidiual CBOR items
// =============================================================================
uint8_t EncodeTrue() {
return kEncodedTrue;
}
uint8_t EncodeFalse() {
return kEncodedFalse;
}
uint8_t EncodeNull() {
return kEncodedNull;
}
uint8_t EncodeIndefiniteLengthArrayStart() {
return kInitialByteIndefiniteLengthArray;
}
uint8_t EncodeIndefiniteLengthMapStart() {
return kInitialByteIndefiniteLengthMap;
}
uint8_t EncodeStop() {
return kStopByte;
}
template <typename C>
void EncodeInt32Tmpl(int32_t value, C* out) {
if (value >= 0) {
internals::WriteTokenStart(MajorType::UNSIGNED, value, out);
} else {
uint64_t representation = static_cast<uint64_t>(-(value + 1));
internals::WriteTokenStart(MajorType::NEGATIVE, representation, out);
}
}
void EncodeInt32(int32_t value, std::vector<uint8_t>* out) {
EncodeInt32Tmpl(value, out);
}
void EncodeInt32(int32_t value, std::string* out) {
EncodeInt32Tmpl(value, out);
}
template <typename C>
void EncodeString16Tmpl(span<uint16_t> in, C* out) {
uint64_t byte_length = static_cast<uint64_t>(in.size_bytes());
internals::WriteTokenStart(MajorType::BYTE_STRING, byte_length, out);
// When emitting UTF16 characters, we always write the least significant byte
// first; this is because it's the native representation for X86.
// TODO(johannes): Implement a more efficient thing here later, e.g.
// casting *iff* the machine has this byte order.
// The wire format for UTF16 chars will probably remain the same
// (least significant byte first) since this way we can have
// golden files, unittests, etc. that port easily and universally.
// See also:
// https://commandcenter.blogspot.com/2012/04/byte-order-fallacy.html
for (const uint16_t two_bytes : in) {
out->push_back(two_bytes);
out->push_back(two_bytes >> 8);
}
}
void EncodeString16(span<uint16_t> in, std::vector<uint8_t>* out) {
EncodeString16Tmpl(in, out);
}
void EncodeString16(span<uint16_t> in, std::string* out) {
EncodeString16Tmpl(in, out);
}
template <typename C>
void EncodeString8Tmpl(span<uint8_t> in, C* out) {
internals::WriteTokenStart(MajorType::STRING,
static_cast<uint64_t>(in.size_bytes()), out);
out->insert(out->end(), in.begin(), in.end());
}
void EncodeString8(span<uint8_t> in, std::vector<uint8_t>* out) {
EncodeString8Tmpl(in, out);
}
void EncodeString8(span<uint8_t> in, std::string* out) {
EncodeString8Tmpl(in, out);
}
template <typename C>
void EncodeFromLatin1Tmpl(span<uint8_t> latin1, C* out) {
for (size_t ii = 0; ii < latin1.size(); ++ii) {
if (latin1[ii] <= 127)
continue;
// If there's at least one non-ASCII char, convert to UTF8.
std::vector<uint8_t> utf8(latin1.begin(), latin1.begin() + ii);
for (; ii < latin1.size(); ++ii) {
if (latin1[ii] <= 127) {
utf8.push_back(latin1[ii]);
} else {
// 0xC0 means it's a UTF8 sequence with 2 bytes.
utf8.push_back((latin1[ii] >> 6) | 0xc0);
utf8.push_back((latin1[ii] | 0x80) & 0xbf);
}
}
EncodeString8(SpanFrom(utf8), out);
return;
}
EncodeString8(latin1, out);
}
void EncodeFromLatin1(span<uint8_t> latin1, std::vector<uint8_t>* out) {
EncodeFromLatin1Tmpl(latin1, out);
}
void EncodeFromLatin1(span<uint8_t> latin1, std::string* out) {
EncodeFromLatin1Tmpl(latin1, out);
}
template <typename C>
void EncodeFromUTF16Tmpl(span<uint16_t> utf16, C* out) {
// If there's at least one non-ASCII char, encode as STRING16 (UTF16).
for (uint16_t ch : utf16) {
if (ch <= 127)
continue;
EncodeString16(utf16, out);
return;
}
// It's all US-ASCII, strip out every second byte and encode as UTF8.
internals::WriteTokenStart(MajorType::STRING,
static_cast<uint64_t>(utf16.size()), out);
out->insert(out->end(), utf16.begin(), utf16.end());
}
void EncodeFromUTF16(span<uint16_t> utf16, std::vector<uint8_t>* out) {
EncodeFromUTF16Tmpl(utf16, out);
}
void EncodeFromUTF16(span<uint16_t> utf16, std::string* out) {
EncodeFromUTF16Tmpl(utf16, out);
}
template <typename C>
void EncodeBinaryTmpl(span<uint8_t> in, C* out) {
out->push_back(kExpectedConversionToBase64Tag);
uint64_t byte_length = static_cast<uint64_t>(in.size_bytes());
internals::WriteTokenStart(MajorType::BYTE_STRING, byte_length, out);
out->insert(out->end(), in.begin(), in.end());
}
void EncodeBinary(span<uint8_t> in, std::vector<uint8_t>* out) {
EncodeBinaryTmpl(in, out);
}
void EncodeBinary(span<uint8_t> in, std::string* out) {
EncodeBinaryTmpl(in, out);
}
// A double is encoded with a specific initial byte
// (kInitialByteForDouble) plus the 64 bits of payload for its value.
constexpr size_t kEncodedDoubleSize = 1 + sizeof(uint64_t);
// An envelope is encoded with a specific initial byte
// (kInitialByteForEnvelope), plus the start byte for a BYTE_STRING with a 32
// bit wide length, plus a 32 bit length for that string.
constexpr size_t kEncodedEnvelopeHeaderSize = 1 + 1 + sizeof(uint32_t);
template <typename C>
void EncodeDoubleTmpl(double value, C* out) {
// The additional_info=27 indicates 64 bits for the double follow.
// See RFC 7049 Section 2.3, Table 1.
out->push_back(kInitialByteForDouble);
union {
double from_double;
uint64_t to_uint64;
} reinterpret;
reinterpret.from_double = value;
WriteBytesMostSignificantByteFirst<uint64_t>(reinterpret.to_uint64, out);
}
void EncodeDouble(double value, std::vector<uint8_t>* out) {
EncodeDoubleTmpl(value, out);
}
void EncodeDouble(double value, std::string* out) {
EncodeDoubleTmpl(value, out);
}
// =============================================================================
// cbor::EnvelopeEncoder - for wrapping submessages
// =============================================================================
template <typename C>
void EncodeStartTmpl(C* out, size_t* byte_size_pos) {
assert(*byte_size_pos == 0);
out->push_back(kInitialByteForEnvelope);
out->push_back(kInitialByteFor32BitLengthByteString);
*byte_size_pos = out->size();
out->resize(out->size() + sizeof(uint32_t));
}
void EnvelopeEncoder::EncodeStart(std::vector<uint8_t>* out) {
EncodeStartTmpl<std::vector<uint8_t>>(out, &byte_size_pos_);
}
void EnvelopeEncoder::EncodeStart(std::string* out) {
EncodeStartTmpl<std::string>(out, &byte_size_pos_);
}
template <typename C>
bool EncodeStopTmpl(C* out, size_t* byte_size_pos) {
assert(*byte_size_pos != 0);
// The byte size is the size of the payload, that is, all the
// bytes that were written past the byte size position itself.
uint64_t byte_size = out->size() - (*byte_size_pos + sizeof(uint32_t));
// We store exactly 4 bytes, so at most INT32MAX, with most significant
// byte first.
if (byte_size > std::numeric_limits<uint32_t>::max())
return false;
for (int shift_bytes = sizeof(uint32_t) - 1; shift_bytes >= 0;
--shift_bytes) {
(*out)[(*byte_size_pos)++] = 0xff & (byte_size >> (shift_bytes * 8));
}
return true;
}
bool EnvelopeEncoder::EncodeStop(std::vector<uint8_t>* out) {
return EncodeStopTmpl(out, &byte_size_pos_);
}
bool EnvelopeEncoder::EncodeStop(std::string* out) {
return EncodeStopTmpl(out, &byte_size_pos_);
}
// =============================================================================
// cbor::NewCBOREncoder - for encoding from a streaming parser
// =============================================================================
namespace {
template <typename C>
class CBOREncoder : public StreamingParserHandler {
public:
CBOREncoder(C* out, Status* status) : out_(out), status_(status) {
*status_ = Status();
}
void HandleMapBegin() override {
if (!status_->ok())
return;
envelopes_.emplace_back();
envelopes_.back().EncodeStart(out_);
out_->push_back(kInitialByteIndefiniteLengthMap);
}
void HandleMapEnd() override {
if (!status_->ok())
return;
out_->push_back(kStopByte);
assert(!envelopes_.empty());
if (!envelopes_.back().EncodeStop(out_)) {
HandleError(
Status(Error::CBOR_ENVELOPE_SIZE_LIMIT_EXCEEDED, out_->size()));
return;
}
envelopes_.pop_back();
}
void HandleArrayBegin() override {
if (!status_->ok())
return;
envelopes_.emplace_back();
envelopes_.back().EncodeStart(out_);
out_->push_back(kInitialByteIndefiniteLengthArray);
}
void HandleArrayEnd() override {
if (!status_->ok())
return;
out_->push_back(kStopByte);
assert(!envelopes_.empty());
if (!envelopes_.back().EncodeStop(out_)) {
HandleError(
Status(Error::CBOR_ENVELOPE_SIZE_LIMIT_EXCEEDED, out_->size()));
return;
}
envelopes_.pop_back();
}
void HandleString8(span<uint8_t> chars) override {
if (!status_->ok())
return;
EncodeString8(chars, out_);
}
void HandleString16(span<uint16_t> chars) override {
if (!status_->ok())
return;
EncodeFromUTF16(chars, out_);
}
void HandleBinary(span<uint8_t> bytes) override {
if (!status_->ok())
return;
EncodeBinary(bytes, out_);
}
void HandleDouble(double value) override {
if (!status_->ok())
return;
EncodeDouble(value, out_);
}
void HandleInt32(int32_t value) override {
if (!status_->ok())
return;
EncodeInt32(value, out_);
}
void HandleBool(bool value) override {
if (!status_->ok())
return;
// See RFC 7049 Section 2.3, Table 2.
out_->push_back(value ? kEncodedTrue : kEncodedFalse);
}
void HandleNull() override {
if (!status_->ok())
return;
// See RFC 7049 Section 2.3, Table 2.
out_->push_back(kEncodedNull);
}
void HandleError(Status error) override {
if (!status_->ok())
return;
*status_ = error;
out_->clear();
}
private:
C* out_;
std::vector<EnvelopeEncoder> envelopes_;
Status* status_;
};
} // namespace
std::unique_ptr<StreamingParserHandler> NewCBOREncoder(
std::vector<uint8_t>* out,
Status* status) {
return std::unique_ptr<StreamingParserHandler>(
new CBOREncoder<std::vector<uint8_t>>(out, status));
}
std::unique_ptr<StreamingParserHandler> NewCBOREncoder(std::string* out,
Status* status) {
return std::unique_ptr<StreamingParserHandler>(
new CBOREncoder<std::string>(out, status));
}
// =============================================================================
// cbor::CBORTokenizer - for parsing individual CBOR items
// =============================================================================
CBORTokenizer::CBORTokenizer(span<uint8_t> bytes) : bytes_(bytes) {
ReadNextToken(/*enter_envelope=*/false);
}
CBORTokenizer::~CBORTokenizer() {}
CBORTokenTag CBORTokenizer::TokenTag() const {
return token_tag_;
}
void CBORTokenizer::Next() {
if (token_tag_ == CBORTokenTag::ERROR_VALUE ||
token_tag_ == CBORTokenTag::DONE)
return;
ReadNextToken(/*enter_envelope=*/false);
}
void CBORTokenizer::EnterEnvelope() {
assert(token_tag_ == CBORTokenTag::ENVELOPE);
ReadNextToken(/*enter_envelope=*/true);
}
Status CBORTokenizer::Status() const {
return status_;
}
// The following accessor functions ::GetInt32, ::GetDouble,
// ::GetString8, ::GetString16WireRep, ::GetBinary, ::GetEnvelopeContents
// assume that a particular token was recognized in ::ReadNextToken.
// That's where all the error checking is done. By design,
// the accessors (assuming the token was recognized) never produce
// an error.
int32_t CBORTokenizer::GetInt32() const {
assert(token_tag_ == CBORTokenTag::INT32);
// The range checks happen in ::ReadNextToken().
return static_cast<int32_t>(
token_start_type_ == MajorType::UNSIGNED
? token_start_internal_value_
: -static_cast<int64_t>(token_start_internal_value_) - 1);
}
double CBORTokenizer::GetDouble() const {
assert(token_tag_ == CBORTokenTag::DOUBLE);
union {
uint64_t from_uint64;
double to_double;
} reinterpret;
reinterpret.from_uint64 = ReadBytesMostSignificantByteFirst<uint64_t>(
bytes_.subspan(status_.pos + 1));
return reinterpret.to_double;
}
span<uint8_t> CBORTokenizer::GetString8() const {
assert(token_tag_ == CBORTokenTag::STRING8);
auto length = static_cast<size_t>(token_start_internal_value_);
return bytes_.subspan(status_.pos + (token_byte_length_ - length), length);
}
span<uint8_t> CBORTokenizer::GetString16WireRep() const {
assert(token_tag_ == CBORTokenTag::STRING16);
auto length = static_cast<size_t>(token_start_internal_value_);
return bytes_.subspan(status_.pos + (token_byte_length_ - length), length);
}
span<uint8_t> CBORTokenizer::GetBinary() const {
assert(token_tag_ == CBORTokenTag::BINARY);
auto length = static_cast<size_t>(token_start_internal_value_);
return bytes_.subspan(status_.pos + (token_byte_length_ - length), length);
}
span<uint8_t> CBORTokenizer::GetEnvelopeContents() const {
assert(token_tag_ == CBORTokenTag::ENVELOPE);
auto length = static_cast<size_t>(token_start_internal_value_);
return bytes_.subspan(status_.pos + kEncodedEnvelopeHeaderSize, length);
}
// All error checking happens in ::ReadNextToken, so that the accessors
// can avoid having to carry an error return value.
//
// With respect to checking the encoded lengths of strings, arrays, etc:
// On the wire, CBOR uses 1,2,4, and 8 byte unsigned integers, so
// we initially read them as uint64_t, usually into token_start_internal_value_.
//
// However, since these containers have a representation on the machine,
// we need to do corresponding size computations on the input byte array,
// output span (e.g. the payload for a string), etc., and size_t is
// machine specific (in practice either 32 bit or 64 bit).
//
// Further, we must avoid overflowing size_t. Therefore, we use this
// kMaxValidLength constant to:
// - Reject values that are larger than the architecture specific
// max size_t (differs between 32 bit and 64 bit arch).
// - Reserve at least one bit so that we can check against overflows
// when adding lengths (array / string length / etc.); we do this by
// ensuring that the inputs to an addition are <= kMaxValidLength,
// and then checking whether the sum went past it.
//
// See also
// https://chromium.googlesource.com/chromium/src/+/master/docs/security/integer-semantics.md
static const uint64_t kMaxValidLength =
std::min<uint64_t>(std::numeric_limits<uint64_t>::max() >> 2,
std::numeric_limits<size_t>::max());
void CBORTokenizer::ReadNextToken(bool enter_envelope) {
if (enter_envelope) {
status_.pos += kEncodedEnvelopeHeaderSize;
} else {
status_.pos =
status_.pos == Status::npos() ? 0 : status_.pos + token_byte_length_;
}
status_.error = Error::OK;
if (status_.pos >= bytes_.size()) {
token_tag_ = CBORTokenTag::DONE;
return;
}
const size_t remaining_bytes = bytes_.size() - status_.pos;
switch (bytes_[status_.pos]) {
case kStopByte:
SetToken(CBORTokenTag::STOP, 1);
return;
case kInitialByteIndefiniteLengthMap:
SetToken(CBORTokenTag::MAP_START, 1);
return;
case kInitialByteIndefiniteLengthArray:
SetToken(CBORTokenTag::ARRAY_START, 1);
return;
case kEncodedTrue:
SetToken(CBORTokenTag::TRUE_VALUE, 1);
return;
case kEncodedFalse:
SetToken(CBORTokenTag::FALSE_VALUE, 1);
return;
case kEncodedNull:
SetToken(CBORTokenTag::NULL_VALUE, 1);
return;
case kExpectedConversionToBase64Tag: { // BINARY
const size_t bytes_read = internals::ReadTokenStart(
bytes_.subspan(status_.pos + 1), &token_start_type_,
&token_start_internal_value_);
if (!bytes_read || token_start_type_ != MajorType::BYTE_STRING ||
token_start_internal_value_ > kMaxValidLength) {
SetError(Error::CBOR_INVALID_BINARY);
return;
}
const uint64_t token_byte_length = token_start_internal_value_ +
/* tag before token start: */ 1 +
/* token start: */ bytes_read;
if (token_byte_length > remaining_bytes) {
SetError(Error::CBOR_INVALID_BINARY);
return;
}
SetToken(CBORTokenTag::BINARY, static_cast<size_t>(token_byte_length));
return;
}
case kInitialByteForDouble: { // DOUBLE
if (kEncodedDoubleSize > remaining_bytes) {
SetError(Error::CBOR_INVALID_DOUBLE);
return;
}
SetToken(CBORTokenTag::DOUBLE, kEncodedDoubleSize);
return;
}
case kInitialByteForEnvelope: { // ENVELOPE
if (kEncodedEnvelopeHeaderSize > remaining_bytes) {
SetError(Error::CBOR_INVALID_ENVELOPE);
return;
}
// The envelope must be a byte string with 32 bit length.
if (bytes_[status_.pos + 1] != kInitialByteFor32BitLengthByteString) {
SetError(Error::CBOR_INVALID_ENVELOPE);
return;
}
// Read the length of the byte string.
token_start_internal_value_ = ReadBytesMostSignificantByteFirst<uint32_t>(
bytes_.subspan(status_.pos + 2));
if (token_start_internal_value_ > kMaxValidLength) {
SetError(Error::CBOR_INVALID_ENVELOPE);
return;
}
uint64_t token_byte_length =
token_start_internal_value_ + kEncodedEnvelopeHeaderSize;
if (token_byte_length > remaining_bytes) {
SetError(Error::CBOR_INVALID_ENVELOPE);
return;
}
SetToken(CBORTokenTag::ENVELOPE, static_cast<size_t>(token_byte_length));
return;
}
default: {
const size_t bytes_read = internals::ReadTokenStart(
bytes_.subspan(status_.pos), &token_start_type_,
&token_start_internal_value_);
switch (token_start_type_) {
case MajorType::UNSIGNED: // INT32.
// INT32 is a signed int32 (int32 makes sense for the
// inspector_protocol, it's not a CBOR limitation), so we check
// against the signed max, so that the allowable values are
// 0, 1, 2, ... 2^31 - 1.
if (!bytes_read || std::numeric_limits<int32_t>::max() <
token_start_internal_value_) {
SetError(Error::CBOR_INVALID_INT32);
return;
}
SetToken(CBORTokenTag::INT32, bytes_read);
return;
case MajorType::NEGATIVE: { // INT32.
// INT32 is a signed int32 (int32 makes sense for the
// inspector_protocol, it's not a CBOR limitation); in CBOR, the
// negative values for INT32 are represented as NEGATIVE, that is, -1
// INT32 is represented as 1 << 5 | 0 (major type 1, additional info
// value 0).
// The represented allowed values range is -1 to -2^31.
// They are mapped into the encoded range of 0 to 2^31-1.
// We check the payload in token_start_internal_value_ against
// that range (2^31-1 is also known as
// std::numeric_limits<int32_t>::max()).
if (!bytes_read || token_start_internal_value_ >
std::numeric_limits<int32_t>::max()) {
SetError(Error::CBOR_INVALID_INT32);
return;
}
SetToken(CBORTokenTag::INT32, bytes_read);
return;
}
case MajorType::STRING: { // STRING8.
if (!bytes_read || token_start_internal_value_ > kMaxValidLength) {
SetError(Error::CBOR_INVALID_STRING8);
return;
}
uint64_t token_byte_length = token_start_internal_value_ + bytes_read;
if (token_byte_length > remaining_bytes) {
SetError(Error::CBOR_INVALID_STRING8);
return;
}
SetToken(CBORTokenTag::STRING8,
static_cast<size_t>(token_byte_length));
return;
}
case MajorType::BYTE_STRING: { // STRING16.
// Length must be divisible by 2 since UTF16 is 2 bytes per
// character, hence the &1 check.
if (!bytes_read || token_start_internal_value_ > kMaxValidLength ||
token_start_internal_value_ & 1) {
SetError(Error::CBOR_INVALID_STRING16);
return;
}
uint64_t token_byte_length = token_start_internal_value_ + bytes_read;
if (token_byte_length > remaining_bytes) {
SetError(Error::CBOR_INVALID_STRING16);
return;
}
SetToken(CBORTokenTag::STRING16,
static_cast<size_t>(token_byte_length));
return;
}
case MajorType::ARRAY:
case MajorType::MAP:
case MajorType::TAG:
case MajorType::SIMPLE_VALUE:
SetError(Error::CBOR_UNSUPPORTED_VALUE);
return;
}
}
}
}
void CBORTokenizer::SetToken(CBORTokenTag token_tag, size_t token_byte_length) {
token_tag_ = token_tag;
token_byte_length_ = token_byte_length;
}
void CBORTokenizer::SetError(Error error) {
token_tag_ = CBORTokenTag::ERROR_VALUE;
status_.error = error;
}
// =============================================================================
// cbor::ParseCBOR - for receiving streaming parser events for CBOR messages
// =============================================================================
namespace {
// When parsing CBOR, we limit recursion depth for objects and arrays
// to this constant.
static constexpr int kStackLimit = 300;
// Below are three parsing routines for CBOR, which cover enough
// to roundtrip JSON messages.
bool ParseMap(int32_t stack_depth,
CBORTokenizer* tokenizer,
StreamingParserHandler* out);
bool ParseArray(int32_t stack_depth,
CBORTokenizer* tokenizer,
StreamingParserHandler* out);
bool ParseValue(int32_t stack_depth,
CBORTokenizer* tokenizer,
StreamingParserHandler* out);
void ParseUTF16String(CBORTokenizer* tokenizer, StreamingParserHandler* out) {
std::vector<uint16_t> value;
span<uint8_t> rep = tokenizer->GetString16WireRep();
for (size_t ii = 0; ii < rep.size(); ii += 2)
value.push_back((rep[ii + 1] << 8) | rep[ii]);
out->HandleString16(span<uint16_t>(value.data(), value.size()));
tokenizer->Next();
}
bool ParseUTF8String(CBORTokenizer* tokenizer, StreamingParserHandler* out) {
assert(tokenizer->TokenTag() == CBORTokenTag::STRING8);
out->HandleString8(tokenizer->GetString8());
tokenizer->Next();
return true;
}
bool ParseValue(int32_t stack_depth,
CBORTokenizer* tokenizer,
StreamingParserHandler* out) {
if (stack_depth > kStackLimit) {
out->HandleError(
Status{Error::CBOR_STACK_LIMIT_EXCEEDED, tokenizer->Status().pos});
return false;
}
// Skip past the envelope to get to what's inside.
if (tokenizer->TokenTag() == CBORTokenTag::ENVELOPE)
tokenizer->EnterEnvelope();
switch (tokenizer->TokenTag()) {
case CBORTokenTag::ERROR_VALUE:
out->HandleError(tokenizer->Status());
return false;
case CBORTokenTag::DONE:
out->HandleError(Status{Error::CBOR_UNEXPECTED_EOF_EXPECTED_VALUE,
tokenizer->Status().pos});
return false;
case CBORTokenTag::TRUE_VALUE:
out->HandleBool(true);
tokenizer->Next();
return true;
case CBORTokenTag::FALSE_VALUE:
out->HandleBool(false);
tokenizer->Next();
return true;
case CBORTokenTag::NULL_VALUE:
out->HandleNull();
tokenizer->Next();
return true;
case CBORTokenTag::INT32:
out->HandleInt32(tokenizer->GetInt32());
tokenizer->Next();
return true;
case CBORTokenTag::DOUBLE:
out->HandleDouble(tokenizer->GetDouble());
tokenizer->Next();
return true;
case CBORTokenTag::STRING8:
return ParseUTF8String(tokenizer, out);
case CBORTokenTag::STRING16:
ParseUTF16String(tokenizer, out);
return true;
case CBORTokenTag::BINARY: {
out->HandleBinary(tokenizer->GetBinary());
tokenizer->Next();
return true;
}
case CBORTokenTag::MAP_START:
return ParseMap(stack_depth + 1, tokenizer, out);
case CBORTokenTag::ARRAY_START:
return ParseArray(stack_depth + 1, tokenizer, out);
default:
out->HandleError(
Status{Error::CBOR_UNSUPPORTED_VALUE, tokenizer->Status().pos});
return false;
}
}
// |bytes| must start with the indefinite length array byte, so basically,
// ParseArray may only be called after an indefinite length array has been
// detected.
bool ParseArray(int32_t stack_depth,
CBORTokenizer* tokenizer,
StreamingParserHandler* out) {
assert(tokenizer->TokenTag() == CBORTokenTag::ARRAY_START);
tokenizer->Next();
out->HandleArrayBegin();
while (tokenizer->TokenTag() != CBORTokenTag::STOP) {
if (tokenizer->TokenTag() == CBORTokenTag::DONE) {
out->HandleError(
Status{Error::CBOR_UNEXPECTED_EOF_IN_ARRAY, tokenizer->Status().pos});
return false;
}
if (tokenizer->TokenTag() == CBORTokenTag::ERROR_VALUE) {
out->HandleError(tokenizer->Status());
return false;
}
// Parse value.
if (!ParseValue(stack_depth, tokenizer, out))
return false;
}
out->HandleArrayEnd();
tokenizer->Next();
return true;
}
// |bytes| must start with the indefinite length array byte, so basically,
// ParseArray may only be called after an indefinite length array has been
// detected.
bool ParseMap(int32_t stack_depth,
CBORTokenizer* tokenizer,
StreamingParserHandler* out) {
assert(tokenizer->TokenTag() == CBORTokenTag::MAP_START);
out->HandleMapBegin();
tokenizer->Next();
while (tokenizer->TokenTag() != CBORTokenTag::STOP) {
if (tokenizer->TokenTag() == CBORTokenTag::DONE) {
out->HandleError(
Status{Error::CBOR_UNEXPECTED_EOF_IN_MAP, tokenizer->Status().pos});
return false;
}
if (tokenizer->TokenTag() == CBORTokenTag::ERROR_VALUE) {
out->HandleError(tokenizer->Status());
return false;
}
// Parse key.
if (tokenizer->TokenTag() == CBORTokenTag::STRING8) {
if (!ParseUTF8String(tokenizer, out))
return false;
} else if (tokenizer->TokenTag() == CBORTokenTag::STRING16) {
ParseUTF16String(tokenizer, out);
} else {
out->HandleError(
Status{Error::CBOR_INVALID_MAP_KEY, tokenizer->Status().pos});
return false;
}
// Parse value.
if (!ParseValue(stack_depth, tokenizer, out))
return false;
}
out->HandleMapEnd();
tokenizer->Next();
return true;
}
} // namespace
void ParseCBOR(span<uint8_t> bytes, StreamingParserHandler* out) {
if (bytes.empty()) {
out->HandleError(Status{Error::CBOR_NO_INPUT, 0});
return;
}
if (bytes[0] != kInitialByteForEnvelope) {
out->HandleError(Status{Error::CBOR_INVALID_START_BYTE, 0});
return;
}
CBORTokenizer tokenizer(bytes);
if (tokenizer.TokenTag() == CBORTokenTag::ERROR_VALUE) {
out->HandleError(tokenizer.Status());
return;
}
// We checked for the envelope start byte above, so the tokenizer
// must agree here, since it's not an error.
assert(tokenizer.TokenTag() == CBORTokenTag::ENVELOPE);
tokenizer.EnterEnvelope();
if (tokenizer.TokenTag() != CBORTokenTag::MAP_START) {
out->HandleError(
Status{Error::CBOR_MAP_START_EXPECTED, tokenizer.Status().pos});
return;
}
if (!ParseMap(/*stack_depth=*/1, &tokenizer, out))
return;
if (tokenizer.TokenTag() == CBORTokenTag::DONE)
return;
if (tokenizer.TokenTag() == CBORTokenTag::ERROR_VALUE) {
out->HandleError(tokenizer.Status());
return;
}
out->HandleError(Status{Error::CBOR_TRAILING_JUNK, tokenizer.Status().pos});
}
// =============================================================================
// cbor::AppendString8EntryToMap - for limited in-place editing of messages
// =============================================================================
template <typename C>
Status AppendString8EntryToCBORMapTmpl(span<uint8_t> string8_key,
span<uint8_t> string8_value,
C* cbor) {
// Careful below: Don't compare (*cbor)[idx] with a uint8_t, since
// it could be a char (signed!). Instead, use bytes.
span<uint8_t> bytes(reinterpret_cast<const uint8_t*>(cbor->data()),
cbor->size());
CBORTokenizer tokenizer(bytes);
if (tokenizer.TokenTag() == CBORTokenTag::ERROR_VALUE)
return tokenizer.Status();
if (tokenizer.TokenTag() != CBORTokenTag::ENVELOPE)
return Status(Error::CBOR_INVALID_ENVELOPE, 0);
size_t envelope_size = tokenizer.GetEnvelopeContents().size();
size_t old_size = cbor->size();
if (old_size != envelope_size + kEncodedEnvelopeHeaderSize)
return Status(Error::CBOR_INVALID_ENVELOPE, 0);
if (envelope_size == 0 ||
(tokenizer.GetEnvelopeContents()[0] != EncodeIndefiniteLengthMapStart()))
return Status(Error::CBOR_MAP_START_EXPECTED, kEncodedEnvelopeHeaderSize);
if (bytes[bytes.size() - 1] != EncodeStop())
return Status(Error::CBOR_MAP_STOP_EXPECTED, cbor->size() - 1);
cbor->pop_back();
EncodeString8(string8_key, cbor);
EncodeString8(string8_value, cbor);
cbor->push_back(EncodeStop());
size_t new_envelope_size = envelope_size + (cbor->size() - old_size);
if (new_envelope_size > std::numeric_limits<uint32_t>::max())
return Status(Error::CBOR_ENVELOPE_SIZE_LIMIT_EXCEEDED, 0);
size_t size_pos = cbor->size() - new_envelope_size - sizeof(uint32_t);
uint8_t* out = reinterpret_cast<uint8_t*>(&cbor->at(size_pos));
*(out++) = (new_envelope_size >> 24) & 0xff;
*(out++) = (new_envelope_size >> 16) & 0xff;
*(out++) = (new_envelope_size >> 8) & 0xff;
*(out) = new_envelope_size & 0xff;
return Status();
}
Status AppendString8EntryToCBORMap(span<uint8_t> string8_key,
span<uint8_t> string8_value,
std::vector<uint8_t>* cbor) {
return AppendString8EntryToCBORMapTmpl(string8_key, string8_value, cbor);
}
Status AppendString8EntryToCBORMap(span<uint8_t> string8_key,
span<uint8_t> string8_value,
std::string* cbor) {
return AppendString8EntryToCBORMapTmpl(string8_key, string8_value, cbor);
}
} // namespace cbor
namespace json {
// =============================================================================
// json::NewJSONEncoder - for encoding streaming parser events as JSON
// =============================================================================
namespace {
// Prints |value| to |out| with 4 hex digits, most significant chunk first.
template <typename C>
void PrintHex(uint16_t value, C* out) {
for (int ii = 3; ii >= 0; --ii) {
int four_bits = 0xf & (value >> (4 * ii));
out->push_back(four_bits + ((four_bits <= 9) ? '0' : ('a' - 10)));
}
}
// In the writer below, we maintain a stack of State instances.
// It is just enough to emit the appropriate delimiters and brackets
// in JSON.
enum class Container {
// Used for the top-level, initial state.
NONE,
// Inside a JSON object.
MAP,
// Inside a JSON array.
ARRAY
};
class State {
public:
explicit State(Container container) : container_(container) {}
void StartElement(std::vector<uint8_t>* out) { StartElementTmpl(out); }
void StartElement(std::string* out) { StartElementTmpl(out); }
Container container() const { return container_; }
private:
template <typename C>
void StartElementTmpl(C* out) {
assert(container_ != Container::NONE || size_ == 0);
if (size_ != 0) {
char delim = (!(size_ & 1) || container_ == Container::ARRAY) ? ',' : ':';
out->push_back(delim);
}
++size_;
}
Container container_ = Container::NONE;
int size_ = 0;
};
constexpr char kBase64Table[] =
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
"abcdefghijklmnopqrstuvwxyz0123456789+/";
template <typename C>
void Base64Encode(const span<uint8_t>& in, C* out) {
// The following three cases are based on the tables in the example
// section in https://en.wikipedia.org/wiki/Base64. We process three
// input bytes at a time, emitting 4 output bytes at a time.
size_t ii = 0;
// While possible, process three input bytes.
for (; ii + 3 <= in.size(); ii += 3) {
uint32_t twentyfour_bits = (in[ii] << 16) | (in[ii + 1] << 8) | in[ii + 2];
out->push_back(kBase64Table[(twentyfour_bits >> 18)]);
out->push_back(kBase64Table[(twentyfour_bits >> 12) & 0x3f]);
out->push_back(kBase64Table[(twentyfour_bits >> 6) & 0x3f]);
out->push_back(kBase64Table[twentyfour_bits & 0x3f]);
}
if (ii + 2 <= in.size()) { // Process two input bytes.
uint32_t twentyfour_bits = (in[ii] << 16) | (in[ii + 1] << 8);
out->push_back(kBase64Table[(twentyfour_bits >> 18)]);
out->push_back(kBase64Table[(twentyfour_bits >> 12) & 0x3f]);
out->push_back(kBase64Table[(twentyfour_bits >> 6) & 0x3f]);
out->push_back('='); // Emit padding.
return;
}
if (ii + 1 <= in.size()) { // Process a single input byte.
uint32_t twentyfour_bits = (in[ii] << 16);
out->push_back(kBase64Table[(twentyfour_bits >> 18)]);
out->push_back(kBase64Table[(twentyfour_bits >> 12) & 0x3f]);
out->push_back('='); // Emit padding.
out->push_back('='); // Emit padding.
}
}
// Implements a handler for JSON parser events to emit a JSON string.
template <typename C>
class JSONEncoder : public StreamingParserHandler {
public:
JSONEncoder(const Platform* platform, C* out, Status* status)
: platform_(platform), out_(out), status_(status) {
*status_ = Status();
state_.emplace(Container::NONE);
}
void HandleMapBegin() override {
if (!status_->ok())
return;
assert(!state_.empty());
state_.top().StartElement(out_);
state_.emplace(Container::MAP);
Emit('{');
}
void HandleMapEnd() override {
if (!status_->ok())
return;
assert(state_.size() >= 2 && state_.top().container() == Container::MAP);
state_.pop();
Emit('}');
}
void HandleArrayBegin() override {
if (!status_->ok())
return;
state_.top().StartElement(out_);
state_.emplace(Container::ARRAY);
Emit('[');
}
void HandleArrayEnd() override {
if (!status_->ok())
return;
assert(state_.size() >= 2 && state_.top().container() == Container::ARRAY);
state_.pop();
Emit(']');
}
void HandleString16(span<uint16_t> chars) override {
if (!status_->ok())
return;
state_.top().StartElement(out_);
Emit('"');
for (const uint16_t ch : chars) {
if (ch == '"') {
Emit("\\\"");
} else if (ch == '\\') {
Emit("\\\\");
} else if (ch == '\b') {
Emit("\\b");
} else if (ch == '\f') {
Emit("\\f");
} else if (ch == '\n') {
Emit("\\n");
} else if (ch == '\r') {
Emit("\\r");
} else if (ch == '\t') {
Emit("\\t");
} else if (ch >= 32 && ch <= 126) {
Emit(ch);
} else {
Emit("\\u");
PrintHex(ch, out_);
}
}
Emit('"');
}
void HandleString8(span<uint8_t> chars) override {
if (!status_->ok())
return;
state_.top().StartElement(out_);
Emit('"');
for (size_t ii = 0; ii < chars.size(); ++ii) {
uint8_t c = chars[ii];
if (c == '"') {
Emit("\\\"");
} else if (c == '\\') {
Emit("\\\\");
} else if (c == '\b') {
Emit("\\b");
} else if (c == '\f') {
Emit("\\f");
} else if (c == '\n') {
Emit("\\n");
} else if (c == '\r') {
Emit("\\r");
} else if (c == '\t') {
Emit("\\t");
} else if (c >= 32 && c <= 126) {
Emit(c);
} else if (c < 32) {
Emit("\\u");
PrintHex(static_cast<uint16_t>(c), out_);
} else {
// Inspect the leading byte to figure out how long the utf8
// byte sequence is; while doing this initialize |codepoint|
// with the first few bits.
// See table in: https://en.wikipedia.org/wiki/UTF-8
// byte one is 110x xxxx -> 2 byte utf8 sequence
// byte one is 1110 xxxx -> 3 byte utf8 sequence
// byte one is 1111 0xxx -> 4 byte utf8 sequence
uint32_t codepoint;
int num_bytes_left;
if ((c & 0xe0) == 0xc0) { // 2 byte utf8 sequence
num_bytes_left = 1;
codepoint = c & 0x1f;
} else if ((c & 0xf0) == 0xe0) { // 3 byte utf8 sequence
num_bytes_left = 2;
codepoint = c & 0x0f;
} else if ((c & 0xf8) == 0xf0) { // 4 byte utf8 sequence
codepoint = c & 0x07;
num_bytes_left = 3;
} else {
continue; // invalid leading byte
}
// If we have enough bytes in our input, decode the remaining ones
// belonging to this Unicode character into |codepoint|.
if (ii + num_bytes_left > chars.size())
continue;
while (num_bytes_left > 0) {
c = chars[++ii];
--num_bytes_left;
// Check the next byte is a continuation byte, that is 10xx xxxx.
if ((c & 0xc0) != 0x80)
continue;
codepoint = (codepoint << 6) | (c & 0x3f);
}
// Disallow overlong encodings for ascii characters, as these
// would include " and other characters significant to JSON
// string termination / control.
if (codepoint <= 0x7f)
continue;
// Invalid in UTF8, and can't be represented in UTF16 anyway.
if (codepoint > 0x10ffff)
continue;
// So, now we transcode to UTF16,
// using the math described at https://en.wikipedia.org/wiki/UTF-16,
// for either one or two 16 bit characters.
if (codepoint < 0xffff) {
Emit("\\u");
PrintHex(static_cast<uint16_t>(codepoint), out_);
continue;
}
codepoint -= 0x10000;
// high surrogate
Emit("\\u");
PrintHex(static_cast<uint16_t>((codepoint >> 10) + 0xd800), out_);
// low surrogate
Emit("\\u");
PrintHex(static_cast<uint16_t>((codepoint & 0x3ff) + 0xdc00), out_);
}
}
Emit('"');
}
void HandleBinary(span<uint8_t> bytes) override {
if (!status_->ok())
return;
state_.top().StartElement(out_);
Emit('"');
Base64Encode(bytes, out_);
Emit('"');
}
void HandleDouble(double value) override {
if (!status_->ok())
return;
state_.top().StartElement(out_);
// JSON cannot represent NaN or Infinity. So, for compatibility,
// we behave like the JSON object in web browsers: emit 'null'.
if (!std::isfinite(value)) {
Emit("null");
return;
}
std::unique_ptr<char[]> str_value = platform_->DToStr(value);
// DToStr may fail to emit a 0 before the decimal dot. E.g. this is
// the case in base::NumberToString in Chromium (which is based on
// dmg_fp). So, much like
// https://cs.chromium.org/chromium/src/base/json/json_writer.cc
// we probe for this and emit the leading 0 anyway if necessary.
const char* chars = str_value.get();
if (chars[0] == '.') {
Emit('0');
} else if (chars[0] == '-' && chars[1] == '.') {
Emit("-0");
++chars;
}
Emit(chars);
}
void HandleInt32(int32_t value) override {
if (!status_->ok())
return;
state_.top().StartElement(out_);
Emit(std::to_string(value));
}
void HandleBool(bool value) override {
if (!status_->ok())
return;
state_.top().StartElement(out_);
Emit(value ? "true" : "false");
}
void HandleNull() override {
if (!status_->ok())
return;
state_.top().StartElement(out_);
Emit("null");
}
void HandleError(Status error) override {
assert(!error.ok());
*status_ = error;
out_->clear();
}
private:
void Emit(char c) { out_->push_back(c); }
void Emit(const char* str) {
out_->insert(out_->end(), str, str + strlen(str));
}
void Emit(const std::string& str) {
out_->insert(out_->end(), str.begin(), str.end());
}
const Platform* platform_;
C* out_;
Status* status_;
std::stack<State> state_;
};
} // namespace
std::unique_ptr<StreamingParserHandler> NewJSONEncoder(
const Platform* platform,
std::vector<uint8_t>* out,
Status* status) {
return std::unique_ptr<StreamingParserHandler>(
new JSONEncoder<std::vector<uint8_t>>(platform, out, status));
}
std::unique_ptr<StreamingParserHandler> NewJSONEncoder(const Platform* platform,
std::string* out,
Status* status) {
return std::unique_ptr<StreamingParserHandler>(
new JSONEncoder<std::string>(platform, out, status));
}
// =============================================================================
// json::ParseJSON - for receiving streaming parser events for JSON.
// =============================================================================
namespace {
const int kStackLimit = 300;
enum Token {
ObjectBegin,
ObjectEnd,
ArrayBegin,
ArrayEnd,
StringLiteral,
Number,
BoolTrue,
BoolFalse,
NullToken,
ListSeparator,
ObjectPairSeparator,
InvalidToken,
NoInput
};
const char* const kNullString = "null";
const char* const kTrueString = "true";
const char* const kFalseString = "false";
template <typename Char>
class JsonParser {
public:
JsonParser(const Platform* platform, StreamingParserHandler* handler)
: platform_(platform), handler_(handler) {}
void Parse(const Char* start, size_t length) {
start_pos_ = start;
const Char* end = start + length;
const Char* tokenEnd = nullptr;
ParseValue(start, end, &tokenEnd, 0);
if (error_)
return;
if (tokenEnd != end) {
HandleError(Error::JSON_PARSER_UNPROCESSED_INPUT_REMAINS, tokenEnd);
}
}
private:
bool CharsToDouble(const uint16_t* chars, size_t length, double* result) {
std::string buffer;
buffer.reserve(length + 1);
for (size_t ii = 0; ii < length; ++ii) {
bool is_ascii = !(chars[ii] & ~0x7F);
if (!is_ascii)
return false;
buffer.push_back(static_cast<char>(chars[ii]));
}
return platform_->StrToD(buffer.c_str(), result);
}
bool CharsToDouble(const uint8_t* chars, size_t length, double* result) {
std::string buffer(reinterpret_cast<const char*>(chars), length);
return platform_->StrToD(buffer.c_str(), result);
}
static bool ParseConstToken(const Char* start,
const Char* end,
const Char** token_end,
const char* token) {
// |token| is \0 terminated, it's one of the constants at top of the file.
while (start < end && *token != '\0' && *start++ == *token++) {
}
if (*token != '\0')
return false;
*token_end = start;
return true;
}
static bool ReadInt(const Char* start,
const Char* end,
const Char** token_end,
bool allow_leading_zeros) {
if (start == end)
return false;
bool has_leading_zero = '0' == *start;
int length = 0;
while (start < end && '0' <= *start && *start <= '9') {
++start;
++length;
}
if (!length)
return false;
if (!allow_leading_zeros && length > 1 && has_leading_zero)
return false;
*token_end = start;
return true;
}
static bool ParseNumberToken(const Char* start,
const Char* end,
const Char** token_end) {
// We just grab the number here. We validate the size in DecodeNumber.
// According to RFC4627, a valid number is: [minus] int [frac] [exp]
if (start == end)
return false;
Char c = *start;
if ('-' == c)
++start;
if (!ReadInt(start, end, &start, /*allow_leading_zeros=*/false))
return false;
if (start == end) {
*token_end = start;
return true;
}
// Optional fraction part
c = *start;
if ('.' == c) {
++start;
if (!ReadInt(start, end, &start, /*allow_leading_zeros=*/true))
return false;
if (start == end) {
*token_end = start;
return true;
}
c = *start;
}
// Optional exponent part
if ('e' == c || 'E' == c) {
++start;
if (start == end)
return false;
c = *start;
if ('-' == c || '+' == c) {
++start;
if (start == end)
return false;
}
if (!ReadInt(start, end, &start, /*allow_leading_zeros=*/true))
return false;
}
*token_end = start;
return true;
}
static bool ReadHexDigits(const Char* start,
const Char* end,
const Char** token_end,
int digits) {
if (end - start < digits)
return false;
for (int i = 0; i < digits; ++i) {
Char c = *start++;
if (!(('0' <= c && c <= '9') || ('a' <= c && c <= 'f') ||
('A' <= c && c <= 'F')))
return false;
}
*token_end = start;
return true;
}
static bool ParseStringToken(const Char* start,
const Char* end,
const Char** token_end) {
while (start < end) {
Char c = *start++;
if ('\\' == c) {
if (start == end)
return false;
c = *start++;
// Make sure the escaped char is valid.
switch (c) {
case 'x':
if (!ReadHexDigits(start, end, &start, 2))
return false;
break;
case 'u':
if (!ReadHexDigits(start, end, &start, 4))
return false;
break;
case '\\':
case '/':
case 'b':
case 'f':
case 'n':
case 'r':
case 't':
case 'v':
case '"':
break;
default:
return false;
}
} else if ('"' == c) {
*token_end = start;
return true;
}
}
return false;
}
static bool SkipComment(const Char* start,
const Char* end,
const Char** comment_end) {
if (start == end)
return false;
if (*start != '/' || start + 1 >= end)
return false;
++start;
if (*start == '/') {
// Single line comment, read to newline.
for (++start; start < end; ++start) {
if (*start == '\n' || *start == '\r') {
*comment_end = start + 1;
return true;
}
}
*comment_end = end;
// Comment reaches end-of-input, which is fine.
return true;
}
if (*start == '*') {
Char previous = '\0';
// Block comment, read until end marker.
for (++start; start < end; previous = *start++) {
if (previous == '*' && *start == '/') {
*comment_end = start + 1;
return true;
}
}
// Block comment must close before end-of-input.
return false;
}
return false;
}
static bool IsSpaceOrNewLine(Char c) {
// \v = vertial tab; \f = form feed page break.
return c == ' ' || c == '\n' || c == '\v' || c == '\f' || c == '\r' ||
c == '\t';
}
static void SkipWhitespaceAndComments(const Char* start,
const Char* end,
const Char** whitespace_end) {
while (start < end) {
if (IsSpaceOrNewLine(*start)) {
++start;
} else if (*start == '/') {
const Char* comment_end = nullptr;
if (!SkipComment(start, end, &comment_end))
break;
start = comment_end;
} else {
break;
}
}
*whitespace_end = start;
}
static Token ParseToken(const Char* start,
const Char* end,
const Char** tokenStart,
const Char** token_end) {
SkipWhitespaceAndComments(start, end, tokenStart);
start = *tokenStart;
if (start == end)
return NoInput;
switch (*start) {
case 'n':
if (ParseConstToken(start, end, token_end, kNullString))
return NullToken;
break;
case 't':
if (ParseConstToken(start, end, token_end, kTrueString))
return BoolTrue;
break;
case 'f':
if (ParseConstToken(start, end, token_end, kFalseString))
return BoolFalse;
break;
case '[':
*token_end = start + 1;
return ArrayBegin;
case ']':
*token_end = start + 1;
return ArrayEnd;
case ',':
*token_end = start + 1;
return ListSeparator;
case '{':
*token_end = start + 1;
return ObjectBegin;
case '}':
*token_end = start + 1;
return ObjectEnd;
case ':':
*token_end = start + 1;
return ObjectPairSeparator;
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
case '-':
if (ParseNumberToken(start, end, token_end))
return Number;
break;
case '"':
if (ParseStringToken(start + 1, end, token_end))
return StringLiteral;
break;
}
return InvalidToken;
}
static int HexToInt(Char c) {
if ('0' <= c && c <= '9')
return c - '0';
if ('A' <= c && c <= 'F')
return c - 'A' + 10;
if ('a' <= c && c <= 'f')
return c - 'a' + 10;
assert(false); // Unreachable.
return 0;
}
static bool DecodeString(const Char* start,
const Char* end,
std::vector<uint16_t>* output) {
if (start == end)
return true;
if (start > end)
return false;
output->reserve(end - start);
while (start < end) {
uint16_t c = *start++;
// If the |Char| we're dealing with is really a byte, then
// we have utf8 here, and we need to check for multibyte characters
// and transcode them to utf16 (either one or two utf16 chars).
if (sizeof(Char) == sizeof(uint8_t) && c > 0x7f) {
// Inspect the leading byte to figure out how long the utf8
// byte sequence is; while doing this initialize |codepoint|
// with the first few bits.
// See table in: https://en.wikipedia.org/wiki/UTF-8
// byte one is 110x xxxx -> 2 byte utf8 sequence
// byte one is 1110 xxxx -> 3 byte utf8 sequence
// byte one is 1111 0xxx -> 4 byte utf8 sequence
uint32_t codepoint;
int num_bytes_left;
if ((c & 0xe0) == 0xc0) { // 2 byte utf8 sequence
num_bytes_left = 1;
codepoint = c & 0x1f;
} else if ((c & 0xf0) == 0xe0) { // 3 byte utf8 sequence
num_bytes_left = 2;
codepoint = c & 0x0f;
} else if ((c & 0xf8) == 0xf0) { // 4 byte utf8 sequence
codepoint = c & 0x07;
num_bytes_left = 3;
} else {
return false; // invalid leading byte
}
// If we have enough bytes in our inpput, decode the remaining ones
// belonging to this Unicode character into |codepoint|.
if (start + num_bytes_left > end)
return false;
while (num_bytes_left > 0) {
c = *start++;
--num_bytes_left;
// Check the next byte is a continuation byte, that is 10xx xxxx.
if ((c & 0xc0) != 0x80)
return false;
codepoint = (codepoint << 6) | (c & 0x3f);
}
// Disallow overlong encodings for ascii characters, as these
// would include " and other characters significant to JSON
// string termination / control.
if (codepoint <= 0x7f)
return false;
// Invalid in UTF8, and can't be represented in UTF16 anyway.
if (codepoint > 0x10ffff)
return false;
// So, now we transcode to UTF16,
// using the math described at https://en.wikipedia.org/wiki/UTF-16,
// for either one or two 16 bit characters.
if (codepoint < 0xffff) {
output->push_back(codepoint);
continue;
}
codepoint -= 0x10000;
output->push_back((codepoint >> 10) + 0xd800); // high surrogate
output->push_back((codepoint & 0x3ff) + 0xdc00); // low surrogate
continue;
}
if ('\\' != c) {
output->push_back(c);
continue;
}
if (start == end)
return false;
c = *start++;
if (c == 'x') {
// \x is not supported.
return false;
}
switch (c) {
case '"':
case '/':
case '\\':
break;
case 'b':
c = '\b';
break;
case 'f':
c = '\f';
break;
case 'n':
c = '\n';
break;
case 'r':
c = '\r';
break;
case 't':
c = '\t';
break;
case 'v':
c = '\v';
break;
case 'u':
c = (HexToInt(*start) << 12) + (HexToInt(*(start + 1)) << 8) +
(HexToInt(*(start + 2)) << 4) + HexToInt(*(start + 3));
start += 4;
break;
default:
return false;
}
output->push_back(c);
}
return true;
}
void ParseValue(const Char* start,
const Char* end,
const Char** value_token_end,
int depth) {
if (depth > kStackLimit) {
HandleError(Error::JSON_PARSER_STACK_LIMIT_EXCEEDED, start);
return;
}
const Char* token_start = nullptr;
const Char* token_end = nullptr;
Token token = ParseToken(start, end, &token_start, &token_end);
switch (token) {
case NoInput:
HandleError(Error::JSON_PARSER_NO_INPUT, token_start);
return;
case InvalidToken:
HandleError(Error::JSON_PARSER_INVALID_TOKEN, token_start);
return;
case NullToken:
handler_->HandleNull();
break;
case BoolTrue:
handler_->HandleBool(true);
break;
case BoolFalse:
handler_->HandleBool(false);
break;
case Number: {
double value;
if (!CharsToDouble(token_start, token_end - token_start, &value)) {
HandleError(Error::JSON_PARSER_INVALID_NUMBER, token_start);
return;
}
if (value >= std::numeric_limits<int32_t>::min() &&
value <= std::numeric_limits<int32_t>::max() &&
static_cast<int32_t>(value) == value)
handler_->HandleInt32(static_cast<int32_t>(value));
else
handler_->HandleDouble(value);
break;
}
case StringLiteral: {
std::vector<uint16_t> value;
bool ok = DecodeString(token_start + 1, token_end - 1, &value);
if (!ok) {
HandleError(Error::JSON_PARSER_INVALID_STRING, token_start);
return;
}
handler_->HandleString16(span<uint16_t>(value.data(), value.size()));
break;
}
case ArrayBegin: {
handler_->HandleArrayBegin();
start = token_end;
token = ParseToken(start, end, &token_start, &token_end);
while (token != ArrayEnd) {
ParseValue(start, end, &token_end, depth + 1);
if (error_)
return;
// After a list value, we expect a comma or the end of the list.
start = token_end;
token = ParseToken(start, end, &token_start, &token_end);
if (token == ListSeparator) {
start = token_end;
token = ParseToken(start, end, &token_start, &token_end);
if (token == ArrayEnd) {
HandleError(Error::JSON_PARSER_UNEXPECTED_ARRAY_END, token_start);
return;
}
} else if (token != ArrayEnd) {
// Unexpected value after list value. Bail out.
HandleError(Error::JSON_PARSER_COMMA_OR_ARRAY_END_EXPECTED,
token_start);
return;
}
}
handler_->HandleArrayEnd();
break;
}
case ObjectBegin: {
handler_->HandleMapBegin();
start = token_end;
token = ParseToken(start, end, &token_start, &token_end);
while (token != ObjectEnd) {
if (token != StringLiteral) {
HandleError(Error::JSON_PARSER_STRING_LITERAL_EXPECTED,
token_start);
return;
}
std::vector<uint16_t> key;
if (!DecodeString(token_start + 1, token_end - 1, &key)) {
HandleError(Error::JSON_PARSER_INVALID_STRING, token_start);
return;
}
handler_->HandleString16(span<uint16_t>(key.data(), key.size()));
start = token_end;
token = ParseToken(start, end, &token_start, &token_end);
if (token != ObjectPairSeparator) {
HandleError(Error::JSON_PARSER_COLON_EXPECTED, token_start);
return;
}
start = token_end;
ParseValue(start, end, &token_end, depth + 1);
if (error_)
return;
start = token_end;
// After a key/value pair, we expect a comma or the end of the
// object.
token = ParseToken(start, end, &token_start, &token_end);
if (token == ListSeparator) {
start = token_end;
token = ParseToken(start, end, &token_start, &token_end);
if (token == ObjectEnd) {
HandleError(Error::JSON_PARSER_UNEXPECTED_MAP_END, token_start);
return;
}
} else if (token != ObjectEnd) {
// Unexpected value after last object value. Bail out.
HandleError(Error::JSON_PARSER_COMMA_OR_MAP_END_EXPECTED,
token_start);
return;
}
}
handler_->HandleMapEnd();
break;
}
default:
// We got a token that's not a value.
HandleError(Error::JSON_PARSER_VALUE_EXPECTED, token_start);
return;
}
SkipWhitespaceAndComments(token_end, end, value_token_end);
}
void HandleError(Error error, const Char* pos) {
assert(error != Error::OK);
if (!error_) {
handler_->HandleError(
Status{error, static_cast<size_t>(pos - start_pos_)});
error_ = true;
}
}
const Char* start_pos_ = nullptr;
bool error_ = false;
const Platform* platform_;
StreamingParserHandler* handler_;
};
} // namespace
void ParseJSON(const Platform& platform,
span<uint8_t> chars,
StreamingParserHandler* handler) {
JsonParser<uint8_t> parser(&platform, handler);
parser.Parse(chars.data(), chars.size());
}
void ParseJSON(const Platform& platform,
span<uint16_t> chars,
StreamingParserHandler* handler) {
JsonParser<uint16_t> parser(&platform, handler);
parser.Parse(chars.data(), chars.size());
}
// =============================================================================
// json::ConvertCBORToJSON, json::ConvertJSONToCBOR - for transcoding
// =============================================================================
template <typename C>
Status ConvertCBORToJSONTmpl(const Platform& platform,
span<uint8_t> cbor,
C* json) {
Status status;
std::unique_ptr<StreamingParserHandler> json_writer =
NewJSONEncoder(&platform, json, &status);
cbor::ParseCBOR(cbor, json_writer.get());
return status;
}
Status ConvertCBORToJSON(const Platform& platform,
span<uint8_t> cbor,
std::vector<uint8_t>* json) {
return ConvertCBORToJSONTmpl(platform, cbor, json);
}
Status ConvertCBORToJSON(const Platform& platform,
span<uint8_t> cbor,
std::string* json) {
return ConvertCBORToJSONTmpl(platform, cbor, json);
}
template <typename T, typename C>
Status ConvertJSONToCBORTmpl(const Platform& platform, span<T> json, C* cbor) {
Status status;
std::unique_ptr<StreamingParserHandler> encoder =
cbor::NewCBOREncoder(cbor, &status);
ParseJSON(platform, json, encoder.get());
return status;
}
Status ConvertJSONToCBOR(const Platform& platform,
span<uint8_t> json,
std::string* cbor) {
return ConvertJSONToCBORTmpl(platform, json, cbor);
}
Status ConvertJSONToCBOR(const Platform& platform,
span<uint16_t> json,
std::string* cbor) {
return ConvertJSONToCBORTmpl(platform, json, cbor);
}
Status ConvertJSONToCBOR(const Platform& platform,
span<uint8_t> json,
std::vector<uint8_t>* cbor) {
return ConvertJSONToCBORTmpl(platform, json, cbor);
}
Status ConvertJSONToCBOR(const Platform& platform,
span<uint16_t> json,
std::vector<uint8_t>* cbor) {
return ConvertJSONToCBORTmpl(platform, json, cbor);
}
} // namespace json
{% for namespace in config.protocol.namespace %}
} // namespace {{namespace}}
{% endfor %}
{% endif %}
{# This template is generated by gen_cbor_templates.py. #}
// Generated by lib/encoding_h.template.
// Copyright 2019 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
{% if config.encoding_lib.header == "" %}
#ifndef {{"_".join(config.protocol.namespace)}}_encoding_h
#define {{"_".join(config.protocol.namespace)}}_encoding_h
#include <algorithm>
#include <cstddef>
#include <cstdint>
#include <cstring>
#include <limits>
#include <memory>
#include <string>
#include <vector>
{% for namespace in config.protocol.namespace %}
namespace {{namespace}} {
{% endfor %}
// ===== encoding/encoding.h =====
// This library is designed to be portable. The only allowed dependency
// are the C/C++ standard libraries, up to C++11. We support both 32 bit
// and 64 architectures.
//
// Types used below:
// uint8_t: a byte, e.g. for raw bytes or UTF8 characters
// uint16_t: two bytes, e.g. for UTF16 characters
// For input parameters:
// span<uint8_t>: pointer to bytes and length
// span<uint16_t>: pointer to UTF16 chars and length
// For output parameters:
// std::vector<uint8_t> - Owned segment of bytes / utf8 characters and length.
// std::string - Same, for compatibility, even though char is signed.
// =============================================================================
// span - sequence of bytes
// =============================================================================
// This template is similar to std::span, which will be included in C++20.
template <typename T>
class span {
public:
using index_type = size_t;
span() : data_(nullptr), size_(0) {}
span(const T* data, index_type size) : data_(data), size_(size) {}
const T* data() const { return data_; }
const T* begin() const { return data_; }
const T* end() const { return data_ + size_; }
const T& operator[](index_type idx) const { return data_[idx]; }
span<T> subspan(index_type offset, index_type count) const {
return span(data_ + offset, count);
}
span<T> subspan(index_type offset) const {
return span(data_ + offset, size_ - offset);
}
bool empty() const { return size_ == 0; }
index_type size() const { return size_; }
index_type size_bytes() const { return size_ * sizeof(T); }
private:
const T* data_;
index_type size_;
};
template <typename T>
span<T> SpanFrom(const std::vector<T>& v) {
return span<T>(v.data(), v.size());
}
template <size_t N>
span<uint8_t> SpanFrom(const char (&str)[N]) {
return span<uint8_t>(reinterpret_cast<const uint8_t*>(str), N - 1);
}
inline span<uint8_t> SpanFrom(const char* str) {
return str ? span<uint8_t>(reinterpret_cast<const uint8_t*>(str), strlen(str))
: span<uint8_t>();
}
inline span<uint8_t> SpanFrom(const std::string& v) {
return span<uint8_t>(reinterpret_cast<const uint8_t*>(v.data()), v.size());
}
// Less than / equality comparison functions for sorting / searching for byte
// spans. These are similar to absl::string_view's < and == operators.
inline bool SpanLessThan(span<uint8_t> x, span<uint8_t> y) noexcept {
auto min_size = std::min(x.size(), y.size());
const int r = min_size == 0 ? 0 : memcmp(x.data(), y.data(), min_size);
return (r < 0) || (r == 0 && x.size() < y.size());
}
inline bool SpanEquals(span<uint8_t> x, span<uint8_t> y) noexcept {
auto len = x.size();
if (len != y.size())
return false;
return x.data() == y.data() || len == 0 ||
std::memcmp(x.data(), y.data(), len) == 0;
}
// =============================================================================
// Status and Error codes
// =============================================================================
enum class Error {
OK = 0,
// JSON parsing errors - json_parser.{h,cc}.
JSON_PARSER_UNPROCESSED_INPUT_REMAINS = 0x01,
JSON_PARSER_STACK_LIMIT_EXCEEDED = 0x02,
JSON_PARSER_NO_INPUT = 0x03,
JSON_PARSER_INVALID_TOKEN = 0x04,
JSON_PARSER_INVALID_NUMBER = 0x05,
JSON_PARSER_INVALID_STRING = 0x06,
JSON_PARSER_UNEXPECTED_ARRAY_END = 0x07,
JSON_PARSER_COMMA_OR_ARRAY_END_EXPECTED = 0x08,
JSON_PARSER_STRING_LITERAL_EXPECTED = 0x09,
JSON_PARSER_COLON_EXPECTED = 0x0a,
JSON_PARSER_UNEXPECTED_MAP_END = 0x0b,
JSON_PARSER_COMMA_OR_MAP_END_EXPECTED = 0x0c,
JSON_PARSER_VALUE_EXPECTED = 0x0d,
CBOR_INVALID_INT32 = 0x0e,
CBOR_INVALID_DOUBLE = 0x0f,
CBOR_INVALID_ENVELOPE = 0x10,
CBOR_INVALID_STRING8 = 0x11,
CBOR_INVALID_STRING16 = 0x12,
CBOR_INVALID_BINARY = 0x13,
CBOR_UNSUPPORTED_VALUE = 0x14,
CBOR_NO_INPUT = 0x15,
CBOR_INVALID_START_BYTE = 0x16,
CBOR_UNEXPECTED_EOF_EXPECTED_VALUE = 0x17,
CBOR_UNEXPECTED_EOF_IN_ARRAY = 0x18,
CBOR_UNEXPECTED_EOF_IN_MAP = 0x19,
CBOR_INVALID_MAP_KEY = 0x1a,
CBOR_STACK_LIMIT_EXCEEDED = 0x1b,
CBOR_TRAILING_JUNK = 0x1c,
CBOR_MAP_START_EXPECTED = 0x1d,
CBOR_MAP_STOP_EXPECTED = 0x1e,
CBOR_ENVELOPE_SIZE_LIMIT_EXCEEDED = 0x1f,
};
// A status value with position that can be copied. The default status
// is OK. Usually, error status values should come with a valid position.
struct Status {
static constexpr size_t npos() { return std::numeric_limits<size_t>::max(); }
bool ok() const { return error == Error::OK; }
Error error = Error::OK;
size_t pos = npos();
Status(Error error, size_t pos) : error(error), pos(pos) {}
Status() = default;
// Returns a 7 bit US-ASCII string, either "OK" or an error message
// that includes the position.
std::string ToASCIIString() const;
private:
std::string ToASCIIString(const char* msg) const;
};
// Handler interface for parser events emitted by a streaming parser.
// See cbor::NewCBOREncoder, cbor::ParseCBOR, json::NewJSONEncoder,
// json::ParseJSON.
class StreamingParserHandler {
public:
virtual ~StreamingParserHandler() = default;
virtual void HandleMapBegin() = 0;
virtual void HandleMapEnd() = 0;
virtual void HandleArrayBegin() = 0;
virtual void HandleArrayEnd() = 0;
virtual void HandleString8(span<uint8_t> chars) = 0;
virtual void HandleString16(span<uint16_t> chars) = 0;
virtual void HandleBinary(span<uint8_t> bytes) = 0;
virtual void HandleDouble(double value) = 0;
virtual void HandleInt32(int32_t value) = 0;
virtual void HandleBool(bool value) = 0;
virtual void HandleNull() = 0;
// The parser may send one error even after other events have already
// been received. Client code is reponsible to then discard the
// already processed events.
// |error| must be an eror, as in, |error.is_ok()| can't be true.
virtual void HandleError(Status error) = 0;
};
namespace cbor {
// The binary encoding for the inspector protocol follows the CBOR specification
// (RFC 7049). Additional constraints:
// - Only indefinite length maps and arrays are supported.
// - Maps and arrays are wrapped with an envelope, that is, a
// CBOR tag with value 24 followed by a byte string specifying
// the byte length of the enclosed map / array. The byte string
// must use a 32 bit wide length.
// - At the top level, a message must be an indefinite length map
// wrapped by an envelope.
// - Maximal size for messages is 2^32 (4 GB).
// - For scalars, we support only the int32_t range, encoded as
// UNSIGNED/NEGATIVE (major types 0 / 1).
// - UTF16 strings, including with unbalanced surrogate pairs, are encoded
// as CBOR BYTE_STRING (major type 2). For such strings, the number of
// bytes encoded must be even.
// - UTF8 strings (major type 3) are supported.
// - 7 bit US-ASCII strings must always be encoded as UTF8 strings, never
// as UTF16 strings.
// - Arbitrary byte arrays, in the inspector protocol called 'binary',
// are encoded as BYTE_STRING (major type 2), prefixed with a byte
// indicating base64 when rendered as JSON.
// =============================================================================
// Detecting CBOR content
// =============================================================================
// The first byte for an envelope, which we use for wrapping dictionaries
// and arrays; and the byte that indicates a byte string with 32 bit length.
// These two bytes start an envelope, and thereby also any CBOR message
// produced or consumed by this protocol. See also |EnvelopeEncoder| below.
uint8_t InitialByteForEnvelope();
uint8_t InitialByteFor32BitLengthByteString();
// Checks whether |msg| is a cbor message.
bool IsCBORMessage(span<uint8_t> msg);
// =============================================================================
// Encoding individual CBOR items
// =============================================================================
// Some constants for CBOR tokens that only take a single byte on the wire.
uint8_t EncodeTrue();
uint8_t EncodeFalse();
uint8_t EncodeNull();
uint8_t EncodeIndefiniteLengthArrayStart();
uint8_t EncodeIndefiniteLengthMapStart();
uint8_t EncodeStop();
// Encodes |value| as |UNSIGNED| (major type 0) iff >= 0, or |NEGATIVE|
// (major type 1) iff < 0.
void EncodeInt32(int32_t value, std::vector<uint8_t>* out);
void EncodeInt32(int32_t value, std::string* out);
// Encodes a UTF16 string as a BYTE_STRING (major type 2). Each utf16
// character in |in| is emitted with most significant byte first,
// appending to |out|.
void EncodeString16(span<uint16_t> in, std::vector<uint8_t>* out);
void EncodeString16(span<uint16_t> in, std::string* out);
// Encodes a UTF8 string |in| as STRING (major type 3).
void EncodeString8(span<uint8_t> in, std::vector<uint8_t>* out);
void EncodeString8(span<uint8_t> in, std::string* out);
// Encodes the given |latin1| string as STRING8.
// If any non-ASCII character is present, it will be represented
// as a 2 byte UTF8 sequence.
void EncodeFromLatin1(span<uint8_t> latin1, std::vector<uint8_t>* out);
void EncodeFromLatin1(span<uint8_t> latin1, std::string* out);
// Encodes the given |utf16| string as STRING8 if it's entirely US-ASCII.
// Otherwise, encodes as STRING16.
void EncodeFromUTF16(span<uint16_t> utf16, std::vector<uint8_t>* out);
void EncodeFromUTF16(span<uint16_t> utf16, std::string* out);
// Encodes arbitrary binary data in |in| as a BYTE_STRING (major type 2) with
// definitive length, prefixed with tag 22 indicating expected conversion to
// base64 (see RFC 7049, Table 3 and Section 2.4.4.2).
void EncodeBinary(span<uint8_t> in, std::vector<uint8_t>* out);
void EncodeBinary(span<uint8_t> in, std::string* out);
// Encodes / decodes a double as Major type 7 (SIMPLE_VALUE),
// with additional info = 27, followed by 8 bytes in big endian.
void EncodeDouble(double value, std::vector<uint8_t>* out);
void EncodeDouble(double value, std::string* out);
// =============================================================================
// cbor::EnvelopeEncoder - for wrapping submessages
// =============================================================================
// An envelope indicates the byte length of a wrapped item.
// We use this for maps and array, which allows the decoder
// to skip such (nested) values whole sale.
// It's implemented as a CBOR tag (major type 6) with additional
// info = 24, followed by a byte string with a 32 bit length value;
// so the maximal structure that we can wrap is 2^32 bits long.
// See also: https://tools.ietf.org/html/rfc7049#section-2.4.4.1
class EnvelopeEncoder {
public:
// Emits the envelope start bytes and records the position for the
// byte size in |byte_size_pos_|. Also emits empty bytes for the
// byte sisze so that encoding can continue.
void EncodeStart(std::vector<uint8_t>* out);
void EncodeStart(std::string* out);
// This records the current size in |out| at position byte_size_pos_.
// Returns true iff successful.
bool EncodeStop(std::vector<uint8_t>* out);
bool EncodeStop(std::string* out);
private:
size_t byte_size_pos_ = 0;
};
// =============================================================================
// cbor::NewCBOREncoder - for encoding from a streaming parser
// =============================================================================
// This can be used to convert to CBOR, by passing the return value to a parser
// that drives it. The handler will encode into |out|, and iff an error occurs
// it will set |status| to an error and clear |out|. Otherwise, |status.ok()|
// will be |true|.
std::unique_ptr<StreamingParserHandler> NewCBOREncoder(
std::vector<uint8_t>* out,
Status* status);
std::unique_ptr<StreamingParserHandler> NewCBOREncoder(std::string* out,
Status* status);
// =============================================================================
// cbor::CBORTokenizer - for parsing individual CBOR items
// =============================================================================
// Tags for the tokens within a CBOR message that CBORTokenizer understands.
// Note that this is not the same terminology as the CBOR spec (RFC 7049),
// but rather, our adaptation. For instance, we lump unsigned and signed
// major type into INT32 here (and disallow values outside the int32_t range).
enum class CBORTokenTag {
// Encountered an error in the structure of the message. Consult
// status() for details.
ERROR_VALUE,
// Booleans and NULL.
TRUE_VALUE,
FALSE_VALUE,
NULL_VALUE,
// An int32_t (signed 32 bit integer).
INT32,
// A double (64 bit floating point).
DOUBLE,
// A UTF8 string.
STRING8,
// A UTF16 string.
STRING16,
// A binary string.
BINARY,
// Starts an indefinite length map; after the map start we expect
// alternating keys and values, followed by STOP.
MAP_START,
// Starts an indefinite length array; after the array start we
// expect values, followed by STOP.
ARRAY_START,
// Ends a map or an array.
STOP,
// An envelope indicator, wrapping a map or array.
// Internally this carries the byte length of the wrapped
// map or array. While CBORTokenizer::Next() will read / skip the entire
// envelope, CBORTokenizer::EnterEnvelope() reads the tokens
// inside of it.
ENVELOPE,
// We've reached the end there is nothing else to read.
DONE,
};
// The major types from RFC 7049 Section 2.1.
enum class MajorType {
UNSIGNED = 0,
NEGATIVE = 1,
BYTE_STRING = 2,
STRING = 3,
ARRAY = 4,
MAP = 5,
TAG = 6,
SIMPLE_VALUE = 7
};
// CBORTokenizer segments a CBOR message, presenting the tokens therein as
// numbers, strings, etc. This is not a complete CBOR parser, but makes it much
// easier to implement one (e.g. ParseCBOR, above). It can also be used to parse
// messages partially.
class CBORTokenizer {
public:
explicit CBORTokenizer(span<uint8_t> bytes);
~CBORTokenizer();
// Identifies the current token that we're looking at,
// or ERROR_VALUE (in which ase ::Status() has details)
// or DONE (if we're past the last token).
CBORTokenTag TokenTag() const;
// Advances to the next token.
void Next();
// Can only be called if TokenTag() == CBORTokenTag::ENVELOPE.
// While Next() would skip past the entire envelope / what it's
// wrapping, EnterEnvelope positions the cursor inside of the envelope,
// letting the client explore the nested structure.
void EnterEnvelope();
// If TokenTag() is CBORTokenTag::ERROR_VALUE, then Status().error describes
// the error more precisely; otherwise it'll be set to Error::OK.
// In either case, Status().pos is the current position.
struct Status Status() const;
// The following methods retrieve the token values. They can only
// be called if TokenTag() matches.
// To be called only if ::TokenTag() == CBORTokenTag::INT32.
int32_t GetInt32() const;
// To be called only if ::TokenTag() == CBORTokenTag::DOUBLE.
double GetDouble() const;
// To be called only if ::TokenTag() == CBORTokenTag::STRING8.
span<uint8_t> GetString8() const;
// Wire representation for STRING16 is low byte first (little endian).
// To be called only if ::TokenTag() == CBORTokenTag::STRING16.
span<uint8_t> GetString16WireRep() const;
// To be called only if ::TokenTag() == CBORTokenTag::BINARY.
span<uint8_t> GetBinary() const;
// To be called only if ::TokenTag() == CBORTokenTag::ENVELOPE.
span<uint8_t> GetEnvelopeContents() const;
private:
void ReadNextToken(bool enter_envelope);
void SetToken(CBORTokenTag token, size_t token_byte_length);
void SetError(Error error);
span<uint8_t> bytes_;
CBORTokenTag token_tag_;
struct Status status_;
size_t token_byte_length_;
MajorType token_start_type_;
uint64_t token_start_internal_value_;
};
// =============================================================================
// cbor::ParseCBOR - for receiving streaming parser events for CBOR messages
// =============================================================================
// Parses a CBOR encoded message from |bytes|, sending events to
// |out|. If an error occurs, sends |out->HandleError|, and parsing stops.
// The client is responsible for discarding the already received information in
// that case.
void ParseCBOR(span<uint8_t> bytes, StreamingParserHandler* out);
// =============================================================================
// cbor::AppendString8EntryToMap - for limited in-place editing of messages
// =============================================================================
// Modifies the |cbor| message by appending a new key/value entry at the end
// of the map. Patches up the envelope size; Status.ok() iff successful.
// If not successful, |cbor| may be corrupted after this call.
Status AppendString8EntryToCBORMap(span<uint8_t> string8_key,
span<uint8_t> string8_value,
std::vector<uint8_t>* cbor);
Status AppendString8EntryToCBORMap(span<uint8_t> string8_key,
span<uint8_t> string8_value,
std::string* cbor);
namespace internals { // Exposed only for writing tests.
size_t ReadTokenStart(span<uint8_t> bytes,
cbor::MajorType* type,
uint64_t* value);
void WriteTokenStart(cbor::MajorType type,
uint64_t value,
std::vector<uint8_t>* encoded);
void WriteTokenStart(cbor::MajorType type,
uint64_t value,
std::string* encoded);
} // namespace internals
} // namespace cbor
namespace json {
// Client code must provide an instance. Implementation should delegate
// to whatever is appropriate.
class Platform {
public:
virtual ~Platform() = default;
// Parses |str| into |result|. Returns false iff there are
// leftover characters or parsing errors.
virtual bool StrToD(const char* str, double* result) const = 0;
// Prints |value| in a format suitable for JSON.
virtual std::unique_ptr<char[]> DToStr(double value) const = 0;
};
// =============================================================================
// json::NewJSONEncoder - for encoding streaming parser events as JSON
// =============================================================================
// Returns a handler object which will write ascii characters to |out|.
// |status->ok()| will be false iff the handler routine HandleError() is called.
// In that case, we'll stop emitting output.
// Except for calling the HandleError routine at any time, the client
// code must call the Handle* methods in an order in which they'd occur
// in valid JSON; otherwise we may crash (the code uses assert).
std::unique_ptr<StreamingParserHandler> NewJSONEncoder(
const Platform* platform,
std::vector<uint8_t>* out,
Status* status);
std::unique_ptr<StreamingParserHandler> NewJSONEncoder(const Platform* platform,
std::string* out,
Status* status);
// =============================================================================
// json::ParseJSON - for receiving streaming parser events for JSON
// =============================================================================
void ParseJSON(const Platform& platform,
span<uint8_t> chars,
StreamingParserHandler* handler);
void ParseJSON(const Platform& platform,
span<uint16_t> chars,
StreamingParserHandler* handler);
// =============================================================================
// json::ConvertCBORToJSON, json::ConvertJSONToCBOR - for transcoding
// =============================================================================
Status ConvertCBORToJSON(const Platform& platform,
span<uint8_t> cbor,
std::string* json);
Status ConvertCBORToJSON(const Platform& platform,
span<uint8_t> cbor,
std::vector<uint8_t>* json);
Status ConvertJSONToCBOR(const Platform& platform,
span<uint8_t> json,
std::vector<uint8_t>* cbor);
Status ConvertJSONToCBOR(const Platform& platform,
span<uint16_t> json,
std::vector<uint8_t>* cbor);
Status ConvertJSONToCBOR(const Platform& platform,
span<uint8_t> json,
std::string* cbor);
Status ConvertJSONToCBOR(const Platform& platform,
span<uint16_t> json,
std::string* cbor);
} // namespace json
{% for namespace in config.protocol.namespace %}
} // namespace {{namespace}}
{% endfor %}
#endif // !defined({{"_".join(config.protocol.namespace)}}_encoding_h)
{% endif %}
......@@ -21,6 +21,9 @@ FILES_TO_SYNC = [
'encoding/encoding.h',
'encoding/encoding.cc',
'encoding/encoding_test.cc',
'bindings/bindings.h',
'bindings/bindings.cc',
'bindings/bindings_test.cc',
'inspector_protocol.gni',
'inspector_protocol.gypi',
'lib/*',
......@@ -143,6 +146,12 @@ def main(argv):
contents = contents.replace(
'namespace inspector_protocol_encoding',
'namespace v8_inspector_protocol_encoding')
contents = contents.replace(
'INSPECTOR_PROTOCOL_BINDINGS_BINDINGS_H_',
'V8_INSPECTOR_PROTOCOL_BINDINGS_BINDINGS_H_')
contents = contents.replace(
'namespace inspector_protocol_bindings',
'namespace v8_inspector_protocol_bindings')
open(os.path.join(dest_dir, f), 'w').write(contents)
shutil.copymode(os.path.join(src_dir, f), os.path.join(dest_dir, f))
for f in to_delete:
......
......@@ -26,6 +26,7 @@ group("v8_run_gcmole") {
"../../third_party/icu/source/",
"../../third_party/wasm-api/wasm.h",
"../../third_party/wasm-api/wasm.hh",
"../../third_party/inspector_protocol/",
"$target_gen_dir/../../",
"$target_gen_dir/../../torque-generated/",
]
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment