Commit 61417410 authored by Johannes Henkel's avatar Johannes Henkel Committed by Commit Bot

[DevTools] Roll of inspector protocol ...

... to b13e24ccee66d7e0590ce1266db9c906e3648561

to support UTF16 strings with binary protocol (v8).

Original review here:
https://chromium-review.googlesource.com/c/deps/inspector_protocol/+/1493312

Change-Id: I401c5d52d233805dc681bb23b40df99f2ea14992
Reviewed-on: https://chromium-review.googlesource.com/c/1493291
Commit-Queue: Johannes Henkel <johannes@chromium.org>
Reviewed-by: 's avatarPavel Feldman <pfeldman@chromium.org>
Cr-Commit-Position: refs/heads/master@{#59956}
parent 75d972a6
......@@ -27,16 +27,6 @@ struct ProtocolMessage {
std::vector<uint8_t> binary;
};
class StringUTF8Adapter {
public:
explicit StringUTF8Adapter(const String& string) : string_(string.utf8()) {}
const char* Data() const { return string_.data(); }
size_t length() const { return string_.length(); }
private:
std::string string_;
};
class StringUtil {
public:
static String substring(const String& s, size_t pos, size_t len) {
......@@ -81,12 +71,17 @@ class StringUtil {
static String fromUTF8(const uint8_t* data, size_t length) {
return String16::fromUTF8(reinterpret_cast<const char*>(data), length);
}
static void writeUTF8(const String& string, std::vector<uint8_t>* out) {
// TODO(pfeldman): get rid of the copy here.
std::string utf8 = string.utf8();
const uint8_t* data = reinterpret_cast<const uint8_t*>(utf8.data());
out->insert(out->end(), data, data + utf8.length());
static String fromUTF16(const uint16_t* data, size_t length) {
return String16(data, length);
}
static const uint8_t* CharactersLatin1(const String& s) { return nullptr; }
static const uint8_t* CharactersUTF8(const String& s) { return nullptr; }
static const uint16_t* CharactersUTF16(const String& s) {
return s.characters16();
}
static size_t CharacterCount(const String& s) { return s.length(); }
};
// A read-only sequence of uninterpreted bytes with reference-counted storage.
......
......@@ -2,7 +2,7 @@ Name: inspector protocol
Short Name: inspector_protocol
URL: https://chromium.googlesource.com/deps/inspector_protocol/
Version: 0
Revision: c40253f87c475880d1bdad4a90cf21c38dadf4ac
Revision: b13e24ccee66d7e0590ce1266db9c906e3648561
License: BSD
License File: LICENSE
Security Critical: no
......
......@@ -194,6 +194,38 @@ void EncodeString8(span<uint8_t> in, std::vector<uint8_t>* out) {
out->insert(out->end(), in.begin(), in.end());
}
void EncodeFromLatin1(span<uint8_t> latin1, std::vector<uint8_t>* out) {
for (std::ptrdiff_t ii = 0; ii < latin1.size(); ++ii) {
if (latin1[ii] <= 127) continue;
// If there's at least one non-ASCII char, convert to UTF8.
std::vector<uint8_t> utf8(latin1.begin(), latin1.begin() + ii);
for (; ii < latin1.size(); ++ii) {
if (latin1[ii] <= 127) {
utf8.push_back(latin1[ii]);
} else {
// 0xC0 means it's a UTF8 sequence with 2 bytes.
utf8.push_back((latin1[ii] >> 6) | 0xc0);
utf8.push_back((latin1[ii] | 0x80) & 0xbf);
}
}
EncodeString8(span<uint8_t>(utf8.data(), utf8.size()), out);
return;
}
EncodeString8(latin1, out);
}
void EncodeFromUTF16(span<uint16_t> utf16, std::vector<uint8_t>* out) {
// If there's at least one non-ASCII char, encode as STRING16 (UTF16).
for (uint16_t ch : utf16) {
if (ch <= 127) continue;
EncodeString16(utf16, out);
return;
}
// It's all US-ASCII, strip out every second byte and encode as UTF8.
WriteTokenStart(MajorType::STRING, static_cast<uint64_t>(utf16.size()), out);
out->insert(out->end(), utf16.begin(), utf16.end());
}
void EncodeBinary(span<uint8_t> in, std::vector<uint8_t>* out) {
out->push_back(kExpectedConversionToBase64Tag);
uint64_t byte_length = static_cast<uint64_t>(in.size_bytes());
......@@ -279,11 +311,15 @@ class JSONToCBOREncoder : public JSONParserHandler {
envelopes_.pop_back();
}
void HandleString16(std::vector<uint16_t> chars) override {
void HandleString8(span<uint8_t> chars) override {
EncodeString8(chars, out_);
}
void HandleString16(span<uint16_t> chars) override {
for (uint16_t ch : chars) {
if (ch >= 0x7f) {
// If there's at least one non-7bit character, we encode as UTF16.
EncodeString16(span<uint16_t>(chars.data(), chars.size()), out_);
EncodeString16(chars, out_);
return;
}
}
......@@ -343,25 +379,13 @@ void ParseUTF16String(CBORTokenizer* tokenizer, JSONParserHandler* out) {
span<uint8_t> rep = tokenizer->GetString16WireRep();
for (std::ptrdiff_t ii = 0; ii < rep.size(); ii += 2)
value.push_back((rep[ii + 1] << 8) | rep[ii]);
out->HandleString16(std::move(value));
out->HandleString16(span<uint16_t>(value.data(), value.size()));
tokenizer->Next();
}
// For now this method only covers US-ASCII. Later, we may allow UTF8.
bool ParseASCIIString(CBORTokenizer* tokenizer, JSONParserHandler* out) {
bool ParseUTF8String(CBORTokenizer* tokenizer, JSONParserHandler* out) {
assert(tokenizer->TokenTag() == CBORTokenTag::STRING8);
std::vector<uint16_t> value16;
for (uint8_t ch : tokenizer->GetString8()) {
// We only accept us-ascii (7 bit) strings here. Other strings must
// be encoded with 16 bit (the BYTE_STRING case).
if (ch >= 0x7f) {
out->HandleError(
Status{Error::CBOR_STRING8_MUST_BE_7BIT, tokenizer->Status().pos});
return false;
}
value16.push_back(ch);
}
out->HandleString16(std::move(value16));
out->HandleString8(tokenizer->GetString8());
tokenizer->Next();
return true;
}
......@@ -405,7 +429,7 @@ bool ParseValue(int32_t stack_depth, CBORTokenizer* tokenizer,
tokenizer->Next();
return true;
case CBORTokenTag::STRING8:
return ParseASCIIString(tokenizer, out);
return ParseUTF8String(tokenizer, out);
case CBORTokenTag::STRING16:
ParseUTF16String(tokenizer, out);
return true;
......@@ -472,7 +496,7 @@ bool ParseMap(int32_t stack_depth, CBORTokenizer* tokenizer,
}
// Parse key.
if (tokenizer->TokenTag() == CBORTokenTag::STRING8) {
if (!ParseASCIIString(tokenizer, out)) return false;
if (!ParseUTF8String(tokenizer, out)) return false;
} else if (tokenizer->TokenTag() == CBORTokenTag::STRING16) {
ParseUTF16String(tokenizer, out);
} else {
......
......@@ -117,9 +117,8 @@ class JSONParserHandler {
virtual void HandleObjectEnd() = 0;
virtual void HandleArrayBegin() = 0;
virtual void HandleArrayEnd() = 0;
// TODO(johannes): Support utf8 (requires utf16->utf8 conversion
// internally, including handling mismatched surrogate pairs).
virtual void HandleString16(std::vector<uint16_t> chars) = 0;
virtual void HandleString8(span<uint8_t> chars) = 0;
virtual void HandleString16(span<uint16_t> chars) = 0;
virtual void HandleBinary(std::vector<uint8_t> bytes) = 0;
virtual void HandleDouble(double value) = 0;
virtual void HandleInt32(int32_t value) = 0;
......@@ -239,8 +238,9 @@ static constexpr uint8_t kStopByte =
// - UTF16 strings, including with unbalanced surrogate pairs, are encoded
// as CBOR BYTE_STRING (major type 2). For such strings, the number of
// bytes encoded must be even.
// - UTF8 strings (major type 3) may only have ASCII characters
// (7 bit US-ASCII).
// - UTF8 strings (major type 3) are supported.
// - 7 bit US-ASCII strings must always be encoded as UTF8 strings, not
// as UTF16 strings.
// - Arbitrary byte arrays, in the inspector protocol called 'binary',
// are encoded as BYTE_STRING (major type 2), prefixed with a byte
// indicating base64 when rendered as JSON.
......@@ -257,6 +257,15 @@ void EncodeString16(span<uint16_t> in, std::vector<uint8_t>* out);
// Encodes a UTF8 string |in| as STRING (major type 3).
void EncodeString8(span<uint8_t> in, std::vector<uint8_t>* out);
// Encodes the given |latin1| string as STRING8.
// If any non-ASCII character is present, it will be represented
// as a 2 byte UTF8 sequence.
void EncodeFromLatin1(span<uint8_t> latin1, std::vector<uint8_t>* out);
// Encodes the given |utf16| string as STRING8 if it's entirely US-ASCII.
// Otherwise, encodes as STRING16.
void EncodeFromUTF16(span<uint16_t> utf16, std::vector<uint8_t>* out);
// Encodes arbitrary binary data in |in| as a BYTE_STRING (major type 2) with
// definitive length, prefixed with tag 22 indicating expected conversion to
// base64 (see RFC 7049, Table 3 and Section 2.4.4.2).
......
......@@ -105,12 +105,17 @@ template<>
struct ValueConversions<Binary> {
static Binary fromValue(protocol::Value* value, ErrorSupport* errors)
{
String result;
bool success = value ? value->asString(&result) : false;
if (!success) {
errors->addError("string value expected");
if (!value ||
(value->type() != Value::TypeBinary && value->type() != Value::TypeString)) {
errors->addError("Either string base64 or binary value expected");
return Binary();
}
Binary binary;
if (value->asBinary(&binary))
return binary;
String result;
value->asString(&result);
bool success;
Binary out = Binary::fromBase64(result, &success);
if (!success)
errors->addError("base64 decoding error");
......@@ -119,7 +124,7 @@ struct ValueConversions<Binary> {
static std::unique_ptr<protocol::Value> toValue(const Binary& value)
{
return StringValue::create(value.toBase64());
return BinaryValue::create(value);
}
};
......
......@@ -130,15 +130,22 @@ std::unique_ptr<Value> parseValue(
}
case CBORTokenTag::STRING8: {
span<uint8_t> str = tokenizer->GetString8();
std::unique_ptr<Value> value = StringValue::create(StringUtil::fromUTF8(str.data(), str.size()));
std::unique_ptr<Value> value =
StringValue::create(StringUtil::fromUTF8(str.data(), str.size()));
tokenizer->Next();
return value;
}
case CBORTokenTag::STRING16: {
span<uint8_t> wire = tokenizer->GetString16WireRep();
DCHECK_EQ(wire.size() & 1, 0);
std::unique_ptr<Value> value = StringValue::create(StringUtil::fromUTF16(
reinterpret_cast<const uint16_t*>(wire.data()), wire.size() / 2));
tokenizer->Next();
return value;
}
case CBORTokenTag::STRING16:
// NOT SUPPORTED YET.
return nullptr;
case CBORTokenTag::BINARY: {
span<uint8_t> payload = tokenizer->GetBinary();
tokenizer->Next();
return BinaryValue::create(Binary::fromSpan(payload.data(), payload.size()));
}
case CBORTokenTag::MAP_START:
......@@ -168,8 +175,8 @@ std::unique_ptr<DictionaryValue> parseMap(
String key;
if (tokenizer->TokenTag() == CBORTokenTag::STRING8) {
span<uint8_t> key_span = tokenizer->GetString8();
tokenizer->Next();
key = StringUtil::fromUTF8(key_span.data(), key_span.size());
tokenizer->Next();
} else if (tokenizer->TokenTag() == CBORTokenTag::STRING16) {
return nullptr; // STRING16 not supported yet.
} else {
......@@ -362,10 +369,37 @@ void StringValue::writeJSON(StringBuilder* output) const
StringUtil::builderAppendQuotedString(*output, m_stringValue);
}
namespace {
// This routine distinguishes between the current encoding for a given
// string |s|, and calls encoding routines that will
// - Ensure that all ASCII strings end up being encoded as UTF8 in
// the wire format - e.g., EncodeFromUTF16 will detect ASCII and
// do the (trivial) transcode to STRING8 on the wire, but if it's
// not ASCII it'll do STRING16.
// - Select a format that's cheap to convert to. E.g., we don't
// have LATIN1 on the wire, so we call EncodeFromLatin1 which
// transcodes to UTF8 if needed.
void EncodeString(const String& s, std::vector<uint8_t>* out) {
if (StringUtil::CharactersLatin1(s)) {
EncodeFromLatin1(span<uint8_t>(StringUtil::CharactersLatin1(s),
StringUtil::CharacterCount(s)),
out);
} else if (StringUtil::CharactersUTF16(s)) {
EncodeFromUTF16(span<uint16_t>(StringUtil::CharactersUTF16(s),
StringUtil::CharacterCount(s)),
out);
} else if (StringUtil::CharactersUTF8(s)) {
EncodeString8(span<uint8_t>(StringUtil::CharactersUTF8(s),
StringUtil::CharacterCount(s)),
out);
} else {
EncodeString8(span<uint8_t>(nullptr, 0), out); // Empty string.
}
}
} // namespace
void StringValue::writeBinary(std::vector<uint8_t>* bytes) const {
StringUTF8Adapter utf8(m_stringValue);
EncodeString8(span<uint8_t>(reinterpret_cast<const uint8_t*>(utf8.Data()),
utf8.length()), bytes);
EncodeString(m_stringValue, bytes);
}
std::unique_ptr<Value> StringValue::clone() const
......@@ -556,9 +590,7 @@ void DictionaryValue::writeBinary(std::vector<uint8_t>* bytes) const {
const String& key = m_order[i];
Dictionary::const_iterator value = m_data.find(key);
DCHECK(value != m_data.cend() && value->second);
StringUTF8Adapter utf8(key);
EncodeString8(span<uint8_t>(reinterpret_cast<const uint8_t*>(utf8.Data()),
utf8.length()), bytes);
EncodeString(key, bytes);
value->second->writeBinary(bytes);
}
bytes->push_back(EncodeStop());
......
......@@ -136,7 +136,7 @@ std::unique_ptr<Value> StringUtil::parseMessage(
reinterpret_cast<const uint8_t*>(message.data()),
message.length());
}
std::unique_ptr<base::Value> value = base::JSONReader::Read(message);
std::unique_ptr<base::Value> value = base::JSONReader::ReadDeprecated(message);
return toProtocolValue(value.get(), 1000);
}
......@@ -185,6 +185,13 @@ void StringBuilder::reserveCapacity(size_t capacity) {
string_.reserve(capacity);
}
// static
String StringUtil::fromUTF16(const uint16_t* data, size_t length) {
std::string utf8;
base::UTF16ToUTF8(reinterpret_cast<const base::char16*>(data), length, &utf8);
return utf8;
}
Binary::Binary() : bytes_(new base::RefCountedBytes) {}
Binary::Binary(const Binary& binary) : bytes_(binary.bytes_) {}
Binary::Binary(scoped_refptr<base::RefCountedMemory> bytes) : bytes_(bytes) {}
......
......@@ -32,16 +32,6 @@ class Value;
using String = std::string;
using ProtocolMessage = std::string;
class {{config.lib.export_macro}} StringUTF8Adapter {
public:
StringUTF8Adapter(const std::string& string) : string_(string) { }
const char* Data() const { return string_.data(); }
size_t length() const { return string_.length(); }
private:
const std::string& string_;
};
class {{config.lib.export_macro}} StringBuilder {
public:
StringBuilder();
......@@ -64,8 +54,12 @@ class {{config.lib.export_macro}} StringUtil {
static String fromInteger(int number) { return base::NumberToString(number); }
static String fromDouble(double number) {
String s = base::NumberToString(number);
if (!s.empty() && s[0] == '.')
s = "0" + s;
if (!s.empty()) { // .123 -> 0.123; -.123 -> -0.123 for valid JSON.
if (s[0] == '.')
s.insert(/*index=*/ 0, /*count=*/ 1, /*ch=*/ '0');
else if (s[0] == '-' && s.size() >= 2 && s[1] == '.')
s.insert(/*index=*/ 1, /*count=*/ 1, /*ch=*/ '0');
}
return s;
}
static double toDouble(const char* s, size_t len, bool* ok) {
......@@ -105,6 +99,15 @@ class {{config.lib.export_macro}} StringUtil {
static String fromUTF8(const uint8_t* data, size_t length) {
return std::string(reinterpret_cast<const char*>(data), length);
}
static String fromUTF16(const uint16_t* data, size_t length);
static const uint8_t* CharactersLatin1(const String& s) { return nullptr; }
static const uint8_t* CharactersUTF8(const String& s) {
return reinterpret_cast<const uint8_t*>(s.data());
}
static const uint16_t* CharactersUTF16(const String& s) { return nullptr; }
static size_t CharacterCount(const String& s) { return s.size(); }
};
// A read-only sequence of uninterpreted bytes with reference-counted storage.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment