Commit bf511b42 authored by jbroman's avatar jbroman Committed by Commit bot

ValueSerializer: Support efficiently reading and writing one-byte strings.

memcpy is faster than UTF-8 encoding/decoding. This yields 10-20% wins on
serializing and deserializing long ASCII strings, according to
blink_perf.bindings -- and these are already in a fast path where the entire
string is known to be ASCII (but this has to be checked). The win may be
larger for strings in Latin-1 but not ASCII (though I suspect this is an
uncommon case).

A change is also made to make ValueSerializerTest.EncodeTwoByteStringUsesPadding
survive wire format version number changes.

This is the first of a series of wire format changes from the previous Blink
format. The deserializer continues to be able to read the old format, but
Chromium M56 will no longer be able to read the messages written by this, in M58.

BUG=chromium:686159

Review-Url: https://codereview.chromium.org/2658793004
Cr-Commit-Position: refs/heads/master@{#42753}
parent 851ef467
......@@ -23,7 +23,10 @@
namespace v8 {
namespace internal {
static const uint32_t kLatestVersion = 9;
// Version 9: (imported from Blink)
// Version 10: one-byte (Latin-1) strings
static const uint32_t kLatestVersion = 10;
static const int kPretenureThreshold = 100 * KB;
template <typename T>
......@@ -61,6 +64,7 @@ enum class SerializationTag : uint8_t {
kDouble = 'N',
// byteLength:uint32_t, then raw data
kUtf8String = 'S',
kOneByteString = '"',
kTwoByteString = 'c',
// Reference to a serialized object. objectID:uint32_t
kObjectReference = '^',
......@@ -372,24 +376,9 @@ void ValueSerializer::WriteString(Handle<String> string) {
String::FlatContent flat = string->GetFlatContent();
DCHECK(flat.IsFlat());
if (flat.IsOneByte()) {
// The existing format uses UTF-8, rather than Latin-1. As a result we must
// to do work to encode strings that have characters outside ASCII.
// TODO(jbroman): In a future format version, consider adding a tag for
// Latin-1 strings, so that this can be skipped.
WriteTag(SerializationTag::kUtf8String);
Vector<const uint8_t> chars = flat.ToOneByteVector();
if (String::IsAscii(chars.begin(), chars.length())) {
WriteOneByteString(chars);
} else {
v8::Local<v8::String> api_string = Utils::ToLocal(string);
uint32_t utf8_length = api_string->Utf8Length();
WriteVarint(utf8_length);
uint8_t* dest;
if (ReserveRawBytes(utf8_length).To(&dest)) {
api_string->WriteUtf8(reinterpret_cast<char*>(dest), utf8_length,
nullptr, v8::String::NO_NULL_TERMINATION);
}
}
WriteTag(SerializationTag::kOneByteString);
WriteOneByteString(chars);
} else if (flat.IsTwoByte()) {
Vector<const uc16> chars = flat.ToUC16Vector();
uint32_t byte_length = chars.length() * sizeof(uc16);
......@@ -1116,6 +1105,8 @@ MaybeHandle<Object> ValueDeserializer::ReadObjectInternal() {
}
case SerializationTag::kUtf8String:
return ReadUtf8String();
case SerializationTag::kOneByteString:
return ReadOneByteString();
case SerializationTag::kTwoByteString:
return ReadTwoByteString();
case SerializationTag::kObjectReference: {
......@@ -1175,6 +1166,18 @@ MaybeHandle<String> ValueDeserializer::ReadUtf8String() {
Vector<const char>::cast(utf8_bytes), pretenure_);
}
MaybeHandle<String> ValueDeserializer::ReadOneByteString() {
uint32_t byte_length;
Vector<const uint8_t> bytes;
if (!ReadVarint<uint32_t>().To(&byte_length) ||
byte_length >
static_cast<uint32_t>(std::numeric_limits<int32_t>::max()) ||
!ReadRawBytes(byte_length).To(&bytes)) {
return MaybeHandle<String>();
}
return isolate_->factory()->NewStringFromOneByte(bytes, pretenure_);
}
MaybeHandle<String> ValueDeserializer::ReadTwoByteString() {
uint32_t byte_length;
Vector<const uint8_t> bytes;
......
......@@ -232,6 +232,7 @@ class ValueDeserializer {
// Reading V8 objects of specific kinds.
// The tag is assumed to have already been read.
MaybeHandle<String> ReadUtf8String() WARN_UNUSED_RESULT;
MaybeHandle<String> ReadOneByteString() WARN_UNUSED_RESULT;
MaybeHandle<String> ReadTwoByteString() WARN_UNUSED_RESULT;
MaybeHandle<JSObject> ReadJSObject() WARN_UNUSED_RESULT;
MaybeHandle<JSArray> ReadSparseJSArray() WARN_UNUSED_RESULT;
......
......@@ -467,6 +467,24 @@ TEST_F(ValueSerializerTest, DecodeString) {
EXPECT_EQ(kEmojiString, Utf8Value(value));
});
// And from Latin-1 (for the ones that fit).
DecodeTest({0xff, 0x0a, 0x22, 0x00}, [](Local<Value> value) {
ASSERT_TRUE(value->IsString());
EXPECT_EQ(0, String::Cast(*value)->Length());
});
DecodeTest({0xff, 0x0a, 0x22, 0x05, 'H', 'e', 'l', 'l', 'o'},
[](Local<Value> value) {
ASSERT_TRUE(value->IsString());
EXPECT_EQ(5, String::Cast(*value)->Length());
EXPECT_EQ(kHelloString, Utf8Value(value));
});
DecodeTest({0xff, 0x0a, 0x22, 0x06, 'Q', 'u', 0xe9, 'b', 'e', 'c'},
[](Local<Value> value) {
ASSERT_TRUE(value->IsString());
EXPECT_EQ(6, String::Cast(*value)->Length());
EXPECT_EQ(kQuebecString, Utf8Value(value));
});
// And from two-byte strings (endianness dependent).
#if defined(V8_TARGET_LITTLE_ENDIAN)
DecodeTest({0xff, 0x09, 0x63, 0x00},
......@@ -501,6 +519,8 @@ TEST_F(ValueSerializerTest, DecodeString) {
TEST_F(ValueSerializerTest, DecodeInvalidString) {
// UTF-8 string with too few bytes available.
InvalidDecodeTest({0xff, 0x09, 0x53, 0x10, 'v', '8'});
// One-byte string with too few bytes available.
InvalidDecodeTest({0xff, 0x0a, 0x22, 0x10, 'v', '8'});
#if defined(V8_TARGET_LITTLE_ENDIAN)
// Two-byte string with too few bytes available.
InvalidDecodeTest({0xff, 0x09, 0x63, 0x10, 'v', '\0', '8', '\0'});
......@@ -525,12 +545,16 @@ TEST_F(ValueSerializerTest, EncodeTwoByteStringUsesPadding) {
return StringFromUtf8(string.c_str());
},
[](const std::vector<uint8_t>& data) {
// This is a sufficient but not necessary condition to be aligned.
// Note that the third byte (0x00) is padding.
const uint8_t expected_prefix[] = {0xff, 0x09, 0x00, 0x63, 0x94, 0x03};
ASSERT_GT(data.size(), sizeof(expected_prefix) / sizeof(uint8_t));
// This is a sufficient but not necessary condition. This test assumes
// that the wire format version is one byte long, but is flexible to
// what that value may be.
const uint8_t expected_prefix[] = {0x00, 0x63, 0x94, 0x03};
ASSERT_GT(data.size(), sizeof(expected_prefix) + 2);
EXPECT_EQ(0xff, data[0]);
EXPECT_GE(data[1], 0x09);
EXPECT_LE(data[1], 0x7f);
EXPECT_TRUE(std::equal(std::begin(expected_prefix),
std::end(expected_prefix), data.begin()));
std::end(expected_prefix), data.begin() + 2));
});
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment