Commit 84841783 authored by Toon Verwaest's avatar Toon Verwaest Committed by Commit Bot

[json] Speed up two-byte json substring internalization

Inputs to JSON can be two-byte because payload strings can contain two-byte
characters, without actually having any one-byte property key. Rather than
eagerly converting the string to one-byte, we can perform a string-table lookup
with a two-byte string key; only converting the result to one-byte if it's a
new key.

This speeds up json parsing of two-byte json from the Youtube benchmark by 20%.

Change-Id: If6d4a37d331724f48b008aef8ec3e28d366cd038
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1619866
Commit-Queue: Toon Verwaest <verwaest@chromium.org>
Reviewed-by: 's avatarIgor Sheludko <ishell@chromium.org>
Reviewed-by: 's avatarUlan Degenbaev <ulan@chromium.org>
Cr-Commit-Position: refs/heads/master@{#61680}
parent eca505af
......@@ -736,8 +736,16 @@ Handle<String> Factory::InternalizeOneByteString(
}
Handle<String> Factory::InternalizeTwoByteString(
const Vector<const uc16>& string) {
TwoByteStringKey key(string, HashSeed(isolate()));
Handle<SeqTwoByteString> string, int from, int length,
bool convert_to_one_byte) {
SeqTwoByteSubStringKey key(isolate(), string, from, length,
convert_to_one_byte);
return InternalizeStringWithKey(&key);
}
Handle<String> Factory::InternalizeTwoByteString(
const Vector<const uc16>& string, bool convert_to_one_byte) {
TwoByteStringKey key(string, HashSeed(isolate()), convert_to_one_byte);
return InternalizeStringWithKey(&key);
}
......@@ -924,22 +932,29 @@ Handle<SeqOneByteString> Factory::AllocateRawOneByteInternalizedString(
Handle<String> Factory::AllocateTwoByteInternalizedString(
const Vector<const uc16>& str, uint32_t hash_field) {
CHECK_GE(String::kMaxLength, str.length());
DCHECK_NE(0, str.length()); // Use Heap::empty_string() instead.
Handle<SeqTwoByteString> result =
AllocateRawTwoByteInternalizedString(str.length(), hash_field);
DisallowHeapAllocation no_gc;
// Fill in the characters.
MemCopy(result->GetChars(no_gc), str.begin(), str.length() * kUC16Size);
return result;
}
Handle<SeqTwoByteString> Factory::AllocateRawTwoByteInternalizedString(
int length, uint32_t hash_field) {
CHECK_GE(String::kMaxLength, length);
DCHECK_NE(0, length); // Use Heap::empty_string() instead.
Map map = *internalized_string_map();
int size = SeqTwoByteString::SizeFor(str.length());
int size = SeqTwoByteString::SizeFor(length);
HeapObject result =
AllocateRawWithImmortalMap(size, AllocationType::kOld, map);
Handle<SeqTwoByteString> answer(SeqTwoByteString::cast(result), isolate());
answer->set_length(str.length());
answer->set_length(length);
answer->set_hash_field(hash_field);
DCHECK_EQ(size, answer->Size());
DisallowHeapAllocation no_gc;
// Fill in the characters.
MemCopy(answer->GetChars(no_gc), str.begin(), str.length() * kUC16Size);
DCHECK_EQ(size, result.Size());
return answer;
}
......@@ -991,17 +1006,6 @@ Handle<String> Factory::NewOneByteInternalizedString(
return result;
}
Handle<String> Factory::NewOneByteInternalizedSubString(
Handle<SeqOneByteString> string, int offset, int length,
uint32_t hash_field) {
Handle<SeqOneByteString> result =
AllocateRawOneByteInternalizedString(length, hash_field);
DisallowHeapAllocation no_allocation;
MemCopy(result->GetChars(no_allocation),
string->GetChars(no_allocation) + offset, length);
return result;
}
Handle<String> Factory::NewTwoByteInternalizedString(
const Vector<const uc16>& str, uint32_t hash_field) {
return AllocateTwoByteInternalizedString(str, hash_field);
......
......@@ -245,8 +245,12 @@ class V8_EXPORT_PRIVATE Factory {
Handle<String> InternalizeOneByteString(const Vector<const uint8_t>& str);
Handle<String> InternalizeOneByteString(Handle<SeqOneByteString>, int from,
int length);
Handle<String> InternalizeTwoByteString(Handle<SeqTwoByteString>, int from,
int length,
bool convert_to_one_byte = false);
Handle<String> InternalizeTwoByteString(const Vector<const uc16>& str);
Handle<String> InternalizeTwoByteString(const Vector<const uc16>& str,
bool convert_to_one_byte = false);
template <class StringTableKey>
Handle<String> InternalizeStringWithKey(StringTableKey* key);
......@@ -318,13 +322,15 @@ class V8_EXPORT_PRIVATE Factory {
Handle<String> NewOneByteInternalizedString(const Vector<const uint8_t>& str,
uint32_t hash_field);
Handle<String> NewOneByteInternalizedSubString(
Handle<SeqOneByteString> string, int offset, int length,
uint32_t hash_field);
Handle<SeqOneByteString> AllocateRawOneByteInternalizedString(
int length, uint32_t hash_field);
Handle<String> NewTwoByteInternalizedString(const Vector<const uc16>& str,
uint32_t hash_field);
Handle<SeqTwoByteString> AllocateRawTwoByteInternalizedString(
int length, uint32_t hash_field);
Handle<String> NewInternalizedStringImpl(Handle<String> string, int chars,
uint32_t hash_field);
......@@ -1068,9 +1074,6 @@ class V8_EXPORT_PRIVATE Factory {
Handle<String> AllocateInternalizedStringImpl(T t, int chars,
uint32_t hash_field);
Handle<SeqOneByteString> AllocateRawOneByteInternalizedString(
int length, uint32_t hash_field);
Handle<String> AllocateTwoByteInternalizedString(
const Vector<const uc16>& str, uint32_t hash_field);
......
......@@ -964,15 +964,16 @@ namespace {
template <typename Char>
bool Matches(const Vector<const Char>& chars, Handle<String> string) {
if (string.is_null()) return false;
DCHECK(!string.is_null());
// Only supports internalized strings in their canonical representation (one
// byte encoded as two-byte will return false here).
if ((sizeof(Char) == 1) != string->IsOneByteRepresentation()) return false;
if (chars.length() != string->length()) return false;
DisallowHeapAllocation no_gc;
const Char* string_data = string->GetChars<Char>(no_gc);
if (string->IsOneByteRepresentation()) {
const uint8_t* string_data = string->GetChars<uint8_t>(no_gc);
return CompareChars(chars.begin(), string_data, chars.length()) == 0;
}
const uint16_t* string_data = string->GetChars<uint16_t>(no_gc);
return CompareChars(chars.begin(), string_data, chars.length()) == 0;
}
......@@ -991,7 +992,7 @@ Handle<String> JsonParser<Char>::DecodeString(
DecodeString(dest, string.start(), string.length());
} else {
DCHECK_IMPLIES(string.internalize(), string.needs_conversion());
i::CopyChars(dest, chars_ + string.start(), string.length());
CopyChars(dest, chars_ + string.start(), string.length());
}
Vector<const SinkChar> data(dest, string.length());
......@@ -1013,13 +1014,6 @@ Handle<String> JsonParser<Char>::MakeString(const JsonString& string,
if (string.length() == 0) return factory()->empty_string();
if (sizeof(Char) == 1) {
if (V8_UNLIKELY(string.needs_conversion())) {
DCHECK(string.has_escape());
Handle<SeqTwoByteString> intermediate =
factory()->NewRawTwoByteString(string.length()).ToHandleChecked();
return DecodeString<uint16_t>(string, intermediate, hint);
}
if (string.internalize() && !string.has_escape()) {
if (!hint.is_null()) {
Vector<const Char> data(chars_ + string.start(), string.length());
......@@ -1035,12 +1029,13 @@ Handle<String> JsonParser<Char>::MakeString(const JsonString& string,
Vector<const uint8_t>::cast(chars));
}
Handle<SeqOneByteString> intermediate =
factory()->NewRawOneByteString(string.length()).ToHandleChecked();
return DecodeString<uint8_t>(string, intermediate, hint);
}
if (V8_UNLIKELY(string.needs_conversion())) {
DCHECK(string.has_escape());
Handle<SeqTwoByteString> intermediate =
factory()->NewRawTwoByteString(string.length()).ToHandleChecked();
return DecodeString<uint16_t>(string, intermediate, hint);
}
if (string.needs_conversion()) {
Handle<SeqOneByteString> intermediate =
factory()->NewRawOneByteString(string.length()).ToHandleChecked();
return DecodeString<uint8_t>(string, intermediate, hint);
......@@ -1052,13 +1047,19 @@ Handle<String> JsonParser<Char>::MakeString(const JsonString& string,
if (Matches(data, hint)) return hint;
}
if (chars_may_relocate_) {
Handle<String> substring = factory()->NewProperSubString(
source_, string.start(), string.start() + string.length());
return factory()->InternalizeString(substring);
return factory()->InternalizeTwoByteString(
Handle<SeqTwoByteString>::cast(source_), string.start(),
string.length(), string.needs_conversion());
}
Vector<const Char> chars(chars_ + string.start(), string.length());
return factory()->InternalizeTwoByteString(
Vector<const uint16_t>::cast(chars));
Vector<const uint16_t>::cast(chars), string.needs_conversion());
}
if (string.needs_conversion()) {
Handle<SeqOneByteString> intermediate =
factory()->NewRawOneByteString(string.length()).ToHandleChecked();
return DecodeString<uint8_t>(string, intermediate, hint);
}
Handle<SeqTwoByteString> intermediate =
......
......@@ -6790,6 +6790,8 @@ template Handle<String> StringTable::LookupKey(Isolate* isolate,
TwoByteStringKey* key);
template Handle<String> StringTable::LookupKey(Isolate* isolate,
SeqOneByteSubStringKey* key);
template Handle<String> StringTable::LookupKey(Isolate* isolate,
SeqTwoByteSubStringKey* key);
Handle<String> StringTable::AddKeyNoResize(Isolate* isolate,
StringTableKey* key) {
......
......@@ -197,13 +197,17 @@ Char FlatStringReader::Get(int index) {
template <typename Char>
class SequentialStringKey final : public StringTableKey {
public:
SequentialStringKey(const Vector<const Char>& chars, uint64_t seed)
SequentialStringKey(const Vector<const Char>& chars, uint64_t seed,
bool convert = false)
: SequentialStringKey(StringHasher::HashSequentialString<Char>(
chars.begin(), chars.length(), seed),
chars) {}
chars, convert) {}
SequentialStringKey(int hash, const Vector<const Char>& chars)
: StringTableKey(hash, chars.length()), chars_(chars) {}
SequentialStringKey(int hash, const Vector<const Char>& chars,
bool convert = false)
: StringTableKey(hash, chars.length()),
chars_(chars),
convert_(convert) {}
bool IsMatch(String s) override {
DisallowHeapAllocation no_gc;
......@@ -226,12 +230,14 @@ class SequentialStringKey final : public StringTableKey {
private:
Vector<const Char> chars_;
bool convert_;
};
using OneByteStringKey = SequentialStringKey<uint8_t>;
using TwoByteStringKey = SequentialStringKey<uint16_t>;
class SeqOneByteSubStringKey final : public StringTableKey {
template <typename Char>
class SeqSubStringKey final : public StringTableKey {
public:
// VS 2017 on official builds gives this spurious warning:
// warning C4789: buffer 'key' of size 16 bytes will be overrun; 4 bytes will
......@@ -241,9 +247,13 @@ class SeqOneByteSubStringKey final : public StringTableKey {
#pragma warning(push)
#pragma warning(disable : 4789)
#endif
SeqOneByteSubStringKey(Isolate* isolate, Handle<SeqOneByteString> string,
int from, int len)
: StringTableKey(0, len), string_(string), from_(from) {
SeqSubStringKey(Isolate* isolate,
Handle<typename CharTraits<Char>::String> string, int from,
int len, bool convert = false)
: StringTableKey(0, len),
string_(string),
from_(from),
convert_(convert) {
// We have to set the hash later.
DisallowHeapAllocation no_gc;
uint32_t hash = StringHasher::HashSequentialString(
......@@ -252,7 +262,8 @@ class SeqOneByteSubStringKey final : public StringTableKey {
DCHECK_LE(0, length());
DCHECK_LE(from_ + length(), string_->length());
DCHECK(string_->IsSeqOneByteString());
DCHECK_EQ(string_->IsSeqOneByteString(), sizeof(Char) == 1);
DCHECK_EQ(string_->IsSeqTwoByteString(), sizeof(Char) == 2);
}
#if defined(V8_CC_MSVC)
#pragma warning(pop)
......@@ -270,15 +281,33 @@ class SeqOneByteSubStringKey final : public StringTableKey {
}
Handle<String> AsHandle(Isolate* isolate) override {
return isolate->factory()->NewOneByteInternalizedSubString(
string_, from_, length(), hash_field());
if (sizeof(Char) == 1 || (sizeof(Char) == 2 && convert_)) {
Handle<SeqOneByteString> result =
isolate->factory()->AllocateRawOneByteInternalizedString(
length(), hash_field());
DisallowHeapAllocation no_gc;
CopyChars(result->GetChars(no_gc), string_->GetChars(no_gc) + from_,
length());
return result;
}
Handle<SeqTwoByteString> result =
isolate->factory()->AllocateRawTwoByteInternalizedString(length(),
hash_field());
DisallowHeapAllocation no_gc;
CopyChars(result->GetChars(no_gc), string_->GetChars(no_gc) + from_,
length());
return result;
}
private:
Handle<SeqOneByteString> string_;
Handle<typename CharTraits<Char>::String> string_;
int from_;
bool convert_;
};
using SeqOneByteSubStringKey = SeqSubStringKey<uint8_t>;
using SeqTwoByteSubStringKey = SeqSubStringKey<uint16_t>;
bool String::Equals(String other) {
if (other == *this) return true;
if (this->IsInternalizedString() && other->IsInternalizedString()) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment