Commit 9ce8ed75 authored by Toon Verwaest's avatar Toon Verwaest Committed by Commit Bot

[runtime] Cleanup StringTableKey

- Performs hash / length check before calling IsMatch
- Casts to string before calling IsMatch
- Removes special two-char internalization key
  (will look into removing StringTableNoAllocateKey next, and possible fold
  StringTableInsertionKey into InternalizedStringKey).

Change-Id: Ida76761eb2c3dc350c829ac2bfe12d52aef5f96d
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1598753Reviewed-by: 's avatarIgor Sheludko <ishell@chromium.org>
Reviewed-by: 's avatarUlan Degenbaev <ulan@chromium.org>
Commit-Queue: Toon Verwaest <verwaest@chromium.org>
Cr-Commit-Position: refs/heads/master@{#61280}
parent 48fa759e
......@@ -1080,8 +1080,8 @@ MaybeHandle<SeqTwoByteString> Factory::NewRawTwoByteString(
return string;
}
Handle<String> Factory::LookupSingleCharacterStringFromCode(uint32_t code) {
if (code <= String::kMaxOneByteCharCodeU) {
Handle<String> Factory::LookupSingleCharacterStringFromCode(uint16_t code) {
if (code <= unibrow::Latin1::kMaxChar) {
{
DisallowHeapAllocation no_allocation;
Object value = single_character_string_cache()->get(code);
......@@ -1089,61 +1089,27 @@ Handle<String> Factory::LookupSingleCharacterStringFromCode(uint32_t code) {
return handle(String::cast(value), isolate());
}
}
uint8_t buffer[1];
buffer[0] = static_cast<uint8_t>(code);
uint8_t buffer[] = {static_cast<uint8_t>(code)};
Handle<String> result =
InternalizeOneByteString(Vector<const uint8_t>(buffer, 1));
single_character_string_cache()->set(code, *result);
return result;
}
DCHECK_LE(code, String::kMaxUtf16CodeUnitU);
Handle<SeqTwoByteString> result = NewRawTwoByteString(1).ToHandleChecked();
result->SeqTwoByteStringSet(0, static_cast<uint16_t>(code));
return result;
}
// Returns true for a character in a range. Both limits are inclusive.
static inline bool Between(uint32_t character, uint32_t from, uint32_t to) {
// This makes uses of the the unsigned wraparound.
return character - from <= to - from;
uint16_t buffer[] = {code};
return InternalizeTwoByteString(Vector<const uint16_t>(buffer, 1));
}
static inline Handle<String> MakeOrFindTwoCharacterString(Isolate* isolate,
uint16_t c1,
uint16_t c2) {
// Numeric strings have a different hash algorithm not known by
// LookupTwoCharsStringIfExists, so we skip this step for such strings.
if (!Between(c1, '0', '9') || !Between(c2, '0', '9')) {
Handle<String> result;
if (StringTable::LookupTwoCharsStringIfExists(isolate, c1, c2)
.ToHandle(&result)) {
return result;
}
}
// Now we know the length is 2, we might as well make use of that fact
// when building the new string.
if (static_cast<unsigned>(c1 | c2) <= String::kMaxOneByteCharCodeU) {
// We can do this.
DCHECK(base::bits::IsPowerOfTwo(String::kMaxOneByteCharCodeU +
1)); // because of this.
Handle<SeqOneByteString> str =
isolate->factory()->NewRawOneByteString(2).ToHandleChecked();
DisallowHeapAllocation no_allocation;
uint8_t* dest = str->GetChars(no_allocation);
dest[0] = static_cast<uint8_t>(c1);
dest[1] = static_cast<uint8_t>(c2);
return str;
} else {
Handle<SeqTwoByteString> str =
isolate->factory()->NewRawTwoByteString(2).ToHandleChecked();
DisallowHeapAllocation no_allocation;
uc16* dest = str->GetChars(no_allocation);
dest[0] = c1;
dest[1] = c2;
return str;
if ((c1 | c2) <= unibrow::Latin1::kMaxChar) {
uint8_t buffer[] = {static_cast<uint8_t>(c1), static_cast<uint8_t>(c2)};
return isolate->factory()->InternalizeOneByteString(
Vector<const uint8_t>(buffer, 2));
}
uint16_t buffer[] = {c1, c2};
return isolate->factory()->InternalizeTwoByteString(
Vector<const uint16_t>(buffer, 2));
}
template <typename SinkChar, typename StringType>
......
......@@ -347,7 +347,7 @@ class V8_EXPORT_PRIVATE Factory {
// Creates a single character string where the character has given code.
// A cache is used for Latin1 codes.
Handle<String> LookupSingleCharacterStringFromCode(uint32_t code);
Handle<String> LookupSingleCharacterStringFromCode(uint16_t code);
// Create a new cons string object which consists of a pair of strings.
V8_WARN_UNUSED_RESULT MaybeHandle<String> NewConsString(Handle<String> left,
......
......@@ -6482,7 +6482,7 @@ class RegExpKey : public HashTableKey {
class InternalizedStringKey : public StringTableKey {
public:
explicit InternalizedStringKey(Handle<String> string)
: StringTableKey(0), string_(string) {
: StringTableKey(0, string->length()), string_(string) {
DCHECK(!string->IsInternalizedString());
DCHECK(string->IsFlat());
// Make sure hash_field is computed.
......@@ -6490,9 +6490,7 @@ class InternalizedStringKey : public StringTableKey {
set_hash_field(string->hash_field());
}
bool IsMatch(Object string) override {
return string_->SlowEquals(String::cast(string));
}
bool IsMatch(String string) override { return string_->SlowEquals(string); }
Handle<String> AsHandle(Isolate* isolate) override {
// Internalize the string if possible.
......@@ -6758,77 +6756,6 @@ uint32_t HashTable<Derived, Shape>::FindInsertionEntry(uint32_t hash) {
return entry;
}
// This class is used for looking up two character strings in the string table.
// If we don't have a hit we don't want to waste much time so we unroll the
// string hash calculation loop here for speed. Doesn't work if the two
// characters form a decimal integer, since such strings have a different hash
// algorithm.
class TwoCharHashTableKey : public StringTableKey {
public:
TwoCharHashTableKey(uint16_t c1, uint16_t c2, uint64_t seed)
: StringTableKey(ComputeHashField(c1, c2, seed)), c1_(c1), c2_(c2) {}
bool IsMatch(Object o) override {
String other = String::cast(o);
if (other->length() != 2) return false;
if (other->Get(0) != c1_) return false;
return other->Get(1) == c2_;
}
Handle<String> AsHandle(Isolate* isolate) override {
// The TwoCharHashTableKey is only used for looking in the string
// table, not for adding to it.
UNREACHABLE();
}
private:
uint32_t ComputeHashField(uint16_t c1, uint16_t c2, uint64_t seed) {
// Char 1.
uint32_t hash = static_cast<uint32_t>(seed);
hash += c1;
hash += hash << 10;
hash ^= hash >> 6;
// Char 2.
hash += c2;
hash += hash << 10;
hash ^= hash >> 6;
// GetHash.
hash += hash << 3;
hash ^= hash >> 11;
hash += hash << 15;
if ((hash & String::kHashBitMask) == 0) hash = StringHasher::kZeroHash;
hash = (hash << String::kHashShift) | String::kIsNotArrayIndexMask;
#ifdef DEBUG
// If this assert fails then we failed to reproduce the two-character
// version of the string hashing algorithm above. One reason could be
// that we were passed two digits as characters, since the hash
// algorithm is different in that case.
uint16_t chars[2] = {c1, c2};
uint32_t check_hash = StringHasher::HashSequentialString(chars, 2, seed);
DCHECK_EQ(hash, check_hash);
#endif
return hash;
}
uint16_t c1_;
uint16_t c2_;
};
MaybeHandle<String> StringTable::LookupTwoCharsStringIfExists(
Isolate* isolate,
uint16_t c1,
uint16_t c2) {
TwoCharHashTableKey key(c1, c2, HashSeed(isolate));
Handle<StringTable> string_table = isolate->factory()->string_table();
int entry = string_table->FindEntry(isolate, &key);
if (entry == kNotFound) return MaybeHandle<String>();
Handle<String> result(String::cast(string_table->KeyAt(entry)), isolate);
DCHECK(StringShape(*result).IsInternalized());
DCHECK_EQ(result->Hash(), key.Hash());
return result;
}
void StringTable::EnsureCapacityForDeserialization(Isolate* isolate,
int expected) {
Handle<StringTable> table = isolate->factory()->string_table();
......@@ -6963,7 +6890,7 @@ Handle<String> StringTable::AddKeyNoResize(Isolate* isolate,
DCHECK_EQ(table->FindEntry(isolate, key), kNotFound);
// Add the new string and return it along with the string table.
int entry = table->FindInsertionEntry(key->Hash());
int entry = table->FindInsertionEntry(key->hash());
table->set(EntryToIndex(entry), *string);
table->ElementAdded();
......@@ -6987,7 +6914,7 @@ namespace {
class StringTableNoAllocateKey : public StringTableKey {
public:
StringTableNoAllocateKey(String string, uint64_t seed)
: StringTableKey(0), string_(string) {
: StringTableKey(0, string.length()), string_(string) {
StringShape shape(string);
one_byte_ = shape.encoding_tag() == kOneByteStringTag;
DCHECK(!shape.IsInternalized());
......@@ -7036,14 +6963,9 @@ class StringTableNoAllocateKey : public StringTableKey {
}
}
bool IsMatch(Object otherstring) override {
String other = String::cast(otherstring);
bool IsMatch(String other) override {
DCHECK(other->IsInternalizedString());
DCHECK(other->IsFlat());
if (Hash() != other->Hash()) return false;
int len = string_->length();
if (len != other->length()) return false;
DisallowHeapAllocation no_gc;
if (!special_flattening_) {
if (string_->Get(0) != other->Get(0)) return false;
......@@ -7055,14 +6977,16 @@ class StringTableNoAllocateKey : public StringTableKey {
String::FlatContent flat1 = string_->GetFlatContent(no_gc);
String::FlatContent flat2 = other->GetFlatContent(no_gc);
return CompareRawStringContents(flat1.ToOneByteVector().begin(),
flat2.ToOneByteVector().begin(), len);
flat2.ToOneByteVector().begin(),
length());
}
if (shape1.encoding_tag() == kTwoByteStringTag &&
shape2.encoding_tag() == kTwoByteStringTag) {
String::FlatContent flat1 = string_->GetFlatContent(no_gc);
String::FlatContent flat2 = other->GetFlatContent(no_gc);
return CompareRawStringContents(flat1.ToUC16Vector().begin(),
flat2.ToUC16Vector().begin(), len);
flat2.ToUC16Vector().begin(),
length());
}
}
StringComparator comparator;
......@@ -7072,11 +6996,12 @@ class StringTableNoAllocateKey : public StringTableKey {
String::FlatContent flat_content = other->GetFlatContent(no_gc);
if (one_byte_) {
if (flat_content.IsOneByte()) {
return CompareRawStringContents(
one_byte_content_, flat_content.ToOneByteVector().begin(), len);
return CompareRawStringContents(one_byte_content_,
flat_content.ToOneByteVector().begin(),
length());
} else {
DCHECK(flat_content.IsTwoByte());
for (int i = 0; i < len; i++) {
for (int i = 0; i < length(); i++) {
if (flat_content.Get(i) != one_byte_content_[i]) return false;
}
return true;
......@@ -7084,10 +7009,10 @@ class StringTableNoAllocateKey : public StringTableKey {
} else {
if (flat_content.IsTwoByte()) {
return CompareRawStringContents(
two_byte_content_, flat_content.ToUC16Vector().begin(), len);
two_byte_content_, flat_content.ToUC16Vector().begin(), length());
} else {
DCHECK(flat_content.IsOneByte());
for (int i = 0; i < len; i++) {
for (int i = 0; i < length(); i++) {
if (flat_content.Get(i) != two_byte_content_[i]) return false;
}
return true;
......@@ -7144,7 +7069,7 @@ Address StringTable::LookupStringIfExists_NoAllocate(Isolate* isolate,
}
DCHECK(!string->IsInternalizedString());
int entry = table->FindEntry(ReadOnlyRoots(isolate), &key, key.Hash());
int entry = table->FindEntry(ReadOnlyRoots(isolate), &key, key.hash());
if (entry != kNotFound) {
String internalized = String::cast(table->KeyAt(entry));
if (FLAG_thin_strings) {
......
......@@ -203,12 +203,10 @@ class SequentialStringKey final : public StringTableKey {
chars) {}
SequentialStringKey(int hash, const Vector<const Char>& chars)
: StringTableKey(hash), chars_(chars) {}
: StringTableKey(hash, chars.length()), chars_(chars) {}
bool IsMatch(Object other) override {
bool IsMatch(String s) override {
DisallowHeapAllocation no_gc;
String s = String::cast(other);
if (s.length() != chars_.length()) return false;
if (s->IsOneByteRepresentation()) {
const uint8_t* chars = s.GetChars<uint8_t>(no_gc);
return CompareChars(chars, chars_.begin(), chars_.length()) == 0;
......@@ -220,10 +218,10 @@ class SequentialStringKey final : public StringTableKey {
Handle<String> AsHandle(Isolate* isolate) override {
if (sizeof(Char) == 1) {
return isolate->factory()->NewOneByteInternalizedString(
Vector<const uint8_t>::cast(chars_), HashField());
Vector<const uint8_t>::cast(chars_), hash_field());
}
return isolate->factory()->NewTwoByteInternalizedString(
Vector<const uint16_t>::cast(chars_), HashField());
Vector<const uint16_t>::cast(chars_), hash_field());
}
private:
......@@ -244,43 +242,41 @@ class SeqOneByteSubStringKey final : public StringTableKey {
#pragma warning(disable : 4789)
#endif
SeqOneByteSubStringKey(Isolate* isolate, Handle<SeqOneByteString> string,
int from, int length)
: StringTableKey(0), string_(string), from_(from), length_(length) {
int from, int len)
: StringTableKey(0, len), string_(string), from_(from) {
// We have to set the hash later.
DisallowHeapAllocation no_gc;
uint32_t hash = StringHasher::HashSequentialString(
string->GetChars(no_gc) + from, length, HashSeed(isolate));
string->GetChars(no_gc) + from, len, HashSeed(isolate));
set_hash_field(hash);
DCHECK_LE(0, length_);
DCHECK_LE(from_ + length_, string_->length());
DCHECK_LE(0, length());
DCHECK_LE(from_ + length(), string_->length());
DCHECK(string_->IsSeqOneByteString());
}
#if defined(V8_CC_MSVC)
#pragma warning(pop)
#endif
bool IsMatch(Object object) override {
bool IsMatch(String string) override {
DisallowHeapAllocation no_gc;
String string = String::cast(object);
if (string.length() != length_) return false;
if (string.IsOneByteRepresentation()) {
const uint8_t* data = string.GetChars<uint8_t>(no_gc);
return CompareChars(string_->GetChars(no_gc) + from_, data, length_) == 0;
return CompareChars(string_->GetChars(no_gc) + from_, data, length()) ==
0;
}
const uint16_t* data = string.GetChars<uint16_t>(no_gc);
return CompareChars(string_->GetChars(no_gc) + from_, data, length_) == 0;
return CompareChars(string_->GetChars(no_gc) + from_, data, length()) == 0;
}
Handle<String> AsHandle(Isolate* isolate) override {
return isolate->factory()->NewOneByteInternalizedSubString(
string_, from_, length_, HashField());
string_, from_, length(), hash_field());
}
private:
Handle<SeqOneByteString> string_;
int from_;
int length_;
};
bool String::Equals(String other) {
......
......@@ -40,12 +40,27 @@ uint32_t StringSetShape::HashForObject(ReadOnlyRoots roots, Object object) {
return String::cast(object)->Hash();
}
StringTableKey::StringTableKey(uint32_t hash_field)
: HashTableKey(hash_field >> Name::kHashShift), hash_field_(hash_field) {}
bool StringTableShape::IsMatch(Key key, Object value) {
String string = String::cast(value);
if (string.hash_field() != key->hash_field()) return false;
if (string.length() != key->length()) return false;
return key->IsMatch(string);
}
StringTableKey::StringTableKey(uint32_t hash_field, int length)
: hash_field_(hash_field), length_(length) {}
void StringTableKey::set_hash_field(uint32_t hash_field) {
hash_field_ = hash_field;
set_hash(hash_field >> Name::kHashShift);
}
uint32_t StringTableKey::hash() const {
return hash_field_ >> Name::kHashShift;
}
// static
uint32_t StringTableShape::Hash(Isolate* isolate, Key key) {
return key->hash();
}
Handle<Object> StringTableShape::AsHandle(Isolate* isolate,
......
......@@ -14,30 +14,34 @@
namespace v8 {
namespace internal {
class StringTableKey : public HashTableKey {
class StringTableKey {
public:
explicit inline StringTableKey(uint32_t hash_field);
virtual ~StringTableKey() {}
inline StringTableKey(uint32_t hash_field, int length);
virtual Handle<String> AsHandle(Isolate* isolate) = 0;
uint32_t HashField() const {
uint32_t hash_field() const {
DCHECK_NE(0, hash_field_);
return hash_field_;
}
virtual bool IsMatch(String string) = 0;
inline uint32_t hash() const;
int length() const { return length_; }
protected:
inline void set_hash_field(uint32_t hash_field);
private:
uint32_t hash_field_ = 0;
int length_;
};
class StringTableShape : public BaseShape<StringTableKey*> {
public:
static inline bool IsMatch(Key key, Object value) {
return key->IsMatch(value);
}
static inline bool IsMatch(Key key, Object value);
static inline uint32_t Hash(Isolate* isolate, Key key) { return key->Hash(); }
static inline uint32_t Hash(Isolate* isolate, Key key);
static inline uint32_t HashForObject(ReadOnlyRoots roots, Object object);
......@@ -71,10 +75,6 @@ class StringTable : public HashTable<StringTable, StringTableShape> {
static Handle<StringTable> CautiousShrink(Isolate* isolate,
Handle<StringTable> table);
// Looks up a string that is equal to the given string and returns
// string handle if it is found, or an empty handle otherwise.
V8_WARN_UNUSED_RESULT static MaybeHandle<String> LookupTwoCharsStringIfExists(
Isolate* isolate, uint16_t c1, uint16_t c2);
// {raw_string} must be a tagged String pointer.
// Returns a tagged pointer: either an internalized string, or a Smi
// sentinel.
......
......@@ -166,16 +166,14 @@ void Deserializer::LogScriptEvents(Script script) {
}
StringTableInsertionKey::StringTableInsertionKey(String string)
: StringTableKey(ComputeHashField(string)), string_(string) {
: StringTableKey(ComputeHashField(string), string.length()),
string_(string) {
DCHECK(string->IsInternalizedString());
}
bool StringTableInsertionKey::IsMatch(Object string) {
// We know that all entries in a hash table had their hash keys created.
// Use that knowledge to have fast failure.
if (Hash() != String::cast(string)->Hash()) return false;
// We want to compare the content of two internalized strings here.
return string_->SlowEquals(String::cast(string));
bool StringTableInsertionKey::IsMatch(String string) {
// We want to compare the content of two strings here.
return string_->SlowEquals(string);
}
Handle<String> StringTableInsertionKey::AsHandle(Isolate* isolate) {
......
......@@ -190,7 +190,7 @@ class StringTableInsertionKey : public StringTableKey {
public:
explicit StringTableInsertionKey(String string);
bool IsMatch(Object string) override;
bool IsMatch(String string) override;
V8_WARN_UNUSED_RESULT Handle<String> AsHandle(Isolate* isolate) override;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment