Commit 9569b20d authored by yangguo@chromium.org's avatar yangguo@chromium.org

Replace the use CharacterStreams in Heap::AllocateSymbolInternal and String::ComputeHash

R=yangguo@chromium.org
BUG=

Review URL: https://chromiumcodereview.appspot.com/11593007
Patch from Dan Carney <dcarney@google.com>.

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@13242 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
parent 362218a0
...@@ -98,12 +98,34 @@ MaybeObject* Heap::AllocateStringFromUtf8(Vector<const char> str, ...@@ -98,12 +98,34 @@ MaybeObject* Heap::AllocateStringFromUtf8(Vector<const char> str,
} }
template<>
bool inline Heap::IsOneByte(Vector<const char> str, int chars) {
// TODO(dcarney): incorporate Latin-1 check when Latin-1 is supported?
// ASCII only check.
return chars == str.length();
}
template<>
bool inline Heap::IsOneByte(String* str, int chars) {
return str->IsOneByteRepresentation();
}
MaybeObject* Heap::AllocateSymbol(Vector<const char> str, MaybeObject* Heap::AllocateSymbol(Vector<const char> str,
int chars, int chars,
uint32_t hash_field) { uint32_t hash_field) {
unibrow::Utf8InputBuffer<> buffer(str.start(), if (IsOneByte(str, chars)) return AllocateAsciiSymbol(str, hash_field);
static_cast<unsigned>(str.length())); return AllocateInternalSymbol<false>(str, chars, hash_field);
return AllocateInternalSymbol(&buffer, chars, hash_field); }
template<typename T>
MaybeObject* Heap::AllocateInternalSymbol(T t, int chars, uint32_t hash_field) {
if (IsOneByte(t, chars)) {
return AllocateInternalSymbol<true>(t, chars, hash_field);
}
return AllocateInternalSymbol<false>(t, chars, hash_field);
} }
......
...@@ -3302,8 +3302,8 @@ static inline bool Between(uint32_t character, uint32_t from, uint32_t to) { ...@@ -3302,8 +3302,8 @@ static inline bool Between(uint32_t character, uint32_t from, uint32_t to) {
MUST_USE_RESULT static inline MaybeObject* MakeOrFindTwoCharacterString( MUST_USE_RESULT static inline MaybeObject* MakeOrFindTwoCharacterString(
Heap* heap, Heap* heap,
uint32_t c1, uint16_t c1,
uint32_t c2) { uint16_t c2) {
String* symbol; String* symbol;
// Numeric strings have a different hash algorithm not known by // Numeric strings have a different hash algorithm not known by
// LookupTwoCharsSymbolIfExists, so we skip this step for such strings. // LookupTwoCharsSymbolIfExists, so we skip this step for such strings.
...@@ -3352,8 +3352,8 @@ MaybeObject* Heap::AllocateConsString(String* first, String* second) { ...@@ -3352,8 +3352,8 @@ MaybeObject* Heap::AllocateConsString(String* first, String* second) {
// dictionary. Check whether we already have the string in the symbol // dictionary. Check whether we already have the string in the symbol
// table to prevent creation of many unneccesary strings. // table to prevent creation of many unneccesary strings.
if (length == 2) { if (length == 2) {
unsigned c1 = first->Get(0); uint16_t c1 = first->Get(0);
unsigned c2 = second->Get(0); uint16_t c2 = second->Get(0);
return MakeOrFindTwoCharacterString(this, c1, c2); return MakeOrFindTwoCharacterString(this, c1, c2);
} }
...@@ -3467,8 +3467,8 @@ MaybeObject* Heap::AllocateSubString(String* buffer, ...@@ -3467,8 +3467,8 @@ MaybeObject* Heap::AllocateSubString(String* buffer,
// Optimization for 2-byte strings often used as keys in a decompression // Optimization for 2-byte strings often used as keys in a decompression
// dictionary. Check whether we already have the string in the symbol // dictionary. Check whether we already have the string in the symbol
// table to prevent creation of many unneccesary strings. // table to prevent creation of many unneccesary strings.
unsigned c1 = buffer->Get(start); uint16_t c1 = buffer->Get(start);
unsigned c2 = buffer->Get(start + 1); uint16_t c2 = buffer->Get(start + 1);
return MakeOrFindTwoCharacterString(this, c1, c2); return MakeOrFindTwoCharacterString(this, c1, c2);
} }
...@@ -4624,27 +4624,88 @@ Map* Heap::SymbolMapForString(String* string) { ...@@ -4624,27 +4624,88 @@ Map* Heap::SymbolMapForString(String* string) {
} }
MaybeObject* Heap::AllocateInternalSymbol(unibrow::CharacterStream* buffer, template<typename T>
int chars, class AllocateInternalSymbolHelper {
uint32_t hash_field) { public:
ASSERT(chars >= 0); static void WriteOneByteData(T t, char* chars, int len);
// Ensure the chars matches the number of characters in the buffer. static void WriteTwoByteData(T t, uint16_t* chars, int len);
ASSERT(static_cast<unsigned>(chars) == buffer->Utf16Length()); private:
// Determine whether the string is ASCII. DISALLOW_COPY_AND_ASSIGN(AllocateInternalSymbolHelper);
bool is_ascii = true; };
while (buffer->has_more()) {
if (buffer->GetNext() > unibrow::Utf8::kMaxOneByteChar) {
is_ascii = false; template<>
break; class AllocateInternalSymbolHelper< Vector<const char> > {
public:
static inline void WriteOneByteData(Vector<const char> vector,
char* chars,
int len) {
// Only works for ascii.
ASSERT(vector.length() == len);
memcpy(chars, vector.start(), len);
}
static inline void WriteTwoByteData(Vector<const char> vector,
uint16_t* chars,
int len) {
const uint8_t* stream = reinterpret_cast<const uint8_t*>(vector.start());
unsigned stream_length = vector.length();
while (stream_length != 0) {
unsigned consumed = 0;
uint32_t c = unibrow::Utf8::ValueOf(stream, stream_length, &consumed);
ASSERT(c != unibrow::Utf8::kBadChar);
ASSERT(consumed <= stream_length);
stream_length -= consumed;
stream += consumed;
if (c > unibrow::Utf16::kMaxNonSurrogateCharCode) {
len -= 2;
if (len < 0) break;
*chars++ = unibrow::Utf16::LeadSurrogate(c);
*chars++ = unibrow::Utf16::TrailSurrogate(c);
} else {
len -= 1;
if (len < 0) break;
*chars++ = c;
}
} }
ASSERT(stream_length == 0);
ASSERT(len == 0);
} }
buffer->Rewind();
private:
DISALLOW_COPY_AND_ASSIGN(AllocateInternalSymbolHelper);
};
template<>
class AllocateInternalSymbolHelper<String*> {
public:
static inline void WriteOneByteData(String* s, char* chars, int len) {
ASSERT(s->length() == len);
String::WriteToFlat(s, chars, 0, len);
}
static inline void WriteTwoByteData(String* s, uint16_t* chars, int len) {
ASSERT(s->length() == len);
String::WriteToFlat(s, chars, 0, len);
}
private:
DISALLOW_COPY_AND_ASSIGN(AllocateInternalSymbolHelper<String*>);
};
template<bool is_one_byte, typename T>
MaybeObject* Heap::AllocateInternalSymbol(T t,
int chars,
uint32_t hash_field) {
typedef AllocateInternalSymbolHelper<T> H;
ASSERT(chars >= 0);
// Compute map and object size. // Compute map and object size.
int size; int size;
Map* map; Map* map;
if (is_ascii) { if (is_one_byte) {
if (chars > SeqOneByteString::kMaxLength) { if (chars > SeqOneByteString::kMaxLength) {
return Failure::OutOfMemoryException(); return Failure::OutOfMemoryException();
} }
...@@ -4674,21 +4735,26 @@ MaybeObject* Heap::AllocateInternalSymbol(unibrow::CharacterStream* buffer, ...@@ -4674,21 +4735,26 @@ MaybeObject* Heap::AllocateInternalSymbol(unibrow::CharacterStream* buffer,
ASSERT_EQ(size, answer->Size()); ASSERT_EQ(size, answer->Size());
// Fill in the characters. if (is_one_byte) {
int i = 0; H::WriteOneByteData(t, SeqOneByteString::cast(answer)->GetChars(), chars);
while (i < chars) { } else {
uint32_t character = buffer->GetNext(); H::WriteTwoByteData(t, SeqTwoByteString::cast(answer)->GetChars(), chars);
if (character > unibrow::Utf16::kMaxNonSurrogateCharCode) {
answer->Set(i++, unibrow::Utf16::LeadSurrogate(character));
answer->Set(i++, unibrow::Utf16::TrailSurrogate(character));
} else {
answer->Set(i++, character);
}
} }
return answer; return answer;
} }
// Need explicit instantiations.
template
MaybeObject* Heap::AllocateInternalSymbol<true>(String*, int, uint32_t);
template
MaybeObject* Heap::AllocateInternalSymbol<false>(String*, int, uint32_t);
template
MaybeObject* Heap::AllocateInternalSymbol<false>(Vector<const char>,
int,
uint32_t);
MaybeObject* Heap::AllocateRawOneByteString(int length, MaybeObject* Heap::AllocateRawOneByteString(int length,
PretenureFlag pretenure) { PretenureFlag pretenure) {
if (length < 0 || length > SeqOneByteString::kMaxLength) { if (length < 0 || length > SeqOneByteString::kMaxLength) {
......
...@@ -764,12 +764,16 @@ class Heap { ...@@ -764,12 +764,16 @@ class Heap {
Vector<const uc16> str, Vector<const uc16> str,
uint32_t hash_field); uint32_t hash_field);
MUST_USE_RESULT MaybeObject* AllocateInternalSymbol( template<typename T>
unibrow::CharacterStream* buffer, int chars, uint32_t hash_field); static inline bool IsOneByte(T t, int chars);
MUST_USE_RESULT MaybeObject* AllocateExternalSymbol( template<typename T>
Vector<const char> str, MUST_USE_RESULT inline MaybeObject* AllocateInternalSymbol(
int chars); T t, int chars, uint32_t hash_field);
template<bool is_one_byte, typename T>
MUST_USE_RESULT MaybeObject* AllocateInternalSymbol(
T t, int chars, uint32_t hash_field);
// Allocates and partially initializes a String. There are two String // Allocates and partially initializes a String. There are two String
// encodings: ASCII and two byte. These functions allocate a string of the // encodings: ASCII and two byte. These functions allocate a string of the
......
...@@ -631,7 +631,17 @@ Handle<String> JsonParser<seq_ascii>::ScanJsonString() { ...@@ -631,7 +631,17 @@ Handle<String> JsonParser<seq_ascii>::ScanJsonString() {
position_); position_);
} }
if (c0 < 0x20) return Handle<String>::null(); if (c0 < 0x20) return Handle<String>::null();
running_hash = StringHasher::AddCharacterCore(running_hash, c0); if (static_cast<uint32_t>(c0) >
unibrow::Utf16::kMaxNonSurrogateCharCode) {
running_hash =
StringHasher::AddCharacterCore(running_hash,
unibrow::Utf16::LeadSurrogate(c0));
running_hash =
StringHasher::AddCharacterCore(running_hash,
unibrow::Utf16::TrailSurrogate(c0));
} else {
running_hash = StringHasher::AddCharacterCore(running_hash, c0);
}
position++; position++;
if (position >= source_length_) return Handle<String>::null(); if (position >= source_length_) return Handle<String>::null();
c0 = seq_source_->SeqOneByteStringGet(position); c0 = seq_source_->SeqOneByteStringGet(position);
......
...@@ -2584,8 +2584,7 @@ void String::Visit( ...@@ -2584,8 +2584,7 @@ void String::Visit(
case kConsStringTag | kOneByteStringTag: case kConsStringTag | kOneByteStringTag:
case kConsStringTag | kTwoByteStringTag: case kConsStringTag | kTwoByteStringTag:
string = cons_op.Operate(ConsString::cast(string), &offset, &type, string = cons_op.Operate(string, &offset, &type, &length);
&length);
if (string == NULL) return; if (string == NULL) return;
slice_offset = offset; slice_offset = offset;
ASSERT(length == static_cast<unsigned>(string->length())); ASSERT(length == static_cast<unsigned>(string->length()));
...@@ -2777,6 +2776,11 @@ const uint16_t* ExternalTwoByteString::ExternalTwoByteStringGetData( ...@@ -2777,6 +2776,11 @@ const uint16_t* ExternalTwoByteString::ExternalTwoByteStringGetData(
} }
String* ConsStringNullOp::Operate(String*, unsigned*, int32_t*, unsigned*) {
return NULL;
}
unsigned ConsStringIteratorOp::OffsetForDepth(unsigned depth) { unsigned ConsStringIteratorOp::OffsetForDepth(unsigned depth) {
return depth & kDepthMask; return depth & kDepthMask;
} }
...@@ -2805,42 +2809,38 @@ void ConsStringIteratorOp::Pop() { ...@@ -2805,42 +2809,38 @@ void ConsStringIteratorOp::Pop() {
} }
void ConsStringIteratorOp::Reset() { bool ConsStringIteratorOp::HasMore() {
depth_ = 0; return depth_ != 0;
maximum_depth_ = 0;
} }
bool ConsStringIteratorOp::HasMore() { void ConsStringIteratorOp::Reset() {
return depth_ != 0; depth_ = 0;
} }
bool ConsStringIteratorOp::ContinueOperation(ContinueResponse* response) { String* ConsStringIteratorOp::ContinueOperation(int32_t* type_out,
unsigned* length_out) {
bool blew_stack; bool blew_stack;
int32_t type; String* string = NextLeaf(&blew_stack, type_out, length_out);
unsigned length;
String* string = NextLeaf(&blew_stack, &type, &length);
// String found. // String found.
if (string != NULL) { if (string != NULL) {
consumed_ += length; // Verify output.
response->string_ = string; ASSERT(*length_out == static_cast<unsigned>(string->length()));
response->offset_ = 0; ASSERT(*type_out == string->map()->instance_type());
response->length_ = length; return string;
response->type_ = type;
return true;
} }
// Traversal complete. // Traversal complete.
if (!blew_stack) return false; if (!blew_stack) return NULL;
// Restart search. // Restart search from root.
Reset(); unsigned offset_out;
// TODO(dcarney) This is unnecessary. string = Search(&offset_out, type_out, length_out);
// After a reset, we don't need a String::Visit // Verify output.
response->string_ = root_; ASSERT(string == NULL || offset_out == 0);
response->offset_ = consumed_; ASSERT(string == NULL ||
response->length_ = root_length_; *length_out == static_cast<unsigned>(string->length()));
response->type_ = root_type_; ASSERT(string == NULL || *type_out == string->map()->instance_type());
return true; return string;
} }
...@@ -2857,18 +2857,24 @@ StringCharacterStream::StringCharacterStream( ...@@ -2857,18 +2857,24 @@ StringCharacterStream::StringCharacterStream(
end_(NULL), end_(NULL),
op_(op) { op_(op) {
op->Reset(); op->Reset();
String::Visit(string, int32_t type = string->map()->instance_type();
offset, *this, *op, string->map()->instance_type(), string->length()); unsigned length = string->length();
String::Visit(string, offset, *this, *op, type, length);
} }
bool StringCharacterStream::HasMore() { bool StringCharacterStream::HasMore() {
if (buffer8_ != end_) return true; if (buffer8_ != end_) return true;
if (!op_->HasMore()) return false; if (!op_->HasMore()) return false;
ConsStringIteratorOp::ContinueResponse response; unsigned length;
if (!op_->ContinueOperation(&response)) return false; int32_t type;
String::Visit(response.string_, String* string = op_->ContinueOperation(&type, &length);
response.offset_, *this, *op_, response.type_, response.length_); if (string == NULL) return false;
ASSERT(!string->IsConsString());
ASSERT(string->length() != 0);
ConsStringNullOp null_op;
String::Visit(string, 0, *this, null_op, type, length);
ASSERT(buffer8_ != end_);
return true; return true;
} }
...@@ -5138,7 +5144,7 @@ bool StringHasher::has_trivial_hash() { ...@@ -5138,7 +5144,7 @@ bool StringHasher::has_trivial_hash() {
} }
uint32_t StringHasher::AddCharacterCore(uint32_t running_hash, uint32_t c) { uint32_t StringHasher::AddCharacterCore(uint32_t running_hash, uint16_t c) {
running_hash += c; running_hash += c;
running_hash += (running_hash << 10); running_hash += (running_hash << 10);
running_hash ^= (running_hash >> 6); running_hash ^= (running_hash >> 6);
...@@ -5157,66 +5163,62 @@ uint32_t StringHasher::GetHashCore(uint32_t running_hash) { ...@@ -5157,66 +5163,62 @@ uint32_t StringHasher::GetHashCore(uint32_t running_hash) {
} }
void StringHasher::AddCharacter(uint32_t c) { void StringHasher::AddCharacter(uint16_t c) {
if (c > unibrow::Utf16::kMaxNonSurrogateCharCode) {
AddSurrogatePair(c); // Not inlined.
return;
}
// Use the Jenkins one-at-a-time hash function to update the hash // Use the Jenkins one-at-a-time hash function to update the hash
// for the given character. // for the given character.
raw_running_hash_ = AddCharacterCore(raw_running_hash_, c); raw_running_hash_ = AddCharacterCore(raw_running_hash_, c);
// Incremental array index computation.
if (is_array_index_) {
if (c < '0' || c > '9') {
is_array_index_ = false;
} else {
int d = c - '0';
if (is_first_char_) {
is_first_char_ = false;
if (c == '0' && length_ > 1) {
is_array_index_ = false;
return;
}
}
if (array_index_ > 429496729U - ((d + 2) >> 3)) {
is_array_index_ = false;
} else {
array_index_ = array_index_ * 10 + d;
}
}
}
} }
void StringHasher::AddCharacterNoIndex(uint32_t c) { bool StringHasher::UpdateIndex(uint16_t c) {
ASSERT(!is_array_index()); ASSERT(is_array_index_);
if (c > unibrow::Utf16::kMaxNonSurrogateCharCode) { if (c < '0' || c > '9') {
AddSurrogatePairNoIndex(c); // Not inlined. is_array_index_ = false;
return; return false;
} }
raw_running_hash_ = AddCharacterCore(raw_running_hash_, c); int d = c - '0';
if (is_first_char_) {
is_first_char_ = false;
if (c == '0' && length_ > 1) {
is_array_index_ = false;
return false;
}
}
if (array_index_ > 429496729U - ((d + 2) >> 3)) {
is_array_index_ = false;
return false;
}
array_index_ = array_index_ * 10 + d;
return true;
} }
uint32_t StringHasher::GetHash() { template<typename Char>
// Get the calculated raw hash value and do some more bit ops to distribute inline void StringHasher::AddCharacters(const Char* chars, int length) {
// the hash further. Ensure that we never return zero as the hash value. ASSERT(sizeof(Char) == 1 || sizeof(Char) == 2);
return GetHashCore(raw_running_hash_); int i = 0;
if (is_array_index_) {
for (; i < length; i++) {
AddCharacter(chars[i]);
if (!UpdateIndex(chars[i])) {
i++;
break;
}
}
}
for (; i < length; i++) {
ASSERT(!is_array_index_);
AddCharacter(chars[i]);
}
} }
template <typename schar> template <typename schar>
uint32_t HashSequentialString(const schar* chars, int length, uint32_t seed) { uint32_t StringHasher::HashSequentialString(const schar* chars,
int length,
uint32_t seed) {
StringHasher hasher(length, seed); StringHasher hasher(length, seed);
if (!hasher.has_trivial_hash()) { if (!hasher.has_trivial_hash()) hasher.AddCharacters(chars, length);
int i;
for (i = 0; hasher.is_array_index() && (i < length); i++) {
hasher.AddCharacter(chars[i]);
}
for (; i < length; i++) {
hasher.AddCharacterNoIndex(chars[i]);
}
}
return hasher.GetHashField(); return hasher.GetHashField();
} }
......
This diff is collapsed.
...@@ -3043,7 +3043,7 @@ class SymbolTable: public HashTable<SymbolTableShape, HashTableKey*> { ...@@ -3043,7 +3043,7 @@ class SymbolTable: public HashTable<SymbolTableShape, HashTableKey*> {
// true if it is found, assigning the symbol to the given output // true if it is found, assigning the symbol to the given output
// parameter. // parameter.
bool LookupSymbolIfExists(String* str, String** symbol); bool LookupSymbolIfExists(String* str, String** symbol);
bool LookupTwoCharsSymbolIfExists(uint32_t c1, uint32_t c2, String** symbol); bool LookupTwoCharsSymbolIfExists(uint16_t c1, uint16_t c2, String** symbol);
// Casting. // Casting.
static inline SymbolTable* cast(Object* obj); static inline SymbolTable* cast(Object* obj);
...@@ -6929,30 +6929,14 @@ class StringHasher { ...@@ -6929,30 +6929,14 @@ class StringHasher {
public: public:
explicit inline StringHasher(int length, uint32_t seed); explicit inline StringHasher(int length, uint32_t seed);
// Returns true if the hash of this string can be computed without template <typename schar>
// looking at the contents. static inline uint32_t HashSequentialString(const schar* chars,
inline bool has_trivial_hash(); int length,
uint32_t seed);
// Add a character to the hash and update the array index calculation.
inline void AddCharacter(uint32_t c);
// Adds a character to the hash but does not update the array index static uint32_t ComputeHashField(unibrow::CharacterStream* buffer,
// calculation. This can only be called when it has been verified int length,
// that the input is not an array index. uint32_t seed);
inline void AddCharacterNoIndex(uint32_t c);
// Add a character above 0xffff as a surrogate pair. These can get into
// the hasher through the routines that take a UTF-8 string and make a symbol.
void AddSurrogatePair(uc32 c);
void AddSurrogatePairNoIndex(uc32 c);
// Returns the value to store in the hash field of a string with
// the given length and contents.
uint32_t GetHashField();
// Returns true if the characters seen so far make up a legal array
// index.
bool is_array_index() { return is_array_index_; }
// Calculated hash value for a string consisting of 1 to // Calculated hash value for a string consisting of 1 to
// String::kMaxArrayIndexSize digits with no leading zeros (except "0"). // String::kMaxArrayIndexSize digits with no leading zeros (except "0").
...@@ -6964,51 +6948,36 @@ class StringHasher { ...@@ -6964,51 +6948,36 @@ class StringHasher {
// use 27 instead. // use 27 instead.
static const int kZeroHash = 27; static const int kZeroHash = 27;
private:
uint32_t array_index() {
ASSERT(is_array_index());
return array_index_;
}
inline uint32_t GetHash();
// Reusable parts of the hashing algorithm. // Reusable parts of the hashing algorithm.
INLINE(static uint32_t AddCharacterCore(uint32_t running_hash, uint32_t c)); INLINE(static uint32_t AddCharacterCore(uint32_t running_hash, uint16_t c));
INLINE(static uint32_t GetHashCore(uint32_t running_hash)); INLINE(static uint32_t GetHashCore(uint32_t running_hash));
int length_; protected:
uint32_t raw_running_hash_; // Returns the value to store in the hash field of a string with
uint32_t array_index_; // the given length and contents.
bool is_array_index_; uint32_t GetHashField();
bool is_first_char_; // Returns true if the hash of this string can be computed without
friend class TwoCharHashTableKey; // looking at the contents.
inline bool has_trivial_hash();
template <bool seq_ascii> friend class JsonParser; // Adds a block of characters to the hash.
}; template<typename Char>
inline void AddCharacters(const Char* chars, int len);
class IncrementalAsciiStringHasher {
public:
explicit inline IncrementalAsciiStringHasher(uint32_t seed, char first_char);
inline void AddCharacter(uc32 c);
inline uint32_t GetHash();
private: private:
// Add a character to the hash.
inline void AddCharacter(uint16_t c);
// Update index. Returns true if string is still an index.
inline bool UpdateIndex(uint16_t c);
int length_; int length_;
uint32_t raw_running_hash_; uint32_t raw_running_hash_;
uint32_t array_index_; uint32_t array_index_;
bool is_array_index_; bool is_array_index_;
char first_char_; bool is_first_char_;
DISALLOW_COPY_AND_ASSIGN(StringHasher);
}; };
// Calculates string hash.
template <typename schar>
inline uint32_t HashSequentialString(const schar* chars,
int length,
uint32_t seed);
// The characteristics of a string are stored in its map. Retrieving these // The characteristics of a string are stored in its map. Retrieving these
// few bits of information is moderately expensive, involving two memory // few bits of information is moderately expensive, involving two memory
// loads where the second is dependent on the first. To improve efficiency // loads where the second is dependent on the first. To improve efficiency
...@@ -7227,10 +7196,6 @@ class String: public HeapObject { ...@@ -7227,10 +7196,6 @@ class String: public HeapObject {
// Returns a hash value used for the property table // Returns a hash value used for the property table
inline uint32_t Hash(); inline uint32_t Hash();
static uint32_t ComputeHashField(unibrow::CharacterStream* buffer,
int length,
uint32_t seed);
static bool ComputeArrayIndex(unibrow::CharacterStream* buffer, static bool ComputeArrayIndex(unibrow::CharacterStream* buffer,
uint32_t* index, uint32_t* index,
int length); int length);
...@@ -7870,22 +7835,30 @@ class StringInputBuffer: public unibrow::InputBuffer<String, String*, 1024> { ...@@ -7870,22 +7835,30 @@ class StringInputBuffer: public unibrow::InputBuffer<String, String*, 1024> {
}; };
// A ConsStringOp that returns null.
// Useful when the operation to apply on a ConsString
// requires an expensive data structure.
class ConsStringNullOp {
public:
inline ConsStringNullOp() {}
static inline String* Operate(String*, unsigned*, int32_t*, unsigned*);
private:
DISALLOW_COPY_AND_ASSIGN(ConsStringNullOp);
};
// This maintains an off-stack representation of the stack frames required // This maintains an off-stack representation of the stack frames required
// to traverse a ConsString, allowing an entirely iterative and restartable // to traverse a ConsString, allowing an entirely iterative and restartable
// traversal of the entire string // traversal of the entire string
// Note: this class is not GC-safe. // Note: this class is not GC-safe.
class ConsStringIteratorOp { class ConsStringIteratorOp {
public: public:
struct ContinueResponse {
String* string_;
unsigned offset_;
unsigned length_;
int32_t type_;
};
inline ConsStringIteratorOp() {} inline ConsStringIteratorOp() {}
String* Operate(ConsString* cons_string, unsigned* offset_out, String* Operate(String* string,
int32_t* type_out, unsigned* length_out); unsigned* offset_out,
inline bool ContinueOperation(ContinueResponse* response); int32_t* type_out,
unsigned* length_out);
inline String* ContinueOperation(int32_t* type_out, unsigned* length_out);
inline void Reset(); inline void Reset();
inline bool HasMore(); inline bool HasMore();
...@@ -7902,6 +7875,9 @@ class ConsStringIteratorOp { ...@@ -7902,6 +7875,9 @@ class ConsStringIteratorOp {
inline void AdjustMaximumDepth(); inline void AdjustMaximumDepth();
inline void Pop(); inline void Pop();
String* NextLeaf(bool* blew_stack, int32_t* type_out, unsigned* length_out); String* NextLeaf(bool* blew_stack, int32_t* type_out, unsigned* length_out);
String* Search(unsigned* offset_out,
int32_t* type_out,
unsigned* length_out);
unsigned depth_; unsigned depth_;
unsigned maximum_depth_; unsigned maximum_depth_;
...@@ -7910,8 +7886,6 @@ class ConsStringIteratorOp { ...@@ -7910,8 +7886,6 @@ class ConsStringIteratorOp {
ConsString* frames_[kStackSize]; ConsString* frames_[kStackSize];
unsigned consumed_; unsigned consumed_;
ConsString* root_; ConsString* root_;
int32_t root_type_;
unsigned root_length_;
DISALLOW_COPY_AND_ASSIGN(ConsStringIteratorOp); DISALLOW_COPY_AND_ASSIGN(ConsStringIteratorOp);
}; };
...@@ -7919,8 +7893,9 @@ class ConsStringIteratorOp { ...@@ -7919,8 +7893,9 @@ class ConsStringIteratorOp {
// Note: this class is not GC-safe. // Note: this class is not GC-safe.
class StringCharacterStream { class StringCharacterStream {
public: public:
inline StringCharacterStream( inline StringCharacterStream(String* string,
String* string, unsigned offset, ConsStringIteratorOp* op); unsigned offset,
ConsStringIteratorOp* op);
inline uint16_t GetNext(); inline uint16_t GetNext();
inline bool HasMore(); inline bool HasMore();
inline void Reset(String* string, unsigned offset, ConsStringIteratorOp* op); inline void Reset(String* string, unsigned offset, ConsStringIteratorOp* op);
......
...@@ -112,7 +112,7 @@ const char* StringsStorage::GetCopy(const char* src) { ...@@ -112,7 +112,7 @@ const char* StringsStorage::GetCopy(const char* src) {
OS::StrNCpy(dst, src, len); OS::StrNCpy(dst, src, len);
dst[len] = '\0'; dst[len] = '\0';
uint32_t hash = uint32_t hash =
HashSequentialString(dst.start(), len, HEAP->HashSeed()); StringHasher::HashSequentialString(dst.start(), len, HEAP->HashSeed());
return AddOrDisposeString(dst.start(), hash); return AddOrDisposeString(dst.start(), hash);
} }
...@@ -145,7 +145,7 @@ const char* StringsStorage::GetVFormatted(const char* format, va_list args) { ...@@ -145,7 +145,7 @@ const char* StringsStorage::GetVFormatted(const char* format, va_list args) {
DeleteArray(str.start()); DeleteArray(str.start());
return format; return format;
} }
uint32_t hash = HashSequentialString( uint32_t hash = StringHasher::HashSequentialString(
str.start(), len, HEAP->HashSeed()); str.start(), len, HEAP->HashSeed());
return AddOrDisposeString(str.start(), hash); return AddOrDisposeString(str.start(), hash);
} }
...@@ -156,8 +156,8 @@ const char* StringsStorage::GetName(String* name) { ...@@ -156,8 +156,8 @@ const char* StringsStorage::GetName(String* name) {
int length = Min(kMaxNameSize, name->length()); int length = Min(kMaxNameSize, name->length());
SmartArrayPointer<char> data = SmartArrayPointer<char> data =
name->ToCString(DISALLOW_NULLS, ROBUST_STRING_TRAVERSAL, 0, length); name->ToCString(DISALLOW_NULLS, ROBUST_STRING_TRAVERSAL, 0, length);
uint32_t hash = uint32_t hash = StringHasher::HashSequentialString(
HashSequentialString(*data, length, name->GetHeap()->HashSeed()); *data, length, name->GetHeap()->HashSeed());
return AddOrDisposeString(data.Detach(), hash); return AddOrDisposeString(data.Detach(), hash);
} }
return ""; return "";
...@@ -1451,9 +1451,9 @@ void HeapObjectsMap::RemoveDeadEntries() { ...@@ -1451,9 +1451,9 @@ void HeapObjectsMap::RemoveDeadEntries() {
SnapshotObjectId HeapObjectsMap::GenerateId(v8::RetainedObjectInfo* info) { SnapshotObjectId HeapObjectsMap::GenerateId(v8::RetainedObjectInfo* info) {
SnapshotObjectId id = static_cast<SnapshotObjectId>(info->GetHash()); SnapshotObjectId id = static_cast<SnapshotObjectId>(info->GetHash());
const char* label = info->GetLabel(); const char* label = info->GetLabel();
id ^= HashSequentialString(label, id ^= StringHasher::HashSequentialString(label,
static_cast<int>(strlen(label)), static_cast<int>(strlen(label)),
HEAP->HashSeed()); HEAP->HashSeed());
intptr_t element_count = info->GetElementCount(); intptr_t element_count = info->GetElementCount();
if (element_count != -1) if (element_count != -1)
id ^= ComputeIntegerHash(static_cast<uint32_t>(element_count), id ^= ComputeIntegerHash(static_cast<uint32_t>(element_count),
...@@ -2940,9 +2940,10 @@ class NativeGroupRetainedObjectInfo : public v8::RetainedObjectInfo { ...@@ -2940,9 +2940,10 @@ class NativeGroupRetainedObjectInfo : public v8::RetainedObjectInfo {
NativeGroupRetainedObjectInfo* NativeObjectsExplorer::FindOrAddGroupInfo( NativeGroupRetainedObjectInfo* NativeObjectsExplorer::FindOrAddGroupInfo(
const char* label) { const char* label) {
const char* label_copy = collection_->names()->GetCopy(label); const char* label_copy = collection_->names()->GetCopy(label);
uint32_t hash = HashSequentialString(label_copy, uint32_t hash = StringHasher::HashSequentialString(
static_cast<int>(strlen(label_copy)), label_copy,
HEAP->HashSeed()); static_cast<int>(strlen(label_copy)),
HEAP->HashSeed());
HashMap::Entry* entry = native_groups_.Lookup(const_cast<char*>(label_copy), HashMap::Entry* entry = native_groups_.Lookup(const_cast<char*>(label_copy),
hash, true); hash, true);
if (entry->value == NULL) { if (entry->value == NULL) {
......
...@@ -170,10 +170,6 @@ class Utf8 { ...@@ -170,10 +170,6 @@ class Utf8 {
// that match are coded as a 4 byte UTF-8 sequence. // that match are coded as a 4 byte UTF-8 sequence.
static const unsigned kBytesSavedByCombiningSurrogates = 2; static const unsigned kBytesSavedByCombiningSurrogates = 2;
static const unsigned kSizeOfUnmatchedSurrogate = 3; static const unsigned kSizeOfUnmatchedSurrogate = 3;
private:
template <unsigned s> friend class Utf8InputBuffer;
friend class Test;
static inline uchar ValueOf(const byte* str, static inline uchar ValueOf(const byte* str,
unsigned length, unsigned length,
unsigned* cursor); unsigned* cursor);
......
...@@ -345,37 +345,24 @@ void AccumulateStats(Handle<String> cons_string, ConsStringStats* stats) { ...@@ -345,37 +345,24 @@ void AccumulateStats(Handle<String> cons_string, ConsStringStats* stats) {
void AccumulateStatsWithOperator( void AccumulateStatsWithOperator(
ConsString* cons_string, ConsStringStats* stats) { ConsString* cons_string, ConsStringStats* stats) {
// Init op. unsigned offset = 0;
int32_t type = cons_string->map()->instance_type();
unsigned length = static_cast<unsigned>(cons_string->length());
ConsStringIteratorOp op; ConsStringIteratorOp op;
op.Reset(); String* string = op.Operate(cons_string, &offset, &type, &length);
// Use response for initial search and on blown stack. CHECK(string != NULL);
ConsStringIteratorOp::ContinueResponse response;
response.string_ = cons_string;
response.offset_ = 0;
response.type_ = cons_string->map()->instance_type();
response.length_ = (uint32_t) cons_string->length();
while (true) { while (true) {
String* string = op.Operate(ConsString::cast(response.string_), ASSERT(!string->IsConsString());
&response.offset_, // Accumulate stats.
&response.type_, stats->leaves_++;
&response.length_); stats->chars_ += string->length();
CHECK(string != NULL); // Check for completion.
while (true) { bool keep_going_fast_check = op.HasMore();
// Accumulate stats. string = op.ContinueOperation(&type, &length);
stats->leaves_++; if (string == NULL) return;
stats->chars_ += string->length(); // Verify no false positives for fast check.
// Check for completion. CHECK(keep_going_fast_check);
bool keep_going_fast_check = op.HasMore(); }
bool keep_going = op.ContinueOperation(&response);
if (!keep_going) return;
// Verify no false positives for fast check.
CHECK(keep_going_fast_check);
CHECK(response.string_ != NULL);
// Blew stack. Restart outer loop.
if (response.string_->IsConsString()) break;
string = response.string_;
}
};
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment