Calculate string hash during flattening and convert flat strings to

symbols.


git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@456 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
parent 9dadae1b
......@@ -2251,6 +2251,16 @@ Object* Heap::LookupSymbol(String* string) {
}
bool Heap::LookupSymbolIfExists(String* string, String** symbol) {
if (string->IsSymbol()) {
*symbol = string;
return true;
}
SymbolTable* table = SymbolTable::cast(symbol_table_);
return table->LookupSymbolIfExists(string, symbol);
}
#ifdef DEBUG
void Heap::ZapFromSpace() {
ASSERT(HAS_HEAP_OBJECT_TAG(kFromSpaceZapValue));
......
......@@ -528,6 +528,7 @@ class Heap : public AllStatic {
return LookupSymbol(CStrVector(str));
}
static Object* LookupSymbol(String* str);
static bool LookupSymbolIfExists(String* str, String** symbol);
// Compute the matching symbol map for a string if possible.
// NULL is returned if string is in new space or not flattened.
......
......@@ -2088,6 +2088,65 @@ uint32_t String::Hash() {
}
StringHasher::StringHasher(int length)
: length_(length),
raw_running_hash_(0),
array_index_(0),
is_array_index_(0 < length_ && length_ <= String::kMaxArrayIndexSize),
is_first_char_(true),
is_valid_(true) { }
bool StringHasher::has_trivial_hash() {
return length_ > String::kMaxMediumStringSize;
}
void StringHasher::AddCharacter(uc32 c) {
// Note: the Jenkins one-at-a-time hash function
raw_running_hash_ += c;
raw_running_hash_ += (raw_running_hash_ << 10);
raw_running_hash_ ^= (raw_running_hash_ >> 6);
// Incremental array index computation
if (is_array_index_) {
if (c < '0' || c > '9') {
is_array_index_ = false;
} else {
int d = c - '0';
if (is_first_char_) {
is_first_char_ = false;
if (c == '0' && length_ > 1) {
is_array_index_ = false;
return;
}
}
if (array_index_ > 429496729U - ((d + 2) >> 3)) {
is_array_index_ = false;
} else {
array_index_ = array_index_ * 10 + d;
}
}
}
}
void StringHasher::AddCharacterNoIndex(uc32 c) {
ASSERT(!is_array_index());
raw_running_hash_ += c;
raw_running_hash_ += (raw_running_hash_ << 10);
raw_running_hash_ ^= (raw_running_hash_ >> 6);
}
uint32_t StringHasher::GetHash() {
uint32_t result = raw_running_hash_;
result += (result << 3);
result ^= (result >> 11);
result += (result << 15);
return result;
}
bool String::AsArrayIndex(uint32_t* index) {
uint32_t field = length_field();
if ((field & kHashComputedMask) && !(field & kIsArrayIndexMask)) return false;
......
......@@ -521,12 +521,23 @@ Object* String::Flatten() {
// cons string is in old space. It can never get GCed until there is
// an old space GC.
PretenureFlag tenure = Heap::InNewSpace(this) ? NOT_TENURED : TENURED;
int len = length();
Object* object = IsAscii() ?
Heap::AllocateRawAsciiString(length(), tenure) :
Heap::AllocateRawTwoByteString(length(), tenure);
Heap::AllocateRawAsciiString(len, tenure) :
Heap::AllocateRawTwoByteString(len, tenure);
if (object->IsFailure()) return object;
String* result = String::cast(object);
Flatten(this, result, 0, length(), 0);
StringHasher hasher(len);
Flatten(this, result, 0, len, 0, &hasher);
if (hasher.is_valid()) {
#ifdef DEBUG
result->ComputeAndSetHash();
ASSERT(result->length_field() == hasher.GetHashField());
#else
result->set_length_field(hasher.GetHashField());
#endif
Heap::LookupSymbolIfExists(result, &result);
}
cs->set_first(result);
cs->set_second(Heap::empty_string());
return this;
......@@ -3606,7 +3617,12 @@ Object* SlicedString::SlicedStringFlatten() {
}
void String::Flatten(String* src, String* sink, int f, int t, int so) {
void String::Flatten(String* src,
String* sink,
int f,
int t,
int so,
StringHasher* hasher) {
String* source = src;
int from = f;
int to = t;
......@@ -3621,7 +3637,10 @@ void String::Flatten(String* src, String* sink, int f, int t, int so) {
buffer->Reset(from, source);
int j = sink_offset;
for (int i = from; i < to; i++) {
sink->Set(j++, buffer->GetNext());
uc32 c = buffer->GetNext();
if (hasher->is_valid())
hasher->AddCharacter(c);
sink->Set(j++, c);
}
return;
}
......@@ -3637,10 +3656,10 @@ void String::Flatten(String* src, String* sink, int f, int t, int so) {
ConsString* cons_string = ConsString::cast(source);
String* first = String::cast(cons_string->first());
int boundary = first->length();
if (to - boundary > boundary - from) {
if (to - boundary >= boundary - from) {
// Right hand side is longer. Recurse over left.
if (from < boundary) {
Flatten(first, sink, from, boundary, sink_offset);
Flatten(first, sink, from, boundary, sink_offset, hasher);
sink_offset += boundary - from;
from = 0;
} else {
......@@ -3649,14 +3668,18 @@ void String::Flatten(String* src, String* sink, int f, int t, int so) {
to -= boundary;
source = String::cast(cons_string->second());
} else {
// Left hand side is longer. Recurse over right.
// Left hand side is longer. Recurse over right. The hasher
// needs us to visit the string from left to right so doing
// this invalidates that hash.
hasher->invalidate();
if (to > boundary) {
String* second = String::cast(cons_string->second());
Flatten(second,
sink,
0,
to - boundary,
sink_offset + boundary - from);
sink_offset + boundary - from,
hasher);
to = boundary;
}
source = first;
......@@ -3812,39 +3835,45 @@ static inline uint32_t HashField(uint32_t hash, bool is_array_index) {
}
uint32_t StringHasher::GetHashField() {
ASSERT(is_valid());
if (length_ <= String::kMaxShortStringSize) {
uint32_t payload;
if (is_array_index()) {
payload = v8::internal::HashField(array_index(), true);
} else {
payload = v8::internal::HashField(GetHash(), false);
}
return (payload & 0x00FFFFFF) | (length_ << String::kShortLengthShift);
} else if (length_ <= String::kMaxMediumStringSize) {
uint32_t payload = v8::internal::HashField(GetHash(), false);
return (payload & 0x0000FFFF) | (length_ << String::kMediumLengthShift);
} else {
return v8::internal::HashField(length_, false);
}
}
uint32_t String::ComputeLengthAndHashField(unibrow::CharacterStream* buffer,
int length) {
// Large string (please note large strings cannot be an array index).
if (length > kMaxMediumStringSize) return HashField(length, false);
StringHasher hasher(length);
// Note: the Jenkins one-at-a-time hash function
uint32_t hash = 0;
while (buffer->has_more()) {
uc32 r = buffer->GetNext();
hash += r;
hash += (hash << 10);
hash ^= (hash >> 6);
}
hash += (hash << 3);
hash ^= (hash >> 11);
hash += (hash << 15);
// Short string.
if (length <= kMaxShortStringSize) {
// Make hash value consistent with value returned from String::Hash.
buffer->Rewind();
uint32_t index;
hash = HashField(hash, ComputeArrayIndex(buffer, &index, length));
hash = (hash & 0x00FFFFFF) | (length << kShortLengthShift);
return hash;
}
// Very long strings have a trivial hash that doesn't inspect the
// string contents.
if (hasher.has_trivial_hash())
return hasher.GetHashField();
// Medium string (please note medium strings cannot be an array index).
ASSERT(length <= kMaxMediumStringSize);
// Make hash value consistent with value returned from String::Hash.
hash = HashField(hash, false);
hash = (hash & 0x0000FFFF) | (length << kMediumLengthShift);
return hash;
// Do the iterative array index computation as long as there is a
// chance this is an array index.
while (buffer->has_more() && hasher.is_array_index())
hasher.AddCharacter(buffer->GetNext());
// Process the remaining characters without updating the array
// index.
while (buffer->has_more())
hasher.AddCharacterNoIndex(buffer->GetNext());
return hasher.GetHashField();
}
......@@ -5630,6 +5659,20 @@ Object* SymbolTable::LookupString(String* string, Object** s) {
}
bool SymbolTable::LookupSymbolIfExists(String* string, String** symbol) {
SymbolKey key(string);
int entry = FindEntry(&key);
if (entry == -1) {
return false;
} else {
String* result = String::cast(KeyAt(entry));
ASSERT(result->is_symbol());
*symbol = result;
return true;
}
}
Object* SymbolTable::LookupSymbol(Vector<const char> str, Object** s) {
Utf8SymbolKey key(str);
return LookupKey(&key, s);
......
......@@ -1815,6 +1815,11 @@ class SymbolTable: public HashTable<0, 1> {
Object* LookupSymbol(Vector<const char> str, Object** s);
Object* LookupString(String* key, Object** s);
// Looks up a symbol that is equal to the given string and returns
// true if it is found, assigning the symbol to the given output
// parameter.
bool LookupSymbolIfExists(String* str, String** symbol);
// Casting.
static inline SymbolTable* cast(Object* obj);
......@@ -2839,6 +2844,52 @@ enum AllowNullsFlag {ALLOW_NULLS, DISALLOW_NULLS};
enum RobustnessFlag {ROBUST_STRING_TRAVERSAL, FAST_STRING_TRAVERSAL};
class StringHasher {
public:
inline StringHasher(int length);
// Returns true if the hash of this string can be computed without
// looking at the contents.
inline bool has_trivial_hash();
// Add a character to the hash and update the array index calculation.
inline void AddCharacter(uc32 c);
// Adds a character to the hash but does not update the array index
// calculation. This can only be called when it has been verified
// that the input is not an array index.
inline void AddCharacterNoIndex(uc32 c);
// Returns the value to store in the hash field of a string with
// the given length and contents.
uint32_t GetHashField();
// Returns true if the characters seen so far make up a legal array
// index.
bool is_array_index() { return is_array_index_; }
bool is_valid() { return is_valid_; }
void invalidate() { is_valid_ = false; }
private:
uint32_t array_index() {
ASSERT(is_array_index());
return array_index_;
}
inline uint32_t GetHash();
int length_;
uint32_t raw_running_hash_;
uint32_t array_index_;
bool is_array_index_;
bool is_first_char_;
bool is_valid_;
};
// The String abstract class captures JavaScript string values:
//
// Ecma-262:
......@@ -2980,6 +3031,8 @@ class String: public HeapObject {
static const int kMaxShortStringSize = 255;
static const int kMaxMediumStringSize = 65535;
static const int kMaxArrayIndexSize = 10;
// Max ascii char code.
static const int kMaxAsciiCharCode = 127;
......@@ -3024,7 +3077,8 @@ class String: public HeapObject {
String* sink,
int from,
int to,
int sink_offset);
int sink_offset,
StringHasher* hasher);
protected:
class ReadBlockBuffer {
......
......@@ -2380,21 +2380,24 @@ static Object* Runtime_StringBuilderConcat(Arguments args) {
if (object->IsFailure()) return object;
String* answer = String::cast(object);
StringHasher hasher(length);
for (int i = 0; i < array_length; i++) {
Object* element = fixed_array->get(i);
if (element->IsSmi()) {
int len = Smi::cast(element)->value();
int pos = len >> 11;
len &= 0x7ff;
String::Flatten(special, answer, pos, pos + len, position);
String::Flatten(special, answer, pos, pos + len, position, &hasher);
position += len;
} else {
String* string = String::cast(element);
int element_length = string->length();
String::Flatten(string, answer, 0, element_length, position);
String::Flatten(string, answer, 0, element_length, position, &hasher);
position += element_length;
}
}
if (hasher.is_valid())
answer->set_length_field(hasher.GetHashField());
return answer;
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment