Commit 1aae797d authored by erik.corry@gmail.com's avatar erik.corry@gmail.com

Use direct copy and templates to speed up flattening of strings.

Review URL: http://codereview.chromium.org/8011

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@549 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
parent bf948c83
...@@ -79,6 +79,8 @@ Handle<String> Factory::NewRawTwoByteString(int length, ...@@ -79,6 +79,8 @@ Handle<String> Factory::NewRawTwoByteString(int length,
Handle<String> Factory::NewConsString(Handle<String> first, Handle<String> Factory::NewConsString(Handle<String> first,
Handle<String> second) { Handle<String> second) {
if (first->length() == 0) return second;
if (second->length() == 0) return first;
CALL_HEAP_FUNCTION(Heap::AllocateConsString(*first, *second), String); CALL_HEAP_FUNCTION(Heap::AllocateConsString(*first, *second), String);
} }
......
...@@ -1331,29 +1331,33 @@ Object* Heap::AllocateSharedFunctionInfo(Object* name) { ...@@ -1331,29 +1331,33 @@ Object* Heap::AllocateSharedFunctionInfo(Object* name) {
Object* Heap::AllocateConsString(String* first, String* second) { Object* Heap::AllocateConsString(String* first, String* second) {
int length = first->length() + second->length(); int first_length = first->length();
int second_length = second->length();
int length = first_length + second_length;
bool is_ascii = first->is_ascii_representation() bool is_ascii = first->is_ascii_representation()
&& second->is_ascii_representation(); && second->is_ascii_representation();
// If the resulting string is small make a flat string. // If the resulting string is small make a flat string.
if (length < ConsString::kMinLength) { if (length < String::kMinNonFlatLength) {
Object* result = is_ascii ASSERT(first->IsFlat());
? AllocateRawAsciiString(length) ASSERT(second->IsFlat());
: AllocateRawTwoByteString(length); if (is_ascii) {
if (result->IsFailure()) return result; Object* result = AllocateRawAsciiString(length);
// Copy the characters into the new object. if (result->IsFailure()) return result;
String* string_result = String::cast(result); // Copy the characters into the new object.
int first_length = first->length(); char* dest = SeqAsciiString::cast(result)->GetChars();
// Copy the content of the first string. String::WriteToFlat(first, dest, 0, first_length);
for (int i = 0; i < first_length; i++) { String::WriteToFlat(second, dest + first_length, 0, second_length);
string_result->Set(i, first->Get(i)); return result;
} } else {
int second_length = second->length(); Object* result = AllocateRawTwoByteString(length);
// Copy the content of the first string. if (result->IsFailure()) return result;
for (int i = 0; i < second_length; i++) { // Copy the characters into the new object.
string_result->Set(first_length + i, second->Get(i)); uc16* dest = SeqTwoByteString::cast(result)->GetChars();
String::WriteToFlat(first, dest, 0, first_length);
String::WriteToFlat(second, dest + first_length, 0, second_length);
return result;
} }
return result;
} }
Map* map; Map* map;
...@@ -1384,7 +1388,7 @@ Object* Heap::AllocateSlicedString(String* buffer, int start, int end) { ...@@ -1384,7 +1388,7 @@ Object* Heap::AllocateSlicedString(String* buffer, int start, int end) {
int length = end - start; int length = end - start;
// If the resulting string is small make a sub string. // If the resulting string is small make a sub string.
if (end - start <= SlicedString::kMinLength) { if (end - start <= String::kMinNonFlatLength) {
return Heap::AllocateSubString(buffer, start, end); return Heap::AllocateSubString(buffer, start, end);
} }
......
...@@ -1372,6 +1372,12 @@ bool String::is_ascii_representation_map(Map* map) { ...@@ -1372,6 +1372,12 @@ bool String::is_ascii_representation_map(Map* map) {
} }
int String::full_representation_tag() {
return map()->instance_type() &
(kStringRepresentationMask | kStringEncodingMask);
}
StringRepresentationTag String::representation_tag() { StringRepresentationTag String::representation_tag() {
return map_representation_tag(map()); return map_representation_tag(map());
} }
...@@ -1417,8 +1423,8 @@ Address SeqAsciiString::GetCharsAddress() { ...@@ -1417,8 +1423,8 @@ Address SeqAsciiString::GetCharsAddress() {
} }
const char* SeqAsciiString::GetChars() { char* SeqAsciiString::GetChars() {
return reinterpret_cast<const char*>(GetCharsAddress()); return reinterpret_cast<char*>(GetCharsAddress());
} }
...@@ -1427,6 +1433,11 @@ Address SeqTwoByteString::GetCharsAddress() { ...@@ -1427,6 +1433,11 @@ Address SeqTwoByteString::GetCharsAddress() {
} }
uc16* SeqTwoByteString::GetChars() {
return reinterpret_cast<uc16*>(FIELD_ADDR(this, kHeaderSize));
}
uint16_t SeqTwoByteString::SeqTwoByteStringGet(int index) { uint16_t SeqTwoByteString::SeqTwoByteStringGet(int index) {
ASSERT(index >= 0 && index < length()); ASSERT(index >= 0 && index < length());
return READ_SHORT_FIELD(this, kHeaderSize + index * kShortSize); return READ_SHORT_FIELD(this, kHeaderSize + index * kShortSize);
......
...@@ -529,12 +529,33 @@ Object* String::Flatten() { ...@@ -529,12 +529,33 @@ Object* String::Flatten() {
// an old space GC. // an old space GC.
PretenureFlag tenure = Heap::InNewSpace(this) ? NOT_TENURED : TENURED; PretenureFlag tenure = Heap::InNewSpace(this) ? NOT_TENURED : TENURED;
int len = length(); int len = length();
Object* object = IsAsciiRepresentation() ? Object* object;
Heap::AllocateRawAsciiString(len, tenure) : String* result;
Heap::AllocateRawTwoByteString(len, tenure); if (IsAsciiRepresentation()) {
if (object->IsFailure()) return object; object = Heap::AllocateRawAsciiString(len, tenure);
String* result = String::cast(object); if (object->IsFailure()) return object;
Flatten(this, result, 0, len, 0); result = String::cast(object);
String* first = String::cast(cs->first());
int first_length = first->length();
char* dest = SeqAsciiString::cast(result)->GetChars();
WriteToFlat(first, dest, 0, first_length);
WriteToFlat(String::cast(cs->second()),
dest + first_length,
0,
len - first_length);
} else {
object = Heap::AllocateRawTwoByteString(len, tenure);
if (object->IsFailure()) return object;
result = String::cast(object);
uc16* dest = SeqTwoByteString::cast(result)->GetChars();
String* first = String::cast(cs->first());
int first_length = first->length();
WriteToFlat(first, dest, 0, first_length);
WriteToFlat(String::cast(cs->second()),
dest + first_length,
0,
len - first_length);
}
cs->set_first(result); cs->set_first(result);
cs->set_second(Heap::empty_string()); cs->set_second(Heap::empty_string());
return this; return this;
...@@ -2922,7 +2943,7 @@ Vector<const char> String::ToAsciiVector() { ...@@ -2922,7 +2943,7 @@ Vector<const char> String::ToAsciiVector() {
} }
if (string_tag == kSeqStringTag) { if (string_tag == kSeqStringTag) {
SeqAsciiString* seq = SeqAsciiString::cast(string); SeqAsciiString* seq = SeqAsciiString::cast(string);
char* start = reinterpret_cast<char*>(seq->GetCharsAddress()); char* start = seq->GetChars();
return Vector<const char>(start + offset, length); return Vector<const char>(start + offset, length);
} }
ASSERT(string_tag == kExternalStringTag); ASSERT(string_tag == kExternalStringTag);
...@@ -2953,8 +2974,7 @@ Vector<const uc16> String::ToUC16Vector() { ...@@ -2953,8 +2974,7 @@ Vector<const uc16> String::ToUC16Vector() {
} }
if (string_tag == kSeqStringTag) { if (string_tag == kSeqStringTag) {
SeqTwoByteString* seq = SeqTwoByteString::cast(string); SeqTwoByteString* seq = SeqTwoByteString::cast(string);
uc16* start = reinterpret_cast<uc16*>(seq->GetCharsAddress()); return Vector<const uc16>(seq->GetChars() + offset, length);
return Vector<const uc16>(start + offset, length);
} }
ASSERT(string_tag == kExternalStringTag); ASSERT(string_tag == kExternalStringTag);
ExternalTwoByteString* ext = ExternalTwoByteString::cast(string); ExternalTwoByteString* ext = ExternalTwoByteString::cast(string);
...@@ -3122,7 +3142,6 @@ const unibrow::byte* SeqAsciiString::SeqAsciiStringReadBlock( ...@@ -3122,7 +3142,6 @@ const unibrow::byte* SeqAsciiString::SeqAsciiStringReadBlock(
unsigned* remaining, unsigned* remaining,
unsigned* offset_ptr, unsigned* offset_ptr,
unsigned max_chars) { unsigned max_chars) {
// Cast const char* to unibrow::byte* (signedness difference).
const unibrow::byte* b = reinterpret_cast<unibrow::byte*>(this) - const unibrow::byte* b = reinterpret_cast<unibrow::byte*>(this) -
kHeapObjectTag + kHeaderSize + *offset_ptr * kCharSize; kHeapObjectTag + kHeaderSize + *offset_ptr * kCharSize;
*remaining = max_chars; *remaining = max_chars;
...@@ -3589,47 +3608,62 @@ Object* SlicedString::SlicedStringFlatten() { ...@@ -3589,47 +3608,62 @@ Object* SlicedString::SlicedStringFlatten() {
} }
void String::Flatten(String* src, template <typename sinkchar>
String* sink, void String::WriteToFlat(String* src,
int f, sinkchar* sink,
int t, int f,
int so) { int t) {
String* source = src; String* source = src;
int from = f; int from = f;
int to = t; int to = t;
int sink_offset = so;
while (true) { while (true) {
ASSERT(0 <= from && from <= to && to <= source->length()); ASSERT(0 <= from && from <= to && to <= source->length());
ASSERT(0 <= sink_offset && sink_offset < sink->length()); switch (source->full_representation_tag()) {
switch (source->representation_tag()) { case kAsciiStringTag | kExternalStringTag: {
case kSeqStringTag: CopyChars(sink,
case kExternalStringTag: { ExternalAsciiString::cast(source)->resource()->data() + from,
Access<StringInputBuffer> buffer(&string_input_buffer); to - from);
buffer->Reset(from, source); return;
int j = sink_offset; }
for (int i = from; i < to; i++) { case kTwoByteStringTag | kExternalStringTag: {
uc32 c = buffer->GetNext(); const uc16* data =
sink->Set(j++, c); ExternalTwoByteString::cast(source)->resource()->data();
} CopyChars(sink,
data + from,
to - from);
return; return;
} }
case kSlicedStringTag: { case kAsciiStringTag | kSeqStringTag: {
CopyChars(sink,
SeqAsciiString::cast(source)->GetChars() + from,
to - from);
return;
}
case kTwoByteStringTag | kSeqStringTag: {
CopyChars(sink,
SeqTwoByteString::cast(source)->GetChars() + from,
to - from);
return;
}
case kAsciiStringTag | kSlicedStringTag:
case kTwoByteStringTag | kSlicedStringTag: {
SlicedString* sliced_string = SlicedString::cast(source); SlicedString* sliced_string = SlicedString::cast(source);
int start = sliced_string->start(); int start = sliced_string->start();
from += start; from += start;
to += start; to += start;
source = String::cast(sliced_string->buffer()); source = String::cast(sliced_string->buffer());
break;
} }
break; case kAsciiStringTag | kConsStringTag:
case kConsStringTag: { case kTwoByteStringTag | kConsStringTag: {
ConsString* cons_string = ConsString::cast(source); ConsString* cons_string = ConsString::cast(source);
String* first = String::cast(cons_string->first()); String* first = String::cast(cons_string->first());
int boundary = first->length(); int boundary = first->length();
if (to - boundary >= boundary - from) { if (to - boundary >= boundary - from) {
// Right hand side is longer. Recurse over left. // Right hand side is longer. Recurse over left.
if (from < boundary) { if (from < boundary) {
Flatten(first, sink, from, boundary, sink_offset); WriteToFlat(first, sink, from, boundary);
sink_offset += boundary - from; sink += boundary - from;
from = 0; from = 0;
} else { } else {
from -= boundary; from -= boundary;
...@@ -3637,22 +3671,19 @@ void String::Flatten(String* src, ...@@ -3637,22 +3671,19 @@ void String::Flatten(String* src,
to -= boundary; to -= boundary;
source = String::cast(cons_string->second()); source = String::cast(cons_string->second());
} else { } else {
// Left hand side is longer. Recurse over right. The hasher // Left hand side is longer. Recurse over right.
// needs us to visit the string from left to right so doing
// this invalidates that hash.
if (to > boundary) { if (to > boundary) {
String* second = String::cast(cons_string->second()); String* second = String::cast(cons_string->second());
Flatten(second, WriteToFlat(second,
sink, sink + boundary - from,
0, 0,
to - boundary, to - boundary);
sink_offset + boundary - from);
to = boundary; to = boundary;
} }
source = first; source = first;
} }
break;
} }
break;
} }
} }
} }
......
...@@ -3071,6 +3071,8 @@ class String: public HeapObject { ...@@ -3071,6 +3071,8 @@ class String: public HeapObject {
// Get the representation tag. // Get the representation tag.
inline StringRepresentationTag representation_tag(); inline StringRepresentationTag representation_tag();
// Get the representation and ASCII tag.
inline int full_representation_tag();
static inline StringRepresentationTag map_representation_tag(Map* map); static inline StringRepresentationTag map_representation_tag(Map* map);
// For use during stack traces. Performs rudimentary sanity check. // For use during stack traces. Performs rudimentary sanity check.
...@@ -3097,6 +3099,9 @@ class String: public HeapObject { ...@@ -3097,6 +3099,9 @@ class String: public HeapObject {
// Max ascii char code. // Max ascii char code.
static const int kMaxAsciiCharCode = unibrow::Utf8::kMaxOneByteChar; static const int kMaxAsciiCharCode = unibrow::Utf8::kMaxOneByteChar;
// Minimum lenth for a cons or sliced string.
static const int kMinNonFlatLength = 13;
// Mask constant for checking if a string has a computed hash code // Mask constant for checking if a string has a computed hash code
// and if it is an array index. The least significant bit indicates // and if it is an array index. The least significant bit indicates
// whether a hash code has been computed. If the hash code has been // whether a hash code has been computed. If the hash code has been
...@@ -3134,11 +3139,11 @@ class String: public HeapObject { ...@@ -3134,11 +3139,11 @@ class String: public HeapObject {
unsigned* offset); unsigned* offset);
// Helper function for flattening strings. // Helper function for flattening strings.
static void Flatten(String* source, template <typename sinkchar>
String* sink, static void WriteToFlat(String* source,
int from, sinkchar* sink,
int to, int from,
int sink_offset); int to);
protected: protected:
class ReadBlockBuffer { class ReadBlockBuffer {
...@@ -3215,7 +3220,7 @@ class SeqAsciiString: public SeqString { ...@@ -3215,7 +3220,7 @@ class SeqAsciiString: public SeqString {
// Get the address of the characters in this string. // Get the address of the characters in this string.
inline Address GetCharsAddress(); inline Address GetCharsAddress();
inline const char* GetChars(); inline char* GetChars();
// Casting // Casting
static inline SeqAsciiString* cast(Object* obj); static inline SeqAsciiString* cast(Object* obj);
...@@ -3257,6 +3262,8 @@ class SeqTwoByteString: public SeqString { ...@@ -3257,6 +3262,8 @@ class SeqTwoByteString: public SeqString {
// Get the address of the characters in this string. // Get the address of the characters in this string.
inline Address GetCharsAddress(); inline Address GetCharsAddress();
inline uc16* GetChars();
// For regexp code. // For regexp code.
const uint16_t* SeqTwoByteStringGetData(unsigned start); const uint16_t* SeqTwoByteStringGetData(unsigned start);
...@@ -3328,7 +3335,6 @@ class ConsString: public String { ...@@ -3328,7 +3335,6 @@ class ConsString: public String {
unsigned* offset_ptr, unsigned* offset_ptr,
unsigned chars); unsigned chars);
// Minimum length for a cons string. // Minimum length for a cons string.
static const int kMinLength = 13; static const int kMinLength = 13;
...@@ -3376,9 +3382,6 @@ class SlicedString: public String { ...@@ -3376,9 +3382,6 @@ class SlicedString: public String {
unsigned* offset_ptr, unsigned* offset_ptr,
unsigned chars); unsigned chars);
// Minimum lenth for a sliced string.
static const int kMinLength = 13;
private: private:
DISALLOW_IMPLICIT_CONSTRUCTORS(SlicedString); DISALLOW_IMPLICIT_CONSTRUCTORS(SlicedString);
}; };
......
...@@ -2591,6 +2591,30 @@ static Object* Runtime_StringAdd(Arguments args) { ...@@ -2591,6 +2591,30 @@ static Object* Runtime_StringAdd(Arguments args) {
} }
template<typename sinkchar>
static inline void StringBuilderConcatHelper(String* special,
sinkchar* sink,
FixedArray* fixed_array,
int array_length) {
int position = 0;
for (int i = 0; i < array_length; i++) {
Object* element = fixed_array->get(i);
if (element->IsSmi()) {
int len = Smi::cast(element)->value();
int pos = len >> 11;
len &= 0x7ff;
String::WriteToFlat(special, sink + position, pos, pos + len);
position += len;
} else {
String* string = String::cast(element);
int element_length = string->length();
String::WriteToFlat(string, sink + position, 0, element_length);
position += element_length;
}
}
}
static Object* Runtime_StringBuilderConcat(Arguments args) { static Object* Runtime_StringBuilderConcat(Arguments args) {
NoHandleAllocation ha; NoHandleAllocation ha;
ASSERT(args.length() == 2); ASSERT(args.length() == 2);
...@@ -2647,32 +2671,27 @@ static Object* Runtime_StringBuilderConcat(Arguments args) { ...@@ -2647,32 +2671,27 @@ static Object* Runtime_StringBuilderConcat(Arguments args) {
} }
int length = position; int length = position;
position = 0;
Object* object; Object* object;
if (ascii) { if (ascii) {
object = Heap::AllocateRawAsciiString(length); object = Heap::AllocateRawAsciiString(length);
if (object->IsFailure()) return object;
SeqAsciiString* answer = SeqAsciiString::cast(object);
StringBuilderConcatHelper(special,
answer->GetChars(),
fixed_array,
array_length);
return answer;
} else { } else {
object = Heap::AllocateRawTwoByteString(length); object = Heap::AllocateRawTwoByteString(length);
if (object->IsFailure()) return object;
SeqTwoByteString* answer = SeqTwoByteString::cast(object);
StringBuilderConcatHelper(special,
answer->GetChars(),
fixed_array,
array_length);
return answer;
} }
if (object->IsFailure()) return object;
String* answer = String::cast(object);
for (int i = 0; i < array_length; i++) {
Object* element = fixed_array->get(i);
if (element->IsSmi()) {
int len = Smi::cast(element)->value();
int pos = len >> 11;
len &= 0x7ff;
String::Flatten(special, answer, pos, pos + len, position);
position += len;
} else {
String* string = String::cast(element);
int element_length = string->length();
String::Flatten(string, answer, 0, element_length, position);
position += element_length;
}
}
return answer;
} }
......
...@@ -443,6 +443,15 @@ class StringBuilder { ...@@ -443,6 +443,15 @@ class StringBuilder {
DISALLOW_IMPLICIT_CONSTRUCTORS(StringBuilder); DISALLOW_IMPLICIT_CONSTRUCTORS(StringBuilder);
}; };
// Copy from ASCII/16bit chars to ASCII/16bit chars.
template <typename sourcechar, typename sinkchar>
static inline void CopyChars(sinkchar* dest, const sourcechar* src, int chars) {
while (chars--) {
*dest++ = *src++;
}
}
} } // namespace v8::internal } } // namespace v8::internal
#endif // V8_UTILS_H_ #endif // V8_UTILS_H_
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment