Commit 1aae797d authored by erik.corry@gmail.com's avatar erik.corry@gmail.com

Use direct copy and templates to speed up flattening of strings.

Review URL: http://codereview.chromium.org/8011

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@549 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
parent bf948c83
......@@ -79,6 +79,8 @@ Handle<String> Factory::NewRawTwoByteString(int length,
Handle<String> Factory::NewConsString(Handle<String> first,
Handle<String> second) {
if (first->length() == 0) return second;
if (second->length() == 0) return first;
CALL_HEAP_FUNCTION(Heap::AllocateConsString(*first, *second), String);
}
......
......@@ -1331,29 +1331,33 @@ Object* Heap::AllocateSharedFunctionInfo(Object* name) {
Object* Heap::AllocateConsString(String* first, String* second) {
int length = first->length() + second->length();
int first_length = first->length();
int second_length = second->length();
int length = first_length + second_length;
bool is_ascii = first->is_ascii_representation()
&& second->is_ascii_representation();
// If the resulting string is small make a flat string.
if (length < ConsString::kMinLength) {
Object* result = is_ascii
? AllocateRawAsciiString(length)
: AllocateRawTwoByteString(length);
if (result->IsFailure()) return result;
// Copy the characters into the new object.
String* string_result = String::cast(result);
int first_length = first->length();
// Copy the content of the first string.
for (int i = 0; i < first_length; i++) {
string_result->Set(i, first->Get(i));
}
int second_length = second->length();
// Copy the content of the first string.
for (int i = 0; i < second_length; i++) {
string_result->Set(first_length + i, second->Get(i));
if (length < String::kMinNonFlatLength) {
ASSERT(first->IsFlat());
ASSERT(second->IsFlat());
if (is_ascii) {
Object* result = AllocateRawAsciiString(length);
if (result->IsFailure()) return result;
// Copy the characters into the new object.
char* dest = SeqAsciiString::cast(result)->GetChars();
String::WriteToFlat(first, dest, 0, first_length);
String::WriteToFlat(second, dest + first_length, 0, second_length);
return result;
} else {
Object* result = AllocateRawTwoByteString(length);
if (result->IsFailure()) return result;
// Copy the characters into the new object.
uc16* dest = SeqTwoByteString::cast(result)->GetChars();
String::WriteToFlat(first, dest, 0, first_length);
String::WriteToFlat(second, dest + first_length, 0, second_length);
return result;
}
return result;
}
Map* map;
......@@ -1384,7 +1388,7 @@ Object* Heap::AllocateSlicedString(String* buffer, int start, int end) {
int length = end - start;
// If the resulting string is small make a sub string.
if (end - start <= SlicedString::kMinLength) {
if (end - start <= String::kMinNonFlatLength) {
return Heap::AllocateSubString(buffer, start, end);
}
......
......@@ -1372,6 +1372,12 @@ bool String::is_ascii_representation_map(Map* map) {
}
int String::full_representation_tag() {
return map()->instance_type() &
(kStringRepresentationMask | kStringEncodingMask);
}
StringRepresentationTag String::representation_tag() {
return map_representation_tag(map());
}
......@@ -1417,8 +1423,8 @@ Address SeqAsciiString::GetCharsAddress() {
}
const char* SeqAsciiString::GetChars() {
return reinterpret_cast<const char*>(GetCharsAddress());
char* SeqAsciiString::GetChars() {
return reinterpret_cast<char*>(GetCharsAddress());
}
......@@ -1427,6 +1433,11 @@ Address SeqTwoByteString::GetCharsAddress() {
}
uc16* SeqTwoByteString::GetChars() {
return reinterpret_cast<uc16*>(FIELD_ADDR(this, kHeaderSize));
}
uint16_t SeqTwoByteString::SeqTwoByteStringGet(int index) {
ASSERT(index >= 0 && index < length());
return READ_SHORT_FIELD(this, kHeaderSize + index * kShortSize);
......
......@@ -529,12 +529,33 @@ Object* String::Flatten() {
// an old space GC.
PretenureFlag tenure = Heap::InNewSpace(this) ? NOT_TENURED : TENURED;
int len = length();
Object* object = IsAsciiRepresentation() ?
Heap::AllocateRawAsciiString(len, tenure) :
Heap::AllocateRawTwoByteString(len, tenure);
if (object->IsFailure()) return object;
String* result = String::cast(object);
Flatten(this, result, 0, len, 0);
Object* object;
String* result;
if (IsAsciiRepresentation()) {
object = Heap::AllocateRawAsciiString(len, tenure);
if (object->IsFailure()) return object;
result = String::cast(object);
String* first = String::cast(cs->first());
int first_length = first->length();
char* dest = SeqAsciiString::cast(result)->GetChars();
WriteToFlat(first, dest, 0, first_length);
WriteToFlat(String::cast(cs->second()),
dest + first_length,
0,
len - first_length);
} else {
object = Heap::AllocateRawTwoByteString(len, tenure);
if (object->IsFailure()) return object;
result = String::cast(object);
uc16* dest = SeqTwoByteString::cast(result)->GetChars();
String* first = String::cast(cs->first());
int first_length = first->length();
WriteToFlat(first, dest, 0, first_length);
WriteToFlat(String::cast(cs->second()),
dest + first_length,
0,
len - first_length);
}
cs->set_first(result);
cs->set_second(Heap::empty_string());
return this;
......@@ -2922,7 +2943,7 @@ Vector<const char> String::ToAsciiVector() {
}
if (string_tag == kSeqStringTag) {
SeqAsciiString* seq = SeqAsciiString::cast(string);
char* start = reinterpret_cast<char*>(seq->GetCharsAddress());
char* start = seq->GetChars();
return Vector<const char>(start + offset, length);
}
ASSERT(string_tag == kExternalStringTag);
......@@ -2953,8 +2974,7 @@ Vector<const uc16> String::ToUC16Vector() {
}
if (string_tag == kSeqStringTag) {
SeqTwoByteString* seq = SeqTwoByteString::cast(string);
uc16* start = reinterpret_cast<uc16*>(seq->GetCharsAddress());
return Vector<const uc16>(start + offset, length);
return Vector<const uc16>(seq->GetChars() + offset, length);
}
ASSERT(string_tag == kExternalStringTag);
ExternalTwoByteString* ext = ExternalTwoByteString::cast(string);
......@@ -3122,7 +3142,6 @@ const unibrow::byte* SeqAsciiString::SeqAsciiStringReadBlock(
unsigned* remaining,
unsigned* offset_ptr,
unsigned max_chars) {
// Cast const char* to unibrow::byte* (signedness difference).
const unibrow::byte* b = reinterpret_cast<unibrow::byte*>(this) -
kHeapObjectTag + kHeaderSize + *offset_ptr * kCharSize;
*remaining = max_chars;
......@@ -3589,47 +3608,62 @@ Object* SlicedString::SlicedStringFlatten() {
}
void String::Flatten(String* src,
String* sink,
int f,
int t,
int so) {
template <typename sinkchar>
void String::WriteToFlat(String* src,
sinkchar* sink,
int f,
int t) {
String* source = src;
int from = f;
int to = t;
int sink_offset = so;
while (true) {
ASSERT(0 <= from && from <= to && to <= source->length());
ASSERT(0 <= sink_offset && sink_offset < sink->length());
switch (source->representation_tag()) {
case kSeqStringTag:
case kExternalStringTag: {
Access<StringInputBuffer> buffer(&string_input_buffer);
buffer->Reset(from, source);
int j = sink_offset;
for (int i = from; i < to; i++) {
uc32 c = buffer->GetNext();
sink->Set(j++, c);
}
switch (source->full_representation_tag()) {
case kAsciiStringTag | kExternalStringTag: {
CopyChars(sink,
ExternalAsciiString::cast(source)->resource()->data() + from,
to - from);
return;
}
case kTwoByteStringTag | kExternalStringTag: {
const uc16* data =
ExternalTwoByteString::cast(source)->resource()->data();
CopyChars(sink,
data + from,
to - from);
return;
}
case kSlicedStringTag: {
case kAsciiStringTag | kSeqStringTag: {
CopyChars(sink,
SeqAsciiString::cast(source)->GetChars() + from,
to - from);
return;
}
case kTwoByteStringTag | kSeqStringTag: {
CopyChars(sink,
SeqTwoByteString::cast(source)->GetChars() + from,
to - from);
return;
}
case kAsciiStringTag | kSlicedStringTag:
case kTwoByteStringTag | kSlicedStringTag: {
SlicedString* sliced_string = SlicedString::cast(source);
int start = sliced_string->start();
from += start;
to += start;
source = String::cast(sliced_string->buffer());
break;
}
break;
case kConsStringTag: {
case kAsciiStringTag | kConsStringTag:
case kTwoByteStringTag | kConsStringTag: {
ConsString* cons_string = ConsString::cast(source);
String* first = String::cast(cons_string->first());
int boundary = first->length();
if (to - boundary >= boundary - from) {
// Right hand side is longer. Recurse over left.
if (from < boundary) {
Flatten(first, sink, from, boundary, sink_offset);
sink_offset += boundary - from;
WriteToFlat(first, sink, from, boundary);
sink += boundary - from;
from = 0;
} else {
from -= boundary;
......@@ -3637,22 +3671,19 @@ void String::Flatten(String* src,
to -= boundary;
source = String::cast(cons_string->second());
} else {
// Left hand side is longer. Recurse over right. The hasher
// needs us to visit the string from left to right so doing
// this invalidates that hash.
// Left hand side is longer. Recurse over right.
if (to > boundary) {
String* second = String::cast(cons_string->second());
Flatten(second,
sink,
0,
to - boundary,
sink_offset + boundary - from);
WriteToFlat(second,
sink + boundary - from,
0,
to - boundary);
to = boundary;
}
source = first;
}
break;
}
break;
}
}
}
......
......@@ -3071,6 +3071,8 @@ class String: public HeapObject {
// Get the representation tag.
inline StringRepresentationTag representation_tag();
// Get the representation and ASCII tag.
inline int full_representation_tag();
static inline StringRepresentationTag map_representation_tag(Map* map);
// For use during stack traces. Performs rudimentary sanity check.
......@@ -3097,6 +3099,9 @@ class String: public HeapObject {
// Max ascii char code.
static const int kMaxAsciiCharCode = unibrow::Utf8::kMaxOneByteChar;
// Minimum lenth for a cons or sliced string.
static const int kMinNonFlatLength = 13;
// Mask constant for checking if a string has a computed hash code
// and if it is an array index. The least significant bit indicates
// whether a hash code has been computed. If the hash code has been
......@@ -3134,11 +3139,11 @@ class String: public HeapObject {
unsigned* offset);
// Helper function for flattening strings.
static void Flatten(String* source,
String* sink,
int from,
int to,
int sink_offset);
template <typename sinkchar>
static void WriteToFlat(String* source,
sinkchar* sink,
int from,
int to);
protected:
class ReadBlockBuffer {
......@@ -3215,7 +3220,7 @@ class SeqAsciiString: public SeqString {
// Get the address of the characters in this string.
inline Address GetCharsAddress();
inline const char* GetChars();
inline char* GetChars();
// Casting
static inline SeqAsciiString* cast(Object* obj);
......@@ -3257,6 +3262,8 @@ class SeqTwoByteString: public SeqString {
// Get the address of the characters in this string.
inline Address GetCharsAddress();
inline uc16* GetChars();
// For regexp code.
const uint16_t* SeqTwoByteStringGetData(unsigned start);
......@@ -3328,7 +3335,6 @@ class ConsString: public String {
unsigned* offset_ptr,
unsigned chars);
// Minimum length for a cons string.
static const int kMinLength = 13;
......@@ -3376,9 +3382,6 @@ class SlicedString: public String {
unsigned* offset_ptr,
unsigned chars);
// Minimum lenth for a sliced string.
static const int kMinLength = 13;
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(SlicedString);
};
......
......@@ -2591,6 +2591,30 @@ static Object* Runtime_StringAdd(Arguments args) {
}
template<typename sinkchar>
static inline void StringBuilderConcatHelper(String* special,
sinkchar* sink,
FixedArray* fixed_array,
int array_length) {
int position = 0;
for (int i = 0; i < array_length; i++) {
Object* element = fixed_array->get(i);
if (element->IsSmi()) {
int len = Smi::cast(element)->value();
int pos = len >> 11;
len &= 0x7ff;
String::WriteToFlat(special, sink + position, pos, pos + len);
position += len;
} else {
String* string = String::cast(element);
int element_length = string->length();
String::WriteToFlat(string, sink + position, 0, element_length);
position += element_length;
}
}
}
static Object* Runtime_StringBuilderConcat(Arguments args) {
NoHandleAllocation ha;
ASSERT(args.length() == 2);
......@@ -2647,32 +2671,27 @@ static Object* Runtime_StringBuilderConcat(Arguments args) {
}
int length = position;
position = 0;
Object* object;
if (ascii) {
object = Heap::AllocateRawAsciiString(length);
if (object->IsFailure()) return object;
SeqAsciiString* answer = SeqAsciiString::cast(object);
StringBuilderConcatHelper(special,
answer->GetChars(),
fixed_array,
array_length);
return answer;
} else {
object = Heap::AllocateRawTwoByteString(length);
if (object->IsFailure()) return object;
SeqTwoByteString* answer = SeqTwoByteString::cast(object);
StringBuilderConcatHelper(special,
answer->GetChars(),
fixed_array,
array_length);
return answer;
}
if (object->IsFailure()) return object;
String* answer = String::cast(object);
for (int i = 0; i < array_length; i++) {
Object* element = fixed_array->get(i);
if (element->IsSmi()) {
int len = Smi::cast(element)->value();
int pos = len >> 11;
len &= 0x7ff;
String::Flatten(special, answer, pos, pos + len, position);
position += len;
} else {
String* string = String::cast(element);
int element_length = string->length();
String::Flatten(string, answer, 0, element_length, position);
position += element_length;
}
}
return answer;
}
......
......@@ -443,6 +443,15 @@ class StringBuilder {
DISALLOW_IMPLICIT_CONSTRUCTORS(StringBuilder);
};
// Copy from ASCII/16bit chars to ASCII/16bit chars.
template <typename sourcechar, typename sinkchar>
static inline void CopyChars(sinkchar* dest, const sourcechar* src, int chars) {
while (chars--) {
*dest++ = *src++;
}
}
} } // namespace v8::internal
#endif // V8_UTILS_H_
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment