Commit 23d085c6 authored by yangguo@chromium.org's avatar yangguo@chromium.org

Handlify concat string and substring.

R=ulan@chromium.org
BUG=

Review URL: https://codereview.chromium.org/50073005

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@17490 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
parent 841dd1cb
......@@ -287,11 +287,43 @@ Handle<SeqTwoByteString> Factory::NewRawTwoByteString(int length,
}
Handle<String> Factory::NewConsString(Handle<String> first,
Handle<String> second) {
CALL_HEAP_FUNCTION(isolate(),
isolate()->heap()->AllocateConsString(*first, *second),
String);
// Returns true for a character in a range. Both limits are inclusive.
static inline bool Between(uint32_t character, uint32_t from, uint32_t to) {
// This makes uses of the the unsigned wraparound.
return character - from <= to - from;
}
static inline Handle<String> MakeOrFindTwoCharacterString(Isolate* isolate,
uint16_t c1,
uint16_t c2) {
// Numeric strings have a different hash algorithm not known by
// LookupTwoCharsStringIfExists, so we skip this step for such strings.
if (!Between(c1, '0', '9') || !Between(c2, '0', '9')) {
String* result;
StringTable* table = isolate->heap()->string_table();
if (table->LookupTwoCharsStringIfExists(c1, c2, &result)) {
return handle(result);
}
}
// Now we know the length is 2, we might as well make use of that fact
// when building the new string.
if (static_cast<unsigned>(c1 | c2) <= String::kMaxOneByteCharCodeU) {
// We can do this.
ASSERT(IsPowerOf2(String::kMaxOneByteCharCodeU + 1)); // because of this.
Handle<SeqOneByteString> str = isolate->factory()->NewRawOneByteString(2);
uint8_t* dest = str->GetChars();
dest[0] = static_cast<uint8_t>(c1);
dest[1] = static_cast<uint8_t>(c2);
return str;
} else {
Handle<SeqTwoByteString> str = isolate->factory()->NewRawTwoByteString(2);
uc16* dest = str->GetChars();
dest[0] = c1;
dest[1] = c2;
return str;
}
}
......@@ -307,6 +339,99 @@ Handle<String> ConcatStringContent(Handle<StringType> result,
}
Handle<ConsString> Factory::NewRawConsString(String::Encoding encoding) {
Handle<Map> map = (encoding == String::ONE_BYTE_ENCODING)
? cons_ascii_string_map() : cons_string_map();
CALL_HEAP_FUNCTION(isolate(),
isolate()->heap()->Allocate(*map, NEW_SPACE),
ConsString);
}
Handle<String> Factory::NewConsString(Handle<String> left,
Handle<String> right) {
int left_length = left->length();
if (left_length == 0) return right;
int right_length = right->length();
if (right_length == 0) return left;
int length = left_length + right_length;
if (length == 2) {
uint16_t c1 = left->Get(0);
uint16_t c2 = right->Get(0);
return MakeOrFindTwoCharacterString(isolate(), c1, c2);
}
// Make sure that an out of memory exception is thrown if the length
// of the new cons string is too large.
if (length > String::kMaxLength || length < 0) {
isolate()->context()->mark_out_of_memory();
V8::FatalProcessOutOfMemory("String concatenation result too large.");
UNREACHABLE();
return Handle<String>::null();
}
bool left_is_one_byte = left->IsOneByteRepresentation();
bool right_is_one_byte = right->IsOneByteRepresentation();
bool is_one_byte = left_is_one_byte && right_is_one_byte;
bool is_one_byte_data_in_two_byte_string = false;
if (!is_one_byte) {
// At least one of the strings uses two-byte representation so we
// can't use the fast case code for short ASCII strings below, but
// we can try to save memory if all chars actually fit in ASCII.
is_one_byte_data_in_two_byte_string =
left->HasOnlyOneByteChars() && right->HasOnlyOneByteChars();
if (is_one_byte_data_in_two_byte_string) {
isolate()->counters()->string_add_runtime_ext_to_ascii()->Increment();
}
}
// If the resulting string is small make a flat string.
if (length < ConsString::kMinLength) {
// Note that neither of the two inputs can be a slice because:
STATIC_ASSERT(ConsString::kMinLength <= SlicedString::kMinLength);
ASSERT(left->IsFlat());
ASSERT(right->IsFlat());
if (is_one_byte) {
Handle<SeqOneByteString> result = NewRawOneByteString(length);
DisallowHeapAllocation no_gc;
uint8_t* dest = result->GetChars();
// Copy left part.
const uint8_t* src = left->IsExternalString()
? Handle<ExternalAsciiString>::cast(left)->GetChars()
: Handle<SeqOneByteString>::cast(left)->GetChars();
for (int i = 0; i < left_length; i++) *dest++ = src[i];
// Copy right part.
src = right->IsExternalString()
? Handle<ExternalAsciiString>::cast(right)->GetChars()
: Handle<SeqOneByteString>::cast(right)->GetChars();
for (int i = 0; i < right_length; i++) *dest++ = src[i];
return result;
}
return (is_one_byte_data_in_two_byte_string)
? ConcatStringContent<uint8_t>(NewRawOneByteString(length), left, right)
: ConcatStringContent<uc16>(NewRawTwoByteString(length), left, right);
}
Handle<ConsString> result = NewRawConsString(
(is_one_byte || is_one_byte_data_in_two_byte_string)
? String::ONE_BYTE_ENCODING
: String::TWO_BYTE_ENCODING);
DisallowHeapAllocation no_gc;
WriteBarrierMode mode = result->GetWriteBarrierMode(no_gc);
result->set_hash_field(String::kEmptyHashField);
result->set_length(length);
result->set_first(*left, mode);
result->set_second(*right, mode);
return result;
}
Handle<String> Factory::NewFlatConcatString(Handle<String> first,
Handle<String> second) {
int total_length = first->length() + second->length();
......@@ -321,22 +446,89 @@ Handle<String> Factory::NewFlatConcatString(Handle<String> first,
}
Handle<String> Factory::NewSubString(Handle<String> str,
int begin,
int end) {
Handle<SlicedString> Factory::NewRawSlicedString(String::Encoding encoding) {
Handle<Map> map = (encoding == String::ONE_BYTE_ENCODING)
? sliced_ascii_string_map() : sliced_string_map();
CALL_HEAP_FUNCTION(isolate(),
str->SubString(begin, end),
String);
isolate()->heap()->Allocate(*map, NEW_SPACE),
SlicedString);
}
Handle<String> Factory::NewProperSubString(Handle<String> str,
int begin,
int end) {
#if VERIFY_HEAP
if (FLAG_verify_heap) str->StringVerify();
#endif
ASSERT(begin > 0 || end < str->length());
CALL_HEAP_FUNCTION(isolate(),
isolate()->heap()->AllocateSubString(*str, begin, end),
String);
int length = end - begin;
if (length <= 0) return empty_string();
if (length == 1) {
return LookupSingleCharacterStringFromCode(isolate(), str->Get(begin));
}
if (length == 2) {
// Optimization for 2-byte strings often used as keys in a decompression
// dictionary. Check whether we already have the string in the string
// table to prevent creation of many unnecessary strings.
uint16_t c1 = str->Get(begin);
uint16_t c2 = str->Get(begin + 1);
return MakeOrFindTwoCharacterString(isolate(), c1, c2);
}
if (!FLAG_string_slices || length < SlicedString::kMinLength) {
if (str->IsOneByteRepresentation()) {
Handle<SeqOneByteString> result = NewRawOneByteString(length);
uint8_t* dest = result->GetChars();
DisallowHeapAllocation no_gc;
String::WriteToFlat(*str, dest, begin, end);
return result;
} else {
Handle<SeqTwoByteString> result = NewRawTwoByteString(length);
uc16* dest = result->GetChars();
DisallowHeapAllocation no_gc;
String::WriteToFlat(*str, dest, begin, end);
return result;
}
}
int offset = begin;
while (str->IsConsString()) {
Handle<ConsString> cons = Handle<ConsString>::cast(str);
int split = cons->first()->length();
if (split <= offset) {
// Slice is fully contained in the second part.
str = Handle<String>(cons->second(), isolate());
offset -= split; // Adjust for offset.
continue;
} else if (offset + length <= split) {
// Slice is fully contained in the first part.
str = Handle<String>(cons->first(), isolate());
continue;
}
break;
}
if (str->IsSlicedString()) {
Handle<SlicedString> slice = Handle<SlicedString>::cast(str);
str = Handle<String>(slice->parent(), isolate());
offset += slice->offset();
} else {
str = FlattenGetString(str);
}
ASSERT(str->IsSeqString() || str->IsExternalString());
Handle<SlicedString> slice = NewRawSlicedString(
str->IsOneByteRepresentation() ? String::ONE_BYTE_ENCODING
: String::TWO_BYTE_ENCODING);
slice->set_hash_field(String::kEmptyHashField);
slice->set_length(length);
slice->set_parent(*str);
slice->set_offset(offset);
return slice;
}
......
......@@ -158,23 +158,28 @@ class Factory {
PretenureFlag pretenure = NOT_TENURED);
// Create a new cons string object which consists of a pair of strings.
Handle<String> NewConsString(Handle<String> first,
Handle<String> second);
Handle<String> NewConsString(Handle<String> left,
Handle<String> right);
Handle<ConsString> NewRawConsString(String::Encoding encoding);
// Create a new sequential string containing the concatenation of the inputs.
Handle<String> NewFlatConcatString(Handle<String> first,
Handle<String> second);
// Create a new string object which holds a substring of a string.
Handle<String> NewSubString(Handle<String> str,
int begin,
int end);
// Create a new string object which holds a proper substring of a string.
Handle<String> NewProperSubString(Handle<String> str,
int begin,
int end);
// Create a new string object which holds a substring of a string.
Handle<String> NewSubString(Handle<String> str, int begin, int end) {
if (begin == 0 && end == str->length()) return str;
return NewProperSubString(str, begin, end);
}
Handle<SlicedString> NewRawSlicedString(String::Encoding encoding);
// Creates a new external String object. There are two String encodings
// in the system: ASCII and two byte. Unlike other String types, it does
// not make sense to have a UTF-8 factory function for external strings,
......
......@@ -229,11 +229,12 @@ Handle<Object> GetProperty(Isolate* isolate,
}
Handle<Object> LookupSingleCharacterStringFromCode(Isolate* isolate,
Handle<String> LookupSingleCharacterStringFromCode(Isolate* isolate,
uint32_t index) {
CALL_HEAP_FUNCTION(
isolate,
isolate->heap()->LookupSingleCharacterStringFromCode(index), Object);
isolate->heap()->LookupSingleCharacterStringFromCode(index),
String);
}
......
......@@ -252,7 +252,7 @@ Handle<Object> GetProperty(Isolate* isolate,
Handle<Object> obj,
Handle<Object> key);
Handle<Object> LookupSingleCharacterStringFromCode(Isolate* isolate,
Handle<String> LookupSingleCharacterStringFromCode(Isolate* isolate,
uint32_t index);
Handle<FixedArray> AddKeysFromJSArray(Handle<FixedArray>,
......
This diff is collapsed.
......@@ -1068,25 +1068,6 @@ class Heap {
Object* stack_trace,
Object* stack_frames);
// Allocates a new cons string object.
// Returns Failure::RetryAfterGC(requested_bytes, space) if the allocation
// failed.
// Please note this does not perform a garbage collection.
MUST_USE_RESULT MaybeObject* AllocateConsString(String* first,
String* second);
// Allocates a new sub string object which is a substring of an underlying
// string buffer stretching from the index start (inclusive) to the index
// end (exclusive).
// Returns Failure::RetryAfterGC(requested_bytes, space) if the allocation
// failed.
// Please note this does not perform a garbage collection.
MUST_USE_RESULT MaybeObject* AllocateSubString(
String* buffer,
int start,
int end,
PretenureFlag pretenure = NOT_TENURED);
// Allocate a new external string object, which is backed by a string
// resource that resides outside the V8 heap.
// Returns Failure::RetryAfterGC(requested_bytes, space) if the allocation
......
......@@ -9303,14 +9303,6 @@ uint32_t StringHasher::ComputeUtf8Hash(Vector<const char> chars,
}
MaybeObject* String::SubString(int start, int end, PretenureFlag pretenure) {
Heap* heap = GetHeap();
if (start == 0 && end == length()) return this;
MaybeObject* result = heap->AllocateSubString(this, start, end, pretenure);
return result;
}
void String::PrintOn(FILE* file) {
int length = this->length();
for (int i = 0; i < length; i++) {
......
......@@ -8522,11 +8522,6 @@ class String: public Name {
// ASCII and two byte string types.
bool MarkAsUndetectable();
// Return a substring.
MUST_USE_RESULT MaybeObject* SubString(int from,
int to,
PretenureFlag pretenure = NOT_TENURED);
// String equality operations.
inline bool Equals(String* other);
bool IsUtf8EqualTo(Vector<const char> str, bool allow_prefix_match = false);
......
......@@ -4387,10 +4387,10 @@ RUNTIME_FUNCTION(MaybeObject*, Runtime_StringLocaleCompare) {
RUNTIME_FUNCTION(MaybeObject*, Runtime_SubString) {
SealHandleScope shs(isolate);
HandleScope scope(isolate);
ASSERT(args.length() == 3);
CONVERT_ARG_CHECKED(String, value, 0);
CONVERT_ARG_HANDLE_CHECKED(String, string, 0);
int start, end;
// We have a fast integer-only case here to avoid a conversion to double in
// the common case where from and to are Smis.
......@@ -4407,13 +4407,10 @@ RUNTIME_FUNCTION(MaybeObject*, Runtime_SubString) {
}
RUNTIME_ASSERT(end >= start);
RUNTIME_ASSERT(start >= 0);
RUNTIME_ASSERT(end <= value->length());
RUNTIME_ASSERT(end <= string->length());
isolate->counters()->sub_string_runtime()->Increment();
if (end - start == 1) {
return isolate->heap()->LookupSingleCharacterStringFromCode(
value->Get(start));
}
return value->SubString(start, end);
return *isolate->factory()->NewSubString(string, start, end);
}
......@@ -6500,30 +6497,31 @@ static inline bool IsTrimWhiteSpace(unibrow::uchar c) {
RUNTIME_FUNCTION(MaybeObject*, Runtime_StringTrim) {
SealHandleScope shs(isolate);
HandleScope scope(isolate);
ASSERT(args.length() == 3);
CONVERT_ARG_CHECKED(String, s, 0);
CONVERT_ARG_HANDLE_CHECKED(String, string, 0);
CONVERT_BOOLEAN_ARG_CHECKED(trimLeft, 1);
CONVERT_BOOLEAN_ARG_CHECKED(trimRight, 2);
s->TryFlatten();
int length = s->length();
string = FlattenGetString(string);
int length = string->length();
int left = 0;
if (trimLeft) {
while (left < length && IsTrimWhiteSpace(s->Get(left))) {
while (left < length && IsTrimWhiteSpace(string->Get(left))) {
left++;
}
}
int right = length;
if (trimRight) {
while (right > left && IsTrimWhiteSpace(s->Get(right - 1))) {
while (right > left && IsTrimWhiteSpace(string->Get(right - 1))) {
right--;
}
}
return s->SubString(left, right);
return *isolate->factory()->NewSubString(string, left, right);
}
......@@ -6926,12 +6924,12 @@ RUNTIME_FUNCTION(MaybeObject*, Runtime_NumberImul) {
RUNTIME_FUNCTION(MaybeObject*, Runtime_StringAdd) {
SealHandleScope shs(isolate);
HandleScope scope(isolate);
ASSERT(args.length() == 2);
CONVERT_ARG_CHECKED(String, str1, 0);
CONVERT_ARG_CHECKED(String, str2, 1);
CONVERT_ARG_HANDLE_CHECKED(String, str1, 0);
CONVERT_ARG_HANDLE_CHECKED(String, str2, 1);
isolate->counters()->string_add_runtime()->Increment();
return isolate->heap()->AllocateConsString(str1, str2);
return *isolate->factory()->NewConsString(str1, str2);
}
......
......@@ -223,3 +223,14 @@ function test_crankshaft() {
test_crankshaft();
%OptimizeFunctionOnNextCall(test_crankshaft);
test_crankshaft();
var s1 = "12345678901234567890";
var s2 = "abcdefghijklmnopqrstuvwxyz";
var c1 = s1 + s2;
var c2 = s1 + c1 + s2;
assertEquals("234567890123456789", c1.substring(1, 19));
assertEquals("bcdefghijklmno", c1.substring(21, 35));
assertEquals("2345678901234567890abcdefghijklmno", c1.substring(1, 35));
assertEquals("234567890123456789", c2.substring(1, 19));
assertEquals("bcdefghijklmno", c2.substring(41, 55));
assertEquals("2345678901234567890abcdefghijklmno", c2.substring(21, 55));
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment