Commit ce86c1bf authored by yangguo@chromium.org's avatar yangguo@chromium.org

Avoid bailing out to runtime for short substrings.

This significantly improves the speed for creating short substrings (less than 13 characters) from slices, flat cons strings and external strings.

TEST=string-external-cached.js, string-slices.js

Review URL: http://codereview.chromium.org/8889012

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@10221 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
parent f1649cf3
...@@ -6093,20 +6093,23 @@ void SubStringStub::Generate(MacroAssembler* masm) { ...@@ -6093,20 +6093,23 @@ void SubStringStub::Generate(MacroAssembler* masm) {
__ JumpIfNotSmi(edx, &runtime); __ JumpIfNotSmi(edx, &runtime);
__ sub(ecx, edx); __ sub(ecx, edx);
__ cmp(ecx, FieldOperand(eax, String::kLengthOffset)); __ cmp(ecx, FieldOperand(eax, String::kLengthOffset));
Label return_eax; Label not_original_string;
__ j(equal, &return_eax); __ j(not_equal, &not_original_string, Label::kNear);
Counters* counters = masm->isolate()->counters();
__ IncrementCounter(counters->sub_string_native(), 1);
__ ret(3 * kPointerSize);
__ bind(&not_original_string);
// Special handling of sub-strings of length 1 and 2. One character strings // Special handling of sub-strings of length 1 and 2. One character strings
// are handled in the runtime system (looked up in the single character // are handled in the runtime system (looked up in the single character
// cache). Two character strings are looked for in the symbol cache. // cache). Two character strings are looked for in the symbol cache.
__ SmiUntag(ecx); // Result length is no longer smi. __ cmp(ecx, Immediate(Smi::FromInt(2)));
__ cmp(ecx, 2);
__ j(greater, &result_longer_than_two); __ j(greater, &result_longer_than_two);
__ j(less, &runtime); __ j(less, &runtime);
// Sub string of length 2 requested. // Sub string of length 2 requested.
// eax: string // eax: string
// ebx: instance type // ebx: instance type
// ecx: sub string length (value is 2) // ecx: sub string length (smi, value is 2)
// edx: from index (smi) // edx: from index (smi)
__ JumpIfInstanceTypeIsNotSequentialAscii(ebx, ebx, &runtime); __ JumpIfInstanceTypeIsNotSequentialAscii(ebx, ebx, &runtime);
...@@ -6121,6 +6124,7 @@ void SubStringStub::Generate(MacroAssembler* masm) { ...@@ -6121,6 +6124,7 @@ void SubStringStub::Generate(MacroAssembler* masm) {
StringHelper::GenerateTwoCharacterSymbolTableProbe( StringHelper::GenerateTwoCharacterSymbolTableProbe(
masm, ebx, ecx, eax, edx, edi, masm, ebx, ecx, eax, edx, edi,
&make_two_character_string, &make_two_character_string); &make_two_character_string, &make_two_character_string);
__ IncrementCounter(counters->sub_string_native(), 1);
__ ret(3 * kPointerSize); __ ret(3 * kPointerSize);
__ bind(&make_two_character_string); __ bind(&make_two_character_string);
...@@ -6128,24 +6132,17 @@ void SubStringStub::Generate(MacroAssembler* masm) { ...@@ -6128,24 +6132,17 @@ void SubStringStub::Generate(MacroAssembler* masm) {
__ mov(eax, Operand(esp, 3 * kPointerSize)); __ mov(eax, Operand(esp, 3 * kPointerSize));
__ mov(ebx, FieldOperand(eax, HeapObject::kMapOffset)); __ mov(ebx, FieldOperand(eax, HeapObject::kMapOffset));
__ movzx_b(ebx, FieldOperand(ebx, Map::kInstanceTypeOffset)); __ movzx_b(ebx, FieldOperand(ebx, Map::kInstanceTypeOffset));
__ Set(ecx, Immediate(2)); __ Set(ecx, Immediate(Smi::FromInt(2)));
__ mov(edx, Operand(esp, 2 * kPointerSize)); // Load index.
if (FLAG_string_slices) {
Label copy_routine;
// If coming from the make_two_character_string path, the string
// is too short to be sliced anyways.
STATIC_ASSERT(2 < SlicedString::kMinLength);
__ jmp(&copy_routine);
__ bind(&result_longer_than_two); __ bind(&result_longer_than_two);
// eax: string // eax: string
// ebx: instance type // ebx: instance type
// ecx: sub string length // ecx: sub string length (smi)
// edx: from index (smi) // edx: from index (smi)
Label allocate_slice, sliced_string, seq_or_external_string; // Deal with different string types: update the index if necessary
__ cmp(ecx, SlicedString::kMinLength); // and put the underlying string into edi.
// Short slice. Copy instead of slicing. Label underlying_unpacked, sliced_string, seq_or_external_string;
__ j(less, &copy_routine);
// If the string is not indirect, it can only be sequential or external. // If the string is not indirect, it can only be sequential or external.
STATIC_ASSERT(kIsIndirectStringMask == (kSlicedStringTag & kConsStringTag)); STATIC_ASSERT(kIsIndirectStringMask == (kSlicedStringTag & kConsStringTag));
STATIC_ASSERT(kIsIndirectStringMask != 0); STATIC_ASSERT(kIsIndirectStringMask != 0);
...@@ -6156,27 +6153,40 @@ void SubStringStub::Generate(MacroAssembler* masm) { ...@@ -6156,27 +6153,40 @@ void SubStringStub::Generate(MacroAssembler* masm) {
__ test(ebx, Immediate(kSlicedNotConsMask)); __ test(ebx, Immediate(kSlicedNotConsMask));
__ j(not_zero, &sliced_string, Label::kNear); __ j(not_zero, &sliced_string, Label::kNear);
// Cons string. Check whether it is flat, then fetch first part. // Cons string. Check whether it is flat, then fetch first part.
// Flat cons strings have an empty second part.
__ cmp(FieldOperand(eax, ConsString::kSecondOffset), __ cmp(FieldOperand(eax, ConsString::kSecondOffset),
factory->empty_string()); factory->empty_string());
__ j(not_equal, &runtime); __ j(not_equal, &runtime);
__ mov(edi, FieldOperand(eax, ConsString::kFirstOffset)); __ mov(edi, FieldOperand(eax, ConsString::kFirstOffset));
__ jmp(&allocate_slice, Label::kNear); // Update instance type.
__ mov(ebx, FieldOperand(edi, HeapObject::kMapOffset));
__ movzx_b(ebx, FieldOperand(ebx, Map::kInstanceTypeOffset));
__ jmp(&underlying_unpacked, Label::kNear);
__ bind(&sliced_string); __ bind(&sliced_string);
// Sliced string. Fetch parent and correct start index by offset. // Sliced string. Fetch parent and adjust start index by offset.
__ add(edx, FieldOperand(eax, SlicedString::kOffsetOffset)); __ add(edx, FieldOperand(eax, SlicedString::kOffsetOffset));
__ mov(edi, FieldOperand(eax, SlicedString::kParentOffset)); __ mov(edi, FieldOperand(eax, SlicedString::kParentOffset));
__ jmp(&allocate_slice, Label::kNear); // Update instance type.
__ mov(ebx, FieldOperand(edi, HeapObject::kMapOffset));
__ movzx_b(ebx, FieldOperand(ebx, Map::kInstanceTypeOffset));
__ jmp(&underlying_unpacked, Label::kNear);
__ bind(&seq_or_external_string); __ bind(&seq_or_external_string);
// Sequential or external string. Just move string to the correct register. // Sequential or external string. Just move string to the expected register.
__ mov(edi, eax); __ mov(edi, eax);
__ bind(&allocate_slice); __ bind(&underlying_unpacked);
if (FLAG_string_slices) {
Label copy_routine;
// edi: underlying subject string // edi: underlying subject string
// ebx: instance type of original subject string // ebx: instance type of original subject string
// edx: offset // edx: adjusted start index (smi)
// ecx: length // ecx: length (smi)
__ cmp(ecx, Immediate(Smi::FromInt(SlicedString::kMinLength)));
// Short slice. Copy instead of slicing.
__ j(less, &copy_routine);
// Allocate new sliced string. At this point we do not reload the instance // Allocate new sliced string. At this point we do not reload the instance
// type including the string encoding because we simply rely on the info // type including the string encoding because we simply rely on the info
// provided by the original string. It does not matter if the original // provided by the original string. It does not matter if the original
...@@ -6193,27 +6203,50 @@ void SubStringStub::Generate(MacroAssembler* masm) { ...@@ -6193,27 +6203,50 @@ void SubStringStub::Generate(MacroAssembler* masm) {
__ AllocateTwoByteSlicedString(eax, ebx, no_reg, &runtime); __ AllocateTwoByteSlicedString(eax, ebx, no_reg, &runtime);
__ bind(&set_slice_header); __ bind(&set_slice_header);
__ mov(FieldOperand(eax, SlicedString::kOffsetOffset), edx); __ mov(FieldOperand(eax, SlicedString::kOffsetOffset), edx);
__ SmiTag(ecx);
__ mov(FieldOperand(eax, SlicedString::kLengthOffset), ecx); __ mov(FieldOperand(eax, SlicedString::kLengthOffset), ecx);
__ mov(FieldOperand(eax, SlicedString::kParentOffset), edi); __ mov(FieldOperand(eax, SlicedString::kParentOffset), edi);
__ mov(FieldOperand(eax, SlicedString::kHashFieldOffset), __ mov(FieldOperand(eax, SlicedString::kHashFieldOffset),
Immediate(String::kEmptyHashField)); Immediate(String::kEmptyHashField));
__ jmp(&return_eax); __ IncrementCounter(counters->sub_string_native(), 1);
__ ret(3 * kPointerSize);
__ bind(&copy_routine); __ bind(&copy_routine);
} else {
__ bind(&result_longer_than_two);
} }
// eax: string // edi: underlying subject string
// ebx: instance type // ebx: instance type of original subject string
// ecx: result string length // edx: adjusted start index (smi)
// Check for flat ascii string // ecx: length (smi)
Label non_ascii_flat; // The subject string can only be external or sequential string of either
__ JumpIfInstanceTypeIsNotSequentialAscii(ebx, ebx, &non_ascii_flat); // encoding at this point.
Label two_byte_sequential, runtime_drop_two, sequential_string;
STATIC_ASSERT(kExternalStringTag != 0);
STATIC_ASSERT(kSeqStringTag == 0);
__ test_b(ebx, kExternalStringTag);
__ j(zero, &sequential_string);
// Handle external string.
Label ascii_external, done;
// Rule out short external strings.
STATIC_CHECK(kShortExternalStringTag != 0);
__ test_b(ebx, kShortExternalStringMask);
__ j(not_zero, &runtime);
__ mov(edi, FieldOperand(edi, ExternalString::kResourceDataOffset));
// Move the pointer so that offset-wise, it looks like a sequential string.
STATIC_ASSERT(SeqTwoByteString::kHeaderSize == SeqAsciiString::kHeaderSize);
__ sub(edi, Immediate(SeqTwoByteString::kHeaderSize - kHeapObjectTag));
__ bind(&sequential_string);
// Stash away (adjusted) index and (underlying) string.
__ push(edx);
__ push(edi);
__ SmiUntag(ecx);
STATIC_ASSERT((kAsciiStringTag & kStringEncodingMask) != 0);
__ test_b(ebx, kStringEncodingMask);
__ j(zero, &two_byte_sequential);
// Allocate the result. // Sequential ascii string. Allocate the result.
__ AllocateAsciiString(eax, ecx, ebx, edx, edi, &runtime); __ AllocateAsciiString(eax, ecx, ebx, edx, edi, &runtime_drop_two);
// eax: result string // eax: result string
// ecx: result string length // ecx: result string length
...@@ -6222,11 +6255,10 @@ void SubStringStub::Generate(MacroAssembler* masm) { ...@@ -6222,11 +6255,10 @@ void SubStringStub::Generate(MacroAssembler* masm) {
__ mov(edi, eax); __ mov(edi, eax);
__ add(edi, Immediate(SeqAsciiString::kHeaderSize - kHeapObjectTag)); __ add(edi, Immediate(SeqAsciiString::kHeaderSize - kHeapObjectTag));
// Load string argument and locate character of sub string start. // Load string argument and locate character of sub string start.
__ mov(esi, Operand(esp, 3 * kPointerSize)); __ pop(esi);
__ add(esi, Immediate(SeqAsciiString::kHeaderSize - kHeapObjectTag)); __ pop(ebx);
__ mov(ebx, Operand(esp, 2 * kPointerSize)); // from
__ SmiUntag(ebx); __ SmiUntag(ebx);
__ add(esi, ebx); __ lea(esi, FieldOperand(esi, ebx, times_1, SeqAsciiString::kHeaderSize));
// eax: result string // eax: result string
// ecx: result length // ecx: result length
...@@ -6235,20 +6267,12 @@ void SubStringStub::Generate(MacroAssembler* masm) { ...@@ -6235,20 +6267,12 @@ void SubStringStub::Generate(MacroAssembler* masm) {
// esi: character of sub string start // esi: character of sub string start
StringHelper::GenerateCopyCharactersREP(masm, edi, esi, ecx, ebx, true); StringHelper::GenerateCopyCharactersREP(masm, edi, esi, ecx, ebx, true);
__ mov(esi, edx); // Restore esi. __ mov(esi, edx); // Restore esi.
Counters* counters = masm->isolate()->counters();
__ IncrementCounter(counters->sub_string_native(), 1); __ IncrementCounter(counters->sub_string_native(), 1);
__ ret(3 * kPointerSize); __ ret(3 * kPointerSize);
__ bind(&non_ascii_flat); __ bind(&two_byte_sequential);
// eax: string // Sequential two-byte string. Allocate the result.
// ebx: instance type & kStringRepresentationMask | kStringEncodingMask __ AllocateTwoByteString(eax, ecx, ebx, edx, edi, &runtime_drop_two);
// ecx: result string length
// Check for flat two byte string
__ cmp(ebx, kSeqStringTag | kTwoByteStringTag);
__ j(not_equal, &runtime);
// Allocate the result.
__ AllocateTwoByteString(eax, ecx, ebx, edx, edi, &runtime);
// eax: result string // eax: result string
// ecx: result string length // ecx: result string length
...@@ -6258,14 +6282,13 @@ void SubStringStub::Generate(MacroAssembler* masm) { ...@@ -6258,14 +6282,13 @@ void SubStringStub::Generate(MacroAssembler* masm) {
__ add(edi, __ add(edi,
Immediate(SeqTwoByteString::kHeaderSize - kHeapObjectTag)); Immediate(SeqTwoByteString::kHeaderSize - kHeapObjectTag));
// Load string argument and locate character of sub string start. // Load string argument and locate character of sub string start.
__ mov(esi, Operand(esp, 3 * kPointerSize)); __ pop(esi);
__ add(esi, Immediate(SeqTwoByteString::kHeaderSize - kHeapObjectTag)); __ pop(ebx);
__ mov(ebx, Operand(esp, 2 * kPointerSize)); // from
// As from is a smi it is 2 times the value which matches the size of a two // As from is a smi it is 2 times the value which matches the size of a two
// byte character. // byte character.
STATIC_ASSERT(kSmiTag == 0); STATIC_ASSERT(kSmiTag == 0);
STATIC_ASSERT(kSmiTagSize + kSmiShiftSize == 1); STATIC_ASSERT(kSmiTagSize + kSmiShiftSize == 1);
__ add(esi, ebx); __ lea(esi, FieldOperand(esi, ebx, times_1, SeqTwoByteString::kHeaderSize));
// eax: result string // eax: result string
// ecx: result length // ecx: result length
...@@ -6274,11 +6297,13 @@ void SubStringStub::Generate(MacroAssembler* masm) { ...@@ -6274,11 +6297,13 @@ void SubStringStub::Generate(MacroAssembler* masm) {
// esi: character of sub string start // esi: character of sub string start
StringHelper::GenerateCopyCharactersREP(masm, edi, esi, ecx, ebx, false); StringHelper::GenerateCopyCharactersREP(masm, edi, esi, ecx, ebx, false);
__ mov(esi, edx); // Restore esi. __ mov(esi, edx); // Restore esi.
__ bind(&return_eax);
__ IncrementCounter(counters->sub_string_native(), 1); __ IncrementCounter(counters->sub_string_native(), 1);
__ ret(3 * kPointerSize); __ ret(3 * kPointerSize);
// Drop pushed values on the stack before tail call.
__ bind(&runtime_drop_two);
__ Drop(2);
// Just jump to runtime to create the sub string. // Just jump to runtime to create the sub string.
__ bind(&runtime); __ bind(&runtime);
__ TailCallRuntime(Runtime::kSubString, 3, 1); __ TailCallRuntime(Runtime::kSubString, 3, 1);
......
...@@ -59,7 +59,7 @@ function test() { ...@@ -59,7 +59,7 @@ function test() {
} catch (ex) { } } catch (ex) { }
assertEquals("1", charat_short.charAt(1)); assertEquals("1", charat_short.charAt(1));
// Test regexp. // Test regexp and short substring.
var re = /(A|B)/; var re = /(A|B)/;
var rere = /(T.{1,2}B)/; var rere = /(T.{1,2}B)/;
var ascii = "ABCDEFGHIJKLMNOPQRST"; var ascii = "ABCDEFGHIJKLMNOPQRST";
...@@ -81,6 +81,10 @@ function test() { ...@@ -81,6 +81,10 @@ function test() {
assertEquals(["A", "A"], re.exec(twobyte)); assertEquals(["A", "A"], re.exec(twobyte));
assertEquals(["B", "B"], re.exec(twobyte_slice)); assertEquals(["B", "B"], re.exec(twobyte_slice));
assertEquals(["T_AB", "T_AB"], rere.exec(twobyte_cons)); assertEquals(["T_AB", "T_AB"], rere.exec(twobyte_cons));
assertEquals("DEFG", ascii_slice.substr(2, 4));
assertEquals("DEFG", twobyte_slice.substr(2, 4));
assertEquals("DEFG", ascii_cons.substr(3, 4));
assertEquals("DEFG", twobyte_cons.substr(4, 4));
} }
} }
......
...@@ -160,6 +160,23 @@ for ( var i = 0; i < 1000; i++) { ...@@ -160,6 +160,23 @@ for ( var i = 0; i < 1000; i++) {
f(flat, cons, slice, i); f(flat, cons, slice, i);
} }
// Short substrings.
flat = "abcdefghijkl12345";
cons = flat + flat.toUpperCase();
/x/.exec(cons); // Flatten cons
slice = "abcdefghijklmn12345".slice(1, -1);
assertEquals("cdefg", flat.substr(2, 5));
assertEquals("cdefg", cons.substr(2, 5));
assertEquals("cdefg", slice.substr(1, 5));
flat = "abc\u1234defghijkl12345";
cons = flat + flat.toUpperCase();
/x/.exec(cons); // Flatten cons
slice = "abc\u1234defghijklmn12345".slice(1, -1);
assertEquals("c\u1234def", flat.substr(2, 5));
assertEquals("c\u1234def", cons.substr(2, 5));
assertEquals("c\u1234def", slice.substr(1, 5));
// Concatenate substrings. // Concatenate substrings.
var ascii = 'abcdefghijklmnop'; var ascii = 'abcdefghijklmnop';
var utf = '\u03B1\u03B2\u03B3\u03B4\u03B5\u03B6\u03B7\u03B8\u03B9\u03BA\u03BB'; var utf = '\u03B1\u03B2\u03B3\u03B4\u03B5\u03B6\u03B7\u03B8\u03B9\u03BA\u03BB';
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment