Commit ce86c1bf authored by yangguo@chromium.org's avatar yangguo@chromium.org

Avoid bailing out to runtime for short substrings.

This significantly improves the speed for creating short substrings (less than 13 characters) from slices, flat cons strings and external strings.

TEST=string-external-cached.js, string-slices.js

Review URL: http://codereview.chromium.org/8889012

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@10221 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
parent f1649cf3
......@@ -6093,20 +6093,23 @@ void SubStringStub::Generate(MacroAssembler* masm) {
__ JumpIfNotSmi(edx, &runtime);
__ sub(ecx, edx);
__ cmp(ecx, FieldOperand(eax, String::kLengthOffset));
Label return_eax;
__ j(equal, &return_eax);
Label not_original_string;
__ j(not_equal, &not_original_string, Label::kNear);
Counters* counters = masm->isolate()->counters();
__ IncrementCounter(counters->sub_string_native(), 1);
__ ret(3 * kPointerSize);
__ bind(&not_original_string);
// Special handling of sub-strings of length 1 and 2. One character strings
// are handled in the runtime system (looked up in the single character
// cache). Two character strings are looked for in the symbol cache.
__ SmiUntag(ecx); // Result length is no longer smi.
__ cmp(ecx, 2);
__ cmp(ecx, Immediate(Smi::FromInt(2)));
__ j(greater, &result_longer_than_two);
__ j(less, &runtime);
// Sub string of length 2 requested.
// eax: string
// ebx: instance type
// ecx: sub string length (value is 2)
// ecx: sub string length (smi, value is 2)
// edx: from index (smi)
__ JumpIfInstanceTypeIsNotSequentialAscii(ebx, ebx, &runtime);
......@@ -6121,6 +6124,7 @@ void SubStringStub::Generate(MacroAssembler* masm) {
StringHelper::GenerateTwoCharacterSymbolTableProbe(
masm, ebx, ecx, eax, edx, edi,
&make_two_character_string, &make_two_character_string);
__ IncrementCounter(counters->sub_string_native(), 1);
__ ret(3 * kPointerSize);
__ bind(&make_two_character_string);
......@@ -6128,24 +6132,17 @@ void SubStringStub::Generate(MacroAssembler* masm) {
__ mov(eax, Operand(esp, 3 * kPointerSize));
__ mov(ebx, FieldOperand(eax, HeapObject::kMapOffset));
__ movzx_b(ebx, FieldOperand(ebx, Map::kInstanceTypeOffset));
__ Set(ecx, Immediate(2));
__ Set(ecx, Immediate(Smi::FromInt(2)));
__ mov(edx, Operand(esp, 2 * kPointerSize)); // Load index.
if (FLAG_string_slices) {
Label copy_routine;
// If coming from the make_two_character_string path, the string
// is too short to be sliced anyways.
STATIC_ASSERT(2 < SlicedString::kMinLength);
__ jmp(&copy_routine);
__ bind(&result_longer_than_two);
// eax: string
// ebx: instance type
// ecx: sub string length
// ecx: sub string length (smi)
// edx: from index (smi)
Label allocate_slice, sliced_string, seq_or_external_string;
__ cmp(ecx, SlicedString::kMinLength);
// Short slice. Copy instead of slicing.
__ j(less, &copy_routine);
// Deal with different string types: update the index if necessary
// and put the underlying string into edi.
Label underlying_unpacked, sliced_string, seq_or_external_string;
// If the string is not indirect, it can only be sequential or external.
STATIC_ASSERT(kIsIndirectStringMask == (kSlicedStringTag & kConsStringTag));
STATIC_ASSERT(kIsIndirectStringMask != 0);
......@@ -6156,27 +6153,40 @@ void SubStringStub::Generate(MacroAssembler* masm) {
__ test(ebx, Immediate(kSlicedNotConsMask));
__ j(not_zero, &sliced_string, Label::kNear);
// Cons string. Check whether it is flat, then fetch first part.
// Flat cons strings have an empty second part.
__ cmp(FieldOperand(eax, ConsString::kSecondOffset),
factory->empty_string());
__ j(not_equal, &runtime);
__ mov(edi, FieldOperand(eax, ConsString::kFirstOffset));
__ jmp(&allocate_slice, Label::kNear);
// Update instance type.
__ mov(ebx, FieldOperand(edi, HeapObject::kMapOffset));
__ movzx_b(ebx, FieldOperand(ebx, Map::kInstanceTypeOffset));
__ jmp(&underlying_unpacked, Label::kNear);
__ bind(&sliced_string);
// Sliced string. Fetch parent and correct start index by offset.
// Sliced string. Fetch parent and adjust start index by offset.
__ add(edx, FieldOperand(eax, SlicedString::kOffsetOffset));
__ mov(edi, FieldOperand(eax, SlicedString::kParentOffset));
__ jmp(&allocate_slice, Label::kNear);
// Update instance type.
__ mov(ebx, FieldOperand(edi, HeapObject::kMapOffset));
__ movzx_b(ebx, FieldOperand(ebx, Map::kInstanceTypeOffset));
__ jmp(&underlying_unpacked, Label::kNear);
__ bind(&seq_or_external_string);
// Sequential or external string. Just move string to the correct register.
// Sequential or external string. Just move string to the expected register.
__ mov(edi, eax);
__ bind(&allocate_slice);
__ bind(&underlying_unpacked);
if (FLAG_string_slices) {
Label copy_routine;
// edi: underlying subject string
// ebx: instance type of original subject string
// edx: offset
// ecx: length
// edx: adjusted start index (smi)
// ecx: length (smi)
__ cmp(ecx, Immediate(Smi::FromInt(SlicedString::kMinLength)));
// Short slice. Copy instead of slicing.
__ j(less, &copy_routine);
// Allocate new sliced string. At this point we do not reload the instance
// type including the string encoding because we simply rely on the info
// provided by the original string. It does not matter if the original
......@@ -6193,27 +6203,50 @@ void SubStringStub::Generate(MacroAssembler* masm) {
__ AllocateTwoByteSlicedString(eax, ebx, no_reg, &runtime);
__ bind(&set_slice_header);
__ mov(FieldOperand(eax, SlicedString::kOffsetOffset), edx);
__ SmiTag(ecx);
__ mov(FieldOperand(eax, SlicedString::kLengthOffset), ecx);
__ mov(FieldOperand(eax, SlicedString::kParentOffset), edi);
__ mov(FieldOperand(eax, SlicedString::kHashFieldOffset),
Immediate(String::kEmptyHashField));
__ jmp(&return_eax);
__ IncrementCounter(counters->sub_string_native(), 1);
__ ret(3 * kPointerSize);
__ bind(&copy_routine);
} else {
__ bind(&result_longer_than_two);
}
// eax: string
// ebx: instance type
// ecx: result string length
// Check for flat ascii string
Label non_ascii_flat;
__ JumpIfInstanceTypeIsNotSequentialAscii(ebx, ebx, &non_ascii_flat);
// edi: underlying subject string
// ebx: instance type of original subject string
// edx: adjusted start index (smi)
// ecx: length (smi)
// The subject string can only be external or sequential string of either
// encoding at this point.
Label two_byte_sequential, runtime_drop_two, sequential_string;
STATIC_ASSERT(kExternalStringTag != 0);
STATIC_ASSERT(kSeqStringTag == 0);
__ test_b(ebx, kExternalStringTag);
__ j(zero, &sequential_string);
// Handle external string.
Label ascii_external, done;
// Rule out short external strings.
STATIC_CHECK(kShortExternalStringTag != 0);
__ test_b(ebx, kShortExternalStringMask);
__ j(not_zero, &runtime);
__ mov(edi, FieldOperand(edi, ExternalString::kResourceDataOffset));
// Move the pointer so that offset-wise, it looks like a sequential string.
STATIC_ASSERT(SeqTwoByteString::kHeaderSize == SeqAsciiString::kHeaderSize);
__ sub(edi, Immediate(SeqTwoByteString::kHeaderSize - kHeapObjectTag));
__ bind(&sequential_string);
// Stash away (adjusted) index and (underlying) string.
__ push(edx);
__ push(edi);
__ SmiUntag(ecx);
STATIC_ASSERT((kAsciiStringTag & kStringEncodingMask) != 0);
__ test_b(ebx, kStringEncodingMask);
__ j(zero, &two_byte_sequential);
// Allocate the result.
__ AllocateAsciiString(eax, ecx, ebx, edx, edi, &runtime);
// Sequential ascii string. Allocate the result.
__ AllocateAsciiString(eax, ecx, ebx, edx, edi, &runtime_drop_two);
// eax: result string
// ecx: result string length
......@@ -6222,11 +6255,10 @@ void SubStringStub::Generate(MacroAssembler* masm) {
__ mov(edi, eax);
__ add(edi, Immediate(SeqAsciiString::kHeaderSize - kHeapObjectTag));
// Load string argument and locate character of sub string start.
__ mov(esi, Operand(esp, 3 * kPointerSize));
__ add(esi, Immediate(SeqAsciiString::kHeaderSize - kHeapObjectTag));
__ mov(ebx, Operand(esp, 2 * kPointerSize)); // from
__ pop(esi);
__ pop(ebx);
__ SmiUntag(ebx);
__ add(esi, ebx);
__ lea(esi, FieldOperand(esi, ebx, times_1, SeqAsciiString::kHeaderSize));
// eax: result string
// ecx: result length
......@@ -6235,20 +6267,12 @@ void SubStringStub::Generate(MacroAssembler* masm) {
// esi: character of sub string start
StringHelper::GenerateCopyCharactersREP(masm, edi, esi, ecx, ebx, true);
__ mov(esi, edx); // Restore esi.
Counters* counters = masm->isolate()->counters();
__ IncrementCounter(counters->sub_string_native(), 1);
__ ret(3 * kPointerSize);
__ bind(&non_ascii_flat);
// eax: string
// ebx: instance type & kStringRepresentationMask | kStringEncodingMask
// ecx: result string length
// Check for flat two byte string
__ cmp(ebx, kSeqStringTag | kTwoByteStringTag);
__ j(not_equal, &runtime);
// Allocate the result.
__ AllocateTwoByteString(eax, ecx, ebx, edx, edi, &runtime);
__ bind(&two_byte_sequential);
// Sequential two-byte string. Allocate the result.
__ AllocateTwoByteString(eax, ecx, ebx, edx, edi, &runtime_drop_two);
// eax: result string
// ecx: result string length
......@@ -6258,14 +6282,13 @@ void SubStringStub::Generate(MacroAssembler* masm) {
__ add(edi,
Immediate(SeqTwoByteString::kHeaderSize - kHeapObjectTag));
// Load string argument and locate character of sub string start.
__ mov(esi, Operand(esp, 3 * kPointerSize));
__ add(esi, Immediate(SeqTwoByteString::kHeaderSize - kHeapObjectTag));
__ mov(ebx, Operand(esp, 2 * kPointerSize)); // from
__ pop(esi);
__ pop(ebx);
// As from is a smi it is 2 times the value which matches the size of a two
// byte character.
STATIC_ASSERT(kSmiTag == 0);
STATIC_ASSERT(kSmiTagSize + kSmiShiftSize == 1);
__ add(esi, ebx);
__ lea(esi, FieldOperand(esi, ebx, times_1, SeqTwoByteString::kHeaderSize));
// eax: result string
// ecx: result length
......@@ -6274,11 +6297,13 @@ void SubStringStub::Generate(MacroAssembler* masm) {
// esi: character of sub string start
StringHelper::GenerateCopyCharactersREP(masm, edi, esi, ecx, ebx, false);
__ mov(esi, edx); // Restore esi.
__ bind(&return_eax);
__ IncrementCounter(counters->sub_string_native(), 1);
__ ret(3 * kPointerSize);
// Drop pushed values on the stack before tail call.
__ bind(&runtime_drop_two);
__ Drop(2);
// Just jump to runtime to create the sub string.
__ bind(&runtime);
__ TailCallRuntime(Runtime::kSubString, 3, 1);
......
......@@ -59,7 +59,7 @@ function test() {
} catch (ex) { }
assertEquals("1", charat_short.charAt(1));
// Test regexp.
// Test regexp and short substring.
var re = /(A|B)/;
var rere = /(T.{1,2}B)/;
var ascii = "ABCDEFGHIJKLMNOPQRST";
......@@ -81,6 +81,10 @@ function test() {
assertEquals(["A", "A"], re.exec(twobyte));
assertEquals(["B", "B"], re.exec(twobyte_slice));
assertEquals(["T_AB", "T_AB"], rere.exec(twobyte_cons));
assertEquals("DEFG", ascii_slice.substr(2, 4));
assertEquals("DEFG", twobyte_slice.substr(2, 4));
assertEquals("DEFG", ascii_cons.substr(3, 4));
assertEquals("DEFG", twobyte_cons.substr(4, 4));
}
}
......
......@@ -160,6 +160,23 @@ for ( var i = 0; i < 1000; i++) {
f(flat, cons, slice, i);
}
// Short substrings.
flat = "abcdefghijkl12345";
cons = flat + flat.toUpperCase();
/x/.exec(cons); // Flatten cons
slice = "abcdefghijklmn12345".slice(1, -1);
assertEquals("cdefg", flat.substr(2, 5));
assertEquals("cdefg", cons.substr(2, 5));
assertEquals("cdefg", slice.substr(1, 5));
flat = "abc\u1234defghijkl12345";
cons = flat + flat.toUpperCase();
/x/.exec(cons); // Flatten cons
slice = "abc\u1234defghijklmn12345".slice(1, -1);
assertEquals("c\u1234def", flat.substr(2, 5));
assertEquals("c\u1234def", cons.substr(2, 5));
assertEquals("c\u1234def", slice.substr(1, 5));
// Concatenate substrings.
var ascii = 'abcdefghijklmnop';
var utf = '\u03B1\u03B2\u03B3\u03B4\u03B5\u03B6\u03B7\u03B8\u03B9\u03BA\u03BB';
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment