Commit 962d7019 authored by whesse@chromium.org's avatar whesse@chromium.org

Speed up FastAsciiArrayJoin on ia32 by improving hand-written assembly code.

Review URL: http://codereview.chromium.org/6148007

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@6310 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
parent 3800b24f
This diff is collapsed.
This diff is collapsed.
...@@ -877,55 +877,53 @@ void MacroAssembler::AllocateAsciiConsString(Register result, ...@@ -877,55 +877,53 @@ void MacroAssembler::AllocateAsciiConsString(Register result,
Immediate(Factory::cons_ascii_string_map())); Immediate(Factory::cons_ascii_string_map()));
} }
// All registers must be distinct. Only current_string needs valid contents
// on entry. All registers may be invalid on exit. result_operand is // Copy memory, byte-by-byte, from source to destination. Not optimized for
// unchanged, padding_chars is updated correctly. // long or aligned copies. The contents of scratch and length are destroyed.
void MacroAssembler::AppendStringToTopOfNewSpace( // Source and destination are incremented by length.
Register current_string, // Tagged pointer to string to copy. // Many variants of movsb, loop unrolling, word moves, and indexed operands
Register current_string_length, // have been tried here already, and this is fastest.
Register result_pos, // A simpler loop is faster on small copies, but 30% slower on large ones.
Register scratch, // The cld() instruction must have been emitted, to set the direction flag(),
Register new_padding_chars, // before calling this function.
Operand operand_result, void MacroAssembler::CopyBytes(Register source,
Operand operand_padding_chars, Register destination,
Label* bailout) { Register length,
mov(current_string_length, Register scratch) {
FieldOperand(current_string, String::kLengthOffset)); Label loop, done, short_string, short_loop;
shr(current_string_length, 1); // Experimentation shows that the short string loop is faster if length < 10.
sub(current_string_length, operand_padding_chars); cmp(Operand(length), Immediate(10));
mov(new_padding_chars, current_string_length); j(less_equal, &short_string);
add(Operand(current_string_length), Immediate(kObjectAlignmentMask));
and_(Operand(current_string_length), Immediate(~kObjectAlignmentMask)); ASSERT(source.is(esi));
sub(new_padding_chars, Operand(current_string_length)); ASSERT(destination.is(edi));
neg(new_padding_chars); ASSERT(length.is(ecx));
// We need an allocation even if current_string_length is 0, to fetch
// result_pos. Consider using a faster fetch of result_pos in that case. // Because source is 4-byte aligned in our uses of this function,
AllocateInNewSpace(current_string_length, result_pos, scratch, no_reg, // we keep source aligned for the rep_movs call by copying the odd bytes
bailout, NO_ALLOCATION_FLAGS); // at the end of the ranges.
sub(result_pos, operand_padding_chars); mov(scratch, Operand(source, length, times_1, -4));
mov(operand_padding_chars, new_padding_chars); mov(Operand(destination, length, times_1, -4), scratch);
mov(scratch, ecx);
Register scratch_2 = new_padding_chars; // Used to compute total length. shr(ecx, 2);
// Copy string to the end of result. rep_movs();
mov(current_string_length, and_(Operand(scratch), Immediate(0x3));
FieldOperand(current_string, String::kLengthOffset)); add(destination, Operand(scratch));
mov(scratch, operand_result); jmp(&done);
mov(scratch_2, current_string_length);
add(scratch_2, FieldOperand(scratch, String::kLengthOffset)); bind(&short_string);
mov(FieldOperand(scratch, String::kLengthOffset), scratch_2); test(length, Operand(length));
shr(current_string_length, 1); j(zero, &done);
lea(current_string,
FieldOperand(current_string, SeqAsciiString::kHeaderSize)); bind(&short_loop);
// Loop condition: while (--current_string_length >= 0). mov_b(scratch, Operand(source, 0));
Label copy_loop; mov_b(Operand(destination, 0), scratch);
Label copy_loop_entry; inc(source);
jmp(&copy_loop_entry); inc(destination);
bind(&copy_loop); dec(length);
mov_b(scratch, Operand(current_string, current_string_length, times_1, 0)); j(not_zero, &short_loop);
mov_b(Operand(result_pos, current_string_length, times_1, 0), scratch);
bind(&copy_loop_entry); bind(&done);
sub(Operand(current_string_length), Immediate(1));
j(greater_equal, &copy_loop);
} }
......
...@@ -386,22 +386,13 @@ class MacroAssembler: public Assembler { ...@@ -386,22 +386,13 @@ class MacroAssembler: public Assembler {
Register scratch2, Register scratch2,
Label* gc_required); Label* gc_required);
// All registers must be distinct. Only current_string needs valid contents // Copy memory, byte-by-byte, from source to destination. Not optimized for
// on entry. All registers may be invalid on exit. result_operand is // long or aligned copies.
// unchanged, padding_chars is updated correctly. // The contents of index and scratch are destroyed.
// The top of new space must contain a sequential ascii string with void CopyBytes(Register source,
// padding_chars bytes free in its top word. The sequential ascii string Register destination,
// current_string is concatenated to it, allocating the necessary amount Register length,
// of new memory. Register scratch);
void AppendStringToTopOfNewSpace(
Register current_string, // Tagged pointer to string to copy.
Register current_string_length,
Register result_pos,
Register scratch,
Register new_padding_chars,
Operand operand_result,
Operand operand_padding_chars,
Label* bailout);
// --------------------------------------------------------------------------- // ---------------------------------------------------------------------------
// Support functions. // Support functions.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment