Commit 9ebcfb41 authored by ulan@chromium.org's avatar ulan@chromium.org

ARM: Make DoStoreKeyedFixedDoubleArray faster; don't allow conditional Vmov

This patch makes us generate faster code for DoStoreKeyedFixedDoubleArray,
by using a branch rather than a conditional Vmov instruction.

Conditional VFP instructions are not a great idea in general, and it was
especially bad in this case because Vmov expands to a bunch of instructions.
For this reason, the patch also removes the 'cond' parameter from Vmov.

Thanks to Rodolph for pointing me to this!

BUG=none

Review URL: https://chromiumcodereview.appspot.com/12316096
Patch from Hans Wennborg <hans@chromium.org>.

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@13722 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
parent 34c372d8
...@@ -2067,8 +2067,7 @@ static bool FitsVMOVDoubleImmediate(double d, uint32_t *encoding) { ...@@ -2067,8 +2067,7 @@ static bool FitsVMOVDoubleImmediate(double d, uint32_t *encoding) {
void Assembler::vmov(const DwVfpRegister dst, void Assembler::vmov(const DwVfpRegister dst,
double imm, double imm,
const Register scratch, const Register scratch) {
const Condition cond) {
ASSERT(CpuFeatures::IsEnabled(VFP2)); ASSERT(CpuFeatures::IsEnabled(VFP2));
uint32_t enc; uint32_t enc;
...@@ -2081,7 +2080,7 @@ void Assembler::vmov(const DwVfpRegister dst, ...@@ -2081,7 +2080,7 @@ void Assembler::vmov(const DwVfpRegister dst,
// Vd(15-12) | 101(11-9) | sz=1(8) | imm4L(3-0) // Vd(15-12) | 101(11-9) | sz=1(8) | imm4L(3-0)
int vd, d; int vd, d;
dst.split_code(&vd, &d); dst.split_code(&vd, &d);
emit(cond | 0x1D*B23 | d*B22 | 0x3*B20 | vd*B12 | 0x5*B9 | B8 | enc); emit(al | 0x1D*B23 | d*B22 | 0x3*B20 | vd*B12 | 0x5*B9 | B8 | enc);
} else if (FLAG_enable_vldr_imm) { } else if (FLAG_enable_vldr_imm) {
// TODO(jfb) Temporarily turned off until we have constant blinding or // TODO(jfb) Temporarily turned off until we have constant blinding or
// some equivalent mitigation: an attacker can otherwise control // some equivalent mitigation: an attacker can otherwise control
...@@ -2099,7 +2098,7 @@ void Assembler::vmov(const DwVfpRegister dst, ...@@ -2099,7 +2098,7 @@ void Assembler::vmov(const DwVfpRegister dst,
// that's tricky because vldr has a limited reach. Furthermore // that's tricky because vldr has a limited reach. Furthermore
// it breaks load locality. // it breaks load locality.
RecordRelocInfo(imm); RecordRelocInfo(imm);
vldr(dst, MemOperand(pc, 0), cond); vldr(dst, MemOperand(pc, 0));
} else { } else {
// Synthesise the double from ARM immediates. // Synthesise the double from ARM immediates.
uint32_t lo, hi; uint32_t lo, hi;
...@@ -2110,27 +2109,27 @@ void Assembler::vmov(const DwVfpRegister dst, ...@@ -2110,27 +2109,27 @@ void Assembler::vmov(const DwVfpRegister dst,
// Move the low part of the double into the lower of the corresponsing S // Move the low part of the double into the lower of the corresponsing S
// registers of D register dst. // registers of D register dst.
mov(ip, Operand(lo)); mov(ip, Operand(lo));
vmov(dst.low(), ip, cond); vmov(dst.low(), ip);
// Move the high part of the double into the higher of the // Move the high part of the double into the higher of the
// corresponsing S registers of D register dst. // corresponsing S registers of D register dst.
mov(ip, Operand(hi)); mov(ip, Operand(hi));
vmov(dst.high(), ip, cond); vmov(dst.high(), ip);
} else { } else {
// D16-D31 does not have S registers, so move the low and high parts // D16-D31 does not have S registers, so move the low and high parts
// directly to the D register using vmov.32. // directly to the D register using vmov.32.
// Note: This may be slower, so we only do this when we have to. // Note: This may be slower, so we only do this when we have to.
mov(ip, Operand(lo)); mov(ip, Operand(lo));
vmov(dst, VmovIndexLo, ip, cond); vmov(dst, VmovIndexLo, ip);
mov(ip, Operand(hi)); mov(ip, Operand(hi));
vmov(dst, VmovIndexHi, ip, cond); vmov(dst, VmovIndexHi, ip);
} }
} else { } else {
// Move the low and high parts of the double to a D register in one // Move the low and high parts of the double to a D register in one
// instruction. // instruction.
mov(ip, Operand(lo)); mov(ip, Operand(lo));
mov(scratch, Operand(hi)); mov(scratch, Operand(hi));
vmov(dst, ip, scratch, cond); vmov(dst, ip, scratch);
} }
} }
} }
......
...@@ -1066,8 +1066,7 @@ class Assembler : public AssemblerBase { ...@@ -1066,8 +1066,7 @@ class Assembler : public AssemblerBase {
void vmov(const DwVfpRegister dst, void vmov(const DwVfpRegister dst,
double imm, double imm,
const Register scratch = no_reg, const Register scratch = no_reg);
const Condition cond = al);
void vmov(const SwVfpRegister dst, void vmov(const SwVfpRegister dst,
const SwVfpRegister src, const SwVfpRegister src,
const Condition cond = al); const Condition cond = al);
......
...@@ -4472,10 +4472,14 @@ void LCodeGen::DoStoreKeyedFixedDoubleArray(LStoreKeyed* instr) { ...@@ -4472,10 +4472,14 @@ void LCodeGen::DoStoreKeyedFixedDoubleArray(LStoreKeyed* instr) {
if (instr->NeedsCanonicalization()) { if (instr->NeedsCanonicalization()) {
// Check for NaN. All NaNs must be canonicalized. // Check for NaN. All NaNs must be canonicalized.
__ VFPCompareAndSetFlags(value, value); __ VFPCompareAndSetFlags(value, value);
Label after_canonicalization;
// Only load canonical NaN if the comparison above set the overflow. // Only load canonical NaN if the comparison above set the overflow.
__ b(vc, &after_canonicalization);
__ Vmov(value, __ Vmov(value,
FixedDoubleArray::canonical_not_the_hole_nan_as_double(), FixedDoubleArray::canonical_not_the_hole_nan_as_double());
no_reg, vs);
__ bind(&after_canonicalization);
} }
__ vstr(value, scratch, instr->additional_index() << element_size_shift); __ vstr(value, scratch, instr->additional_index() << element_size_shift);
......
...@@ -812,19 +812,18 @@ void MacroAssembler::VFPCompareAndLoadFlags(const DwVfpRegister src1, ...@@ -812,19 +812,18 @@ void MacroAssembler::VFPCompareAndLoadFlags(const DwVfpRegister src1,
void MacroAssembler::Vmov(const DwVfpRegister dst, void MacroAssembler::Vmov(const DwVfpRegister dst,
const double imm, const double imm,
const Register scratch, const Register scratch) {
const Condition cond) {
ASSERT(CpuFeatures::IsEnabled(VFP2)); ASSERT(CpuFeatures::IsEnabled(VFP2));
static const DoubleRepresentation minus_zero(-0.0); static const DoubleRepresentation minus_zero(-0.0);
static const DoubleRepresentation zero(0.0); static const DoubleRepresentation zero(0.0);
DoubleRepresentation value(imm); DoubleRepresentation value(imm);
// Handle special values first. // Handle special values first.
if (value.bits == zero.bits) { if (value.bits == zero.bits) {
vmov(dst, kDoubleRegZero, cond); vmov(dst, kDoubleRegZero);
} else if (value.bits == minus_zero.bits) { } else if (value.bits == minus_zero.bits) {
vneg(dst, kDoubleRegZero, cond); vneg(dst, kDoubleRegZero);
} else { } else {
vmov(dst, imm, scratch, cond); vmov(dst, imm, scratch);
} }
} }
......
...@@ -480,8 +480,7 @@ class MacroAssembler: public Assembler { ...@@ -480,8 +480,7 @@ class MacroAssembler: public Assembler {
void Vmov(const DwVfpRegister dst, void Vmov(const DwVfpRegister dst,
const double imm, const double imm,
const Register scratch = no_reg, const Register scratch = no_reg);
const Condition cond = al);
// Enter exit frame. // Enter exit frame.
// stack_space - extra stack space, used for alignment before call to C. // stack_space - extra stack space, used for alignment before call to C.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment