Commit 65018d91 authored by sgjesse@chromium.org's avatar sgjesse@chromium.org

ARM: Use the vsqrt instruction when available

vsqrt is used to calculate Math.sqrt(x), Math.pow(x, 0.5) and Math.pow(x, -0.5). Code size doesn't matter, as %_MathSqrt and %_MathPow are only called in one place each.
Review URL: http://codereview.chromium.org/2885002

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@4974 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
parent 7f816486
......@@ -2112,6 +2112,18 @@ void Assembler::vmrs(Register dst, Condition cond) {
}
void Assembler::vsqrt(const DwVfpRegister dst,
const DwVfpRegister src,
const Condition cond) {
// cond(31-28) | 11101 (27-23)| D=?(22) | 11 (21-20) | 0001 (19-16) |
// Vd(15-12) | 101(11-9) | sz(8)=1 | 11 (7-6) | M(5)=? | 0(4) | Vm(3-0)
ASSERT(CpuFeatures::IsEnabled(VFP3));
emit(cond | 0xE*B24 | B23 | 0x3*B20 | B16 |
dst.code()*B12 | 0x5*B9 | B8 | 3*B6 | src.code());
}
// Pseudo instructions.
void Assembler::nop(int type) {
// This is mov rx, rx.
......
......@@ -988,6 +988,9 @@ class Assembler : public Malloced {
const Condition cond = al);
void vmrs(const Register dst,
const Condition cond = al);
void vsqrt(const DwVfpRegister dst,
const DwVfpRegister src,
const Condition cond = al);
// Pseudo instructions
void nop(int type = 0);
......
......@@ -4279,22 +4279,147 @@ void CodeGenerator::GenerateIsNonNegativeSmi(ZoneList<Expression*>* args) {
}
// Generates the Math.pow method - currently just calls runtime.
// Generates the Math.pow method.
void CodeGenerator::GenerateMathPow(ZoneList<Expression*>* args) {
ASSERT(args->length() == 2);
Load(args->at(0));
Load(args->at(1));
frame_->CallRuntime(Runtime::kMath_pow, 2);
frame_->EmitPush(r0);
if (!CpuFeatures::IsSupported(VFP3)) {
frame_->CallRuntime(Runtime::kMath_pow, 2);
frame_->EmitPush(r0);
} else {
CpuFeatures::Scope scope(VFP3);
JumpTarget runtime, done;
Label not_minus_half, allocate_return;
Register scratch1 = VirtualFrame::scratch0();
Register scratch2 = VirtualFrame::scratch1();
// Get base and exponent to registers.
Register exponent = frame_->PopToRegister();
Register base = frame_->PopToRegister(exponent);
// Set the frame for the runtime jump target. The code below jumps to the
// jump target label so the frame needs to be established before that.
ASSERT(runtime.entry_frame() == NULL);
runtime.set_entry_frame(frame_);
__ BranchOnSmi(exponent, runtime.entry_label());
// Special handling of raising to the power of -0.5 and 0.5. First check
// that the value is a heap number and that the lower bits (which for both
// values are zero).
Register heap_number_map = r6;
__ LoadRoot(heap_number_map, Heap::kHeapNumberMapRootIndex);
__ ldr(scratch1, FieldMemOperand(exponent, HeapObject::kMapOffset));
__ ldr(scratch2, FieldMemOperand(exponent, HeapNumber::kMantissaOffset));
__ cmp(scratch1, heap_number_map);
runtime.Branch(ne);
__ tst(scratch2, scratch2);
runtime.Branch(ne);
// Load the e
__ ldr(scratch1, FieldMemOperand(exponent, HeapNumber::kExponentOffset));
// Compare exponent with -0.5.
__ cmp(scratch1, Operand(0xbfe00000));
__ b(ne, &not_minus_half);
// Get the double value from the base into vfp register d0.
__ ObjectToDoubleVFPRegister(base, d0,
scratch1, scratch2, heap_number_map, s0,
runtime.entry_label(),
AVOID_NANS_AND_INFINITIES);
// Load 1.0 into d2.
__ mov(scratch2, Operand(0x3ff00000));
__ mov(scratch1, Operand(0));
__ vmov(d2, scratch1, scratch2);
// Calculate the reciprocal of the square root. 1/sqrt(x) = sqrt(1/x).
__ vdiv(d0, d2, d0);
__ vsqrt(d0, d0);
__ b(&allocate_return);
__ bind(&not_minus_half);
// Compare exponent with 0.5.
__ cmp(scratch1, Operand(0x3fe00000));
runtime.Branch(ne);
// Get the double value from the base into vfp register d0.
__ ObjectToDoubleVFPRegister(base, d0,
scratch1, scratch2, heap_number_map, s0,
runtime.entry_label(),
AVOID_NANS_AND_INFINITIES);
__ vsqrt(d0, d0);
__ bind(&allocate_return);
__ AllocateHeapNumberWithValue(
base, d0, scratch1, scratch2, heap_number_map, runtime.entry_label());
done.Jump();
runtime.Bind();
// Push back the arguments again for the runtime call.
frame_->EmitPush(base);
frame_->EmitPush(exponent);
frame_->CallRuntime(Runtime::kMath_pow, 2);
__ Move(base, r0);
done.Bind();
frame_->EmitPush(base);
}
}
// Generates the Math.sqrt method - currently just calls runtime.
// Generates the Math.sqrt method.
void CodeGenerator::GenerateMathSqrt(ZoneList<Expression*>* args) {
ASSERT(args->length() == 1);
Load(args->at(0));
frame_->CallRuntime(Runtime::kMath_sqrt, 1);
frame_->EmitPush(r0);
if (!CpuFeatures::IsSupported(VFP3)) {
frame_->CallRuntime(Runtime::kMath_sqrt, 1);
frame_->EmitPush(r0);
} else {
CpuFeatures::Scope scope(VFP3);
JumpTarget runtime, done;
Register scratch1 = VirtualFrame::scratch0();
Register scratch2 = VirtualFrame::scratch1();
// Get the value from the frame.
Register tos = frame_->PopToRegister();
// Set the frame for the runtime jump target. The code below jumps to the
// jump target label so the frame needs to be established before that.
ASSERT(runtime.entry_frame() == NULL);
runtime.set_entry_frame(frame_);
Register heap_number_map = r6;
__ LoadRoot(heap_number_map, Heap::kHeapNumberMapRootIndex);
// Get the double value from the heap number into vfp register d0.
__ ObjectToDoubleVFPRegister(tos, d0,
scratch1, scratch2, heap_number_map, s0,
runtime.entry_label());
// Calculate the square root of d0 and place result in a heap number object.
__ vsqrt(d0, d0);
__ AllocateHeapNumberWithValue(
tos, d0, scratch1, scratch2, heap_number_map, runtime.entry_label());
done.Jump();
runtime.Bind();
// Push back the argument again for the runtime call.
frame_->EmitPush(tos);
frame_->CallRuntime(Runtime::kMath_sqrt, 1);
__ Move(tos, r0);
done.Bind();
frame_->EmitPush(tos);
}
}
......
......@@ -1038,7 +1038,8 @@ void Decoder::DecodeUnconditional(Instr* instr) {
// Dd = vmul(Dn, Dm)
// Dd = vdiv(Dn, Dm)
// vcmp(Dd, Dm)
// VMRS
// vmrs
// Dd = vsqrt(Dm)
void Decoder::DecodeTypeVFP(Instr* instr) {
ASSERT((instr->TypeField() == 7) && (instr->Bit(24) == 0x0) );
ASSERT(instr->Bits(11, 9) == 0x5);
......@@ -1056,6 +1057,8 @@ void Decoder::DecodeTypeVFP(Instr* instr) {
} else if (((instr->Opc2Field() == 0x4) || (instr->Opc2Field() == 0x5)) &&
(instr->Opc3Field() & 0x1)) {
DecodeVCMP(instr);
} else if (((instr->Opc2Field() == 0x1)) && (instr->Opc3Field() == 0x3)) {
Format(instr, "vsqrt.f64'cond 'Dd, 'Dm");
} else {
Unknown(instr); // Not used by V8.
}
......
......@@ -1369,6 +1369,56 @@ void MacroAssembler::IntegerToDoubleConversionWithVFP3(Register inReg,
}
void MacroAssembler::ObjectToDoubleVFPRegister(Register object,
DwVfpRegister result,
Register scratch1,
Register scratch2,
Register heap_number_map,
SwVfpRegister scratch3,
Label* not_number,
ObjectToDoubleFlags flags) {
Label done;
if ((flags & OBJECT_NOT_SMI) == 0) {
Label not_smi;
BranchOnNotSmi(object, &not_smi);
// Remove smi tag and convert to double.
mov(scratch1, Operand(object, ASR, kSmiTagSize));
vmov(scratch3, scratch1);
vcvt_f64_s32(result, scratch3);
b(&done);
bind(&not_smi);
}
// Check for heap number and load double value from it.
ldr(scratch1, FieldMemOperand(object, HeapObject::kMapOffset));
sub(scratch2, object, Operand(kHeapObjectTag));
cmp(scratch1, heap_number_map);
b(ne, not_number);
if ((flags & AVOID_NANS_AND_INFINITIES) != 0) {
// If exponent is all ones the number is either a NaN or +/-Infinity.
ldr(scratch1, FieldMemOperand(object, HeapNumber::kExponentOffset));
Sbfx(scratch1,
scratch1,
HeapNumber::kExponentShift,
HeapNumber::kExponentBits);
// All-one value sign extend to -1.
cmp(scratch1, Operand(-1));
b(eq, not_number);
}
vldr(result, scratch2, HeapNumber::kValueOffset);
bind(&done);
}
void MacroAssembler::SmiToDoubleVFPRegister(Register smi,
DwVfpRegister value,
Register scratch1,
SwVfpRegister scratch2) {
mov(scratch1, Operand(smi, ASR, kSmiTagSize));
vmov(scratch2, scratch1);
vcvt_f64_s32(value, scratch2);
}
void MacroAssembler::GetLeastBitsFromSmi(Register dst,
Register src,
int num_least_bits) {
......@@ -1686,6 +1736,18 @@ void MacroAssembler::AllocateHeapNumber(Register result,
}
void MacroAssembler::AllocateHeapNumberWithValue(Register result,
DwVfpRegister value,
Register scratch1,
Register scratch2,
Register heap_number_map,
Label* gc_required) {
AllocateHeapNumber(result, scratch1, scratch2, heap_number_map, gc_required);
sub(scratch1, result, Operand(kHeapObjectTag));
vstr(value, scratch1, HeapNumber::kValueOffset);
}
void MacroAssembler::CountLeadingZeros(Register zeros, // Answer.
Register source, // Input.
Register scratch) {
......
......@@ -67,6 +67,17 @@ enum AllocationFlags {
};
// Flags used for the ObjectToDoubleVFPRegister function.
enum ObjectToDoubleFlags {
// No special flags.
NO_OBJECT_TO_DOUBLE_FLAGS = 0,
// Object is known to be a non smi.
OBJECT_NOT_SMI = 1 << 0,
// Don't load NaNs or infinities, branch to the non number case instead.
AVOID_NANS_AND_INFINITIES = 1 << 1
};
// MacroAssembler implements a collection of frequently used macros.
class MacroAssembler: public Assembler {
public:
......@@ -381,6 +392,13 @@ class MacroAssembler: public Assembler {
Register scratch2,
Register heap_number_map,
Label* gc_required);
void AllocateHeapNumberWithValue(Register result,
DwVfpRegister value,
Register scratch1,
Register scratch2,
Register heap_number_map,
Label* gc_required);
// ---------------------------------------------------------------------------
// Support functions.
......@@ -469,6 +487,27 @@ class MacroAssembler: public Assembler {
Register outHighReg,
Register outLowReg);
// Load the value of a number object into a VFP double register. If the object
// is not a number a jump to the label not_number is performed and the VFP
// double register is unchanged.
void ObjectToDoubleVFPRegister(
Register object,
DwVfpRegister value,
Register scratch1,
Register scratch2,
Register heap_number_map,
SwVfpRegister scratch3,
Label* not_number,
ObjectToDoubleFlags flags = NO_OBJECT_TO_DOUBLE_FLAGS);
// Load the value of a smi object into a VFP double register. The register
// scratch1 can be the same register as smi in which case smi will hold the
// untagged value afterwards.
void SmiToDoubleVFPRegister(Register smi,
DwVfpRegister value,
Register scratch1,
SwVfpRegister scratch2);
// Count leading zeros in a 32 bit word. On ARM5 and later it uses the clz
// instruction. On pre-ARM5 hardware this routine gives the wrong answer
// for 0 (31 instead of 32). Source and scratch can be the same in which case
......
......@@ -26,6 +26,7 @@
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <stdlib.h>
#include <math.h>
#include <cstdarg>
#include "v8.h"
......@@ -2262,7 +2263,8 @@ static int GlueRegCode(bool last_bit, int vm, int m) {
// Dd = vmul(Dn, Dm)
// Dd = vdiv(Dn, Dm)
// vcmp(Dd, Dm)
// VMRS
// vmrs
// Dd = vsqrt(Dm)
void Simulator::DecodeTypeVFP(Instr* instr) {
ASSERT((instr->TypeField() == 7) && (instr->Bit(24) == 0x0) );
ASSERT(instr->Bits(11, 9) == 0x5);
......@@ -2284,6 +2286,11 @@ void Simulator::DecodeTypeVFP(Instr* instr) {
} else if (((instr->Opc2Field() == 0x4) || (instr->Opc2Field() == 0x5)) &&
(instr->Opc3Field() & 0x1)) {
DecodeVCMP(instr);
} else if (((instr->Opc2Field() == 0x1)) && (instr->Opc3Field() == 0x3)) {
// vsqrt
double dm_value = get_double_from_d_register(vm);
double dd_value = sqrt(dm_value);
set_d_register_from_double(vd, dd_value);
} else {
UNREACHABLE(); // Not used by V8.
}
......
......@@ -401,3 +401,16 @@ TEST(Type3) {
VERIFY_RUN();
}
TEST(Vfp) {
SETUP();
if (CpuFeatures::IsSupported(VFP3)) {
CpuFeatures::Scope scope(VFP3);
COMPARE(vsqrt(d0, d0),
"eeb10bc0 vsqrt.f64 d0, d0");
}
VERIFY_RUN();
}
......@@ -27,18 +27,23 @@
// Tests the special cases specified by ES 15.8.2.17
function test(expected_sqrt, value) {
assertEquals(expected_sqrt, Math.sqrt(value));
if (isFinite(value)) {
assertEquals(expected_sqrt, Math.pow(value, 0.5));
}
}
// Simple sanity check
assertEquals(2, Math.sqrt(4));
assertEquals(0.1, Math.sqrt(0.01));
test(2, 4);
test(0.1, 0.01);
// Spec tests
assertEquals(NaN, Math.sqrt(NaN));
assertEquals(NaN, Math.sqrt(-1));
assertEquals(+0, Math.sqrt(+0));
assertEquals(-0, Math.sqrt(-0));
assertEquals(Infinity, Math.sqrt(Infinity));
test(NaN, NaN);
test(NaN, -1);
test(+0, +0);
test(-0, -0);
test(Infinity, Infinity);
// -Infinity is smaller than 0 so it should return NaN
assertEquals(NaN, Math.sqrt(-Infinity));
test(NaN, -Infinity);
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment