ARM: Improved double to integer truncation.

Patch from ARM Ltd.

BUG=none
TEST=Added to cctest/test-assembler-arm.cc and cctest/test-disasm-arm.cc

Review URL: http://codereview.chromium.org/6625084

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@7174 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
parent 64288228
......@@ -2384,6 +2384,14 @@ void Assembler::vcvt_f32_f64(const SwVfpRegister dst,
}
void Assembler::vneg(const DwVfpRegister dst,
const DwVfpRegister src,
const Condition cond) {
emit(cond | 0xE*B24 | 0xB*B20 | B16 | dst.code()*B12 |
0x5*B9 | B8 | B6 | src.code());
}
void Assembler::vabs(const DwVfpRegister dst,
const DwVfpRegister src,
const Condition cond) {
......
......@@ -992,6 +992,9 @@ class Assembler : public Malloced {
VFPConversionMode mode = kDefaultRoundToZero,
const Condition cond = al);
void vneg(const DwVfpRegister dst,
const DwVfpRegister src,
const Condition cond = al);
void vabs(const DwVfpRegister dst,
const DwVfpRegister src,
const Condition cond = al);
......
......@@ -635,64 +635,13 @@ void FloatingPointHelper::ConvertNumberToInt32(MacroAssembler* masm,
__ jmp(&done);
__ bind(&not_in_int32_range);
__ ldr(scratch2, FieldMemOperand(object, HeapNumber::kExponentOffset));
__ ldr(scratch1, FieldMemOperand(object, HeapNumber::kMantissaOffset));
__ ldr(scratch1, FieldMemOperand(object, HeapNumber::kExponentOffset));
__ ldr(scratch2, FieldMemOperand(object, HeapNumber::kMantissaOffset));
// Register scratch1 contains mantissa word, scratch2 contains
// sign, exponent and mantissa. Extract biased exponent into dst.
__ Ubfx(dst,
scratch2,
HeapNumber::kExponentShift,
HeapNumber::kExponentBits);
// Express exponent as delta to 31.
__ sub(dst, dst, Operand(HeapNumber::kExponentBias + 31));
Label normal_exponent;
// If the delta is larger than kMantissaBits plus one, all bits
// would be shifted away, which means that we can return 0.
__ cmp(dst, Operand(HeapNumber::kMantissaBits + 1));
__ b(&normal_exponent, lt);
__ mov(dst, Operand(0));
__ jmp(&done);
__ bind(&normal_exponent);
const int kShiftBase = HeapNumber::kNonMantissaBitsInTopWord - 1;
// Calculate shift.
__ add(scratch3, dst, Operand(kShiftBase));
// Put implicit 1 before the mantissa part in scratch2.
__ orr(scratch2,
scratch2,
Operand(1 << HeapNumber::kMantissaBitsInTopWord));
// Save sign.
Register sign = dst;
__ and_(sign, scratch2, Operand(HeapNumber::kSignMask));
// Shift mantisssa bits the correct position in high word.
__ mov(scratch2, Operand(scratch2, LSL, scratch3));
// Replace the shifted bits with bits from the lower mantissa word.
Label pos_shift, shift_done;
__ rsb(scratch3, scratch3, Operand(32), SetCC);
__ b(&pos_shift, ge);
// Negate scratch3.
__ rsb(scratch3, scratch3, Operand(0));
__ mov(scratch1, Operand(scratch1, LSL, scratch3));
__ jmp(&shift_done);
__ bind(&pos_shift);
__ mov(scratch1, Operand(scratch1, LSR, scratch3));
__ bind(&shift_done);
__ orr(scratch2, scratch2, Operand(scratch1));
// Restore sign if necessary.
__ cmp(sign, Operand(0));
__ rsb(dst, scratch2, Operand(0), LeaveCC, ne);
__ mov(dst, scratch2, LeaveCC, eq);
__ EmitOutOfInt32RangeTruncate(dst,
scratch1,
scratch2,
scratch3);
__ jmp(&done);
__ bind(&is_smi);
......
......@@ -388,9 +388,11 @@ enum VFPConversionMode {
// This mask does not include the "inexact" or "input denormal" cumulative
// exceptions flags, because we usually don't want to check for it.
static const uint32_t kVFPExceptionMask = 0xf;
static const uint32_t kVFPInvalidOpExceptionBit = 1 << 0;
static const uint32_t kVFPOverflowExceptionBit = 1 << 2;
static const uint32_t kVFPUnderflowExceptionBit = 1 << 3;
static const uint32_t kVFPInexactExceptionBit = 1 << 4;
static const uint32_t kVFPFlushToZeroMask = 1 << 24;
static const uint32_t kVFPInvalidExceptionBit = 1;
static const uint32_t kVFPNConditionFlagBit = 1 << 31;
static const uint32_t kVFPZConditionFlagBit = 1 << 30;
......
......@@ -1067,6 +1067,9 @@ void Decoder::DecodeTypeVFP(Instruction* instr) {
} else if ((instr->Opc2Value() == 0x0) && (instr->Opc3Value() == 0x3)) {
// vabs
Format(instr, "vabs'cond 'Dd, 'Dm");
} else if ((instr->Opc2Value() == 0x1) && (instr->Opc3Value() == 0x1)) {
// vneg
Format(instr, "vneg'cond 'Dd, 'Dm");
} else if ((instr->Opc2Value() == 0x7) && (instr->Opc3Value() == 0x3)) {
DecodeVCVTBetweenDoubleAndSingle(instr);
} else if ((instr->Opc2Value() == 0x8) && (instr->Opc3Value() & 0x1)) {
......
......@@ -1610,12 +1610,15 @@ LInstruction* LChunkBuilder::DoChange(HChange* instr) {
LOperand* value = UseRegister(instr->value());
bool needs_check = !instr->value()->type().IsSmi();
LInstruction* res = NULL;
if (needs_check) {
res = DefineSameAsFirst(new LTaggedToI(value, FixedTemp(d1)));
} else {
if (!needs_check) {
res = DefineSameAsFirst(new LSmiUntag(value, needs_check));
}
if (needs_check) {
} else {
LOperand* temp1 = TempRegister();
LOperand* temp2 = instr->CanTruncateToInt32() ? TempRegister()
: NULL;
LOperand* temp3 = instr->CanTruncateToInt32() ? FixedTemp(d3)
: NULL;
res = DefineSameAsFirst(new LTaggedToI(value, temp1, temp2, temp3));
res = AssignEnvironment(res);
}
return res;
......@@ -1635,7 +1638,10 @@ LInstruction* LChunkBuilder::DoChange(HChange* instr) {
} else {
ASSERT(to.IsInteger32());
LOperand* value = UseRegister(instr->value());
LDoubleToI* res = new LDoubleToI(value, TempRegister());
LDoubleToI* res =
new LDoubleToI(value,
TempRegister(),
instr->CanTruncateToInt32() ? TempRegister() : NULL);
return AssignEnvironment(DefineAsRegister(res));
}
} else if (from.IsInteger32()) {
......
......@@ -1458,11 +1458,12 @@ class LNumberTagD: public LTemplateInstruction<1, 1, 2> {
// Sometimes truncating conversion from a tagged value to an int32.
class LDoubleToI: public LTemplateInstruction<1, 1, 1> {
class LDoubleToI: public LTemplateInstruction<1, 1, 2> {
public:
explicit LDoubleToI(LOperand* value, LOperand* temp1) {
LDoubleToI(LOperand* value, LOperand* temp1, LOperand* temp2) {
inputs_[0] = value;
temps_[0] = temp1;
temps_[1] = temp2;
}
DECLARE_CONCRETE_INSTRUCTION(DoubleToI, "double-to-i")
......@@ -1473,11 +1474,16 @@ class LDoubleToI: public LTemplateInstruction<1, 1, 1> {
// Truncating conversion from a tagged value to an int32.
class LTaggedToI: public LTemplateInstruction<1, 1, 1> {
class LTaggedToI: public LTemplateInstruction<1, 1, 3> {
public:
LTaggedToI(LOperand* value, LOperand* temp) {
LTaggedToI(LOperand* value,
LOperand* temp1,
LOperand* temp2,
LOperand* temp3) {
inputs_[0] = value;
temps_[0] = temp;
temps_[0] = temp1;
temps_[1] = temp2;
temps_[2] = temp3;
}
DECLARE_CONCRETE_INSTRUCTION(TaggedToI, "tagged-to-i")
......
......@@ -3380,19 +3380,30 @@ class DeferredTaggedToI: public LDeferredCode {
void LCodeGen::DoDeferredTaggedToI(LTaggedToI* instr) {
Label done;
Register input_reg = ToRegister(instr->InputAt(0));
Register scratch = scratch0();
DoubleRegister dbl_scratch = d0;
SwVfpRegister flt_scratch = s0;
DoubleRegister dbl_tmp = ToDoubleRegister(instr->TempAt(0));
Register scratch1 = scratch0();
Register scratch2 = ToRegister(instr->TempAt(0));
DwVfpRegister double_scratch = double_scratch0();
SwVfpRegister single_scratch = double_scratch.low();
ASSERT(!scratch1.is(input_reg) && !scratch1.is(scratch2));
ASSERT(!scratch2.is(input_reg) && !scratch2.is(scratch1));
Label done;
// Heap number map check.
__ ldr(scratch, FieldMemOperand(input_reg, HeapObject::kMapOffset));
__ ldr(scratch1, FieldMemOperand(input_reg, HeapObject::kMapOffset));
__ LoadRoot(ip, Heap::kHeapNumberMapRootIndex);
__ cmp(scratch, Operand(ip));
__ cmp(scratch1, Operand(ip));
if (instr->truncating()) {
Register scratch3 = ToRegister(instr->TempAt(1));
DwVfpRegister double_scratch2 = ToDoubleRegister(instr->TempAt(2));
ASSERT(!scratch3.is(input_reg) &&
!scratch3.is(scratch1) &&
!scratch3.is(scratch2));
// Performs a truncating conversion of a floating point number as used by
// the JS bitwise operations.
Label heap_number;
__ b(eq, &heap_number);
// Check for undefined. Undefined is converted to zero for truncating
......@@ -3404,36 +3415,38 @@ void LCodeGen::DoDeferredTaggedToI(LTaggedToI* instr) {
__ b(&done);
__ bind(&heap_number);
__ sub(ip, input_reg, Operand(kHeapObjectTag));
__ vldr(dbl_tmp, ip, HeapNumber::kValueOffset);
__ vcmp(dbl_tmp, 0.0); // Sets overflow bit in FPSCR flags if NaN.
__ vcvt_s32_f64(flt_scratch, dbl_tmp);
__ vmov(input_reg, flt_scratch); // 32-bit result of conversion.
__ vmrs(pc); // Move vector status bits to normal status bits.
// Overflow bit is set if dbl_tmp is Nan.
__ cmn(input_reg, Operand(1), vc); // 0x7fffffff + 1 -> overflow.
__ cmp(input_reg, Operand(1), vc); // 0x80000000 - 1 -> overflow.
DeoptimizeIf(vs, instr->environment()); // Saturation may have occured.
__ sub(scratch1, input_reg, Operand(kHeapObjectTag));
__ vldr(double_scratch2, scratch1, HeapNumber::kValueOffset);
__ EmitECMATruncate(input_reg,
double_scratch2,
single_scratch,
scratch1,
scratch2,
scratch3);
} else {
CpuFeatures::Scope scope(VFP3);
// Deoptimize if we don't have a heap number.
DeoptimizeIf(ne, instr->environment());
__ sub(ip, input_reg, Operand(kHeapObjectTag));
__ vldr(dbl_tmp, ip, HeapNumber::kValueOffset);
__ vcvt_s32_f64(flt_scratch, dbl_tmp);
__ vmov(input_reg, flt_scratch); // 32-bit result of conversion.
// Non-truncating conversion means that we cannot lose bits, so we convert
// back to check; note that using non-overlapping s and d regs would be
// slightly faster.
__ vcvt_f64_s32(dbl_scratch, flt_scratch);
__ VFPCompareAndSetFlags(dbl_scratch, dbl_tmp);
DeoptimizeIf(ne, instr->environment()); // Not equal or unordered.
__ vldr(double_scratch, ip, HeapNumber::kValueOffset);
__ EmitVFPTruncate(kRoundToZero,
single_scratch,
double_scratch,
scratch1,
scratch2,
kCheckForInexactConversion);
DeoptimizeIf(ne, instr->environment());
// Load the result.
__ vmov(input_reg, single_scratch);
if (instr->hydrogen()->CheckFlag(HValue::kBailoutOnMinusZero)) {
__ tst(input_reg, Operand(input_reg));
__ cmp(input_reg, Operand(0));
__ b(ne, &done);
__ vmov(lr, ip, dbl_tmp);
__ tst(ip, Operand(1 << 31)); // Test sign bit.
__ vmov(scratch1, double_scratch.high());
__ tst(scratch1, Operand(HeapNumber::kSignMask));
DeoptimizeIf(ne, instr->environment());
}
}
......@@ -3475,48 +3488,38 @@ void LCodeGen::DoNumberUntagD(LNumberUntagD* instr) {
void LCodeGen::DoDoubleToI(LDoubleToI* instr) {
LOperand* input = instr->InputAt(0);
ASSERT(input->IsDoubleRegister());
LOperand* result = instr->result();
ASSERT(result->IsRegister());
DoubleRegister double_input = ToDoubleRegister(input);
Register result_reg = ToRegister(result);
SwVfpRegister single_scratch = double_scratch0().low();
Register result_reg = ToRegister(instr->result());
Register scratch1 = scratch0();
Register scratch2 = ToRegister(instr->TempAt(0));
DwVfpRegister double_input = ToDoubleRegister(instr->InputAt(0));
DwVfpRegister double_scratch = double_scratch0();
SwVfpRegister single_scratch = double_scratch0().low();
__ EmitVFPTruncate(kRoundToZero,
single_scratch,
double_input,
scratch1,
scratch2);
// Deoptimize if we had a vfp invalid exception.
DeoptimizeIf(ne, instr->environment());
// Retrieve the result.
__ vmov(result_reg, single_scratch);
Label done;
if (!instr->truncating()) {
// Convert result back to double and compare with input
// to check if the conversion was exact.
__ vmov(single_scratch, result_reg);
__ vcvt_f64_s32(double_scratch0(), single_scratch);
__ VFPCompareAndSetFlags(double_scratch0(), double_input);
if (instr->truncating()) {
Register scratch3 = ToRegister(instr->TempAt(1));
__ EmitECMATruncate(result_reg,
double_input,
single_scratch,
scratch1,
scratch2,
scratch3);
} else {
VFPRoundingMode rounding_mode = kRoundToMinusInf;
__ EmitVFPTruncate(rounding_mode,
single_scratch,
double_input,
scratch1,
scratch2,
kCheckForInexactConversion);
// Deoptimize if we had a vfp invalid exception,
// including inexact operation.
DeoptimizeIf(ne, instr->environment());
if (instr->hydrogen()->CheckFlag(HValue::kBailoutOnMinusZero)) {
Label done;
__ cmp(result_reg, Operand(0));
__ b(ne, &done);
// Check for -0.
__ vmov(scratch1, double_input.high());
__ tst(scratch1, Operand(HeapNumber::kSignMask));
DeoptimizeIf(ne, instr->environment());
__ bind(&done);
}
// Retrieve the result.
__ vmov(result_reg, single_scratch);
}
__ bind(&done);
}
......
......@@ -2032,6 +2032,121 @@ void MacroAssembler::EmitVFPTruncate(VFPRoundingMode rounding_mode,
}
void MacroAssembler::EmitOutOfInt32RangeTruncate(Register result,
Register input_high,
Register input_low,
Register scratch) {
Label done, normal_exponent, restore_sign;
// Extract the biased exponent in result.
Ubfx(result,
input_high,
HeapNumber::kExponentShift,
HeapNumber::kExponentBits);
// Check for Infinity and NaNs, which should return 0.
cmp(result, Operand(HeapNumber::kExponentMask));
mov(result, Operand(0), LeaveCC, eq);
b(eq, &done);
// Express exponent as delta to (number of mantissa bits + 31).
sub(result,
result,
Operand(HeapNumber::kExponentBias + HeapNumber::kMantissaBits + 31),
SetCC);
// If the delta is strictly positive, all bits would be shifted away,
// which means that we can return 0.
b(le, &normal_exponent);
mov(result, Operand(0));
b(&done);
bind(&normal_exponent);
const int kShiftBase = HeapNumber::kNonMantissaBitsInTopWord - 1;
// Calculate shift.
add(scratch, result, Operand(kShiftBase + HeapNumber::kMantissaBits), SetCC);
// Save the sign.
Register sign = result;
result = no_reg;
and_(sign, input_high, Operand(HeapNumber::kSignMask));
// Set the implicit 1 before the mantissa part in input_high.
orr(input_high,
input_high,
Operand(1 << HeapNumber::kMantissaBitsInTopWord));
// Shift the mantissa bits to the correct position.
// We don't need to clear non-mantissa bits as they will be shifted away.
// If they weren't, it would mean that the answer is in the 32bit range.
mov(input_high, Operand(input_high, LSL, scratch));
// Replace the shifted bits with bits from the lower mantissa word.
Label pos_shift, shift_done;
rsb(scratch, scratch, Operand(32), SetCC);
b(&pos_shift, ge);
// Negate scratch.
rsb(scratch, scratch, Operand(0));
mov(input_low, Operand(input_low, LSL, scratch));
b(&shift_done);
bind(&pos_shift);
mov(input_low, Operand(input_low, LSR, scratch));
bind(&shift_done);
orr(input_high, input_high, Operand(input_low));
// Restore sign if necessary.
cmp(sign, Operand(0));
result = sign;
sign = no_reg;
rsb(result, input_high, Operand(0), LeaveCC, ne);
mov(result, input_high, LeaveCC, eq);
bind(&done);
}
void MacroAssembler::EmitECMATruncate(Register result,
DwVfpRegister double_input,
SwVfpRegister single_scratch,
Register scratch,
Register input_high,
Register input_low) {
CpuFeatures::Scope scope(VFP3);
ASSERT(!input_high.is(result));
ASSERT(!input_low.is(result));
ASSERT(!input_low.is(input_high));
ASSERT(!scratch.is(result) &&
!scratch.is(input_high) &&
!scratch.is(input_low));
ASSERT(!single_scratch.is(double_input.low()) &&
!single_scratch.is(double_input.high()));
Label done;
// Clear cumulative exception flags.
ClearFPSCRBits(kVFPExceptionMask, scratch);
// Try a conversion to a signed integer.
vcvt_s32_f64(single_scratch, double_input);
vmov(result, single_scratch);
// Retrieve he FPSCR.
vmrs(scratch);
// Check for overflow and NaNs.
tst(scratch, Operand(kVFPOverflowExceptionBit |
kVFPUnderflowExceptionBit |
kVFPInvalidOpExceptionBit));
// If we had no exceptions we are done.
b(eq, &done);
// Load the double value and perform a manual truncation.
vmov(input_low, input_high, double_input);
EmitOutOfInt32RangeTruncate(result,
input_high,
input_low,
scratch);
bind(&done);
}
void MacroAssembler::GetLeastBitsFromSmi(Register dst,
Register src,
int num_least_bits) {
......
......@@ -649,11 +649,11 @@ class MacroAssembler: public Assembler {
DwVfpRegister double_scratch,
Label *not_int32);
// Truncates a double using a specific rounding mode.
// Clears the z flag (ne condition) if an overflow occurs.
// If exact_conversion is true, the z flag is also cleared if the conversion
// was inexact, ie. if the double value could not be converted exactly
// to a 32bit integer.
// Truncates a double using a specific rounding mode.
// Clears the z flag (ne condition) if an overflow occurs.
// If exact_conversion is true, the z flag is also cleared if the conversion
// was inexact, ie. if the double value could not be converted exactly
// to a 32bit integer.
void EmitVFPTruncate(VFPRoundingMode rounding_mode,
SwVfpRegister result,
DwVfpRegister double_input,
......@@ -662,6 +662,27 @@ class MacroAssembler: public Assembler {
CheckForInexactConversion check
= kDontCheckForInexactConversion);
// Helper for EmitECMATruncate.
// This will truncate a floating-point value outside of the singed 32bit
// integer range to a 32bit signed integer.
// Expects the double value loaded in input_high and input_low.
// Exits with the answer in 'result'.
// Note that this code does not work for values in the 32bit range!
void EmitOutOfInt32RangeTruncate(Register result,
Register input_high,
Register input_low,
Register scratch);
// Performs a truncating conversion of a floating point number as used by
// the JS bitwise operations. See ECMA-262 9.5: ToInt32.
// Exits with 'result' holding the answer and all other registers clobbered.
void EmitECMATruncate(Register result,
DwVfpRegister double_input,
SwVfpRegister single_scratch,
Register scratch,
Register scratch2,
Register scratch3);
// Count leading zeros in a 32 bit word. On ARM5 and later it uses the clz
// instruction. On pre-ARM5 hardware this routine gives the wrong answer
// for 0 (31 instead of 32). Source and scratch can be the same in which case
......
......@@ -2467,6 +2467,8 @@ void Simulator::DecodeType7(Instruction* instr) {
// vmov :Rt = Sn
// vcvt: Dd = Sm
// vcvt: Sd = Dm
// Dd = vabs(Dm)
// Dd = vneg(Dm)
// Dd = vadd(Dn, Dm)
// Dd = vsub(Dn, Dm)
// Dd = vmul(Dn, Dm)
......@@ -2502,6 +2504,11 @@ void Simulator::DecodeTypeVFP(Instruction* instr) {
double dm_value = get_double_from_d_register(vm);
double dd_value = fabs(dm_value);
set_d_register_from_double(vd, dd_value);
} else if ((instr->Opc2Value() == 0x1) && (instr->Opc3Value() == 0x1)) {
// vneg
double dm_value = get_double_from_d_register(vm);
double dd_value = -dm_value;
set_d_register_from_double(vd, dd_value);
} else if ((instr->Opc2Value() == 0x7) && (instr->Opc3Value() == 0x3)) {
DecodeVCVTBetweenDoubleAndSingle(instr);
} else if ((instr->Opc2Value() == 0x8) && (instr->Opc3Value() & 0x1)) {
......
......@@ -232,6 +232,8 @@ TEST(4) {
double g;
double h;
int i;
double m;
double n;
float x;
float y;
} T;
......@@ -297,6 +299,14 @@ TEST(4) {
__ vabs(d0, d2);
__ vstr(d0, r4, OFFSET_OF(T, h));
// Test vneg.
__ vldr(d1, r4, OFFSET_OF(T, m));
__ vneg(d0, d1);
__ vstr(d0, r4, OFFSET_OF(T, m));
__ vldr(d1, r4, OFFSET_OF(T, n));
__ vneg(d0, d1);
__ vstr(d0, r4, OFFSET_OF(T, n));
__ ldm(ia_w, sp, r4.bit() | fp.bit() | pc.bit());
CodeDesc desc;
......@@ -319,12 +329,16 @@ TEST(4) {
t.g = -2718.2818;
t.h = 31415926.5;
t.i = 0;
t.m = -2718.2818;
t.n = 123.456;
t.x = 4.5;
t.y = 9.0;
Object* dummy = CALL_GENERATED_CODE(f, &t, 0, 0, 0, 0);
USE(dummy);
CHECK_EQ(4.5, t.y);
CHECK_EQ(9.0, t.x);
CHECK_EQ(-123.456, t.n);
CHECK_EQ(2718.2818, t.m);
CHECK_EQ(2, t.i);
CHECK_EQ(2718.2818, t.g);
CHECK_EQ(31415926.5, t.h);
......
......@@ -440,6 +440,11 @@ TEST(Vfp) {
COMPARE(vabs(d3, d4, mi),
"4eb03bc4 vabsmi d3, d4");
COMPARE(vneg(d0, d1),
"eeb10b41 vneg d0, d1");
COMPARE(vneg(d3, d4, mi),
"4eb13b44 vnegmi d3, d4");
COMPARE(vadd(d0, d1, d2),
"ee310b02 vadd.f64 d0, d1, d2");
COMPARE(vadd(d3, d4, d5, mi),
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment