Implement int32 TypeRecordingBinaryOp on ARM.

TEST=none BUG=none Patch by Rodolph Perfetta from ARM Ltd. Review URL: http://codereview.chromium.org/6594009 git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@7014 ce2b1a6d-e550-0410-aec6-3dcde31c8c00

Implement int32 TypeRecordingBinaryOp on ARM.
TEST=none BUG=none Patch by Rodolph Perfetta from ARM Ltd. Review URL: http://codereview.chromium.org/6594009 git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@7014 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
1703b8a3 · sgjesse@chromium.org · 617ccc1d · 1703b8a3 · 1703b8a3 · 1703b8a3
Commit 1703b8a3 authored Mar 02, 2011 by sgjesse@chromium.org
8 changed files
--- a/src/arm/assembler-arm.h
+++ b/src/arm/assembler-arm.h
@@ -284,6 +284,7 @@ const SwVfpRegister s29 = { 29 };
 const SwVfpRegister s30 = { 30 };
 const SwVfpRegister s31 = { 31 };

+const DwVfpRegister no_dreg = { -1 };
 const DwVfpRegister d0  = {  0 };
 const DwVfpRegister d1  = {  1 };
 const DwVfpRegister d2  = {  2 };

--- a/src/arm/code-stubs-arm.cc
+++ b/src/arm/code-stubs-arm.cc
--- a/src/arm/constants-arm.h
+++ b/src/arm/constants-arm.h
@@ -385,7 +385,10 @@ enum VFPConversionMode {
  kDefaultRoundToZero = 1
 };

+// This mask does not include the "inexact" or "input denormal" cumulative
+// exceptions flags, because we usually don't want to check for it.
 static const uint32_t kVFPExceptionMask = 0xf;
+static const uint32_t kVFPInexactExceptionBit = 1 << 4;
 static const uint32_t kVFPFlushToZeroMask = 1 << 24;
 static const uint32_t kVFPInvalidExceptionBit = 1;

@@ -411,6 +414,11 @@ enum VFPRoundingMode {

 static const uint32_t kVFPRoundingModeMask = 3 << 22;

+enum CheckForInexactConversion {
+  kCheckForInexactConversion,
+  kDontCheckForInexactConversion
+};
+
 // -----------------------------------------------------------------------------
 // Hints.


--- a/src/arm/lithium-codegen-arm.cc
+++ b/src/arm/lithium-codegen-arm.cc
@@ -2583,41 +2583,6 @@ void LCodeGen::DoMathAbs(LUnaryMathOperation* instr) {
 }


-// Truncates a double using a specific rounding mode.
-// Clears the z flag (ne condition) if an overflow occurs.
-void LCodeGen::EmitVFPTruncate(VFPRoundingMode rounding_mode,
-                               SwVfpRegister result,
-                               DwVfpRegister double_input,
-                               Register scratch1,
-                               Register scratch2) {
-  Register prev_fpscr = scratch1;
-  Register scratch = scratch2;
-
-  // Set custom FPCSR:
-  //  - Set rounding mode.
-  //  - Clear vfp cumulative exception flags.
-  //  - Make sure Flush-to-zero mode control bit is unset.
-  __ vmrs(prev_fpscr);
-  __ bic(scratch, prev_fpscr, Operand(kVFPExceptionMask |
-                                      kVFPRoundingModeMask |
-                                      kVFPFlushToZeroMask));
-  __ orr(scratch, scratch, Operand(rounding_mode));
-  __ vmsr(scratch);
-
-  // Convert the argument to an integer.
-  __ vcvt_s32_f64(result,
-                  double_input,
-                  kFPSCRRounding);
-
-  // Retrieve FPSCR.
-  __ vmrs(scratch);
-  // Restore FPSCR.
-  __ vmsr(prev_fpscr);
-  // Check for vfp exceptions.
-  __ tst(scratch, Operand(kVFPExceptionMask));
-}
-
-
 void LCodeGen::DoMathFloor(LUnaryMathOperation* instr) {
  DoubleRegister input = ToDoubleRegister(instr->InputAt(0));
  Register result = ToRegister(instr->result());
@@ -2625,11 +2590,11 @@ void LCodeGen::DoMathFloor(LUnaryMathOperation* instr) {
  Register scratch1 = scratch0();
  Register scratch2 = ToRegister(instr->TempAt(0));

-  EmitVFPTruncate(kRoundToMinusInf,
-                  single_scratch,
-                  input,
-                  scratch1,
-                  scratch2);
+  __ EmitVFPTruncate(kRoundToMinusInf,
+                     single_scratch,
+                     input,
+                     scratch1,
+                     scratch2);
  DeoptimizeIf(ne, instr->environment());

  // Move the result back to general purpose register r0.
@@ -2651,11 +2616,11 @@ void LCodeGen::DoMathRound(LUnaryMathOperation* instr) {
  Register result = ToRegister(instr->result());
  Register scratch1 = scratch0();
  Register scratch2 = result;
-  EmitVFPTruncate(kRoundToNearest,
-                  double_scratch0().low(),
-                  input,
-                  scratch1,
-                  scratch2);
+  __ EmitVFPTruncate(kRoundToNearest,
+                     double_scratch0().low(),
+                     input,
+                     scratch1,
+                     scratch2);
  DeoptimizeIf(ne, instr->environment());
  __ vmov(result, double_scratch0().low());

@@ -3370,11 +3335,12 @@ void LCodeGen::DoDoubleToI(LDoubleToI* instr) {
  Register scratch1 = scratch0();
  Register scratch2 = ToRegister(instr->TempAt(0));

-  EmitVFPTruncate(kRoundToZero,
-                  single_scratch,
-                  double_input,
-                  scratch1,
-                  scratch2);
+  __ EmitVFPTruncate(kRoundToZero,
+                     single_scratch,
+                     double_input,
+                     scratch1,
+                     scratch2);
+
  // Deoptimize if we had a vfp invalid exception.
  DeoptimizeIf(ne, instr->environment());


--- a/src/arm/lithium-codegen-arm.h
+++ b/src/arm/lithium-codegen-arm.h
@@ -206,11 +206,6 @@ class LCodeGen BASE_EMBEDDED {
  // Specific math operations - used from DoUnaryMathOperation.
  void EmitIntegerMathAbs(LUnaryMathOperation* instr);
  void DoMathAbs(LUnaryMathOperation* instr);
-  void EmitVFPTruncate(VFPRoundingMode rounding_mode,
-                       SwVfpRegister result,
-                       DwVfpRegister double_input,
-                       Register scratch1,
-                       Register scratch2);
  void DoMathFloor(LUnaryMathOperation* instr);
  void DoMathRound(LUnaryMathOperation* instr);
  void DoMathSqrt(LUnaryMathOperation* instr);

--- a/src/arm/macro-assembler-arm.cc
+++ b/src/arm/macro-assembler-arm.cc
@@ -271,6 +271,29 @@ void MacroAssembler::Sbfx(Register dst, Register src1, int lsb, int width,
 }


+void MacroAssembler::Bfi(Register dst,
+                         Register src,
+                         Register scratch,
+                         int lsb,
+                         int width,
+                         Condition cond) {
+  ASSERT(0 <= lsb && lsb < 32);
+  ASSERT(0 <= width && width < 32);
+  ASSERT(lsb + width < 32);
+  ASSERT(!scratch.is(dst));
+  if (width == 0) return;
+  if (!CpuFeatures::IsSupported(ARMv7)) {
+    int mask = (1 << (width + lsb)) - 1 - ((1 << lsb) - 1);
+    bic(dst, dst, Operand(mask));
+    and_(scratch, src, Operand((1 << width) - 1));
+    mov(scratch, Operand(scratch, LSL, lsb));
+    orr(dst, dst, scratch);
+  } else {
+    bfi(dst, src, lsb, width, cond);
+  }
+}
+
+
 void MacroAssembler::Bfc(Register dst, int lsb, int width, Condition cond) {
  ASSERT(lsb < 32);
  if (!CpuFeatures::IsSupported(ARMv7)) {
@@ -1818,9 +1841,9 @@ void MacroAssembler::ConvertToInt32(Register source,
    ldr(scratch, FieldMemOperand(source, HeapNumber::kExponentOffset));
    // Get exponent alone in scratch2.
    Ubfx(scratch2,
-            scratch,
-            HeapNumber::kExponentShift,
-            HeapNumber::kExponentBits);
+         scratch,
+         HeapNumber::kExponentShift,
+         HeapNumber::kExponentBits);
    // Load dest with zero.  We use this either for the final shift or
    // for the answer.
    mov(dest, Operand(0, RelocInfo::NONE));
@@ -1883,6 +1906,52 @@ void MacroAssembler::ConvertToInt32(Register source,
 }


+void MacroAssembler::EmitVFPTruncate(VFPRoundingMode rounding_mode,
+                                     SwVfpRegister result,
+                                     DwVfpRegister double_input,
+                                     Register scratch1,
+                                     Register scratch2,
+                                     CheckForInexactConversion check_inexact) {
+  ASSERT(CpuFeatures::IsSupported(VFP3));
+  CpuFeatures::Scope scope(VFP3);
+  Register prev_fpscr = scratch1;
+  Register scratch = scratch2;
+
+  int32_t check_inexact_conversion =
+    (check_inexact == kCheckForInexactConversion) ? kVFPInexactExceptionBit : 0;
+
+  // Set custom FPCSR:
+  //  - Set rounding mode.
+  //  - Clear vfp cumulative exception flags.
+  //  - Make sure Flush-to-zero mode control bit is unset.
+  vmrs(prev_fpscr);
+  bic(scratch,
+      prev_fpscr,
+      Operand(kVFPExceptionMask |
+              check_inexact_conversion |
+              kVFPRoundingModeMask |
+              kVFPFlushToZeroMask));
+  // 'Round To Nearest' is encoded by 0b00 so no bits need to be set.
+  if (rounding_mode != kRoundToNearest) {
+    orr(scratch, scratch, Operand(rounding_mode));
+  }
+  vmsr(scratch);
+
+  // Convert the argument to an integer.
+  vcvt_s32_f64(result,
+               double_input,
+               (rounding_mode == kRoundToZero) ? kDefaultRoundToZero
+                                               : kFPSCRRounding);
+
+  // Retrieve FPSCR.
+  vmrs(scratch);
+  // Restore FPSCR.
+  vmsr(prev_fpscr);
+  // Check for vfp exceptions.
+  tst(scratch, Operand(kVFPExceptionMask | check_inexact_conversion));
+}
+
+
 void MacroAssembler::GetLeastBitsFromSmi(Register dst,
                                         Register src,
                                         int num_least_bits) {

--- a/src/arm/macro-assembler-arm.h
+++ b/src/arm/macro-assembler-arm.h
@@ -121,6 +121,15 @@ class MacroAssembler: public Assembler {
            Condition cond = al);
  void Sbfx(Register dst, Register src, int lsb, int width,
            Condition cond = al);
+  // The scratch register is not used for ARMv7.
+  // scratch can be the same register as src (in which case it is trashed), but
+  // not the same as dst.
+  void Bfi(Register dst,
+           Register src,
+           Register scratch,
+           int lsb,
+           int width,
+           Condition cond = al);
  void Bfc(Register dst, int lsb, int width, Condition cond = al);
  void Usat(Register dst, int satpos, const Operand& src,
            Condition cond = al);
@@ -234,6 +243,17 @@ class MacroAssembler: public Assembler {
    }
  }

+  // Pop two registers. Pops rightmost register first (from lower address).
+  void Pop(Register src1, Register src2, Condition cond = al) {
+    ASSERT(!src1.is(src2));
+    if (src1.code() > src2.code()) {
+      ldm(ia_w, sp, src1.bit() | src2.bit(), cond);
+    } else {
+      ldr(src2, MemOperand(sp, 4, PostIndex), cond);
+      ldr(src1, MemOperand(sp, 4, PostIndex), cond);
+    }
+  }
+
  // Push and pop the registers that can hold pointers, as defined by the
  // RegList constant kSafepointSavedRegisters.
  void PushSafepointRegisters();
@@ -621,6 +641,19 @@ class MacroAssembler: public Assembler {
                      DwVfpRegister double_scratch,
                      Label *not_int32);

+// Truncates a double using a specific rounding mode.
+// Clears the z flag (ne condition) if an overflow occurs.
+// If exact_conversion is true, the z flag is also cleared if the conversion
+// was inexact, ie. if the double value could not be converted exactly
+// to a 32bit integer.
+  void EmitVFPTruncate(VFPRoundingMode rounding_mode,
+                       SwVfpRegister result,
+                       DwVfpRegister double_input,
+                       Register scratch1,
+                       Register scratch2,
+                       CheckForInexactConversion check
+                           = kDontCheckForInexactConversion);
+
  // Count leading zeros in a 32 bit word.  On ARM5 and later it uses the clz
  // instruction.  On pre-ARM5 hardware this routine gives the wrong answer
  // for 0 (31 instead of 32).  Source and scratch can be the same in which case

--- a/src/arm/simulator-arm.cc
+++ b/src/arm/simulator-arm.cc
@@ -2564,6 +2564,7 @@ void Simulator::DecodeTypeVFP(Instruction* instr) {
      double dn_value = get_double_from_d_register(vn);
      double dm_value = get_double_from_d_register(vm);
      double dd_value = dn_value / dm_value;
+      div_zero_vfp_flag_ = (dm_value == 0);
      set_d_register_from_double(vd, dd_value);
    } else {
      UNIMPLEMENTED();  // Not used by V8.
@@ -2798,14 +2799,17 @@ void Simulator::DecodeVCVTBetweenFloatingPointAndInteger(Instruction* instr) {

    inv_op_vfp_flag_ = get_inv_op_vfp_flag(mode, val, unsigned_integer);

+    double abs_diff =
+      unsigned_integer ? fabs(val - static_cast<uint32_t>(temp))
+                       : fabs(val - temp);
+
+    inexact_vfp_flag_ = (abs_diff != 0);
+
    if (inv_op_vfp_flag_) {
      temp = VFPConversionSaturate(val, unsigned_integer);
    } else {
      switch (mode) {
        case RN: {
-          double abs_diff =
-            unsigned_integer ? fabs(val - static_cast<uint32_t>(temp))
-                             : fabs(val - temp);
          int val_sign = (val > 0) ? 1 : -1;
          if (abs_diff > 0.5) {
            temp += val_sign;