[turbofan] ARM: Reduce out-of-line NaN code size

Reduce the amount of code generated for OutOfLineLoadFloat* by computing sqrt(-1) rather than move the NaN as an immediate. Add support for single precision floating point immediate moves to enable this. BUG= Review URL: https://codereview.chromium.org/1758003003 Cr-Commit-Position: refs/heads/master@{#34746}

[turbofan] ARM: Reduce out-of-line NaN code size
Reduce the amount of code generated for OutOfLineLoadFloat* by computing sqrt(-1) rather than move the NaN as an immediate. Add support for single precision floating point immediate moves to enable this. BUG= Review URL: https://codereview.chromium.org/1758003003 Cr-Commit-Position: refs/heads/master@{#34746}
2cd9877b · martyn.capewell · Commit bot · d3583574 · 2cd9877b · 2cd9877b
Commit 2cd9877b authored Mar 14, 2016 by martyn.capewell Committed by Commit bot Mar 14, 2016
7 changed files
--- a/src/arm/assembler-arm.cc
+++ b/src/arm/assembler-arm.cc
@@ -2547,12 +2547,6 @@ void  Assembler::vstm(BlockAddrMode am,
 }
-void Assembler::vmov(const SwVfpRegister dst, float imm) {
-  mov(ip, Operand(bit_cast<int32_t>(imm)));
-  vmov(dst, ip);
-}
 static void DoubleAsTwoUInt32(double d, uint32_t* lo, uint32_t* hi) {
  uint64_t i;
  memcpy(&i, &d, 8);
@@ -2564,7 +2558,7 @@ static void DoubleAsTwoUInt32(double d, uint32_t* lo, uint32_t* hi) {
 // Only works for little endian floating point formats.
 // We don't support VFP on the mixed endian floating point platform.
-static bool FitsVMOVDoubleImmediate(double d, uint32_t *encoding) {
+static bool FitsVmovFPImmediate(double d, uint32_t* encoding) {
  DCHECK(CpuFeatures::IsSupported(VFP3));
  // VMOV can accept an immediate of the form:
@@ -2593,12 +2587,12 @@ static bool FitsVMOVDoubleImmediate(double d, uint32_t *encoding) {
    return false;
  }
-  // Bits 62:55 must be all clear or all set.
+  // Bits 61:54 must be all clear or all set.
  if (((hi & 0x3fc00000) != 0) && ((hi & 0x3fc00000) != 0x3fc00000)) {
    return false;
  }
-  // Bit 63 must be NOT bit 62.
+  // Bit 62 must be NOT bit 61.
  if (((hi ^ (hi << 1)) & (0x40000000)) == 0) {
    return false;
  }
@@ -2613,6 +2607,25 @@ static bool FitsVMOVDoubleImmediate(double d, uint32_t *encoding) {
 }
+void Assembler::vmov(const SwVfpRegister dst, float imm) {
+  uint32_t enc;
+  if (CpuFeatures::IsSupported(VFP3) && FitsVmovFPImmediate(imm, &enc)) {
+    // The float can be encoded in the instruction.
+    //
+    // Sd = immediate
+    // Instruction details available in ARM DDI 0406C.b, A8-936.
+    // cond(31-28) | 11101(27-23) | D(22) | 11(21-20) | imm4H(19-16) |
+    // Vd(15-12) | 101(11-9) | sz=0(8) | imm4L(3-0)
+    int vd, d;
+    dst.split_code(&vd, &d);
+    emit(al | 0x1D * B23 | d * B22 | 0x3 * B20 | vd * B12 | 0x5 * B9 | enc);
+  } else {
+    mov(ip, Operand(bit_cast<int32_t>(imm)));
+    vmov(dst, ip);
+  }
+}
 void Assembler::vmov(const DwVfpRegister dst,
                     double imm,
                     const Register scratch) {
@@ -2623,7 +2636,7 @@ void Assembler::vmov(const DwVfpRegister dst,
  // pointer (pp) is valid.
  bool can_use_pool =
      !FLAG_enable_embedded_constant_pool || is_constant_pool_available();
-  if (CpuFeatures::IsSupported(VFP3) && FitsVMOVDoubleImmediate(imm, &enc)) {
+  if (CpuFeatures::IsSupported(VFP3) && FitsVmovFPImmediate(imm, &enc)) {
    // The double can be encoded in the instruction.
    //
    // Dd = immediate

--- a/src/arm/constants-arm.h
+++ b/src/arm/constants-arm.h
@@ -654,7 +654,7 @@ class Instruction {
  inline bool HasH()    const { return HValue() == 1; }
  inline bool HasLink() const { return LinkValue() == 1; }
-  // Decoding the double immediate in the vmov instruction.
+  // Decode the double immediate from a vmov instruction.
  double DoubleImmedVmov() const;
  // Instructions are read of out a code stream. The only way to get a

--- a/src/arm/disasm-arm.cc
+++ b/src/arm/disasm-arm.cc
@@ -1430,7 +1430,7 @@ void Decoder::DecodeTypeVFP(Instruction* instr) {
        if (instr->SzValue() == 0x1) {
          Format(instr, "vmov'cond.f64 'Dd, 'd");
        } else {
-          Unknown(instr);  // Not used by V8.
+          Format(instr, "vmov'cond.f32 'Sd, 'd");
        }
      } else if (((instr->Opc2Value() == 0x6)) && instr->Opc3Value() == 0x3) {
        // vrintz - round towards zero (truncate)

--- a/src/arm/simulator-arm.cc
+++ b/src/arm/simulator-arm.cc
@@ -3256,7 +3256,7 @@ void Simulator::DecodeTypeVFP(Instruction* instr) {
        if (instr->SzValue() == 0x1) {
          set_d_register_from_double(vd, instr->DoubleImmedVmov());
        } else {
-          UNREACHABLE();  // Not used by v8.
+          set_s_register_from_float(d, instr->DoubleImmedVmov());
        }
      } else if (((instr->Opc2Value() == 0x6)) && (instr->Opc3Value() == 0x3)) {
        // vrintz - truncate

--- a/src/compiler/arm/code-generator-arm.cc
+++ b/src/compiler/arm/code-generator-arm.cc
@@ -168,7 +168,9 @@ class OutOfLineLoadFloat32 final : public OutOfLineCode {
      : OutOfLineCode(gen), result_(result) {}
  void Generate() final {
-    __ vmov(result_, std::numeric_limits<float>::quiet_NaN());
+    // Compute sqrtf(-1.0f), which results in a quiet single-precision NaN.
+    __ vmov(result_, -1.0f);
+    __ vsqrt(result_, result_);
  }
 private:
@@ -182,7 +184,9 @@ class OutOfLineLoadFloat64 final : public OutOfLineCode {
      : OutOfLineCode(gen), result_(result) {}
  void Generate() final {
-    __ vmov(result_, std::numeric_limits<double>::quiet_NaN(), kScratchReg);
+    // Compute sqrt(-1.0), which results in a quiet double-precision NaN.
+    __ vmov(result_, -1.0);
+    __ vsqrt(result_, result_);
  }
 private:

--- a/test/cctest/test-assembler-arm.cc
+++ b/test/cctest/test-assembler-arm.cc
@@ -232,6 +232,8 @@ TEST(4) {
    double j;
    double m;
    double n;
+    float o;
+    float p;
    float x;
    float y;
  } T;
@@ -314,6 +316,12 @@ TEST(4) {
    __ vneg(d0, d1);
    __ vstr(d0, r4, offsetof(T, n));
+    // Test vmov for single-precision immediates.
+    __ vmov(s0, 0.25f);
+    __ vstr(s0, r4, offsetof(T, o));
+    __ vmov(s0, -16.0f);
+    __ vstr(s0, r4, offsetof(T, p));
    __ ldm(ia_w, sp, r4.bit() | fp.bit() | pc.bit());
    CodeDesc desc;
@@ -341,6 +349,8 @@ TEST(4) {
    t.y = 9.0;
    Object* dummy = CALL_GENERATED_CODE(isolate, f, &t, 0, 0, 0, 0);
    USE(dummy);
+    CHECK_EQ(-16.0f, t.p);
+    CHECK_EQ(0.25f, t.o);
    CHECK_EQ(-123.456, t.n);
    CHECK_EQ(2718.2818, t.m);
    CHECK_EQ(2, t.i);

--- a/test/cctest/test-disasm-arm.cc
+++ b/test/cctest/test-disasm-arm.cc
@@ -614,6 +614,11 @@ TEST(Vfp) {
    COMPARE(vmov(d2, -13.0),
            "eeba2b0a       vmov.f64 d2, #-13");
+    COMPARE(vmov(s1, -1.0),
+            "eeff0a00       vmov.f32 s1, #-1");
+    COMPARE(vmov(s3, 13.0),
+            "eef21a0a       vmov.f32 s3, #13");
    COMPARE(vmov(d0, VmovIndexLo, r0),
            "ee000b10       vmov.32 d0[0], r0");
    COMPARE(vmov(d0, VmovIndexHi, r0),