[wasm simd] Fix F32x4Min/Max bug with signaling NaNs.

- Fixes a bug where signaling NaNs are converted to Infinities rather than quiet NaNs. Bug: v8:6020,v8:8639 Change-Id: I2601378f06f1987983f2b93e8970f401333073be Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1536911 Commit-Queue: Bill Budge <bbudge@chromium.org> Reviewed-by: Deepti Gandluri <gdeepti@chromium.org> Cr-Commit-Position: refs/heads/master@{#60480}

[wasm simd] Fix F32x4Min/Max bug with signaling NaNs.
- Fixes a bug where signaling NaNs are converted to Infinities rather than quiet NaNs. Bug: v8:6020,v8:8639 Change-Id: I2601378f06f1987983f2b93e8970f401333073be Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1536911 Commit-Queue: Bill Budge <bbudge@chromium.org> Reviewed-by: Deepti Gandluri <gdeepti@chromium.org> Cr-Commit-Position: refs/heads/master@{#60480}
1c378d02 · Bill Budge · Commit Bot · b0cfb778 · 1c378d02 · 1c378d02
Commit 1c378d02 authored Mar 26, 2019 by Bill Budge Committed by Commit Bot Mar 27, 2019
3 changed files
--- a/src/compiler/backend/ia32/code-generator-ia32.cc
+++ b/src/compiler/backend/ia32/code-generator-ia32.cc
@@ -1961,10 +1961,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
      __ minps(kScratchDoubleReg, dst);
      __ minps(dst, src1);
      // propagate -0's and NaNs, which may be non-canonical.
-      __ orps(dst, kScratchDoubleReg);
+      __ orps(kScratchDoubleReg, dst);
-      // Canonicalize NaNs by clearing the payload. Sign is non-deterministic.
+      // Canonicalize NaNs by quieting and clearing the payload.
-      __ movaps(kScratchDoubleReg, dst);
+      __ cmpps(dst, kScratchDoubleReg, 3);
-      __ cmpps(dst, dst, 4);
+      __ orps(kScratchDoubleReg, dst);
      __ psrld(dst, 10);
      __ andnps(dst, kScratchDoubleReg);
      break;
@@ -1979,6 +1979,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
      __ vminps(dst, dst, src1);
      __ vorps(dst, dst, kScratchDoubleReg);
      __ vcmpneqps(kScratchDoubleReg, dst, dst);
+      __ vorps(dst, dst, kScratchDoubleReg);
      __ vpsrld(kScratchDoubleReg, kScratchDoubleReg, 10);
      __ vandnps(dst, kScratchDoubleReg, dst);
      break;
@@ -1993,14 +1994,13 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
      __ maxps(kScratchDoubleReg, dst);
      __ maxps(dst, src1);
      // Find discrepancies.
-      __ xorps(kScratchDoubleReg, dst);
+      __ xorps(dst, kScratchDoubleReg);
      // Propagate NaNs, which may be non-canonical.
-      __ orps(dst, kScratchDoubleReg);
+      __ orps(kScratchDoubleReg, dst);
-      // Propagate sign discrepancy. NaNs and correct lanes are preserved.
+      // Propagate sign discrepancy and (subtle) quiet NaNs.
-      __ subps(dst, kScratchDoubleReg);
+      __ subps(kScratchDoubleReg, dst);
-      // Canonicalize NaNs by clearing the payload. Sign is non-deterministic.
+      // Canonicalize NaNs by clearing the payload.
-      __ movaps(kScratchDoubleReg, dst);
+      __ cmpps(dst, kScratchDoubleReg, 3);
-      __ cmpps(dst, dst, 4);
      __ psrld(dst, 10);
      __ andnps(dst, kScratchDoubleReg);
      break;
@@ -2013,12 +2013,12 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
      __ movaps(kScratchDoubleReg, src1);
      __ vmaxps(kScratchDoubleReg, kScratchDoubleReg, dst);
      __ vmaxps(dst, dst, src1);
-      __ vxorps(kScratchDoubleReg, kScratchDoubleReg, dst);
+      __ vxorps(dst, dst, kScratchDoubleReg);
-      __ vorps(dst, dst, kScratchDoubleReg);
+      __ vorps(kScratchDoubleReg, kScratchDoubleReg, dst);
-      __ vsubps(dst, dst, kScratchDoubleReg);
+      __ vsubps(kScratchDoubleReg, kScratchDoubleReg, dst);
-      __ vcmpneqps(kScratchDoubleReg, dst, dst);
+      __ vcmpneqps(dst, kScratchDoubleReg, kScratchDoubleReg);
-      __ vpsrld(kScratchDoubleReg, kScratchDoubleReg, 10);
+      __ vpsrld(dst, dst, 10);
-      __ vandnps(dst, kScratchDoubleReg, dst);
+      __ vandnps(dst, dst, kScratchDoubleReg);
      break;
    }
    case kSSEF32x4Eq: {

--- a/src/compiler/backend/x64/code-generator-x64.cc
+++ b/src/compiler/backend/x64/code-generator-x64.cc
@@ -2356,10 +2356,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
      __ minps(kScratchDoubleReg, dst);
      __ minps(dst, src1);
      // propagate -0's and NaNs, which may be non-canonical.
-      __ orps(dst, kScratchDoubleReg);
+      __ orps(kScratchDoubleReg, dst);
-      // Canonicalize NaNs by clearing the payload. Sign is non-deterministic.
+      // Canonicalize NaNs by quieting and clearing the payload.
-      __ movaps(kScratchDoubleReg, dst);
+      __ cmpps(dst, kScratchDoubleReg, 3);
-      __ cmpps(dst, dst, 4);
+      __ orps(kScratchDoubleReg, dst);
      __ psrld(dst, 10);
      __ andnps(dst, kScratchDoubleReg);
      break;
@@ -2374,14 +2374,13 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
      __ maxps(kScratchDoubleReg, dst);
      __ maxps(dst, src1);
      // Find discrepancies.
-      __ xorps(kScratchDoubleReg, dst);
+      __ xorps(dst, kScratchDoubleReg);
      // Propagate NaNs, which may be non-canonical.
-      __ orps(dst, kScratchDoubleReg);
+      __ orps(kScratchDoubleReg, dst);
-      // Propagate sign discrepancy. NaNs and correct lanes are preserved.
+      // Propagate sign discrepancy and (subtle) quiet NaNs.
-      __ subps(dst, kScratchDoubleReg);
+      __ subps(kScratchDoubleReg, dst);
      // Canonicalize NaNs by clearing the payload. Sign is non-deterministic.
-      __ movaps(kScratchDoubleReg, dst);
+      __ cmpps(dst, kScratchDoubleReg, 3);
-      __ cmpps(dst, dst, 4);
      __ psrld(dst, 10);
      __ andnps(dst, kScratchDoubleReg);
      break;

--- a/test/cctest/wasm/test-run-wasm-simd.cc
+++ b/test/cctest/wasm/test-run-wasm-simd.cc
@@ -442,9 +442,70 @@ WASM_SIMD_COMPILED_TEST(F32x4ConvertI32x4) {
  }
 }
+bool IsSameNan(float expected, float actual) {
+  // Sign is non-deterministic.
+  uint32_t expected_bits = bit_cast<uint32_t>(expected) & ~0x80000000;
+  uint32_t actual_bits = bit_cast<uint32_t>(actual) & ~0x80000000;
+  // Some implementations convert signaling NaNs to quiet NaNs.
+  return (expected_bits == actual_bits) ||
+         ((expected_bits | 0x00400000) == actual_bits);
+}
+bool IsCanonical(float actual) {
+  uint32_t actual_bits = bit_cast<uint32_t>(actual);
+  // Canonical NaN has quiet bit and no payload.
+  return (actual_bits & 0xFFC00000) == actual_bits;
+}
+void CheckFloatResult(float x, float y, float expected, float actual,
+                      bool exact = true) {
+  if (std::isnan(expected)) {
+    CHECK(std::isnan(actual));
+    if (std::isnan(x) && IsSameNan(x, actual)) return;
+    if (std::isnan(y) && IsSameNan(y, actual)) return;
+    if (IsSameNan(expected, actual)) return;
+    if (IsCanonical(actual)) return;
+    // This is expected to assert; it's useful for debugging.
+    CHECK_EQ(bit_cast<uint32_t>(expected), bit_cast<uint32_t>(actual));
+  } else {
+    if (exact) {
+      CHECK_EQ(expected, actual);
+      // The sign of 0's must match.
+      CHECK_EQ(std::signbit(expected), std::signbit(actual));
+      return;
+    }
+    // Otherwise, perform an approximate equality test. First check for
+    // equality to handle +/-Infinity where approximate equality doesn't work.
+    if (expected == actual) return;
+    // 1% error allows all platforms to pass easily.
+    constexpr float kApproximationError = 0.01f;
+    float abs_error = std::abs(expected) * kApproximationError,
+          min = expected - abs_error, max = expected + abs_error;
+    CHECK_LE(min, actual);
+    CHECK_GE(max, actual);
+  }
+}
+// Test some values not included in the float inputs from value_helper. These
+// tests are useful for opcodes that are synthesized during code gen, like Min
+// and Max on ia32 and x64.
+static constexpr uint32_t nan_test_array[] = {
+    // Bit patterns of quiet NaNs and signaling NaNs, with or without
+    // additional payload.
+    0x7FC00000, 0xFFC00000, 0x7FFFFFFF, 0x7F800000, 0xFF800000, 0x7F876543,
+    0xFF876543,
+    // Both Infinities.
+    0x7F800000, 0xFF800000,
+    // Some "normal" numbers, 1 and -1.
+    0x3F800000, 0xBF800000};
+#define FOR_FLOAT32_NAN_INPUTS(i) \
+  for (size_t i = 0; i < arraysize(nan_test_array); ++i)
 void RunF32x4UnOpTest(ExecutionTier execution_tier, LowerSimd lower_simd,
                      WasmOpcode opcode, FloatUnOp expected_op,
-                      bool approximate = false) {
+                      bool exact = true) {
  WasmRunner<int32_t, float> r(execution_tier, lower_simd);
  // Global to hold output.
  float* g = r.builder().AddGlobal<float>(kWasmS128);
@@ -458,25 +519,26 @@ void RunF32x4UnOpTest(ExecutionTier execution_tier, LowerSimd lower_simd,
  FOR_FLOAT32_INPUTS(x) {
    if (!PlatformCanRepresent(x)) continue;
    // Extreme values have larger errors so skip them for approximation tests.
-    if (approximate && IsExtreme(x)) continue;
+    if (!exact && IsExtreme(x)) continue;
    float expected = expected_op(x);
    if (!PlatformCanRepresent(expected)) continue;
    r.Call(x);
    for (int i = 0; i < 4; i++) {
      float actual = ReadLittleEndianValue<float>(&g[i]);
-      if (std::isnan(expected)) {
+      CheckFloatResult(x, x, expected, actual, exact);
-        CHECK(std::isnan(actual));
+    }
-      } else {
-        // First check for equality, to handle +/-Inf, since min and max would
-        // be NaNs in those cases.
-        if (expected == actual) continue;
-        // 1% error allows all platforms to pass easily.
-        constexpr float kApproximationError = 0.01f;
-        float abs_error = std::abs(expected) * kApproximationError,
-              min = expected - abs_error, max = expected + abs_error;
-        CHECK_LE(min, actual);
-        CHECK_GE(max, actual);
  }
+  FOR_FLOAT32_NAN_INPUTS(x) {
+    if (!PlatformCanRepresent(x)) continue;
+    // Extreme values have larger errors so skip them for approximation tests.
+    if (!exact && IsExtreme(x)) continue;
+    float expected = expected_op(x);
+    if (!PlatformCanRepresent(expected)) continue;
+    r.Call(x);
+    for (int i = 0; i < 4; i++) {
+      float actual = ReadLittleEndianValue<float>(&g[i]);
+      CheckFloatResult(x, x, expected, actual, exact);
    }
  }
 }
@@ -490,12 +552,12 @@ WASM_SIMD_TEST(F32x4Neg) {
 WASM_SIMD_TEST(F32x4RecipApprox) {
  RunF32x4UnOpTest(execution_tier, lower_simd, kExprF32x4RecipApprox,
-                   base::Recip, true /* approximate */);
+                   base::Recip, false /* !exact */);
 }
 WASM_SIMD_TEST(F32x4RecipSqrtApprox) {
  RunF32x4UnOpTest(execution_tier, lower_simd, kExprF32x4RecipSqrtApprox,
-                   base::RecipSqrt, true /* approximate */);
+                   base::RecipSqrt, false /* !exact */);
 }
 void RunF32x4BinOpTest(ExecutionTier execution_tier, LowerSimd lower_simd,
@@ -522,23 +584,30 @@ void RunF32x4BinOpTest(ExecutionTier execution_tier, LowerSimd lower_simd,
      r.Call(x, y);
      for (int i = 0; i < 4; i++) {
        float actual = ReadLittleEndianValue<float>(&g[i]);
-        if (std::isnan(expected)) {
+        CheckFloatResult(x, y, expected, actual, true /* exact */);
-          CHECK(std::isnan(actual));
-          // Finally, check that NaN outputs are canonical. The sign bit is
-          // allowed to be non-deterministic.
-          uint32_t expected_bits = bit_cast<uint32_t>(expected) & ~0x80000000;
-          uint32_t actual_bits = bit_cast<uint32_t>(actual) & ~0x80000000;
-          CHECK_EQ(expected_bits, actual_bits);
-        } else {
-          CHECK_EQ(expected, actual);
-          // The sign of 0's must match.
-          CHECK_EQ(std::signbit(expected), std::signbit(actual));
      }
    }
  }
+  FOR_FLOAT32_NAN_INPUTS(i) {
+    float x = bit_cast<float>(nan_test_array[i]);
+    if (!PlatformCanRepresent(x)) continue;
+    FOR_FLOAT32_NAN_INPUTS(j) {
+      float y = bit_cast<float>(nan_test_array[j]);
+      if (!PlatformCanRepresent(y)) continue;
+      float expected = expected_op(x, y);
+      if (!PlatformCanRepresent(expected)) continue;
+      r.Call(x, y);
+      for (int i = 0; i < 4; i++) {
+        float actual = ReadLittleEndianValue<float>(&g[i]);
+        CheckFloatResult(x, y, expected, actual, true /* exact */);
+      }
+    }
  }
 }
+#undef FOR_FLOAT32_NAN_INPUTS
 WASM_SIMD_TEST(F32x4Add) {
  RunF32x4BinOpTest(execution_tier, lower_simd, kExprF32x4Add, Add);
 }