Commit 1c378d02 authored by Bill Budge's avatar Bill Budge Committed by Commit Bot

[wasm simd] Fix F32x4Min/Max bug with signaling NaNs.

- Fixes a bug where signaling NaNs are converted to
  Infinities rather than quiet NaNs.

Bug: v8:6020,v8:8639
Change-Id: I2601378f06f1987983f2b93e8970f401333073be
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1536911
Commit-Queue: Bill Budge <bbudge@chromium.org>
Reviewed-by: 's avatarDeepti Gandluri <gdeepti@chromium.org>
Cr-Commit-Position: refs/heads/master@{#60480}
parent b0cfb778
...@@ -1961,10 +1961,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -1961,10 +1961,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ minps(kScratchDoubleReg, dst); __ minps(kScratchDoubleReg, dst);
__ minps(dst, src1); __ minps(dst, src1);
// propagate -0's and NaNs, which may be non-canonical. // propagate -0's and NaNs, which may be non-canonical.
__ orps(dst, kScratchDoubleReg); __ orps(kScratchDoubleReg, dst);
// Canonicalize NaNs by clearing the payload. Sign is non-deterministic. // Canonicalize NaNs by quieting and clearing the payload.
__ movaps(kScratchDoubleReg, dst); __ cmpps(dst, kScratchDoubleReg, 3);
__ cmpps(dst, dst, 4); __ orps(kScratchDoubleReg, dst);
__ psrld(dst, 10); __ psrld(dst, 10);
__ andnps(dst, kScratchDoubleReg); __ andnps(dst, kScratchDoubleReg);
break; break;
...@@ -1979,6 +1979,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -1979,6 +1979,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ vminps(dst, dst, src1); __ vminps(dst, dst, src1);
__ vorps(dst, dst, kScratchDoubleReg); __ vorps(dst, dst, kScratchDoubleReg);
__ vcmpneqps(kScratchDoubleReg, dst, dst); __ vcmpneqps(kScratchDoubleReg, dst, dst);
__ vorps(dst, dst, kScratchDoubleReg);
__ vpsrld(kScratchDoubleReg, kScratchDoubleReg, 10); __ vpsrld(kScratchDoubleReg, kScratchDoubleReg, 10);
__ vandnps(dst, kScratchDoubleReg, dst); __ vandnps(dst, kScratchDoubleReg, dst);
break; break;
...@@ -1993,14 +1994,13 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -1993,14 +1994,13 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ maxps(kScratchDoubleReg, dst); __ maxps(kScratchDoubleReg, dst);
__ maxps(dst, src1); __ maxps(dst, src1);
// Find discrepancies. // Find discrepancies.
__ xorps(kScratchDoubleReg, dst); __ xorps(dst, kScratchDoubleReg);
// Propagate NaNs, which may be non-canonical. // Propagate NaNs, which may be non-canonical.
__ orps(dst, kScratchDoubleReg); __ orps(kScratchDoubleReg, dst);
// Propagate sign discrepancy. NaNs and correct lanes are preserved. // Propagate sign discrepancy and (subtle) quiet NaNs.
__ subps(dst, kScratchDoubleReg); __ subps(kScratchDoubleReg, dst);
// Canonicalize NaNs by clearing the payload. Sign is non-deterministic. // Canonicalize NaNs by clearing the payload.
__ movaps(kScratchDoubleReg, dst); __ cmpps(dst, kScratchDoubleReg, 3);
__ cmpps(dst, dst, 4);
__ psrld(dst, 10); __ psrld(dst, 10);
__ andnps(dst, kScratchDoubleReg); __ andnps(dst, kScratchDoubleReg);
break; break;
...@@ -2013,12 +2013,12 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -2013,12 +2013,12 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ movaps(kScratchDoubleReg, src1); __ movaps(kScratchDoubleReg, src1);
__ vmaxps(kScratchDoubleReg, kScratchDoubleReg, dst); __ vmaxps(kScratchDoubleReg, kScratchDoubleReg, dst);
__ vmaxps(dst, dst, src1); __ vmaxps(dst, dst, src1);
__ vxorps(kScratchDoubleReg, kScratchDoubleReg, dst); __ vxorps(dst, dst, kScratchDoubleReg);
__ vorps(dst, dst, kScratchDoubleReg); __ vorps(kScratchDoubleReg, kScratchDoubleReg, dst);
__ vsubps(dst, dst, kScratchDoubleReg); __ vsubps(kScratchDoubleReg, kScratchDoubleReg, dst);
__ vcmpneqps(kScratchDoubleReg, dst, dst); __ vcmpneqps(dst, kScratchDoubleReg, kScratchDoubleReg);
__ vpsrld(kScratchDoubleReg, kScratchDoubleReg, 10); __ vpsrld(dst, dst, 10);
__ vandnps(dst, kScratchDoubleReg, dst); __ vandnps(dst, dst, kScratchDoubleReg);
break; break;
} }
case kSSEF32x4Eq: { case kSSEF32x4Eq: {
......
...@@ -2356,10 +2356,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -2356,10 +2356,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ minps(kScratchDoubleReg, dst); __ minps(kScratchDoubleReg, dst);
__ minps(dst, src1); __ minps(dst, src1);
// propagate -0's and NaNs, which may be non-canonical. // propagate -0's and NaNs, which may be non-canonical.
__ orps(dst, kScratchDoubleReg); __ orps(kScratchDoubleReg, dst);
// Canonicalize NaNs by clearing the payload. Sign is non-deterministic. // Canonicalize NaNs by quieting and clearing the payload.
__ movaps(kScratchDoubleReg, dst); __ cmpps(dst, kScratchDoubleReg, 3);
__ cmpps(dst, dst, 4); __ orps(kScratchDoubleReg, dst);
__ psrld(dst, 10); __ psrld(dst, 10);
__ andnps(dst, kScratchDoubleReg); __ andnps(dst, kScratchDoubleReg);
break; break;
...@@ -2374,14 +2374,13 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -2374,14 +2374,13 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ maxps(kScratchDoubleReg, dst); __ maxps(kScratchDoubleReg, dst);
__ maxps(dst, src1); __ maxps(dst, src1);
// Find discrepancies. // Find discrepancies.
__ xorps(kScratchDoubleReg, dst); __ xorps(dst, kScratchDoubleReg);
// Propagate NaNs, which may be non-canonical. // Propagate NaNs, which may be non-canonical.
__ orps(dst, kScratchDoubleReg); __ orps(kScratchDoubleReg, dst);
// Propagate sign discrepancy. NaNs and correct lanes are preserved. // Propagate sign discrepancy and (subtle) quiet NaNs.
__ subps(dst, kScratchDoubleReg); __ subps(kScratchDoubleReg, dst);
// Canonicalize NaNs by clearing the payload. Sign is non-deterministic. // Canonicalize NaNs by clearing the payload. Sign is non-deterministic.
__ movaps(kScratchDoubleReg, dst); __ cmpps(dst, kScratchDoubleReg, 3);
__ cmpps(dst, dst, 4);
__ psrld(dst, 10); __ psrld(dst, 10);
__ andnps(dst, kScratchDoubleReg); __ andnps(dst, kScratchDoubleReg);
break; break;
......
...@@ -442,9 +442,70 @@ WASM_SIMD_COMPILED_TEST(F32x4ConvertI32x4) { ...@@ -442,9 +442,70 @@ WASM_SIMD_COMPILED_TEST(F32x4ConvertI32x4) {
} }
} }
bool IsSameNan(float expected, float actual) {
// Sign is non-deterministic.
uint32_t expected_bits = bit_cast<uint32_t>(expected) & ~0x80000000;
uint32_t actual_bits = bit_cast<uint32_t>(actual) & ~0x80000000;
// Some implementations convert signaling NaNs to quiet NaNs.
return (expected_bits == actual_bits) ||
((expected_bits | 0x00400000) == actual_bits);
}
bool IsCanonical(float actual) {
uint32_t actual_bits = bit_cast<uint32_t>(actual);
// Canonical NaN has quiet bit and no payload.
return (actual_bits & 0xFFC00000) == actual_bits;
}
void CheckFloatResult(float x, float y, float expected, float actual,
bool exact = true) {
if (std::isnan(expected)) {
CHECK(std::isnan(actual));
if (std::isnan(x) && IsSameNan(x, actual)) return;
if (std::isnan(y) && IsSameNan(y, actual)) return;
if (IsSameNan(expected, actual)) return;
if (IsCanonical(actual)) return;
// This is expected to assert; it's useful for debugging.
CHECK_EQ(bit_cast<uint32_t>(expected), bit_cast<uint32_t>(actual));
} else {
if (exact) {
CHECK_EQ(expected, actual);
// The sign of 0's must match.
CHECK_EQ(std::signbit(expected), std::signbit(actual));
return;
}
// Otherwise, perform an approximate equality test. First check for
// equality to handle +/-Infinity where approximate equality doesn't work.
if (expected == actual) return;
// 1% error allows all platforms to pass easily.
constexpr float kApproximationError = 0.01f;
float abs_error = std::abs(expected) * kApproximationError,
min = expected - abs_error, max = expected + abs_error;
CHECK_LE(min, actual);
CHECK_GE(max, actual);
}
}
// Test some values not included in the float inputs from value_helper. These
// tests are useful for opcodes that are synthesized during code gen, like Min
// and Max on ia32 and x64.
static constexpr uint32_t nan_test_array[] = {
// Bit patterns of quiet NaNs and signaling NaNs, with or without
// additional payload.
0x7FC00000, 0xFFC00000, 0x7FFFFFFF, 0x7F800000, 0xFF800000, 0x7F876543,
0xFF876543,
// Both Infinities.
0x7F800000, 0xFF800000,
// Some "normal" numbers, 1 and -1.
0x3F800000, 0xBF800000};
#define FOR_FLOAT32_NAN_INPUTS(i) \
for (size_t i = 0; i < arraysize(nan_test_array); ++i)
void RunF32x4UnOpTest(ExecutionTier execution_tier, LowerSimd lower_simd, void RunF32x4UnOpTest(ExecutionTier execution_tier, LowerSimd lower_simd,
WasmOpcode opcode, FloatUnOp expected_op, WasmOpcode opcode, FloatUnOp expected_op,
bool approximate = false) { bool exact = true) {
WasmRunner<int32_t, float> r(execution_tier, lower_simd); WasmRunner<int32_t, float> r(execution_tier, lower_simd);
// Global to hold output. // Global to hold output.
float* g = r.builder().AddGlobal<float>(kWasmS128); float* g = r.builder().AddGlobal<float>(kWasmS128);
...@@ -458,25 +519,26 @@ void RunF32x4UnOpTest(ExecutionTier execution_tier, LowerSimd lower_simd, ...@@ -458,25 +519,26 @@ void RunF32x4UnOpTest(ExecutionTier execution_tier, LowerSimd lower_simd,
FOR_FLOAT32_INPUTS(x) { FOR_FLOAT32_INPUTS(x) {
if (!PlatformCanRepresent(x)) continue; if (!PlatformCanRepresent(x)) continue;
// Extreme values have larger errors so skip them for approximation tests. // Extreme values have larger errors so skip them for approximation tests.
if (approximate && IsExtreme(x)) continue; if (!exact && IsExtreme(x)) continue;
float expected = expected_op(x); float expected = expected_op(x);
if (!PlatformCanRepresent(expected)) continue; if (!PlatformCanRepresent(expected)) continue;
r.Call(x); r.Call(x);
for (int i = 0; i < 4; i++) { for (int i = 0; i < 4; i++) {
float actual = ReadLittleEndianValue<float>(&g[i]); float actual = ReadLittleEndianValue<float>(&g[i]);
if (std::isnan(expected)) { CheckFloatResult(x, x, expected, actual, exact);
CHECK(std::isnan(actual)); }
} else {
// First check for equality, to handle +/-Inf, since min and max would
// be NaNs in those cases.
if (expected == actual) continue;
// 1% error allows all platforms to pass easily.
constexpr float kApproximationError = 0.01f;
float abs_error = std::abs(expected) * kApproximationError,
min = expected - abs_error, max = expected + abs_error;
CHECK_LE(min, actual);
CHECK_GE(max, actual);
} }
FOR_FLOAT32_NAN_INPUTS(x) {
if (!PlatformCanRepresent(x)) continue;
// Extreme values have larger errors so skip them for approximation tests.
if (!exact && IsExtreme(x)) continue;
float expected = expected_op(x);
if (!PlatformCanRepresent(expected)) continue;
r.Call(x);
for (int i = 0; i < 4; i++) {
float actual = ReadLittleEndianValue<float>(&g[i]);
CheckFloatResult(x, x, expected, actual, exact);
} }
} }
} }
...@@ -490,12 +552,12 @@ WASM_SIMD_TEST(F32x4Neg) { ...@@ -490,12 +552,12 @@ WASM_SIMD_TEST(F32x4Neg) {
WASM_SIMD_TEST(F32x4RecipApprox) { WASM_SIMD_TEST(F32x4RecipApprox) {
RunF32x4UnOpTest(execution_tier, lower_simd, kExprF32x4RecipApprox, RunF32x4UnOpTest(execution_tier, lower_simd, kExprF32x4RecipApprox,
base::Recip, true /* approximate */); base::Recip, false /* !exact */);
} }
WASM_SIMD_TEST(F32x4RecipSqrtApprox) { WASM_SIMD_TEST(F32x4RecipSqrtApprox) {
RunF32x4UnOpTest(execution_tier, lower_simd, kExprF32x4RecipSqrtApprox, RunF32x4UnOpTest(execution_tier, lower_simd, kExprF32x4RecipSqrtApprox,
base::RecipSqrt, true /* approximate */); base::RecipSqrt, false /* !exact */);
} }
void RunF32x4BinOpTest(ExecutionTier execution_tier, LowerSimd lower_simd, void RunF32x4BinOpTest(ExecutionTier execution_tier, LowerSimd lower_simd,
...@@ -522,23 +584,30 @@ void RunF32x4BinOpTest(ExecutionTier execution_tier, LowerSimd lower_simd, ...@@ -522,23 +584,30 @@ void RunF32x4BinOpTest(ExecutionTier execution_tier, LowerSimd lower_simd,
r.Call(x, y); r.Call(x, y);
for (int i = 0; i < 4; i++) { for (int i = 0; i < 4; i++) {
float actual = ReadLittleEndianValue<float>(&g[i]); float actual = ReadLittleEndianValue<float>(&g[i]);
if (std::isnan(expected)) { CheckFloatResult(x, y, expected, actual, true /* exact */);
CHECK(std::isnan(actual));
// Finally, check that NaN outputs are canonical. The sign bit is
// allowed to be non-deterministic.
uint32_t expected_bits = bit_cast<uint32_t>(expected) & ~0x80000000;
uint32_t actual_bits = bit_cast<uint32_t>(actual) & ~0x80000000;
CHECK_EQ(expected_bits, actual_bits);
} else {
CHECK_EQ(expected, actual);
// The sign of 0's must match.
CHECK_EQ(std::signbit(expected), std::signbit(actual));
} }
} }
} }
FOR_FLOAT32_NAN_INPUTS(i) {
float x = bit_cast<float>(nan_test_array[i]);
if (!PlatformCanRepresent(x)) continue;
FOR_FLOAT32_NAN_INPUTS(j) {
float y = bit_cast<float>(nan_test_array[j]);
if (!PlatformCanRepresent(y)) continue;
float expected = expected_op(x, y);
if (!PlatformCanRepresent(expected)) continue;
r.Call(x, y);
for (int i = 0; i < 4; i++) {
float actual = ReadLittleEndianValue<float>(&g[i]);
CheckFloatResult(x, y, expected, actual, true /* exact */);
}
}
} }
} }
#undef FOR_FLOAT32_NAN_INPUTS
WASM_SIMD_TEST(F32x4Add) { WASM_SIMD_TEST(F32x4Add) {
RunF32x4BinOpTest(execution_tier, lower_simd, kExprF32x4Add, Add); RunF32x4BinOpTest(execution_tier, lower_simd, kExprF32x4Add, Add);
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment