Commit 821bc649 authored by Bill Budge's avatar Bill Budge Committed by Commit Bot

[wasm simd] Fix F32x4 Min and Max

- Fix F32x4 tests to save results in globals, so they can be checked
  in C++ code. Perform correct checks in case of NaNs.
- Fix ia32, x64 implementations of F32x4Min, F32x4Max to correctly
  deal with NaNs.
- Enable tests for all float values on all platforms, except skip
  denormalized results on ARM, and skip extreme values for reciprocal,
  reciprocal square root approximation opcodes.
- Disable Min, Max test for interpreter (see v8:8425) since it doesn't
  handle NaNs correctly.
- Fix vmin, vmax implementations in ARM simulator.

Bug: v8:8639
Change-Id: I87e188e3cb078f09fdacfd9955f426c20a11bf64
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1495897
Commit-Queue: Bill Budge <bbudge@chromium.org>
Reviewed-by: 's avatarDeepti Gandluri <gdeepti@chromium.org>
Cr-Commit-Position: refs/heads/master@{#60021}
parent a3ac513b
...@@ -4180,6 +4180,11 @@ void CompareGreater(Simulator* simulator, int Vd, int Vm, int Vn, bool ge) { ...@@ -4180,6 +4180,11 @@ void CompareGreater(Simulator* simulator, int Vd, int Vm, int Vn, bool ge) {
simulator->set_neon_register<T, SIZE>(Vd, src1); simulator->set_neon_register<T, SIZE>(Vd, src1);
} }
float MinMax(float a, float b, bool is_min) {
if (std::isnan(a) || std::isnan(b)) return NAN;
return is_min ? fmin(a, b) : fmax(a, b);
}
template <typename T> template <typename T>
T MinMax(T a, T b, bool is_min) { T MinMax(T a, T b, bool is_min) {
return is_min ? std::min(a, b) : std::max(a, b); return is_min ? std::min(a, b) : std::max(a, b);
......
...@@ -1953,24 +1953,47 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -1953,24 +1953,47 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
} }
case kSSEF32x4Min: { case kSSEF32x4Min: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
__ minps(i.OutputSimd128Register(), i.InputOperand(1)); // minps doesn't propagate NaN lanes in the first source. Compare this
// with itself to generate 1's in those lanes (quiet NaNs) and or them
// with the result of minps to simulate NaN propagation.
__ movaps(kScratchDoubleReg, i.InputSimd128Register(0));
__ cmpps(kScratchDoubleReg, kScratchDoubleReg, 0x4);
__ minps(i.OutputSimd128Register(), i.InputSimd128Register(1));
__ orps(i.OutputSimd128Register(), kScratchDoubleReg);
break; break;
} }
case kAVXF32x4Min: { case kAVXF32x4Min: {
CpuFeatureScope avx_scope(tasm(), AVX); CpuFeatureScope avx_scope(tasm(), AVX);
// See comment above for minps and NaN propagation.
__ vcmpneqps(kScratchDoubleReg, i.InputSimd128Register(0),
i.InputSimd128Register(0)); // Is NaN?
__ vminps(i.OutputSimd128Register(), i.InputSimd128Register(0), __ vminps(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputOperand(1)); i.InputOperand(1));
__ vorps(i.OutputSimd128Register(), i.OutputSimd128Register(),
kScratchDoubleReg); // re-NaN-imate.
break; break;
} }
case kSSEF32x4Max: { case kSSEF32x4Max: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
__ maxps(i.OutputSimd128Register(), i.InputOperand(1)); // maxps doesn't propagate NaN lanes in the first source. Compare this
// with itself to generate 1's in those lanes (quiet NaNs) and or them
// with the result of maxps to simulate NaN propagation.
__ movaps(kScratchDoubleReg, i.InputSimd128Register(0));
__ cmpps(kScratchDoubleReg, kScratchDoubleReg, 0x4);
__ maxps(i.OutputSimd128Register(), i.InputSimd128Register(1));
__ orps(i.OutputSimd128Register(), kScratchDoubleReg);
break; break;
} }
case kAVXF32x4Max: { case kAVXF32x4Max: {
CpuFeatureScope avx_scope(tasm(), AVX); CpuFeatureScope avx_scope(tasm(), AVX);
// See comment above for maxps and NaN propagation.
__ vcmpneqps(kScratchDoubleReg, i.InputSimd128Register(0),
i.InputSimd128Register(0));
__ vmaxps(i.OutputSimd128Register(), i.InputSimd128Register(0), __ vmaxps(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputOperand(1)); i.InputOperand(1));
__ vorps(i.OutputSimd128Register(), i.OutputSimd128Register(),
kScratchDoubleReg);
break; break;
} }
case kSSEF32x4Eq: { case kSSEF32x4Eq: {
......
...@@ -2302,12 +2302,24 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -2302,12 +2302,24 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
} }
case kX64F32x4Min: { case kX64F32x4Min: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
// minps doesn't propagate NaN lanes in the first source. Compare this
// with itself to generate 1's in those lanes (quiet NaNs) and or them
// with the result of minps to simulate NaN propagation.
__ movaps(kScratchDoubleReg, i.InputSimd128Register(0));
__ cmpps(kScratchDoubleReg, kScratchDoubleReg, 0x4);
__ minps(i.OutputSimd128Register(), i.InputSimd128Register(1)); __ minps(i.OutputSimd128Register(), i.InputSimd128Register(1));
__ orps(i.OutputSimd128Register(), kScratchDoubleReg);
break; break;
} }
case kX64F32x4Max: { case kX64F32x4Max: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
// maxps doesn't propagate NaN lanes in the first source. Compare this
// with itself to generate 1's in those lanes (quiet NaNs) and or them
// with the result of maxps to simulate NaN propagation.
__ movaps(kScratchDoubleReg, i.InputSimd128Register(0));
__ cmpps(kScratchDoubleReg, kScratchDoubleReg, 0x4);
__ maxps(i.OutputSimd128Register(), i.InputSimd128Register(1)); __ maxps(i.OutputSimd128Register(), i.InputSimd128Register(1));
__ orps(i.OutputSimd128Register(), kScratchDoubleReg);
break; break;
} }
case kX64F32x4Eq: { case kX64F32x4Eq: {
......
...@@ -895,6 +895,9 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase { ...@@ -895,6 +895,9 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
void maxps(XMMRegister dst, XMMRegister src) { maxps(dst, Operand(src)); } void maxps(XMMRegister dst, XMMRegister src) { maxps(dst, Operand(src)); }
void cmpps(XMMRegister dst, Operand src, uint8_t cmp); void cmpps(XMMRegister dst, Operand src, uint8_t cmp);
void cmpps(XMMRegister dst, XMMRegister src, uint8_t cmp) {
cmpps(dst, Operand(src), cmp);
}
#define SSE_CMP_P(instr, imm8) \ #define SSE_CMP_P(instr, imm8) \
void instr##ps(XMMRegister dst, XMMRegister src) { \ void instr##ps(XMMRegister dst, XMMRegister src) { \
cmpps(dst, Operand(src), imm8); \ cmpps(dst, Operand(src), imm8); \
...@@ -1497,6 +1500,7 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase { ...@@ -1497,6 +1500,7 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
#define PACKED_OP_LIST(V) \ #define PACKED_OP_LIST(V) \
V(and, 0x54) \ V(and, 0x54) \
V(or, 0x56) \
V(xor, 0x57) \ V(xor, 0x57) \
V(add, 0x58) \ V(add, 0x58) \
V(mul, 0x59) \ V(mul, 0x59) \
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment