Commit e9b00902 authored by Ng Zhi An's avatar Ng Zhi An Committed by Commit Bot

[wasm simd] Implement F64x2Abs on x64

Bug: v8:8460
Change-Id: Ica8329efa9be5944037e205f371d2bc34b882e0d
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1703762Reviewed-by: 's avatarDeepti Gandluri <gdeepti@chromium.org>
Reviewed-by: 's avatarMichael Starzinger <mstarzinger@chromium.org>
Reviewed-by: 's avatarBill Budge <bbudge@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#62816}
parent 5ef538f7
......@@ -1820,6 +1820,8 @@ void InstructionSelector::VisitNode(Node* node) {
return MarkAsFloat64(node), VisitF64x2ExtractLane(node);
case IrOpcode::kF64x2ReplaceLane:
return MarkAsSimd128(node), VisitF64x2ReplaceLane(node);
case IrOpcode::kF64x2Abs:
return MarkAsSimd128(node), VisitF64x2Abs(node);
case IrOpcode::kF64x2Eq:
return MarkAsSimd128(node), VisitF64x2Eq(node);
case IrOpcode::kF64x2Ne:
......@@ -2549,6 +2551,7 @@ void InstructionSelector::VisitWord64AtomicCompareExchange(Node* node) {
void InstructionSelector::VisitF64x2Splat(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2ExtractLane(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2ReplaceLane(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Abs(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Eq(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Ne(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Lt(Node* node) { UNIMPLEMENTED(); }
......
......@@ -2275,6 +2275,23 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ movq(i.OutputDoubleRegister(), kScratchRegister);
break;
}
case kX64F64x2Abs: {
// TODO(zhin): look at kSSEFloat64Abs instruction selection and codegen to
// avoid having 2 cases here, and potentially share code
CpuFeatureScope sse_scope(tasm(), SSE4_1);
XMMRegister dst = i.OutputSimd128Register();
XMMRegister src = i.InputSimd128Register(0);
if (dst == src) {
__ pcmpeqq(kScratchDoubleReg, kScratchDoubleReg);
__ psrlq(kScratchDoubleReg, 1);
__ andpd(dst, kScratchDoubleReg);
} else {
__ pcmpeqq(dst, dst);
__ psrlq(dst, 1);
__ andpd(dst, src);
}
break;
}
case kX64F64x2Eq: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
__ cmpeqpd(i.OutputSimd128Register(), i.InputSimd128Register(1));
......
......@@ -161,6 +161,7 @@ namespace compiler {
V(X64F64x2Splat) \
V(X64F64x2ExtractLane) \
V(X64F64x2ReplaceLane) \
V(X64F64x2Abs) \
V(X64F64x2Eq) \
V(X64F64x2Ne) \
V(X64F64x2Lt) \
......
......@@ -127,6 +127,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kX64F64x2Splat:
case kX64F64x2ExtractLane:
case kX64F64x2ReplaceLane:
case kX64F64x2Abs:
case kX64F64x2Eq:
case kX64F64x2Ne:
case kX64F64x2Lt:
......
......@@ -2660,6 +2660,7 @@ VISIT_ATOMIC_BINOP(Xor)
V(I64x2GeU)
#define SIMD_UNOP_LIST(V) \
V(F64x2Abs) \
V(F32x4SConvertI32x4) \
V(F32x4Abs) \
V(F32x4Neg) \
......
......@@ -246,6 +246,7 @@ MachineType AtomicOpType(Operator const* op) {
V(Word32PairShr, Operator::kNoProperties, 3, 0, 2) \
V(Word32PairSar, Operator::kNoProperties, 3, 0, 2) \
V(F64x2Splat, Operator::kNoProperties, 1, 0, 1) \
V(F64x2Abs, Operator::kNoProperties, 1, 0, 1) \
V(F64x2Eq, Operator::kCommutative, 2, 0, 1) \
V(F64x2Ne, Operator::kCommutative, 2, 0, 1) \
V(F64x2Lt, Operator::kNoProperties, 2, 0, 1) \
......
......@@ -469,6 +469,7 @@ class V8_EXPORT_PRIVATE MachineOperatorBuilder final
// SIMD operators.
const Operator* F64x2Splat();
const Operator* F64x2Abs();
const Operator* F64x2ExtractLane(int32_t);
const Operator* F64x2ReplaceLane(int32_t);
const Operator* F64x2Eq();
......
......@@ -743,6 +743,7 @@
V(F64x2Splat) \
V(F64x2ExtractLane) \
V(F64x2ReplaceLane) \
V(F64x2Abs) \
V(F64x2Eq) \
V(F64x2Ne) \
V(F64x2Lt) \
......
......@@ -3994,6 +3994,8 @@ Node* WasmGraphBuilder::SimdOp(wasm::WasmOpcode opcode, Node* const* inputs) {
switch (opcode) {
case wasm::kExprF64x2Splat:
return graph()->NewNode(mcgraph()->machine()->F64x2Splat(), inputs[0]);
case wasm::kExprF64x2Abs:
return graph()->NewNode(mcgraph()->machine()->F64x2Abs(), inputs[0]);
case wasm::kExprF64x2Eq:
return graph()->NewNode(mcgraph()->machine()->F64x2Eq(), inputs[0],
inputs[1]);
......
......@@ -2298,6 +2298,7 @@ class ThreadImpl {
Push(WasmValue(Simd128(res))); \
return true; \
}
UNOP_CASE(F64x2Abs, f64x2, float2, 2, std::abs(a))
UNOP_CASE(F32x4Abs, f32x4, float4, 4, std::abs(a))
UNOP_CASE(F32x4Neg, f32x4, float4, 4, -a)
UNOP_CASE(F32x4RecipApprox, f32x4, float4, 4, base::Recip(a))
......
......@@ -238,6 +238,7 @@ const char* WasmOpcodes::OpcodeName(WasmOpcode opcode) {
CASE_F64x2_OP(Le, "le")
CASE_F64x2_OP(Gt, "gt")
CASE_F64x2_OP(Ge, "ge")
CASE_F64x2_OP(Abs, "abs")
CASE_F32x4_OP(Abs, "abs")
CASE_F32x4_OP(AddHoriz, "add_horizontal")
CASE_F32x4_OP(RecipApprox, "recip_approx")
......
......@@ -385,6 +385,7 @@ bool IsJSCompatibleSignature(const FunctionSig* sig, bool hasBigIntFeature);
V(F32x4Mul, 0xfd9c, s_ss) \
V(F32x4Min, 0xfd9e, s_ss) \
V(F32x4Max, 0xfd9f, s_ss) \
V(F64x2Abs, 0xfda0, s_s) \
V(I32x4SConvertF32x4, 0xfdab, s_s) \
V(I32x4UConvertF32x4, 0xfdac, s_s) \
V(F32x4SConvertI32x4, 0xfdaf, s_s) \
......
......@@ -19,6 +19,7 @@ namespace test_run_wasm_simd {
namespace {
using DoubleUnOp = double (*)(double);
using DoubleCompareOp = int64_t (*)(double, double);
using FloatUnOp = float (*)(float);
using FloatBinOp = float (*)(float, float);
......@@ -842,6 +843,117 @@ WASM_SIMD_TEST_NO_LOWERING(F64x2Le) {
RunF64x2CompareOpTest(execution_tier, lower_simd, kExprF64x2Le, LessEqual);
}
bool IsSameNan(double expected, double actual) {
// Sign is non-deterministic.
uint64_t expected_bits = bit_cast<uint64_t>(expected) & ~0x8000000000000000;
uint64_t actual_bits = bit_cast<uint64_t>(actual) & ~0x8000000000000000;
// Some implementations convert signaling NaNs to quiet NaNs.
return (expected_bits == actual_bits) ||
((expected_bits | 0x0008000000000000) == actual_bits);
}
bool IsCanonical(double actual) {
uint64_t actual_bits = bit_cast<uint64_t>(actual);
// Canonical NaN has quiet bit and no payload.
return (actual_bits & 0xFF80000000000000) == actual_bits;
}
void CheckDoubleResult(double x, double y, double expected, double actual,
bool exact = true) {
if (std::isnan(expected)) {
CHECK(std::isnan(actual));
if (std::isnan(x) && IsSameNan(x, actual)) return;
if (std::isnan(y) && IsSameNan(y, actual)) return;
if (IsSameNan(expected, actual)) return;
if (IsCanonical(actual)) return;
// This is expected to assert; it's useful for debugging.
CHECK_EQ(bit_cast<uint64_t>(expected), bit_cast<uint64_t>(actual));
} else {
if (exact) {
CHECK_EQ(expected, actual);
// The sign of 0's must match.
CHECK_EQ(std::signbit(expected), std::signbit(actual));
return;
}
// Otherwise, perform an approximate equality test. First check for
// equality to handle +/-Infinity where approximate equality doesn't work.
if (expected == actual) return;
// 1% error allows all platforms to pass easily.
constexpr double kApproximationError = 0.01f;
double abs_error = std::abs(expected) * kApproximationError,
min = expected - abs_error, max = expected + abs_error;
CHECK_LE(min, actual);
CHECK_GE(max, actual);
}
}
// Test some values not included in the double inputs from value_helper. These
// tests are useful for opcodes that are synthesized during code gen, like Min
// and Max on ia32 and x64.
static constexpr uint64_t double_nan_test_array[] = {
// quiet NaNs, + and -
0x7FF8000000000001, 0xFFF8000000000001,
// with payload
0x7FF8000000000011, 0xFFF8000000000011,
// signaling NaNs, + and -
0x7FF0000000000001, 0xFFF0000000000001,
// with payload
0x7FF0000000000011, 0xFFF0000000000011,
// Both Infinities.
0x7FF0000000000000, 0xFFF0000000000000,
// Some "normal" numbers, 1 and -1.
0x3FF0000000000000, 0xBFF0000000000000};
#define FOR_FLOAT64_NAN_INPUTS(i) \
for (size_t i = 0; i < arraysize(double_nan_test_array); ++i)
void RunF64x2UnOpTest(ExecutionTier execution_tier, LowerSimd lower_simd,
WasmOpcode opcode, DoubleUnOp expected_op,
bool exact = true) {
WasmRunner<int32_t, double> r(execution_tier, lower_simd);
// Global to hold output.
double* g = r.builder().AddGlobal<double>(kWasmS128);
// Build fn to splat test value, perform unop, and write the result.
byte value = 0;
byte temp1 = r.AllocateLocal(kWasmS128);
BUILD(r, WASM_SET_LOCAL(temp1, WASM_SIMD_F64x2_SPLAT(WASM_GET_LOCAL(value))),
WASM_SET_GLOBAL(0, WASM_SIMD_UNOP(opcode, WASM_GET_LOCAL(temp1))),
WASM_ONE);
FOR_FLOAT64_INPUTS(x) {
if (!PlatformCanRepresent(x)) continue;
// Extreme values have larger errors so skip them for approximation tests.
if (!exact && IsExtreme(x)) continue;
double expected = expected_op(x);
if (!PlatformCanRepresent(expected)) continue;
r.Call(x);
for (int i = 0; i < 2; i++) {
double actual = ReadLittleEndianValue<double>(&g[i]);
CheckFloatResult(x, x, expected, actual, exact);
}
}
FOR_FLOAT64_NAN_INPUTS(i) {
double x = bit_cast<double>(double_nan_test_array[i]);
if (!PlatformCanRepresent(x)) continue;
// Extreme values have larger errors so skip them for approximation tests.
if (!exact && IsExtreme(x)) continue;
double expected = expected_op(x);
if (!PlatformCanRepresent(expected)) continue;
r.Call(x);
for (int i = 0; i < 2; i++) {
double actual = ReadLittleEndianValue<double>(&g[i]);
CheckDoubleResult(x, x, expected, actual, exact);
}
}
}
#undef FOR_FLOAT64_NAN_INPUTS
WASM_SIMD_TEST_NO_LOWERING(F64x2Abs) {
RunF64x2UnOpTest(execution_tier, lower_simd, kExprF64x2Abs, std::abs);
}
WASM_SIMD_TEST_NO_LOWERING(I64x2Splat) {
WasmRunner<int32_t, int64_t> r(execution_tier, lower_simd);
// Set up a global to hold output vector.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment