Commit 9a68fa13 authored by Ng Zhi An's avatar Ng Zhi An Committed by Commit Bot

[wasm-simd] Prototype f32x4 and f64x2 pmin and pmax

This patch implements f32x4.pmin, f32x4.pmax, f64x2.pmin, and f64x2.pmax
for x64 and interpreter.

Pseudo-min and Pseudo-max instructions were proposed in
https://github.com/WebAssembly/simd/pull/122. These instructions
exactly match std::min and std::max in C++ STL, and thus have different
semantics from the existing min and max.

The instruction-selector for x64 switches the operands around, because
it allows for defining the dst to be same as first (really the second
input node), allowing better codegen.

For example, b = f32x4.pmin(a, b) directly maps to vminps(b, b, a) or
minps(b, a), as long as we can define dst == b, and switching the
instruction operands around allows us to do that.

Bug: v8:10501
Change-Id: I06f983fc1764caf673e600ac91d9c0ac5166e17e
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2186630
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Reviewed-by: 's avatarTobias Tebbi <tebbi@chromium.org>
Reviewed-by: 's avatarDeepti Gandluri <gdeepti@chromium.org>
Cr-Commit-Position: refs/heads/master@{#67688}
parent 6adf7e82
......@@ -1884,6 +1884,10 @@ void InstructionSelector::VisitNode(Node* node) {
return MarkAsSimd128(node), VisitF64x2Qfma(node);
case IrOpcode::kF64x2Qfms:
return MarkAsSimd128(node), VisitF64x2Qfms(node);
case IrOpcode::kF64x2Pmin:
return MarkAsSimd128(node), VisitF64x2Pmin(node);
case IrOpcode::kF64x2Pmax:
return MarkAsSimd128(node), VisitF64x2Pmax(node);
case IrOpcode::kF32x4Splat:
return MarkAsSimd128(node), VisitF32x4Splat(node);
case IrOpcode::kF32x4ExtractLane:
......@@ -1930,6 +1934,10 @@ void InstructionSelector::VisitNode(Node* node) {
return MarkAsSimd128(node), VisitF32x4Qfma(node);
case IrOpcode::kF32x4Qfms:
return MarkAsSimd128(node), VisitF32x4Qfms(node);
case IrOpcode::kF32x4Pmin:
return MarkAsSimd128(node), VisitF32x4Pmin(node);
case IrOpcode::kF32x4Pmax:
return MarkAsSimd128(node), VisitF32x4Pmax(node);
case IrOpcode::kI64x2Splat:
return MarkAsSimd128(node), VisitI64x2Splat(node);
case IrOpcode::kI64x2SplatI32Pair:
......@@ -2653,6 +2661,14 @@ void InstructionSelector::VisitI32x4BitMask(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_IA32
// && !V8_TARGET_ARCH_X64
// TODO(v8:10501) Prototyping pmin and pmax instructions.
#if !V8_TARGET_ARCH_X64
void InstructionSelector::VisitF32x4Pmin(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF32x4Pmax(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Pmin(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Pmax(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_X64
void InstructionSelector::VisitFinishRegion(Node* node) { EmitIdentity(node); }
void InstructionSelector::VisitParameter(Node* node) {
......
......@@ -2607,6 +2607,30 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
break;
}
case kX64F32x4Pmin: {
XMMRegister dst = i.OutputSimd128Register();
DCHECK_EQ(dst, i.InputSimd128Register(0));
__ Minps(dst, i.InputSimd128Register(1));
break;
}
case kX64F32x4Pmax: {
XMMRegister dst = i.OutputSimd128Register();
DCHECK_EQ(dst, i.InputSimd128Register(0));
__ Maxps(dst, i.InputSimd128Register(1));
break;
}
case kX64F64x2Pmin: {
XMMRegister dst = i.OutputSimd128Register();
DCHECK_EQ(dst, i.InputSimd128Register(0));
__ Minpd(dst, i.InputSimd128Register(1));
break;
}
case kX64F64x2Pmax: {
XMMRegister dst = i.OutputSimd128Register();
DCHECK_EQ(dst, i.InputSimd128Register(0));
__ Maxpd(dst, i.InputSimd128Register(1));
break;
}
case kX64I64x2Splat: {
XMMRegister dst = i.OutputSimd128Register();
if (HasRegisterInput(instr, 0)) {
......
......@@ -172,6 +172,8 @@ namespace compiler {
V(X64F64x2Le) \
V(X64F64x2Qfma) \
V(X64F64x2Qfms) \
V(X64F64x2Pmin) \
V(X64F64x2Pmax) \
V(X64F32x4Splat) \
V(X64F32x4ExtractLane) \
V(X64F32x4ReplaceLane) \
......@@ -195,6 +197,8 @@ namespace compiler {
V(X64F32x4Le) \
V(X64F32x4Qfma) \
V(X64F32x4Qfms) \
V(X64F32x4Pmin) \
V(X64F32x4Pmax) \
V(X64I64x2Splat) \
V(X64I64x2ExtractLane) \
V(X64I64x2ReplaceLane) \
......
......@@ -144,6 +144,8 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kX64F64x2Le:
case kX64F64x2Qfma:
case kX64F64x2Qfms:
case kX64F64x2Pmin:
case kX64F64x2Pmax:
case kX64F32x4Splat:
case kX64F32x4ExtractLane:
case kX64F32x4ReplaceLane:
......@@ -167,6 +169,8 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kX64F32x4Le:
case kX64F32x4Qfma:
case kX64F32x4Qfms:
case kX64F32x4Pmin:
case kX64F32x4Pmax:
case kX64I64x2Splat:
case kX64I64x2ExtractLane:
case kX64I64x2ReplaceLane:
......
......@@ -3380,6 +3380,34 @@ void InstructionSelector::VisitS8x16Swizzle(Node* node) {
arraysize(temps), temps);
}
namespace {
void VisitPminOrPmax(InstructionSelector* selector, Node* node,
ArchOpcode opcode) {
// Due to the way minps/minpd work, we want the dst to be same as the second
// input: b = pmin(a, b) directly maps to minps b a.
X64OperandGenerator g(selector);
selector->Emit(opcode, g.DefineSameAsFirst(node),
g.UseRegister(node->InputAt(1)),
g.UseRegister(node->InputAt(0)));
}
} // namespace
void InstructionSelector::VisitF32x4Pmin(Node* node) {
VisitPminOrPmax(this, node, kX64F32x4Pmin);
}
void InstructionSelector::VisitF32x4Pmax(Node* node) {
VisitPminOrPmax(this, node, kX64F32x4Pmax);
}
void InstructionSelector::VisitF64x2Pmin(Node* node) {
VisitPminOrPmax(this, node, kX64F64x2Pmin);
}
void InstructionSelector::VisitF64x2Pmax(Node* node) {
VisitPminOrPmax(this, node, kX64F64x2Pmax);
}
// static
MachineOperatorBuilder::Flags
InstructionSelector::SupportedMachineOperatorFlags() {
......
......@@ -337,6 +337,8 @@ ShiftKind ShiftKindOf(Operator const* op) {
V(F64x2Le, Operator::kNoProperties, 2, 0, 1) \
V(F64x2Qfma, Operator::kNoProperties, 3, 0, 1) \
V(F64x2Qfms, Operator::kNoProperties, 3, 0, 1) \
V(F64x2Pmin, Operator::kNoProperties, 2, 0, 1) \
V(F64x2Pmax, Operator::kNoProperties, 2, 0, 1) \
V(F32x4Splat, Operator::kNoProperties, 1, 0, 1) \
V(F32x4SConvertI32x4, Operator::kNoProperties, 1, 0, 1) \
V(F32x4UConvertI32x4, Operator::kNoProperties, 1, 0, 1) \
......@@ -358,6 +360,8 @@ ShiftKind ShiftKindOf(Operator const* op) {
V(F32x4Le, Operator::kNoProperties, 2, 0, 1) \
V(F32x4Qfma, Operator::kNoProperties, 3, 0, 1) \
V(F32x4Qfms, Operator::kNoProperties, 3, 0, 1) \
V(F32x4Pmin, Operator::kNoProperties, 2, 0, 1) \
V(F32x4Pmax, Operator::kNoProperties, 2, 0, 1) \
V(I64x2Splat, Operator::kNoProperties, 1, 0, 1) \
V(I64x2SplatI32Pair, Operator::kNoProperties, 2, 0, 1) \
V(I64x2Neg, Operator::kNoProperties, 1, 0, 1) \
......
......@@ -574,6 +574,8 @@ class V8_EXPORT_PRIVATE MachineOperatorBuilder final
const Operator* F64x2Le();
const Operator* F64x2Qfma();
const Operator* F64x2Qfms();
const Operator* F64x2Pmin();
const Operator* F64x2Pmax();
const Operator* F32x4Splat();
const Operator* F32x4ExtractLane(int32_t);
......@@ -598,6 +600,8 @@ class V8_EXPORT_PRIVATE MachineOperatorBuilder final
const Operator* F32x4Le();
const Operator* F32x4Qfma();
const Operator* F32x4Qfms();
const Operator* F32x4Pmin();
const Operator* F32x4Pmax();
const Operator* I64x2Splat();
const Operator* I64x2SplatI32Pair();
......
......@@ -763,6 +763,8 @@
V(F64x2Le) \
V(F64x2Qfma) \
V(F64x2Qfms) \
V(F64x2Pmin) \
V(F64x2Pmax) \
V(F32x4Splat) \
V(F32x4ExtractLane) \
V(F32x4ReplaceLane) \
......@@ -788,6 +790,8 @@
V(F32x4Ge) \
V(F32x4Qfma) \
V(F32x4Qfms) \
V(F32x4Pmin) \
V(F32x4Pmax) \
V(I64x2Splat) \
V(I64x2SplatI32Pair) \
V(I64x2ExtractLane) \
......
......@@ -4128,6 +4128,12 @@ Node* WasmGraphBuilder::SimdOp(wasm::WasmOpcode opcode, Node* const* inputs) {
case wasm::kExprF64x2Qfms:
return graph()->NewNode(mcgraph()->machine()->F64x2Qfms(), inputs[0],
inputs[1], inputs[2]);
case wasm::kExprF64x2Pmin:
return graph()->NewNode(mcgraph()->machine()->F64x2Pmin(), inputs[0],
inputs[1]);
case wasm::kExprF64x2Pmax:
return graph()->NewNode(mcgraph()->machine()->F64x2Pmax(), inputs[0],
inputs[1]);
case wasm::kExprF32x4Splat:
return graph()->NewNode(mcgraph()->machine()->F32x4Splat(), inputs[0]);
case wasm::kExprF32x4SConvertI32x4:
......@@ -4193,6 +4199,12 @@ Node* WasmGraphBuilder::SimdOp(wasm::WasmOpcode opcode, Node* const* inputs) {
case wasm::kExprF32x4Qfms:
return graph()->NewNode(mcgraph()->machine()->F32x4Qfms(), inputs[0],
inputs[1], inputs[2]);
case wasm::kExprF32x4Pmin:
return graph()->NewNode(mcgraph()->machine()->F32x4Pmin(), inputs[0],
inputs[1]);
case wasm::kExprF32x4Pmax:
return graph()->NewNode(mcgraph()->machine()->F32x4Pmax(), inputs[0],
inputs[1]);
case wasm::kExprI64x2Splat:
return graph()->NewNode(mcgraph()->machine()->I64x2Splat(), inputs[0]);
case wasm::kExprI64x2Neg:
......
......@@ -2299,12 +2299,16 @@ class ThreadImpl {
BINOP_CASE(F64x2Div, f64x2, float2, 2, base::Divide(a, b))
BINOP_CASE(F64x2Min, f64x2, float2, 2, JSMin(a, b))
BINOP_CASE(F64x2Max, f64x2, float2, 2, JSMax(a, b))
BINOP_CASE(F64x2Pmin, f64x2, float2, 2, std::min(a, b))
BINOP_CASE(F64x2Pmax, f64x2, float2, 2, std::max(a, b))
BINOP_CASE(F32x4Add, f32x4, float4, 4, a + b)
BINOP_CASE(F32x4Sub, f32x4, float4, 4, a - b)
BINOP_CASE(F32x4Mul, f32x4, float4, 4, a * b)
BINOP_CASE(F32x4Div, f32x4, float4, 4, a / b)
BINOP_CASE(F32x4Min, f32x4, float4, 4, JSMin(a, b))
BINOP_CASE(F32x4Max, f32x4, float4, 4, JSMax(a, b))
BINOP_CASE(F32x4Pmin, f32x4, float4, 4, std::min(a, b))
BINOP_CASE(F32x4Pmax, f32x4, float4, 4, std::max(a, b))
BINOP_CASE(I64x2Add, i64x2, int2, 2, base::AddWithWraparound(a, b))
BINOP_CASE(I64x2Sub, i64x2, int2, 2, base::SubWithWraparound(a, b))
BINOP_CASE(I64x2Mul, i64x2, int2, 2, base::MulWithWraparound(a, b))
......
......@@ -321,6 +321,11 @@ const char* WasmOpcodes::OpcodeName(WasmOpcode opcode) {
CASE_I16x8_OP(BitMask, "bitmask")
CASE_I32x4_OP(BitMask, "bitmask")
CASE_F32x4_OP(Pmin, "pmin")
CASE_F32x4_OP(Pmax, "pmax")
CASE_F64x2_OP(Pmin, "pmin")
CASE_F64x2_OP(Pmax, "pmax")
// Atomic operations.
CASE_OP(AtomicNotify, "atomic.notify")
CASE_INT_OP(AtomicWait, "atomic.wait")
......
......@@ -461,7 +461,11 @@ bool IsJSCompatibleSignature(const FunctionSig* sig, const WasmFeatures&);
V(I32x4AddHoriz, 0xfdb0, s_ss) \
V(F32x4AddHoriz, 0xfdb2, s_ss) \
V(F32x4RecipApprox, 0xfdb3, s_s) \
V(F32x4RecipSqrtApprox, 0xfdba, s_s)
V(F32x4RecipSqrtApprox, 0xfdba, s_s) \
V(F32x4Pmin, 0xfdda, s_ss) \
V(F32x4Pmax, 0xfddb, s_ss) \
V(F64x2Pmin, 0xfddc, s_ss) \
V(F64x2Pmax, 0xfddd, s_ss)
#define FOREACH_SIMD_1_OPERAND_1_PARAM_OPCODE(V) \
V(I8x16ExtractLaneS, 0xfd15, _) \
......
......@@ -99,12 +99,18 @@ T Div(T a, T b) {
template <typename T>
T Minimum(T a, T b) {
return a <= b ? a : b;
// Follow one of the possible implementation given in
// https://en.cppreference.com/w/cpp/algorithm/min so that it works the same
// way for floats (when given NaNs/Infs).
return (b < a) ? b : a;
}
template <typename T>
T Maximum(T a, T b) {
return a >= b ? a : b;
// Follow one of the possible implementation given in
// https://en.cppreference.com/w/cpp/algorithm/max so that it works the same
// way for floats (when given NaNs/Infs).
return (a < b) ? b : a;
}
template <typename T>
......@@ -750,6 +756,18 @@ WASM_SIMD_TEST(F32x4Max) {
RunF32x4BinOpTest(execution_tier, lower_simd, kExprF32x4Max, JSMax);
}
#if V8_TARGET_ARCH_X64
WASM_SIMD_TEST_NO_LOWERING(F32x4Pmin) {
FLAG_SCOPE(wasm_simd_post_mvp);
RunF32x4BinOpTest(execution_tier, lower_simd, kExprF32x4Pmin, Minimum);
}
WASM_SIMD_TEST_NO_LOWERING(F32x4Pmax) {
FLAG_SCOPE(wasm_simd_post_mvp);
RunF32x4BinOpTest(execution_tier, lower_simd, kExprF32x4Pmax, Maximum);
}
#endif // V8_TARGET_ARCH_X64
void RunF32x4CompareOpTest(ExecutionTier execution_tier, LowerSimd lower_simd,
WasmOpcode opcode, FloatCompareOp expected_op) {
WasmRunner<int32_t, float, float> r(execution_tier, lower_simd);
......@@ -1340,6 +1358,18 @@ WASM_SIMD_TEST_NO_LOWERING(F64x2Div) {
RunF64x2BinOpTest(execution_tier, lower_simd, kExprF64x2Div, Div);
}
#if V8_TARGET_ARCH_X64
WASM_SIMD_TEST_NO_LOWERING(F64x2Pmin) {
FLAG_SCOPE(wasm_simd_post_mvp);
RunF64x2BinOpTest(execution_tier, lower_simd, kExprF64x2Pmin, Minimum);
}
WASM_SIMD_TEST_NO_LOWERING(F64x2Pmax) {
FLAG_SCOPE(wasm_simd_post_mvp);
RunF64x2BinOpTest(execution_tier, lower_simd, kExprF64x2Pmax, Maximum);
}
#endif // V8_TARGET_ARCH_X64
void RunF64x2CompareOpTest(ExecutionTier execution_tier, LowerSimd lower_simd,
WasmOpcode opcode, DoubleCompareOp expected_op) {
WasmRunner<int32_t, double, double> r(execution_tier, lower_simd);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment