Commit 5d1392b6 authored by Ng Zhi An's avatar Ng Zhi An Committed by Commit Bot

[wasm-simd][ia32] Implement f32x4 f64x2 pmin pmax

Same implementation as the one for x64 in https://crrev.com/c/2186630.

Bug: v8:10501
Change-Id: If2b6c0fdc649afba3449d9579452cf7047a55a54
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2188556Reviewed-by: 's avatarDeepti Gandluri <gdeepti@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#67721}
parent 131fa2c9
...@@ -366,7 +366,9 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase { ...@@ -366,7 +366,9 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
AVX_PACKED_OP3(Cmpneqpd, cmpneqpd) AVX_PACKED_OP3(Cmpneqpd, cmpneqpd)
AVX_PACKED_OP3(Cmpltpd, cmpltpd) AVX_PACKED_OP3(Cmpltpd, cmpltpd)
AVX_PACKED_OP3(Cmplepd, cmplepd) AVX_PACKED_OP3(Cmplepd, cmplepd)
AVX_PACKED_OP3(Minps, minps)
AVX_PACKED_OP3(Minpd, minpd) AVX_PACKED_OP3(Minpd, minpd)
AVX_PACKED_OP3(Maxps, maxps)
AVX_PACKED_OP3(Maxpd, maxpd) AVX_PACKED_OP3(Maxpd, maxpd)
AVX_PACKED_OP3(Cmpunordps, cmpunordps) AVX_PACKED_OP3(Cmpunordps, cmpunordps)
AVX_PACKED_OP3(Cmpunordpd, cmpunordpd) AVX_PACKED_OP3(Cmpunordpd, cmpunordpd)
......
...@@ -2020,6 +2020,18 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -2020,6 +2020,18 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
i.InputOperand(1)); i.InputOperand(1));
break; break;
} }
case kIA32F64x2Pmin: {
XMMRegister dst = i.OutputSimd128Register();
DCHECK_EQ(dst, i.InputSimd128Register(0));
__ Minpd(dst, dst, i.InputSimd128Register(1));
break;
}
case kIA32F64x2Pmax: {
XMMRegister dst = i.OutputSimd128Register();
DCHECK_EQ(dst, i.InputSimd128Register(0));
__ Maxpd(dst, dst, i.InputSimd128Register(1));
break;
}
case kIA32I64x2SplatI32Pair: { case kIA32I64x2SplatI32Pair: {
XMMRegister dst = i.OutputSimd128Register(); XMMRegister dst = i.OutputSimd128Register();
__ Pinsrd(dst, i.InputRegister(0), 0); __ Pinsrd(dst, i.InputRegister(0), 0);
...@@ -2417,6 +2429,18 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -2417,6 +2429,18 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
i.InputOperand(1)); i.InputOperand(1));
break; break;
} }
case kIA32F32x4Pmin: {
XMMRegister dst = i.OutputSimd128Register();
DCHECK_EQ(dst, i.InputSimd128Register(0));
__ Minps(dst, dst, i.InputSimd128Register(1));
break;
}
case kIA32F32x4Pmax: {
XMMRegister dst = i.OutputSimd128Register();
DCHECK_EQ(dst, i.InputSimd128Register(0));
__ Maxps(dst, dst, i.InputSimd128Register(1));
break;
}
case kIA32I32x4Splat: { case kIA32I32x4Splat: {
XMMRegister dst = i.OutputSimd128Register(); XMMRegister dst = i.OutputSimd128Register();
__ Movd(dst, i.InputOperand(0)); __ Movd(dst, i.InputOperand(0));
......
...@@ -134,6 +134,8 @@ namespace compiler { ...@@ -134,6 +134,8 @@ namespace compiler {
V(IA32F64x2Ne) \ V(IA32F64x2Ne) \
V(IA32F64x2Lt) \ V(IA32F64x2Lt) \
V(IA32F64x2Le) \ V(IA32F64x2Le) \
V(IA32F64x2Pmin) \
V(IA32F64x2Pmax) \
V(IA32I64x2SplatI32Pair) \ V(IA32I64x2SplatI32Pair) \
V(IA32I64x2ReplaceLaneI32Pair) \ V(IA32I64x2ReplaceLaneI32Pair) \
V(IA32I64x2Neg) \ V(IA32I64x2Neg) \
...@@ -182,6 +184,8 @@ namespace compiler { ...@@ -182,6 +184,8 @@ namespace compiler {
V(AVXF32x4Lt) \ V(AVXF32x4Lt) \
V(SSEF32x4Le) \ V(SSEF32x4Le) \
V(AVXF32x4Le) \ V(AVXF32x4Le) \
V(IA32F32x4Pmin) \
V(IA32F32x4Pmax) \
V(IA32I32x4Splat) \ V(IA32I32x4Splat) \
V(IA32I32x4ExtractLane) \ V(IA32I32x4ExtractLane) \
V(SSEI32x4ReplaceLane) \ V(SSEI32x4ReplaceLane) \
......
...@@ -115,6 +115,8 @@ int InstructionScheduler::GetTargetInstructionFlags( ...@@ -115,6 +115,8 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kIA32F64x2Ne: case kIA32F64x2Ne:
case kIA32F64x2Lt: case kIA32F64x2Lt:
case kIA32F64x2Le: case kIA32F64x2Le:
case kIA32F64x2Pmin:
case kIA32F64x2Pmax:
case kIA32I64x2SplatI32Pair: case kIA32I64x2SplatI32Pair:
case kIA32I64x2ReplaceLaneI32Pair: case kIA32I64x2ReplaceLaneI32Pair:
case kIA32I64x2Neg: case kIA32I64x2Neg:
...@@ -163,6 +165,8 @@ int InstructionScheduler::GetTargetInstructionFlags( ...@@ -163,6 +165,8 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kAVXF32x4Lt: case kAVXF32x4Lt:
case kSSEF32x4Le: case kSSEF32x4Le:
case kAVXF32x4Le: case kAVXF32x4Le:
case kIA32F32x4Pmin:
case kIA32F32x4Pmax:
case kIA32I32x4Splat: case kIA32I32x4Splat:
case kIA32I32x4ExtractLane: case kIA32I32x4ExtractLane:
case kSSEI32x4ReplaceLane: case kSSEI32x4ReplaceLane:
......
...@@ -2810,6 +2810,34 @@ void InstructionSelector::VisitS8x16Swizzle(Node* node) { ...@@ -2810,6 +2810,34 @@ void InstructionSelector::VisitS8x16Swizzle(Node* node) {
arraysize(temps), temps); arraysize(temps), temps);
} }
namespace {
void VisitPminOrPmax(InstructionSelector* selector, Node* node,
ArchOpcode opcode) {
// Due to the way minps/minpd work, we want the dst to be same as the second
// input: b = pmin(a, b) directly maps to minps b a.
IA32OperandGenerator g(selector);
selector->Emit(opcode, g.DefineSameAsFirst(node),
g.UseRegister(node->InputAt(1)),
g.UseRegister(node->InputAt(0)));
}
} // namespace
void InstructionSelector::VisitF32x4Pmin(Node* node) {
VisitPminOrPmax(this, node, kIA32F32x4Pmin);
}
void InstructionSelector::VisitF32x4Pmax(Node* node) {
VisitPminOrPmax(this, node, kIA32F32x4Pmax);
}
void InstructionSelector::VisitF64x2Pmin(Node* node) {
VisitPminOrPmax(this, node, kIA32F64x2Pmin);
}
void InstructionSelector::VisitF64x2Pmax(Node* node) {
VisitPminOrPmax(this, node, kIA32F64x2Pmax);
}
// static // static
MachineOperatorBuilder::Flags MachineOperatorBuilder::Flags
InstructionSelector::SupportedMachineOperatorFlags() { InstructionSelector::SupportedMachineOperatorFlags() {
......
...@@ -2662,12 +2662,12 @@ void InstructionSelector::VisitI32x4BitMask(Node* node) { UNIMPLEMENTED(); } ...@@ -2662,12 +2662,12 @@ void InstructionSelector::VisitI32x4BitMask(Node* node) { UNIMPLEMENTED(); }
// && !V8_TARGET_ARCH_X64 // && !V8_TARGET_ARCH_X64
// TODO(v8:10501) Prototyping pmin and pmax instructions. // TODO(v8:10501) Prototyping pmin and pmax instructions.
#if !V8_TARGET_ARCH_X64 #if !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_IA32
void InstructionSelector::VisitF32x4Pmin(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitF32x4Pmin(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF32x4Pmax(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitF32x4Pmax(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Pmin(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitF64x2Pmin(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Pmax(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitF64x2Pmax(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_X64 #endif // !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_IA32
void InstructionSelector::VisitFinishRegion(Node* node) { EmitIdentity(node); } void InstructionSelector::VisitFinishRegion(Node* node) { EmitIdentity(node); }
......
...@@ -756,7 +756,7 @@ WASM_SIMD_TEST(F32x4Max) { ...@@ -756,7 +756,7 @@ WASM_SIMD_TEST(F32x4Max) {
RunF32x4BinOpTest(execution_tier, lower_simd, kExprF32x4Max, JSMax); RunF32x4BinOpTest(execution_tier, lower_simd, kExprF32x4Max, JSMax);
} }
#if V8_TARGET_ARCH_X64 #if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_IA32
WASM_SIMD_TEST_NO_LOWERING(F32x4Pmin) { WASM_SIMD_TEST_NO_LOWERING(F32x4Pmin) {
FLAG_SCOPE(wasm_simd_post_mvp); FLAG_SCOPE(wasm_simd_post_mvp);
RunF32x4BinOpTest(execution_tier, lower_simd, kExprF32x4Pmin, Minimum); RunF32x4BinOpTest(execution_tier, lower_simd, kExprF32x4Pmin, Minimum);
...@@ -766,7 +766,7 @@ WASM_SIMD_TEST_NO_LOWERING(F32x4Pmax) { ...@@ -766,7 +766,7 @@ WASM_SIMD_TEST_NO_LOWERING(F32x4Pmax) {
FLAG_SCOPE(wasm_simd_post_mvp); FLAG_SCOPE(wasm_simd_post_mvp);
RunF32x4BinOpTest(execution_tier, lower_simd, kExprF32x4Pmax, Maximum); RunF32x4BinOpTest(execution_tier, lower_simd, kExprF32x4Pmax, Maximum);
} }
#endif // V8_TARGET_ARCH_X64 #endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_IA32
void RunF32x4CompareOpTest(ExecutionTier execution_tier, LowerSimd lower_simd, void RunF32x4CompareOpTest(ExecutionTier execution_tier, LowerSimd lower_simd,
WasmOpcode opcode, FloatCompareOp expected_op) { WasmOpcode opcode, FloatCompareOp expected_op) {
...@@ -1358,7 +1358,7 @@ WASM_SIMD_TEST_NO_LOWERING(F64x2Div) { ...@@ -1358,7 +1358,7 @@ WASM_SIMD_TEST_NO_LOWERING(F64x2Div) {
RunF64x2BinOpTest(execution_tier, lower_simd, kExprF64x2Div, Div); RunF64x2BinOpTest(execution_tier, lower_simd, kExprF64x2Div, Div);
} }
#if V8_TARGET_ARCH_X64 #if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_IA32
WASM_SIMD_TEST_NO_LOWERING(F64x2Pmin) { WASM_SIMD_TEST_NO_LOWERING(F64x2Pmin) {
FLAG_SCOPE(wasm_simd_post_mvp); FLAG_SCOPE(wasm_simd_post_mvp);
RunF64x2BinOpTest(execution_tier, lower_simd, kExprF64x2Pmin, Minimum); RunF64x2BinOpTest(execution_tier, lower_simd, kExprF64x2Pmin, Minimum);
...@@ -1368,7 +1368,7 @@ WASM_SIMD_TEST_NO_LOWERING(F64x2Pmax) { ...@@ -1368,7 +1368,7 @@ WASM_SIMD_TEST_NO_LOWERING(F64x2Pmax) {
FLAG_SCOPE(wasm_simd_post_mvp); FLAG_SCOPE(wasm_simd_post_mvp);
RunF64x2BinOpTest(execution_tier, lower_simd, kExprF64x2Pmax, Maximum); RunF64x2BinOpTest(execution_tier, lower_simd, kExprF64x2Pmax, Maximum);
} }
#endif // V8_TARGET_ARCH_X64 #endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_IA32
void RunF64x2CompareOpTest(ExecutionTier execution_tier, LowerSimd lower_simd, void RunF64x2CompareOpTest(ExecutionTier execution_tier, LowerSimd lower_simd,
WasmOpcode opcode, DoubleCompareOp expected_op) { WasmOpcode opcode, DoubleCompareOp expected_op) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment