Commit 795246c4 authored by Ng Zhi An's avatar Ng Zhi An Committed by Commit Bot

[wasm-simd][arm64] Implement f32x4 f64x2 pmin pmax

Bug: v8:10501
Change-Id: I6dad0f4da4d7c50d0793d39a5a119defb6b53844
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2191392
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Reviewed-by: 's avatarDeepti Gandluri <gdeepti@chromium.org>
Cr-Commit-Position: refs/heads/master@{#67787}
parent 6a6ec7a1
...@@ -1901,6 +1901,27 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -1901,6 +1901,27 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
} }
SIMD_DESTRUCTIVE_BINOP_CASE(kArm64F64x2Qfma, Fmla, 2D); SIMD_DESTRUCTIVE_BINOP_CASE(kArm64F64x2Qfma, Fmla, 2D);
SIMD_DESTRUCTIVE_BINOP_CASE(kArm64F64x2Qfms, Fmls, 2D); SIMD_DESTRUCTIVE_BINOP_CASE(kArm64F64x2Qfms, Fmls, 2D);
case kArm64F64x2Pmin: {
VRegister dst = i.OutputSimd128Register().V2D();
VRegister lhs = i.InputSimd128Register(0).V2D();
VRegister rhs = i.InputSimd128Register(1).V2D();
// f64x2.pmin(lhs, rhs)
// = v128.bitselect(rhs, lhs, f64x2.lt(rhs,lhs))
// = v128.bitselect(rhs, lhs, f64x2.gt(lhs,rhs))
__ Fcmgt(dst, lhs, rhs);
__ Bsl(dst.V16B(), rhs.V16B(), lhs.V16B());
break;
}
case kArm64F64x2Pmax: {
VRegister dst = i.OutputSimd128Register().V2D();
VRegister lhs = i.InputSimd128Register(0).V2D();
VRegister rhs = i.InputSimd128Register(1).V2D();
// f64x2.pmax(lhs, rhs)
// = v128.bitselect(rhs, lhs, f64x2.gt(rhs, lhs))
__ Fcmgt(dst, rhs, lhs);
__ Bsl(dst.V16B(), rhs.V16B(), lhs.V16B());
break;
}
case kArm64F32x4Splat: { case kArm64F32x4Splat: {
__ Dup(i.OutputSimd128Register().V4S(), i.InputSimd128Register(0).S(), 0); __ Dup(i.OutputSimd128Register().V4S(), i.InputSimd128Register(0).S(), 0);
break; break;
...@@ -1953,6 +1974,27 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -1953,6 +1974,27 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
} }
SIMD_DESTRUCTIVE_BINOP_CASE(kArm64F32x4Qfma, Fmla, 4S); SIMD_DESTRUCTIVE_BINOP_CASE(kArm64F32x4Qfma, Fmla, 4S);
SIMD_DESTRUCTIVE_BINOP_CASE(kArm64F32x4Qfms, Fmls, 4S); SIMD_DESTRUCTIVE_BINOP_CASE(kArm64F32x4Qfms, Fmls, 4S);
case kArm64F32x4Pmin: {
VRegister dst = i.OutputSimd128Register().V4S();
VRegister lhs = i.InputSimd128Register(0).V4S();
VRegister rhs = i.InputSimd128Register(1).V4S();
// f32x4.pmin(lhs, rhs)
// = v128.bitselect(rhs, lhs, f32x4.lt(rhs, lhs))
// = v128.bitselect(rhs, lhs, f32x4.gt(lhs, rhs))
__ Fcmgt(dst, lhs, rhs);
__ Bsl(dst.V16B(), rhs.V16B(), lhs.V16B());
break;
}
case kArm64F32x4Pmax: {
VRegister dst = i.OutputSimd128Register().V4S();
VRegister lhs = i.InputSimd128Register(0).V4S();
VRegister rhs = i.InputSimd128Register(1).V4S();
// f32x4.pmax(lhs, rhs)
// = v128.bitselect(rhs, lhs, f32x4.gt(rhs, lhs))
__ Fcmgt(dst, rhs, lhs);
__ Bsl(dst.V16B(), rhs.V16B(), lhs.V16B());
break;
}
case kArm64I64x2Splat: { case kArm64I64x2Splat: {
__ Dup(i.OutputSimd128Register().V2D(), i.InputRegister64(0)); __ Dup(i.OutputSimd128Register().V2D(), i.InputRegister64(0));
break; break;
......
...@@ -186,6 +186,8 @@ namespace compiler { ...@@ -186,6 +186,8 @@ namespace compiler {
V(Arm64F64x2Le) \ V(Arm64F64x2Le) \
V(Arm64F64x2Qfma) \ V(Arm64F64x2Qfma) \
V(Arm64F64x2Qfms) \ V(Arm64F64x2Qfms) \
V(Arm64F64x2Pmin) \
V(Arm64F64x2Pmax) \
V(Arm64F32x4Splat) \ V(Arm64F32x4Splat) \
V(Arm64F32x4ExtractLane) \ V(Arm64F32x4ExtractLane) \
V(Arm64F32x4ReplaceLane) \ V(Arm64F32x4ReplaceLane) \
...@@ -209,6 +211,8 @@ namespace compiler { ...@@ -209,6 +211,8 @@ namespace compiler {
V(Arm64F32x4Le) \ V(Arm64F32x4Le) \
V(Arm64F32x4Qfma) \ V(Arm64F32x4Qfma) \
V(Arm64F32x4Qfms) \ V(Arm64F32x4Qfms) \
V(Arm64F32x4Pmin) \
V(Arm64F32x4Pmax) \
V(Arm64I64x2Splat) \ V(Arm64I64x2Splat) \
V(Arm64I64x2ExtractLane) \ V(Arm64I64x2ExtractLane) \
V(Arm64I64x2ReplaceLane) \ V(Arm64I64x2ReplaceLane) \
......
...@@ -156,6 +156,8 @@ int InstructionScheduler::GetTargetInstructionFlags( ...@@ -156,6 +156,8 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kArm64F64x2Le: case kArm64F64x2Le:
case kArm64F64x2Qfma: case kArm64F64x2Qfma:
case kArm64F64x2Qfms: case kArm64F64x2Qfms:
case kArm64F64x2Pmin:
case kArm64F64x2Pmax:
case kArm64F32x4Splat: case kArm64F32x4Splat:
case kArm64F32x4ExtractLane: case kArm64F32x4ExtractLane:
case kArm64F32x4ReplaceLane: case kArm64F32x4ReplaceLane:
...@@ -179,6 +181,8 @@ int InstructionScheduler::GetTargetInstructionFlags( ...@@ -179,6 +181,8 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kArm64F32x4Le: case kArm64F32x4Le:
case kArm64F32x4Qfma: case kArm64F32x4Qfma:
case kArm64F32x4Qfms: case kArm64F32x4Qfms:
case kArm64F32x4Pmin:
case kArm64F32x4Pmax:
case kArm64I64x2Splat: case kArm64I64x2Splat:
case kArm64I64x2ExtractLane: case kArm64I64x2ExtractLane:
case kArm64I64x2ReplaceLane: case kArm64I64x2ReplaceLane:
......
...@@ -3613,6 +3613,34 @@ void InstructionSelector::VisitSignExtendWord32ToInt64(Node* node) { ...@@ -3613,6 +3613,34 @@ void InstructionSelector::VisitSignExtendWord32ToInt64(Node* node) {
VisitRR(this, kArm64Sxtw, node); VisitRR(this, kArm64Sxtw, node);
} }
namespace {
void VisitPminOrPmax(InstructionSelector* selector, ArchOpcode opcode,
Node* node) {
Arm64OperandGenerator g(selector);
// Need all unique registers because we first compare the two inputs, then we
// need the inputs to remain unchanged for the bitselect later.
selector->Emit(opcode, g.DefineAsRegister(node),
g.UseUniqueRegister(node->InputAt(0)),
g.UseUniqueRegister(node->InputAt(1)));
}
} // namespace
void InstructionSelector::VisitF32x4Pmin(Node* node) {
VisitPminOrPmax(this, kArm64F32x4Pmin, node);
}
void InstructionSelector::VisitF32x4Pmax(Node* node) {
VisitPminOrPmax(this, kArm64F32x4Pmax, node);
}
void InstructionSelector::VisitF64x2Pmin(Node* node) {
VisitPminOrPmax(this, kArm64F64x2Pmin, node);
}
void InstructionSelector::VisitF64x2Pmax(Node* node) {
VisitPminOrPmax(this, kArm64F64x2Pmax, node);
}
// static // static
MachineOperatorBuilder::Flags MachineOperatorBuilder::Flags
InstructionSelector::SupportedMachineOperatorFlags() { InstructionSelector::SupportedMachineOperatorFlags() {
......
...@@ -2662,12 +2662,12 @@ void InstructionSelector::VisitI32x4BitMask(Node* node) { UNIMPLEMENTED(); } ...@@ -2662,12 +2662,12 @@ void InstructionSelector::VisitI32x4BitMask(Node* node) { UNIMPLEMENTED(); }
// && !V8_TARGET_ARCH_X64 // && !V8_TARGET_ARCH_X64
// TODO(v8:10501) Prototyping pmin and pmax instructions. // TODO(v8:10501) Prototyping pmin and pmax instructions.
#if !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_IA32 #if !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_IA32 && !V8_TARGET_ARCH_ARM64
void InstructionSelector::VisitF32x4Pmin(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitF32x4Pmin(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF32x4Pmax(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitF32x4Pmax(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Pmin(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitF64x2Pmin(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Pmax(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitF64x2Pmax(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_IA32 #endif // !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_IA32 && !V8_TARGET_ARCH_ARM64
void InstructionSelector::VisitFinishRegion(Node* node) { EmitIdentity(node); } void InstructionSelector::VisitFinishRegion(Node* node) { EmitIdentity(node); }
......
...@@ -756,7 +756,7 @@ WASM_SIMD_TEST(F32x4Max) { ...@@ -756,7 +756,7 @@ WASM_SIMD_TEST(F32x4Max) {
RunF32x4BinOpTest(execution_tier, lower_simd, kExprF32x4Max, JSMax); RunF32x4BinOpTest(execution_tier, lower_simd, kExprF32x4Max, JSMax);
} }
#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_IA32 #if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_IA32 || V8_TARGET_ARCH_ARM64
WASM_SIMD_TEST_NO_LOWERING(F32x4Pmin) { WASM_SIMD_TEST_NO_LOWERING(F32x4Pmin) {
FLAG_SCOPE(wasm_simd_post_mvp); FLAG_SCOPE(wasm_simd_post_mvp);
RunF32x4BinOpTest(execution_tier, lower_simd, kExprF32x4Pmin, Minimum); RunF32x4BinOpTest(execution_tier, lower_simd, kExprF32x4Pmin, Minimum);
...@@ -766,7 +766,7 @@ WASM_SIMD_TEST_NO_LOWERING(F32x4Pmax) { ...@@ -766,7 +766,7 @@ WASM_SIMD_TEST_NO_LOWERING(F32x4Pmax) {
FLAG_SCOPE(wasm_simd_post_mvp); FLAG_SCOPE(wasm_simd_post_mvp);
RunF32x4BinOpTest(execution_tier, lower_simd, kExprF32x4Pmax, Maximum); RunF32x4BinOpTest(execution_tier, lower_simd, kExprF32x4Pmax, Maximum);
} }
#endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_IA32 #endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_IA32 || V8_TARGET_ARCH_ARM64
void RunF32x4CompareOpTest(ExecutionTier execution_tier, LowerSimd lower_simd, void RunF32x4CompareOpTest(ExecutionTier execution_tier, LowerSimd lower_simd,
WasmOpcode opcode, FloatCompareOp expected_op) { WasmOpcode opcode, FloatCompareOp expected_op) {
...@@ -1358,7 +1358,7 @@ WASM_SIMD_TEST_NO_LOWERING(F64x2Div) { ...@@ -1358,7 +1358,7 @@ WASM_SIMD_TEST_NO_LOWERING(F64x2Div) {
RunF64x2BinOpTest(execution_tier, lower_simd, kExprF64x2Div, Div); RunF64x2BinOpTest(execution_tier, lower_simd, kExprF64x2Div, Div);
} }
#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_IA32 #if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_IA32 || V8_TARGET_ARCH_ARM64
WASM_SIMD_TEST_NO_LOWERING(F64x2Pmin) { WASM_SIMD_TEST_NO_LOWERING(F64x2Pmin) {
FLAG_SCOPE(wasm_simd_post_mvp); FLAG_SCOPE(wasm_simd_post_mvp);
RunF64x2BinOpTest(execution_tier, lower_simd, kExprF64x2Pmin, Minimum); RunF64x2BinOpTest(execution_tier, lower_simd, kExprF64x2Pmin, Minimum);
...@@ -1368,7 +1368,7 @@ WASM_SIMD_TEST_NO_LOWERING(F64x2Pmax) { ...@@ -1368,7 +1368,7 @@ WASM_SIMD_TEST_NO_LOWERING(F64x2Pmax) {
FLAG_SCOPE(wasm_simd_post_mvp); FLAG_SCOPE(wasm_simd_post_mvp);
RunF64x2BinOpTest(execution_tier, lower_simd, kExprF64x2Pmax, Maximum); RunF64x2BinOpTest(execution_tier, lower_simd, kExprF64x2Pmax, Maximum);
} }
#endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_IA32 #endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_IA32 || V8_TARGET_ARCH_ARM64
void RunF64x2CompareOpTest(ExecutionTier execution_tier, LowerSimd lower_simd, void RunF64x2CompareOpTest(ExecutionTier execution_tier, LowerSimd lower_simd,
WasmOpcode opcode, DoubleCompareOp expected_op) { WasmOpcode opcode, DoubleCompareOp expected_op) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment