Commit 795246c4 authored by Ng Zhi An's avatar Ng Zhi An Committed by Commit Bot

[wasm-simd][arm64] Implement f32x4 f64x2 pmin pmax

Bug: v8:10501
Change-Id: I6dad0f4da4d7c50d0793d39a5a119defb6b53844
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2191392
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Reviewed-by: 's avatarDeepti Gandluri <gdeepti@chromium.org>
Cr-Commit-Position: refs/heads/master@{#67787}
parent 6a6ec7a1
......@@ -1901,6 +1901,27 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
SIMD_DESTRUCTIVE_BINOP_CASE(kArm64F64x2Qfma, Fmla, 2D);
SIMD_DESTRUCTIVE_BINOP_CASE(kArm64F64x2Qfms, Fmls, 2D);
case kArm64F64x2Pmin: {
VRegister dst = i.OutputSimd128Register().V2D();
VRegister lhs = i.InputSimd128Register(0).V2D();
VRegister rhs = i.InputSimd128Register(1).V2D();
// f64x2.pmin(lhs, rhs)
// = v128.bitselect(rhs, lhs, f64x2.lt(rhs,lhs))
// = v128.bitselect(rhs, lhs, f64x2.gt(lhs,rhs))
__ Fcmgt(dst, lhs, rhs);
__ Bsl(dst.V16B(), rhs.V16B(), lhs.V16B());
break;
}
case kArm64F64x2Pmax: {
VRegister dst = i.OutputSimd128Register().V2D();
VRegister lhs = i.InputSimd128Register(0).V2D();
VRegister rhs = i.InputSimd128Register(1).V2D();
// f64x2.pmax(lhs, rhs)
// = v128.bitselect(rhs, lhs, f64x2.gt(rhs, lhs))
__ Fcmgt(dst, rhs, lhs);
__ Bsl(dst.V16B(), rhs.V16B(), lhs.V16B());
break;
}
case kArm64F32x4Splat: {
__ Dup(i.OutputSimd128Register().V4S(), i.InputSimd128Register(0).S(), 0);
break;
......@@ -1953,6 +1974,27 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
SIMD_DESTRUCTIVE_BINOP_CASE(kArm64F32x4Qfma, Fmla, 4S);
SIMD_DESTRUCTIVE_BINOP_CASE(kArm64F32x4Qfms, Fmls, 4S);
case kArm64F32x4Pmin: {
VRegister dst = i.OutputSimd128Register().V4S();
VRegister lhs = i.InputSimd128Register(0).V4S();
VRegister rhs = i.InputSimd128Register(1).V4S();
// f32x4.pmin(lhs, rhs)
// = v128.bitselect(rhs, lhs, f32x4.lt(rhs, lhs))
// = v128.bitselect(rhs, lhs, f32x4.gt(lhs, rhs))
__ Fcmgt(dst, lhs, rhs);
__ Bsl(dst.V16B(), rhs.V16B(), lhs.V16B());
break;
}
case kArm64F32x4Pmax: {
VRegister dst = i.OutputSimd128Register().V4S();
VRegister lhs = i.InputSimd128Register(0).V4S();
VRegister rhs = i.InputSimd128Register(1).V4S();
// f32x4.pmax(lhs, rhs)
// = v128.bitselect(rhs, lhs, f32x4.gt(rhs, lhs))
__ Fcmgt(dst, rhs, lhs);
__ Bsl(dst.V16B(), rhs.V16B(), lhs.V16B());
break;
}
case kArm64I64x2Splat: {
__ Dup(i.OutputSimd128Register().V2D(), i.InputRegister64(0));
break;
......
......@@ -186,6 +186,8 @@ namespace compiler {
V(Arm64F64x2Le) \
V(Arm64F64x2Qfma) \
V(Arm64F64x2Qfms) \
V(Arm64F64x2Pmin) \
V(Arm64F64x2Pmax) \
V(Arm64F32x4Splat) \
V(Arm64F32x4ExtractLane) \
V(Arm64F32x4ReplaceLane) \
......@@ -209,6 +211,8 @@ namespace compiler {
V(Arm64F32x4Le) \
V(Arm64F32x4Qfma) \
V(Arm64F32x4Qfms) \
V(Arm64F32x4Pmin) \
V(Arm64F32x4Pmax) \
V(Arm64I64x2Splat) \
V(Arm64I64x2ExtractLane) \
V(Arm64I64x2ReplaceLane) \
......
......@@ -156,6 +156,8 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kArm64F64x2Le:
case kArm64F64x2Qfma:
case kArm64F64x2Qfms:
case kArm64F64x2Pmin:
case kArm64F64x2Pmax:
case kArm64F32x4Splat:
case kArm64F32x4ExtractLane:
case kArm64F32x4ReplaceLane:
......@@ -179,6 +181,8 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kArm64F32x4Le:
case kArm64F32x4Qfma:
case kArm64F32x4Qfms:
case kArm64F32x4Pmin:
case kArm64F32x4Pmax:
case kArm64I64x2Splat:
case kArm64I64x2ExtractLane:
case kArm64I64x2ReplaceLane:
......
......@@ -3613,6 +3613,34 @@ void InstructionSelector::VisitSignExtendWord32ToInt64(Node* node) {
VisitRR(this, kArm64Sxtw, node);
}
namespace {
void VisitPminOrPmax(InstructionSelector* selector, ArchOpcode opcode,
Node* node) {
Arm64OperandGenerator g(selector);
// Need all unique registers because we first compare the two inputs, then we
// need the inputs to remain unchanged for the bitselect later.
selector->Emit(opcode, g.DefineAsRegister(node),
g.UseUniqueRegister(node->InputAt(0)),
g.UseUniqueRegister(node->InputAt(1)));
}
} // namespace
void InstructionSelector::VisitF32x4Pmin(Node* node) {
VisitPminOrPmax(this, kArm64F32x4Pmin, node);
}
void InstructionSelector::VisitF32x4Pmax(Node* node) {
VisitPminOrPmax(this, kArm64F32x4Pmax, node);
}
void InstructionSelector::VisitF64x2Pmin(Node* node) {
VisitPminOrPmax(this, kArm64F64x2Pmin, node);
}
void InstructionSelector::VisitF64x2Pmax(Node* node) {
VisitPminOrPmax(this, kArm64F64x2Pmax, node);
}
// static
MachineOperatorBuilder::Flags
InstructionSelector::SupportedMachineOperatorFlags() {
......
......@@ -2662,12 +2662,12 @@ void InstructionSelector::VisitI32x4BitMask(Node* node) { UNIMPLEMENTED(); }
// && !V8_TARGET_ARCH_X64
// TODO(v8:10501) Prototyping pmin and pmax instructions.
#if !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_IA32
#if !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_IA32 && !V8_TARGET_ARCH_ARM64
void InstructionSelector::VisitF32x4Pmin(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF32x4Pmax(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Pmin(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Pmax(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_IA32
#endif // !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_IA32 && !V8_TARGET_ARCH_ARM64
void InstructionSelector::VisitFinishRegion(Node* node) { EmitIdentity(node); }
......
......@@ -756,7 +756,7 @@ WASM_SIMD_TEST(F32x4Max) {
RunF32x4BinOpTest(execution_tier, lower_simd, kExprF32x4Max, JSMax);
}
#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_IA32
#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_IA32 || V8_TARGET_ARCH_ARM64
WASM_SIMD_TEST_NO_LOWERING(F32x4Pmin) {
FLAG_SCOPE(wasm_simd_post_mvp);
RunF32x4BinOpTest(execution_tier, lower_simd, kExprF32x4Pmin, Minimum);
......@@ -766,7 +766,7 @@ WASM_SIMD_TEST_NO_LOWERING(F32x4Pmax) {
FLAG_SCOPE(wasm_simd_post_mvp);
RunF32x4BinOpTest(execution_tier, lower_simd, kExprF32x4Pmax, Maximum);
}
#endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_IA32
#endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_IA32 || V8_TARGET_ARCH_ARM64
void RunF32x4CompareOpTest(ExecutionTier execution_tier, LowerSimd lower_simd,
WasmOpcode opcode, FloatCompareOp expected_op) {
......@@ -1358,7 +1358,7 @@ WASM_SIMD_TEST_NO_LOWERING(F64x2Div) {
RunF64x2BinOpTest(execution_tier, lower_simd, kExprF64x2Div, Div);
}
#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_IA32
#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_IA32 || V8_TARGET_ARCH_ARM64
WASM_SIMD_TEST_NO_LOWERING(F64x2Pmin) {
FLAG_SCOPE(wasm_simd_post_mvp);
RunF64x2BinOpTest(execution_tier, lower_simd, kExprF64x2Pmin, Minimum);
......@@ -1368,7 +1368,7 @@ WASM_SIMD_TEST_NO_LOWERING(F64x2Pmax) {
FLAG_SCOPE(wasm_simd_post_mvp);
RunF64x2BinOpTest(execution_tier, lower_simd, kExprF64x2Pmax, Maximum);
}
#endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_IA32
#endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_IA32 || V8_TARGET_ARCH_ARM64
void RunF64x2CompareOpTest(ExecutionTier execution_tier, LowerSimd lower_simd,
WasmOpcode opcode, DoubleCompareOp expected_op) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment