Commit 507f85c0 authored by Ng Zhi An's avatar Ng Zhi An Committed by Commit Bot

[wasm-simd][arm] Implement f32x4 f64x2 pmin pmax

Bug: v8:10501
Change-Id: Ib61f7957e1fd7cfa498bce28171b5f9e4b2f93c3
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2191393
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Reviewed-by: 's avatarDeepti Gandluri <gdeepti@chromium.org>
Cr-Commit-Position: refs/heads/master@{#67913}
parent fde3691b
......@@ -1997,6 +1997,32 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ vmov(i.OutputSimd128Register().high(), scratch, scratch);
break;
}
case kArmF64x2Pmin: {
Simd128Register dst = i.OutputSimd128Register();
Simd128Register lhs = i.InputSimd128Register(0);
Simd128Register rhs = i.InputSimd128Register(1);
DCHECK_EQ(dst, lhs);
// Move rhs only when rhs is strictly greater (mi).
__ VFPCompareAndSetFlags(rhs.low(), lhs.low());
__ vmov(dst.low(), rhs.low(), mi);
__ VFPCompareAndSetFlags(rhs.high(), lhs.high());
__ vmov(dst.high(), rhs.high(), mi);
break;
}
case kArmF64x2Pmax: {
Simd128Register dst = i.OutputSimd128Register();
Simd128Register lhs = i.InputSimd128Register(0);
Simd128Register rhs = i.InputSimd128Register(1);
DCHECK_EQ(dst, lhs);
// Move rhs only when rhs is strictly greater (mi).
__ VFPCompareAndSetFlags(rhs.low(), lhs.low());
__ vmov(dst.low(), rhs.low(), gt);
__ VFPCompareAndSetFlags(rhs.high(), lhs.high());
__ vmov(dst.high(), rhs.high(), gt);
break;
}
case kArmI64x2SplatI32Pair: {
Simd128Register dst = i.OutputSimd128Register();
__ vdup(Neon32, dst, i.InputRegister(0));
......@@ -2220,6 +2246,33 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
i.InputSimd128Register(0));
break;
}
case kArmF32x4Pmin: {
Simd128Register dst = i.OutputSimd128Register();
Simd128Register lhs = i.InputSimd128Register(0);
Simd128Register rhs = i.InputSimd128Register(1);
DCHECK_NE(dst, lhs);
DCHECK_NE(dst, rhs);
// f32x4.pmin(lhs, rhs)
// = v128.bitselect(rhs, lhs, f32x4.lt(rhs, lhs))
// = v128.bitselect(rhs, lhs, f32x4.gt(lhs, rhs))
__ vcgt(dst, lhs, rhs);
__ vbsl(dst, rhs, lhs);
break;
}
case kArmF32x4Pmax: {
Simd128Register dst = i.OutputSimd128Register();
Simd128Register lhs = i.InputSimd128Register(0);
Simd128Register rhs = i.InputSimd128Register(1);
DCHECK_NE(dst, lhs);
DCHECK_NE(dst, rhs);
// f32x4.pmax(lhs, rhs)
// = v128.bitselect(rhs, lhs, f32x4.gt(rhs, lhs))
__ vcgt(dst, rhs, lhs);
__ vbsl(dst, rhs, lhs);
break;
}
case kArmI32x4Splat: {
__ vdup(Neon32, i.OutputSimd128Register(), i.InputRegister(0));
break;
......
......@@ -144,6 +144,8 @@ namespace compiler {
V(ArmF64x2Ne) \
V(ArmF64x2Lt) \
V(ArmF64x2Le) \
V(ArmF64x2Pmin) \
V(ArmF64x2Pmax) \
V(ArmF32x4Splat) \
V(ArmF32x4ExtractLane) \
V(ArmF32x4ReplaceLane) \
......@@ -165,6 +167,8 @@ namespace compiler {
V(ArmF32x4Ne) \
V(ArmF32x4Lt) \
V(ArmF32x4Le) \
V(ArmF32x4Pmin) \
V(ArmF32x4Pmax) \
V(ArmI64x2SplatI32Pair) \
V(ArmI64x2ReplaceLaneI32Pair) \
V(ArmI64x2Neg) \
......
......@@ -124,6 +124,8 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kArmF64x2Ne:
case kArmF64x2Lt:
case kArmF64x2Le:
case kArmF64x2Pmin:
case kArmF64x2Pmax:
case kArmF32x4Splat:
case kArmF32x4ExtractLane:
case kArmF32x4ReplaceLane:
......@@ -145,6 +147,8 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kArmF32x4Ne:
case kArmF32x4Lt:
case kArmF32x4Le:
case kArmF32x4Pmin:
case kArmF32x4Pmax:
case kArmI64x2SplatI32Pair:
case kArmI64x2ReplaceLaneI32Pair:
case kArmI64x2Neg:
......
......@@ -2941,6 +2941,42 @@ void InstructionSelector::VisitI32x4BitMask(Node* node) {
VisitBitMask<kArmI32x4BitMask>(this, node);
}
namespace {
void VisitF32x4PminOrPmax(InstructionSelector* selector, ArchOpcode opcode,
Node* node) {
ArmOperandGenerator g(selector);
// Need all unique registers because we first compare the two inputs, then we
// need the inputs to remain unchanged for the bitselect later.
selector->Emit(opcode, g.DefineAsRegister(node),
g.UseUniqueRegister(node->InputAt(0)),
g.UseUniqueRegister(node->InputAt(1)));
}
void VisitF64x2PminOrPMax(InstructionSelector* selector, ArchOpcode opcode,
Node* node) {
ArmOperandGenerator g(selector);
selector->Emit(opcode, g.DefineSameAsFirst(node),
g.UseRegister(node->InputAt(0)),
g.UseRegister(node->InputAt(1)));
}
} // namespace
void InstructionSelector::VisitF32x4Pmin(Node* node) {
VisitF32x4PminOrPmax(this, kArmF32x4Pmin, node);
}
void InstructionSelector::VisitF32x4Pmax(Node* node) {
VisitF32x4PminOrPmax(this, kArmF32x4Pmax, node);
}
void InstructionSelector::VisitF64x2Pmin(Node* node) {
VisitF64x2PminOrPMax(this, kArmF64x2Pmin, node);
}
void InstructionSelector::VisitF64x2Pmax(Node* node) {
VisitF64x2PminOrPMax(this, kArmF64x2Pmax, node);
}
// static
MachineOperatorBuilder::Flags
InstructionSelector::SupportedMachineOperatorFlags() {
......
......@@ -2651,23 +2651,20 @@ void InstructionSelector::VisitI64x2MinU(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI64x2MaxU(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_S390X
// TODO(v8:10308) Bitmask operations are in prototype now, we can remove these
// guards when they go into the proposal.
#if !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_IA32 && \
!V8_TARGET_ARCH_X64
// TODO(v8:10308) Bitmask operations are in prototype now, we can remove these
// guards when they go into the proposal.
void InstructionSelector::VisitI8x16BitMask(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI16x8BitMask(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI32x4BitMask(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_IA32
// && !V8_TARGET_ARCH_X64
// TODO(v8:10501) Prototyping pmin and pmax instructions.
#if !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_IA32 && !V8_TARGET_ARCH_ARM64
void InstructionSelector::VisitF32x4Pmin(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF32x4Pmax(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Pmin(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Pmax(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_IA32 && !V8_TARGET_ARCH_ARM64
#endif // !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_IA32
// && !V8_TARGET_ARCH_X64
void InstructionSelector::VisitFinishRegion(Node* node) { EmitIdentity(node); }
......
......@@ -757,7 +757,9 @@ WASM_SIMD_TEST(F32x4Max) {
RunF32x4BinOpTest(execution_tier, lower_simd, kExprF32x4Max, JSMax);
}
#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_IA32 || V8_TARGET_ARCH_ARM64
// TODO(v8:10501) Prototyping pmin and pmax instructions.
#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_IA32 || V8_TARGET_ARCH_ARM64 || \
V8_TARGET_ARCH_ARM
WASM_SIMD_TEST_NO_LOWERING(F32x4Pmin) {
FLAG_SCOPE(wasm_simd_post_mvp);
RunF32x4BinOpTest(execution_tier, lower_simd, kExprF32x4Pmin, Minimum);
......@@ -767,7 +769,8 @@ WASM_SIMD_TEST_NO_LOWERING(F32x4Pmax) {
FLAG_SCOPE(wasm_simd_post_mvp);
RunF32x4BinOpTest(execution_tier, lower_simd, kExprF32x4Pmax, Maximum);
}
#endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_IA32 || V8_TARGET_ARCH_ARM64
#endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_IA32 || V8_TARGET_ARCH_ARM64 ||
// V8_TARGET_ARCH_ARM
void RunF32x4CompareOpTest(ExecutionTier execution_tier, LowerSimd lower_simd,
WasmOpcode opcode, FloatCompareOp expected_op) {
......@@ -1359,7 +1362,9 @@ WASM_SIMD_TEST_NO_LOWERING(F64x2Div) {
RunF64x2BinOpTest(execution_tier, lower_simd, kExprF64x2Div, Div);
}
#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_IA32 || V8_TARGET_ARCH_ARM64
// TODO(v8:10501) Prototyping pmin and pmax instructions.
#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_IA32 || V8_TARGET_ARCH_ARM64 || \
V8_TARGET_ARCH_ARM
WASM_SIMD_TEST_NO_LOWERING(F64x2Pmin) {
FLAG_SCOPE(wasm_simd_post_mvp);
RunF64x2BinOpTest(execution_tier, lower_simd, kExprF64x2Pmin, Minimum);
......@@ -1369,7 +1374,8 @@ WASM_SIMD_TEST_NO_LOWERING(F64x2Pmax) {
FLAG_SCOPE(wasm_simd_post_mvp);
RunF64x2BinOpTest(execution_tier, lower_simd, kExprF64x2Pmax, Maximum);
}
#endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_IA32 || V8_TARGET_ARCH_ARM64
#endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_IA32 || V8_TARGET_ARCH_ARM64 ||
// V8_TARGET_ARCH_ARM
void RunF64x2CompareOpTest(ExecutionTier execution_tier, LowerSimd lower_simd,
WasmOpcode opcode, DoubleCompareOp expected_op) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment