Commit 9d9e8b41 authored by Zhi An Ng's avatar Zhi An Ng Committed by Commit Bot

[wasm-simd][arm64] Prototype i64x2.bitmask

Drive-by cleanup for other bitmask instructions to
UseScratchRegisterScope instead of using temporary registers in
instruction-selector.

Bug: v8:10997
Change-Id: Id46d249fd20ceaeab8e867babec8b34d7995c17f
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2548081
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Reviewed-by: 's avatarBill Budge <bbudge@chromium.org>
Cr-Commit-Position: refs/heads/master@{#71351}
parent 11910df3
......@@ -2208,6 +2208,18 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
ASSEMBLE_SIMD_SHIFT_RIGHT(Ushr, 6, V2D, Ushl, X);
break;
}
case kArm64I64x2BitMask: {
UseScratchRegisterScope scope(tasm());
Register dst = i.OutputRegister32();
VRegister src = i.InputSimd128Register(0);
VRegister tmp1 = scope.AcquireV(kFormat2D);
Register tmp2 = scope.AcquireX();
__ Ushr(tmp1.V2D(), src.V2D(), 63);
__ Mov(dst.X(), tmp1.D(), 0);
__ Mov(tmp2.X(), tmp1.D(), 1);
__ Add(dst.W(), dst.W(), Operand(tmp2.W(), LSL, 1));
break;
}
case kArm64I32x4Splat: {
__ Dup(i.OutputSimd128Register().V4S(), i.InputRegister32(0));
break;
......@@ -2265,10 +2277,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
SIMD_BINOP_CASE(kArm64I32x4GeU, Cmhs, 4S);
SIMD_UNOP_CASE(kArm64I32x4Abs, Abs, 4S);
case kArm64I32x4BitMask: {
UseScratchRegisterScope scope(tasm());
Register dst = i.OutputRegister32();
VRegister src = i.InputSimd128Register(0);
VRegister tmp = i.TempSimd128Register(0);
VRegister mask = i.TempSimd128Register(1);
VRegister tmp = scope.AcquireQ();
VRegister mask = scope.AcquireQ();
__ Sshr(tmp.V4S(), src.V4S(), 31);
// Set i-th bit of each lane i. When AND with tmp, the lanes that
......@@ -2384,10 +2397,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
SIMD_BINOP_CASE(kArm64I16x8Q15MulRSatS, Sqrdmulh, 8H);
SIMD_UNOP_CASE(kArm64I16x8Abs, Abs, 8H);
case kArm64I16x8BitMask: {
UseScratchRegisterScope scope(tasm());
Register dst = i.OutputRegister32();
VRegister src = i.InputSimd128Register(0);
VRegister tmp = i.TempSimd128Register(0);
VRegister mask = i.TempSimd128Register(1);
VRegister tmp = scope.AcquireQ();
VRegister mask = scope.AcquireQ();
__ Sshr(tmp.V8H(), src.V8H(), 15);
// Set i-th bit of each lane i. When AND with tmp, the lanes that
......@@ -2490,10 +2504,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
SIMD_BINOP_CASE(kArm64I8x16RoundingAverageU, Urhadd, 16B);
SIMD_UNOP_CASE(kArm64I8x16Abs, Abs, 16B);
case kArm64I8x16BitMask: {
UseScratchRegisterScope scope(tasm());
Register dst = i.OutputRegister32();
VRegister src = i.InputSimd128Register(0);
VRegister tmp = i.TempSimd128Register(0);
VRegister mask = i.TempSimd128Register(1);
VRegister tmp = scope.AcquireQ();
VRegister mask = scope.AcquireQ();
// Set i-th bit of each lane i. When AND with tmp, the lanes that
// are signed will have i-th bit set, unsigned will be 0.
......
......@@ -233,6 +233,7 @@ namespace compiler {
V(Arm64I64x2Mul) \
V(Arm64I64x2Eq) \
V(Arm64I64x2ShrU) \
V(Arm64I64x2BitMask) \
V(Arm64I32x4Splat) \
V(Arm64I32x4ExtractLane) \
V(Arm64I32x4ReplaceLane) \
......
......@@ -199,6 +199,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kArm64I64x2Mul:
case kArm64I64x2Eq:
case kArm64I64x2ShrU:
case kArm64I64x2BitMask:
case kArm64I32x4Splat:
case kArm64I32x4ExtractLane:
case kArm64I32x4ReplaceLane:
......
......@@ -3305,32 +3305,36 @@ void InstructionSelector::VisitInt64AbsWithOverflow(Node* node) {
V(I16x8) \
V(I8x16)
#define SIMD_UNOP_LIST(V) \
V(F64x2Abs, kArm64F64x2Abs) \
V(F64x2Neg, kArm64F64x2Neg) \
V(F64x2Sqrt, kArm64F64x2Sqrt) \
V(F32x4SConvertI32x4, kArm64F32x4SConvertI32x4) \
V(F32x4UConvertI32x4, kArm64F32x4UConvertI32x4) \
V(F32x4Abs, kArm64F32x4Abs) \
V(F32x4Neg, kArm64F32x4Neg) \
V(F32x4Sqrt, kArm64F32x4Sqrt) \
V(F32x4RecipApprox, kArm64F32x4RecipApprox) \
V(F32x4RecipSqrtApprox, kArm64F32x4RecipSqrtApprox) \
V(I64x2Neg, kArm64I64x2Neg) \
V(I32x4SConvertF32x4, kArm64I32x4SConvertF32x4) \
V(I32x4Neg, kArm64I32x4Neg) \
V(I32x4UConvertF32x4, kArm64I32x4UConvertF32x4) \
V(I32x4Abs, kArm64I32x4Abs) \
V(I16x8Neg, kArm64I16x8Neg) \
V(I16x8Abs, kArm64I16x8Abs) \
V(I8x16Neg, kArm64I8x16Neg) \
V(I8x16Abs, kArm64I8x16Abs) \
V(S128Not, kArm64S128Not) \
V(V32x4AnyTrue, kArm64V128AnyTrue) \
V(V32x4AllTrue, kArm64V32x4AllTrue) \
V(V16x8AnyTrue, kArm64V128AnyTrue) \
V(V16x8AllTrue, kArm64V16x8AllTrue) \
V(V8x16AnyTrue, kArm64V128AnyTrue) \
#define SIMD_UNOP_LIST(V) \
V(F64x2Abs, kArm64F64x2Abs) \
V(F64x2Neg, kArm64F64x2Neg) \
V(F64x2Sqrt, kArm64F64x2Sqrt) \
V(F32x4SConvertI32x4, kArm64F32x4SConvertI32x4) \
V(F32x4UConvertI32x4, kArm64F32x4UConvertI32x4) \
V(F32x4Abs, kArm64F32x4Abs) \
V(F32x4Neg, kArm64F32x4Neg) \
V(F32x4Sqrt, kArm64F32x4Sqrt) \
V(F32x4RecipApprox, kArm64F32x4RecipApprox) \
V(F32x4RecipSqrtApprox, kArm64F32x4RecipSqrtApprox) \
V(I64x2Neg, kArm64I64x2Neg) \
V(I64x2BitMask, kArm64I64x2BitMask) \
V(I32x4SConvertF32x4, kArm64I32x4SConvertF32x4) \
V(I32x4Neg, kArm64I32x4Neg) \
V(I32x4UConvertF32x4, kArm64I32x4UConvertF32x4) \
V(I32x4Abs, kArm64I32x4Abs) \
V(I32x4BitMask, kArm64I32x4BitMask) \
V(I16x8Neg, kArm64I16x8Neg) \
V(I16x8Abs, kArm64I16x8Abs) \
V(I16x8BitMask, kArm64I16x8BitMask) \
V(I8x16Neg, kArm64I8x16Neg) \
V(I8x16Abs, kArm64I8x16Abs) \
V(I8x16BitMask, kArm64I8x16BitMask) \
V(S128Not, kArm64S128Not) \
V(V32x4AnyTrue, kArm64V128AnyTrue) \
V(V32x4AllTrue, kArm64V32x4AllTrue) \
V(V16x8AnyTrue, kArm64V128AnyTrue) \
V(V16x8AllTrue, kArm64V16x8AllTrue) \
V(V8x16AnyTrue, kArm64V128AnyTrue) \
V(V8x16AllTrue, kArm64V8x16AllTrue)
#define SIMD_SHIFT_OP_LIST(V) \
......@@ -3578,29 +3582,6 @@ VISIT_SIMD_QFMOP(F32x4Qfma)
VISIT_SIMD_QFMOP(F32x4Qfms)
#undef VISIT_SIMD_QFMOP
namespace {
template <ArchOpcode opcode>
void VisitBitMask(InstructionSelector* selector, Node* node) {
Arm64OperandGenerator g(selector);
InstructionOperand temps[] = {g.TempSimd128Register(),
g.TempSimd128Register()};
selector->Emit(opcode, g.DefineAsRegister(node),
g.UseRegister(node->InputAt(0)), arraysize(temps), temps);
}
} // namespace
void InstructionSelector::VisitI8x16BitMask(Node* node) {
VisitBitMask<kArm64I8x16BitMask>(this, node);
}
void InstructionSelector::VisitI16x8BitMask(Node* node) {
VisitBitMask<kArm64I16x8BitMask>(this, node);
}
void InstructionSelector::VisitI32x4BitMask(Node* node) {
VisitBitMask<kArm64I32x4BitMask>(this, node);
}
namespace {
struct ShuffleEntry {
......
......@@ -2802,9 +2802,6 @@ void InstructionSelector::VisitI16x8ExtAddPairwiseI8x16U(Node* node) {
void InstructionSelector::VisitLoadLane(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitStoreLane(Node* node) { UNIMPLEMENTED(); }
// TODO(v8:10997) Prototype i64x2.bitmask.
void InstructionSelector::VisitI64x2BitMask(Node* node) { UNIMPLEMENTED(); }
// TODO(v8:10983) Prototyping sign select.
void InstructionSelector::VisitI8x16SignSelect(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI16x8SignSelect(Node* node) { UNIMPLEMENTED(); }
......@@ -2812,6 +2809,11 @@ void InstructionSelector::VisitI32x4SignSelect(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI64x2SignSelect(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_X64
#if !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM64
// TODO(v8:10997) Prototype i64x2.bitmask.
void InstructionSelector::VisitI64x2BitMask(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM64
void InstructionSelector::VisitFinishRegion(Node* node) { EmitIdentity(node); }
void InstructionSelector::VisitParameter(Node* node) {
......
......@@ -1630,7 +1630,7 @@ WASM_SIMD_TEST(I32x4BitMask) {
}
// TODO(v8:10997) Prototyping i64x2.bitmask.
#if V8_TARGET_ARCH_X64
#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64
WASM_SIMD_TEST_NO_LOWERING(I64x2BitMask) {
FLAG_SCOPE(wasm_simd_post_mvp);
WasmRunner<int32_t, int64_t> r(execution_tier, lower_simd);
......@@ -1648,7 +1648,7 @@ WASM_SIMD_TEST_NO_LOWERING(I64x2BitMask) {
CHECK_EQ(actual, expected);
}
}
#endif // V8_TARGET_ARCH_X64
#endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64
WASM_SIMD_TEST(I8x16Splat) {
WasmRunner<int32_t, int32_t> r(execution_tier, lower_simd);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment