Commit 910d92e2 authored by Ng Zhi An's avatar Ng Zhi An Committed by Commit Bot

[wasm-simd][x64][arm64] Add i64x2 ne and alltrue

This is a partial revert of https://crrev.com/c/2457669 to add back
i64x2.ne and i64x2.all_true, which were accepted into the proposal
(https://github.com/WebAssembly/simd/issues/419).

This only implements it for x64 and arm64 on TurboFan, other archs and
Liftoff will come later.

Bug: v8:11347,v8:11348
Change-Id: I86934478760e3d501ecdb3ce6c9b702764cc0838
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2665005Reviewed-by: 's avatarDeepti Gandluri <gdeepti@chromium.org>
Reviewed-by: 's avatarGeorg Neis <neis@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#72489}
parent 0df3de18
......@@ -2304,6 +2304,13 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
SIMD_BINOP_CASE(kArm64I64x2Eq, Cmeq, 2D);
case kArm64I64x2Ne: {
VRegister dst = i.OutputSimd128Register().V2D();
__ Cmeq(dst, i.InputSimd128Register(0).V2D(),
i.InputSimd128Register(1).V2D());
__ Mvn(dst, dst);
break;
}
case kArm64I64x2ShrU: {
ASSEMBLE_SIMD_SHIFT_RIGHT(Ushr, 6, V2D, Ushl, X);
break;
......@@ -2805,6 +2812,15 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ Uxtl(i.OutputSimd128Register().V2D(), i.OutputSimd128Register().V2S());
break;
}
case kArm64V64x2AllTrue: {
UseScratchRegisterScope scope(tasm());
VRegister tmp = scope.AcquireV(kFormat2D);
__ Cmeq(tmp.V2D(), i.InputSimd128Register(0).V2D(), 0);
__ Addp(tmp.D(), tmp);
__ Fcmp(tmp.D(), tmp.D());
__ Cset(i.OutputRegister32(), eq);
break;
}
#define SIMD_REDUCE_OP_CASE(Op, Instr, format, FORMAT) \
case Op: { \
UseScratchRegisterScope scope(tasm()); \
......
......@@ -237,6 +237,7 @@ namespace compiler {
V(Arm64I64x2Sub) \
V(Arm64I64x2Mul) \
V(Arm64I64x2Eq) \
V(Arm64I64x2Ne) \
V(Arm64I64x2ShrU) \
V(Arm64I64x2BitMask) \
V(Arm64I32x4Splat) \
......@@ -374,6 +375,7 @@ namespace compiler {
V(Arm64S8x4Reverse) \
V(Arm64S8x2Reverse) \
V(Arm64V128AnyTrue) \
V(Arm64V64x2AllTrue) \
V(Arm64V32x4AllTrue) \
V(Arm64V16x8AllTrue) \
V(Arm64V8x16AllTrue) \
......
......@@ -202,6 +202,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kArm64I64x2Sub:
case kArm64I64x2Mul:
case kArm64I64x2Eq:
case kArm64I64x2Ne:
case kArm64I64x2ShrU:
case kArm64I64x2BitMask:
case kArm64I32x4Splat:
......@@ -343,6 +344,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kArm64S8x4Reverse:
case kArm64S8x2Reverse:
case kArm64V128AnyTrue:
case kArm64V64x2AllTrue:
case kArm64V32x4AllTrue:
case kArm64V16x8AllTrue:
case kArm64V8x16AllTrue:
......
......@@ -3441,6 +3441,7 @@ void InstructionSelector::VisitInt64AbsWithOverflow(Node* node) {
V(I8x16BitMask, kArm64I8x16BitMask) \
V(S128Not, kArm64S128Not) \
V(V128AnyTrue, kArm64V128AnyTrue) \
V(V64x2AllTrue, kArm64V64x2AllTrue) \
V(V32x4AllTrue, kArm64V32x4AllTrue) \
V(V16x8AllTrue, kArm64V16x8AllTrue) \
V(V8x16AllTrue, kArm64V8x16AllTrue)
......@@ -3484,6 +3485,7 @@ void InstructionSelector::VisitInt64AbsWithOverflow(Node* node) {
V(I64x2Add, kArm64I64x2Add) \
V(I64x2Sub, kArm64I64x2Sub) \
V(I64x2Eq, kArm64I64x2Eq) \
V(I64x2Ne, kArm64I64x2Ne) \
V(I32x4AddHoriz, kArm64I32x4AddHoriz) \
V(I32x4Mul, kArm64I32x4Mul) \
V(I32x4MinS, kArm64I32x4MinS) \
......
......@@ -2040,6 +2040,8 @@ void InstructionSelector::VisitNode(Node* node) {
return MarkAsSimd128(node), VisitI64x2Mul(node);
case IrOpcode::kI64x2Eq:
return MarkAsSimd128(node), VisitI64x2Eq(node);
case IrOpcode::kI64x2Ne:
return MarkAsSimd128(node), VisitI64x2Ne(node);
case IrOpcode::kI64x2ShrU:
return MarkAsSimd128(node), VisitI64x2ShrU(node);
case IrOpcode::kI64x2ExtMulLowI32x4S:
......@@ -2304,12 +2306,14 @@ void InstructionSelector::VisitNode(Node* node) {
return MarkAsSimd128(node), VisitI8x16Swizzle(node);
case IrOpcode::kI8x16Shuffle:
return MarkAsSimd128(node), VisitI8x16Shuffle(node);
case IrOpcode::kV128AnyTrue:
return MarkAsWord32(node), VisitV128AnyTrue(node);
case IrOpcode::kV64x2AllTrue:
return MarkAsWord32(node), VisitV64x2AllTrue(node);
case IrOpcode::kV32x4AllTrue:
return MarkAsWord32(node), VisitV32x4AllTrue(node);
case IrOpcode::kV16x8AllTrue:
return MarkAsWord32(node), VisitV16x8AllTrue(node);
case IrOpcode::kV128AnyTrue:
return MarkAsWord32(node), VisitV128AnyTrue(node);
case IrOpcode::kV8x16AllTrue:
return MarkAsWord32(node), VisitV8x16AllTrue(node);
default:
......@@ -2816,6 +2820,11 @@ void InstructionSelector::VisitI32x4WidenI8x16S(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI32x4WidenI8x16U(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_X64
#if !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM64
void InstructionSelector::VisitI64x2Ne(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitV64x2AllTrue(Node* node) { UNIMPLEMENTED(); }
#endif //! V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM64
void InstructionSelector::VisitFinishRegion(Node* node) { EmitIdentity(node); }
void InstructionSelector::VisitParameter(Node* node) {
......
......@@ -2913,6 +2913,14 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
ASSEMBLE_SIMD_BINOP(pcmpeqq);
break;
}
case kX64I64x2Ne: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
XMMRegister tmp = i.TempSimd128Register(0);
__ Pcmpeqq(i.OutputSimd128Register(), i.InputSimd128Register(1));
__ Pcmpeqq(tmp, tmp);
__ Pxor(i.OutputSimd128Register(), tmp);
break;
}
case kX64I64x2ShrU: {
// Take shift value modulo 2^6.
ASSEMBLE_SIMD_SHIFT(psrlq, 6);
......@@ -4305,6 +4313,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
// comparison instruction used matters, e.g. given 0xff00, pcmpeqb returns
// 0x0011, pcmpeqw returns 0x0000, ptest will set ZF to 0 and 1
// respectively.
case kX64V64x2AllTrue: {
ASSEMBLE_SIMD_ALL_TRUE(Pcmpeqq);
break;
}
case kX64V32x4AllTrue: {
ASSEMBLE_SIMD_ALL_TRUE(Pcmpeqd);
break;
......
......@@ -214,6 +214,7 @@ namespace compiler {
V(X64I64x2Sub) \
V(X64I64x2Mul) \
V(X64I64x2Eq) \
V(X64I64x2Ne) \
V(X64I64x2ShrU) \
V(X64I64x2SignSelect) \
V(X64I64x2ExtMulLowI32x4S) \
......@@ -391,9 +392,10 @@ namespace compiler {
V(X64S8x8Reverse) \
V(X64S8x4Reverse) \
V(X64S8x2Reverse) \
V(X64V128AnyTrue) \
V(X64V64x2AllTrue) \
V(X64V32x4AllTrue) \
V(X64V16x8AllTrue) \
V(X64V128AnyTrue) \
V(X64V8x16AllTrue) \
V(X64Prefetch) \
V(X64PrefetchNta) \
......
......@@ -190,6 +190,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kX64I64x2Sub:
case kX64I64x2Mul:
case kX64I64x2Eq:
case kX64I64x2Ne:
case kX64I64x2ShrU:
case kX64I64x2SignSelect:
case kX64I64x2ExtMulLowI32x4S:
......@@ -319,6 +320,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kX64S128Zero:
case kX64S128AllOnes:
case kX64S128AndNot:
case kX64V64x2AllTrue:
case kX64V32x4AllTrue:
case kX64V16x8AllTrue:
case kX64I8x16Swizzle:
......
......@@ -2920,6 +2920,7 @@ VISIT_ATOMIC_BINOP(Xor)
V(I8x16GeU)
#define SIMD_BINOP_ONE_TEMP_LIST(V) \
V(I64x2Ne) \
V(I32x4Ne) \
V(I32x4GtU) \
V(I16x8Ne) \
......@@ -2978,6 +2979,7 @@ VISIT_ATOMIC_BINOP(Xor)
V(I8x16ShrU)
#define SIMD_ALLTRUE_LIST(V) \
V(V64x2AllTrue) \
V(V32x4AllTrue) \
V(V16x8AllTrue) \
V(V8x16AllTrue)
......
......@@ -438,6 +438,7 @@ std::ostream& operator<<(std::ostream& os, TruncateKind kind) {
V(I64x2Sub, Operator::kNoProperties, 2, 0, 1) \
V(I64x2Mul, Operator::kCommutative, 2, 0, 1) \
V(I64x2Eq, Operator::kCommutative, 2, 0, 1) \
V(I64x2Ne, Operator::kCommutative, 2, 0, 1) \
V(I64x2ShrU, Operator::kNoProperties, 2, 0, 1) \
V(I64x2ExtMulLowI32x4S, Operator::kCommutative, 2, 0, 1) \
V(I64x2ExtMulHighI32x4S, Operator::kCommutative, 2, 0, 1) \
......@@ -559,9 +560,10 @@ std::ostream& operator<<(std::ostream& os, TruncateKind kind) {
V(S128Not, Operator::kNoProperties, 1, 0, 1) \
V(S128Select, Operator::kNoProperties, 3, 0, 1) \
V(S128AndNot, Operator::kNoProperties, 2, 0, 1) \
V(V128AnyTrue, Operator::kNoProperties, 1, 0, 1) \
V(V64x2AllTrue, Operator::kNoProperties, 1, 0, 1) \
V(V32x4AllTrue, Operator::kNoProperties, 1, 0, 1) \
V(V16x8AllTrue, Operator::kNoProperties, 1, 0, 1) \
V(V128AnyTrue, Operator::kNoProperties, 1, 0, 1) \
V(V8x16AllTrue, Operator::kNoProperties, 1, 0, 1) \
V(I8x16Swizzle, Operator::kNoProperties, 2, 0, 1)
......
......@@ -678,6 +678,7 @@ class V8_EXPORT_PRIVATE MachineOperatorBuilder final
const Operator* I64x2Sub();
const Operator* I64x2Mul();
const Operator* I64x2Eq();
const Operator* I64x2Ne();
const Operator* I64x2ShrU();
const Operator* I64x2ExtMulLowI32x4S();
const Operator* I64x2ExtMulHighI32x4S();
......@@ -822,9 +823,10 @@ class V8_EXPORT_PRIVATE MachineOperatorBuilder final
const Operator* I8x16Swizzle();
const Operator* I8x16Shuffle(const uint8_t shuffle[16]);
const Operator* V128AnyTrue();
const Operator* V64x2AllTrue();
const Operator* V32x4AllTrue();
const Operator* V16x8AllTrue();
const Operator* V128AnyTrue();
const Operator* V8x16AllTrue();
// load [base + index]
......
......@@ -834,6 +834,7 @@
V(I64x2Sub) \
V(I64x2Mul) \
V(I64x2Eq) \
V(I64x2Ne) \
V(I64x2ShrU) \
V(I64x2ExtMulLowI32x4S) \
V(I64x2ExtMulHighI32x4S) \
......@@ -980,9 +981,10 @@
V(S128AndNot) \
V(I8x16Swizzle) \
V(I8x16Shuffle) \
V(V128AnyTrue) \
V(V64x2AllTrue) \
V(V32x4AllTrue) \
V(V16x8AllTrue) \
V(V128AnyTrue) \
V(V8x16AllTrue) \
V(LoadTransform) \
V(PrefetchTemporal) \
......
......@@ -4837,6 +4837,9 @@ Node* WasmGraphBuilder::SimdOp(wasm::WasmOpcode opcode, Node* const* inputs) {
case wasm::kExprI64x2Eq:
return graph()->NewNode(mcgraph()->machine()->I64x2Eq(), inputs[0],
inputs[1]);
case wasm::kExprI64x2Ne:
return graph()->NewNode(mcgraph()->machine()->I64x2Ne(), inputs[0],
inputs[1]);
case wasm::kExprI64x2ShrU:
return graph()->NewNode(mcgraph()->machine()->I64x2ShrU(), inputs[0],
inputs[1]);
......@@ -5213,6 +5216,8 @@ Node* WasmGraphBuilder::SimdOp(wasm::WasmOpcode opcode, Node* const* inputs) {
case wasm::kExprS128AndNot:
return graph()->NewNode(mcgraph()->machine()->S128AndNot(), inputs[0],
inputs[1]);
case wasm::kExprV64x2AllTrue:
return graph()->NewNode(mcgraph()->machine()->V64x2AllTrue(), inputs[0]);
case wasm::kExprV32x4AllTrue:
return graph()->NewNode(mcgraph()->machine()->V32x4AllTrue(), inputs[0]);
case wasm::kExprV16x8AllTrue:
......
......@@ -36,6 +36,7 @@ namespace wasm {
#define CASE_S64x2_OP(name, str) CASE_OP(S64x2##name, "s64x2." str)
#define CASE_S32x4_OP(name, str) CASE_OP(S32x4##name, "s32x4." str)
#define CASE_S16x8_OP(name, str) CASE_OP(S16x8##name, "s16x8." str)
#define CASE_V64x2_OP(name, str) CASE_OP(V64x2##name, "v64x2." str)
#define CASE_V32x4_OP(name, str) CASE_OP(V32x4##name, "v32x4." str)
#define CASE_V16x8_OP(name, str) CASE_OP(V16x8##name, "v16x8." str)
#define CASE_V8x16_OP(name, str) CASE_OP(V8x16##name, "v8x16." str)
......@@ -237,8 +238,7 @@ constexpr const char* WasmOpcodes::OpcodeName(WasmOpcode opcode) {
CASE_SIMD_OP(Neg, "neg")
CASE_SIMDF_OP(Sqrt, "sqrt")
CASE_SIMD_OP(Eq, "eq")
CASE_SIMDF_OP(Ne, "ne")
CASE_SIMDI_OP(Ne, "ne")
CASE_SIMD_OP(Ne, "ne")
CASE_SIMD_OP(Add, "add")
CASE_SIMD_OP(Sub, "sub")
CASE_SIMD_OP(Mul, "mul")
......@@ -297,6 +297,7 @@ constexpr const char* WasmOpcodes::OpcodeName(WasmOpcode opcode) {
CASE_I8x16_OP(Shuffle, "shuffle")
CASE_V128_OP(AnyTrue, "any_true")
CASE_SIMDV_OP(AllTrue, "all_true")
CASE_V64x2_OP(AllTrue, "all_true")
CASE_SIMDF_OP(Qfma, "qfma")
CASE_SIMDF_OP(Qfms, "qfms")
......
......@@ -348,6 +348,7 @@ bool V8_EXPORT_PRIVATE IsJSCompatibleSignature(const FunctionSig* sig,
V(I32x4GeS, 0xfd3f, s_ss) \
V(I32x4GeU, 0xfd40, s_ss) \
V(I64x2Eq, 0xfdc0, s_ss) \
V(I64x2Ne, 0xfdd0, s_ss) \
V(F32x4Eq, 0xfd41, s_ss) \
V(F32x4Ne, 0xfd42, s_ss) \
V(F32x4Lt, 0xfd43, s_ss) \
......@@ -441,6 +442,7 @@ bool V8_EXPORT_PRIVATE IsJSCompatibleSignature(const FunctionSig* sig,
V(I32x4ExtMulLowI16x8U, 0xfdbe, s_ss) \
V(I32x4ExtMulHighI16x8U, 0xfdbf, s_ss) \
V(I64x2Neg, 0xfdc1, s_s) \
V(V64x2AllTrue, 0xfdcf, i_s) \
V(I64x2BitMask, 0xfdc4, i_s) \
V(I64x2Shl, 0xfdcb, s_si) \
V(I64x2ShrS, 0xfdcc, s_si) \
......
......@@ -1038,6 +1038,12 @@ WASM_SIMD_TEST_NO_LOWERING(I64x2Eq) {
RunI64x2BinOpTest(execution_tier, lower_simd, kExprI64x2Eq, Equal);
}
#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64
WASM_SIMD_TEST_NO_LOWERING(I64x2Ne) {
RunI64x2BinOpTest(execution_tier, lower_simd, kExprI64x2Ne, NotEqual);
}
#endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64
WASM_SIMD_TEST(F64x2Splat) {
WasmRunner<int32_t, double> r(execution_tier, lower_simd);
// Set up a global to hold output vector.
......@@ -3513,6 +3519,9 @@ WASM_SIMD_TEST(S8x16MultiShuffleFuzz) {
CHECK_EQ(1, r.Call()); \
}
#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64
WASM_SIMD_BOOL_REDUCTION_TEST(64x2, 2, WASM_I64V)
#endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64
WASM_SIMD_BOOL_REDUCTION_TEST(32x4, 4, WASM_I32V)
WASM_SIMD_BOOL_REDUCTION_TEST(16x8, 8, WASM_I32V)
WASM_SIMD_BOOL_REDUCTION_TEST(8x16, 16, WASM_I32V)
......@@ -4410,6 +4419,9 @@ WASM_SIMD_TEST(V128AnytrueWithNegativeZero) {
CHECK_EQ(1, r.Call(0x1)); \
CHECK_EQ(0, r.Call(0)); \
}
#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64
WASM_SIMD_ALLTRUE_TEST(64x2, 2, 0xffffffffffffffff, int64_t)
#endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64
WASM_SIMD_ALLTRUE_TEST(32x4, 4, 0xffffffff, int32_t)
WASM_SIMD_ALLTRUE_TEST(16x8, 8, 0xffff, int32_t)
WASM_SIMD_ALLTRUE_TEST(8x16, 16, 0xff, int32_t)
......
......@@ -2309,6 +2309,7 @@ class WasmInterpreterInternals {
CMPOP_CASE(F32x4Lt, f32x4, float4, int4, 4, a < b)
CMPOP_CASE(F32x4Le, f32x4, float4, int4, 4, a <= b)
CMPOP_CASE(I64x2Eq, i64x2, int2, int2, 2, a == b)
CMPOP_CASE(I64x2Ne, i64x2, int2, int2, 2, a != b)
CMPOP_CASE(I32x4Eq, i32x4, int4, int4, 4, a == b)
CMPOP_CASE(I32x4Ne, i32x4, int4, int4, 4, a != b)
CMPOP_CASE(I32x4GtS, i32x4, int4, int4, 4, a > b)
......@@ -2639,6 +2640,7 @@ class WasmInterpreterInternals {
Push(WasmValue(res)); \
return true; \
}
REDUCTION_CASE(V64x2AllTrue, i64x2, int2, 2, &)
REDUCTION_CASE(V32x4AllTrue, i32x4, int4, 4, &)
REDUCTION_CASE(V16x8AllTrue, i16x8, int8, 8, &)
REDUCTION_CASE(V8x16AllTrue, i8x16, int16, 16, &)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment