Commit f65701b0 authored by Ng Zhi An's avatar Ng Zhi An Committed by Commit Bot

[wasm-simd][ia32] Implement i64x2.ne and i64x2 all_true

Bug: v8:11347,v8:11348
Change-Id: I47ba950b80197d1d769d93aa68266131be9bf31d
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2666146Reviewed-by: 's avatarDeepti Gandluri <gdeepti@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#72518}
parent 3c746bed
......@@ -1810,8 +1810,22 @@ void TurboAssembler::Haddps(XMMRegister dst, XMMRegister src1, Operand src2) {
}
}
void TurboAssembler::Pcmpeqq(XMMRegister dst, Operand src) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
vpcmpeqq(dst, dst, src);
} else {
CpuFeatureScope scope(this, SSE4_1);
pcmpeqq(dst, src);
}
}
void TurboAssembler::Pcmpeqq(XMMRegister dst, XMMRegister src1,
XMMRegister src2) {
Pcmpeqq(dst, src1, Operand(src2));
}
void TurboAssembler::Pcmpeqq(XMMRegister dst, XMMRegister src1, Operand src2) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
vpcmpeqq(dst, src1, src2);
......
......@@ -545,6 +545,8 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
#undef AVX_OP3_WITH_TYPE_SCOPE
void Haddps(XMMRegister dst, XMMRegister src1, Operand src2);
void Pcmpeqq(XMMRegister dst, Operand src2);
void Pcmpeqq(XMMRegister dst, XMMRegister src1, Operand src2);
void Pcmpeqq(XMMRegister dst, XMMRegister src1, XMMRegister src2);
void Pshufb(XMMRegister dst, XMMRegister src) { Pshufb(dst, dst, src); }
void Pshufb(XMMRegister dst, Operand src) { Pshufb(dst, dst, src); }
......
......@@ -2231,6 +2231,13 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
i.InputSimd128Register(1));
break;
}
case kIA32I64x2Ne: {
__ Pcmpeqq(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputOperand(1));
__ Pcmpeqq(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
__ Pxor(i.OutputSimd128Register(), kScratchDoubleReg);
break;
}
case kIA32I64x2SConvertI32x4Low: {
__ Pmovsxdq(i.OutputSimd128Register(), i.InputSimd128Register(0));
break;
......@@ -4274,6 +4281,9 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
// comparison instruction used matters, e.g. given 0xff00, pcmpeqb returns
// 0x0011, pcmpeqw returns 0x0000, ptest will set ZF to 0 and 1
// respectively.
case kIA32V64x2AllTrue:
ASSEMBLE_SIMD_ALL_TRUE(Pcmpeqq);
break;
case kIA32V32x4AllTrue:
ASSEMBLE_SIMD_ALL_TRUE(Pcmpeqd);
break;
......
......@@ -149,6 +149,7 @@ namespace compiler {
V(IA32I64x2ShrU) \
V(IA32I64x2BitMask) \
V(IA32I64x2Eq) \
V(IA32I64x2Ne) \
V(IA32I64x2SignSelect) \
V(IA32I64x2ExtMulLowI32x4S) \
V(IA32I64x2ExtMulHighI32x4S) \
......@@ -403,6 +404,7 @@ namespace compiler {
V(SSES8x2Reverse) \
V(AVXS8x2Reverse) \
V(IA32S128AnyTrue) \
V(IA32V64x2AllTrue) \
V(IA32V32x4AllTrue) \
V(IA32V16x8AllTrue) \
V(IA32V8x16AllTrue) \
......
......@@ -131,6 +131,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kIA32I64x2ShrU:
case kIA32I64x2BitMask:
case kIA32I64x2Eq:
case kIA32I64x2Ne:
case kIA32I64x2SignSelect:
case kIA32I64x2ExtMulLowI32x4S:
case kIA32I64x2ExtMulHighI32x4S:
......@@ -375,6 +376,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kSSES8x2Reverse:
case kAVXS8x2Reverse:
case kIA32S128AnyTrue:
case kIA32V64x2AllTrue:
case kIA32V32x4AllTrue:
case kIA32V16x8AllTrue:
case kIA32V8x16AllTrue:
......
......@@ -2273,6 +2273,7 @@ void InstructionSelector::VisitWord32AtomicPairCompareExchange(Node* node) {
V(I64x2Add) \
V(I64x2Sub) \
V(I64x2Eq) \
V(I64x2Ne) \
V(I32x4DotI16x8S) \
V(I16x8RoundingAverageU) \
V(I8x16Add) \
......@@ -2337,6 +2338,7 @@ void InstructionSelector::VisitWord32AtomicPairCompareExchange(Node* node) {
V(S128Not)
#define SIMD_ALLTRUE_LIST(V) \
V(V64x2AllTrue) \
V(V32x4AllTrue) \
V(V16x8AllTrue) \
V(V8x16AllTrue)
......
......@@ -2815,10 +2815,10 @@ void InstructionSelector::VisitI32x4WidenI8x16S(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI32x4WidenI8x16U(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_X64
#if !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM64
#if !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_IA32
void InstructionSelector::VisitI64x2Ne(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitV64x2AllTrue(Node* node) { UNIMPLEMENTED(); }
#endif //! V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM64
#endif //! V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_IA32
void InstructionSelector::VisitFinishRegion(Node* node) { EmitIdentity(node); }
......
......@@ -114,6 +114,8 @@ void SimdScalarLowering::LowerGraph() {
V(I64x2Splat) \
V(I64x2ExtractLane) \
V(I64x2ReplaceLane) \
V(I64x2Eq) \
V(I64x2Ne) \
V(I64x2Neg) \
V(I64x2Shl) \
V(I64x2ShrS) \
......@@ -166,6 +168,7 @@ void SimdScalarLowering::LowerGraph() {
V(S128Not) \
V(S128AndNot) \
V(S128Select) \
V(V64x2AllTrue) \
V(V32x4AllTrue) \
V(V16x8AllTrue) \
V(V128AnyTrue) \
......@@ -1186,7 +1189,7 @@ Node* SimdScalarLowering::ConstructPhiForComparison(Diamond d,
int false_value) {
// Close the given Diamond d using a Phi node, taking care of constructing the
// right kind of constants (Int32 or Int64) based on rep_type.
if (rep_type == SimdType::kFloat64x2) {
if (rep_type == SimdType::kFloat64x2 || rep_type == SimdType::kInt64x2) {
MachineRepresentation rep = MachineRepresentation::kWord64;
return d.Phi(rep, mcgraph_->Int64Constant(true_value),
mcgraph_->Int64Constant(false_value));
......@@ -1259,15 +1262,33 @@ void SimdScalarLowering::LowerAllTrueOp(Node* node, SimdType rep_type) {
int num_lanes = NumLanes(rep_type);
DCHECK_EQ(1, node->InputCount());
Node** rep = GetReplacementsWithType(node->InputAt(0), rep_type);
Node* zero;
Node* tmp_result;
MachineRepresentation result_rep = MachineRepresentation::kWord32;
const Operator* equals;
if (SimdType::kInt64x2 == rep_type) {
zero = mcgraph_->Int64Constant(0);
tmp_result = mcgraph_->Int64Constant(1);
result_rep = MachineRepresentation::kWord64;
equals = machine()->Word64Equal();
} else {
zero = mcgraph_->Int32Constant(0);
tmp_result = mcgraph_->Int32Constant(1);
equals = machine()->Word32Equal();
}
Node** rep_node = zone()->NewArray<Node*>(num_lanes);
Node* zero = mcgraph_->Int32Constant(0);
Node* tmp_result = mcgraph_->Int32Constant(1);
for (int i = 0; i < num_lanes; ++i) {
Diamond d(graph(), common(),
graph()->NewNode(machine()->Word32Equal(), rep[i], zero));
tmp_result = d.Phi(MachineRepresentation::kWord32, zero, tmp_result);
Diamond d(graph(), common(), graph()->NewNode(equals, rep[i], zero));
tmp_result = d.Phi(result_rep, zero, tmp_result);
}
if (SimdType::kInt64x2 == rep_type) {
tmp_result =
graph()->NewNode(machine()->TruncateInt64ToInt32(), tmp_result);
}
rep_node[0] = tmp_result;
ReplaceNode(node, rep_node, 1);
}
......@@ -2100,6 +2121,7 @@ void SimdScalarLowering::LowerNode(Node* node) {
COMPARISON_CASE(Float32x4, kF32x4Le, Float32LessThanOrEqual, false)
COMPARISON_CASE(Float32x4, kF32x4Gt, Float32LessThan, true)
COMPARISON_CASE(Float32x4, kF32x4Ge, Float32LessThanOrEqual, true)
COMPARISON_CASE(Int64x2, kI64x2Eq, Word64Equal, false)
COMPARISON_CASE(Int32x4, kI32x4Eq, Word32Equal, false)
COMPARISON_CASE(Int32x4, kI32x4LtS, Int32LessThan, false)
COMPARISON_CASE(Int32x4, kI32x4LeS, Int32LessThanOrEqual, false)
......@@ -2136,6 +2158,10 @@ void SimdScalarLowering::LowerNode(Node* node) {
LowerNotEqual(node, SimdType::kFloat32x4, machine()->Float32Equal());
break;
}
case IrOpcode::kI64x2Ne: {
LowerNotEqual(node, SimdType::kInt64x2, machine()->Word64Equal());
break;
}
case IrOpcode::kI32x4Ne: {
LowerNotEqual(node, SimdType::kInt32x4, machine()->Word32Equal());
break;
......@@ -2238,6 +2264,10 @@ void SimdScalarLowering::LowerNode(Node* node) {
ReplaceNode(node, rep_node, 1);
break;
}
case IrOpcode::kV64x2AllTrue: {
LowerAllTrueOp(node, SimdType::kInt64x2);
break;
}
case IrOpcode::kV32x4AllTrue: {
LowerAllTrueOp(node, SimdType::kInt32x4);
break;
......
......@@ -1038,11 +1038,11 @@ WASM_SIMD_TEST_NO_LOWERING(I64x2Eq) {
RunI64x2BinOpTest(execution_tier, lower_simd, kExprI64x2Eq, Equal);
}
#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64
#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_IA32
WASM_SIMD_TEST_NO_LOWERING(I64x2Ne) {
RunI64x2BinOpTest(execution_tier, lower_simd, kExprI64x2Ne, NotEqual);
}
#endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64
#endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_IA32
WASM_SIMD_TEST(F64x2Splat) {
WasmRunner<int32_t, double> r(execution_tier, lower_simd);
......@@ -3516,9 +3516,9 @@ WASM_SIMD_TEST(S8x16MultiShuffleFuzz) {
CHECK_EQ(1, r.Call()); \
}
#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64
#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_IA32
WASM_SIMD_BOOL_REDUCTION_TEST(64x2, 2, WASM_I64V)
#endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64
#endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_IA32
WASM_SIMD_BOOL_REDUCTION_TEST(32x4, 4, WASM_I32V)
WASM_SIMD_BOOL_REDUCTION_TEST(16x8, 8, WASM_I32V)
WASM_SIMD_BOOL_REDUCTION_TEST(8x16, 16, WASM_I32V)
......@@ -4416,9 +4416,9 @@ WASM_SIMD_TEST(V128AnytrueWithNegativeZero) {
CHECK_EQ(1, r.Call(0x1)); \
CHECK_EQ(0, r.Call(0)); \
}
#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64
#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_IA32
WASM_SIMD_ALLTRUE_TEST(64x2, 2, 0xffffffffffffffff, int64_t)
#endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64
#endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_IA32
WASM_SIMD_ALLTRUE_TEST(32x4, 4, 0xffffffff, int32_t)
WASM_SIMD_ALLTRUE_TEST(16x8, 8, 0xffff, int32_t)
WASM_SIMD_ALLTRUE_TEST(8x16, 16, 0xff, int32_t)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment