Commit 0f514da6 authored by Ng Zhi An's avatar Ng Zhi An Committed by Commit Bot

[wasm-simd][x64][arm64] Implement i64x2 signed compares

This is a partial revert of https://crrev.com/c/2457669/.

This change is slightly longer (in code-generator-x64.cc) because we
also implement support when SSE4_2 is not supported (the reverted change
seems to assume SSE4_2, which is not always the case). This code
sequence is from https://github.com/WebAssembly/simd/pull/412.

Bug: v8:11415
Change-Id: I3eef415667b4142887cf1c449d27d19ba5bbd208
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2683219
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Reviewed-by: 's avatarBill Budge <bbudge@chromium.org>
Cr-Commit-Position: refs/heads/master@{#72611}
parent 0279d82c
......@@ -2311,6 +2311,8 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ Mvn(dst, dst);
break;
}
SIMD_BINOP_CASE(kArm64I64x2GtS, Cmgt, 2D);
SIMD_BINOP_CASE(kArm64I64x2GeS, Cmge, 2D);
case kArm64I64x2ShrU: {
ASSEMBLE_SIMD_SHIFT_RIGHT(Ushr, 6, V2D, Ushl, X);
break;
......
......@@ -238,6 +238,8 @@ namespace compiler {
V(Arm64I64x2Mul) \
V(Arm64I64x2Eq) \
V(Arm64I64x2Ne) \
V(Arm64I64x2GtS) \
V(Arm64I64x2GeS) \
V(Arm64I64x2ShrU) \
V(Arm64I64x2BitMask) \
V(Arm64I32x4Splat) \
......
......@@ -203,6 +203,8 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kArm64I64x2Mul:
case kArm64I64x2Eq:
case kArm64I64x2Ne:
case kArm64I64x2GtS:
case kArm64I64x2GeS:
case kArm64I64x2ShrU:
case kArm64I64x2BitMask:
case kArm64I32x4Splat:
......
......@@ -3486,6 +3486,8 @@ void InstructionSelector::VisitInt64AbsWithOverflow(Node* node) {
V(I64x2Sub, kArm64I64x2Sub) \
V(I64x2Eq, kArm64I64x2Eq) \
V(I64x2Ne, kArm64I64x2Ne) \
V(I64x2GtS, kArm64I64x2GtS) \
V(I64x2GeS, kArm64I64x2GeS) \
V(I32x4AddHoriz, kArm64I32x4AddHoriz) \
V(I32x4Mul, kArm64I32x4Mul) \
V(I32x4MinS, kArm64I32x4MinS) \
......
......@@ -2042,6 +2042,10 @@ void InstructionSelector::VisitNode(Node* node) {
return MarkAsSimd128(node), VisitI64x2Eq(node);
case IrOpcode::kI64x2Ne:
return MarkAsSimd128(node), VisitI64x2Ne(node);
case IrOpcode::kI64x2GtS:
return MarkAsSimd128(node), VisitI64x2GtS(node);
case IrOpcode::kI64x2GeS:
return MarkAsSimd128(node), VisitI64x2GeS(node);
case IrOpcode::kI64x2ShrU:
return MarkAsSimd128(node), VisitI64x2ShrU(node);
case IrOpcode::kI64x2ExtMulLowI32x4S:
......@@ -2795,6 +2799,11 @@ void InstructionSelector::VisitI32x4WidenI8x16S(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI32x4WidenI8x16U(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM64
#if !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM64
void InstructionSelector::VisitI64x2GtS(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI64x2GeS(Node* node) { UNIMPLEMENTED(); }
#endif //! V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM64
void InstructionSelector::VisitFinishRegion(Node* node) { EmitIdentity(node); }
void InstructionSelector::VisitParameter(Node* node) {
......
......@@ -2921,6 +2921,69 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ Pxor(i.OutputSimd128Register(), tmp);
break;
}
case kX64I64x2GtS: {
XMMRegister dst = i.OutputSimd128Register();
XMMRegister src0 = i.InputSimd128Register(0);
XMMRegister src1 = i.InputSimd128Register(1);
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(tasm(), AVX);
__ vpcmpgtq(dst, src0, src1);
} else if (CpuFeatures::IsSupported(SSE4_2)) {
CpuFeatureScope sse_scope(tasm(), SSE4_2);
DCHECK_EQ(dst, src0);
__ pcmpgtq(dst, src1);
} else {
DCHECK_NE(dst, src0);
DCHECK_NE(dst, src1);
__ movdqa(dst, src1);
__ movdqa(kScratchDoubleReg, src0);
__ psubq(dst, src0);
__ pcmpeqd(kScratchDoubleReg, src1);
__ pand(dst, kScratchDoubleReg);
__ movdqa(kScratchDoubleReg, src0);
__ pcmpgtd(kScratchDoubleReg, src1);
__ por(dst, kScratchDoubleReg);
__ pshufd(dst, dst, 0xF5);
}
break;
}
case kX64I64x2GeS: {
XMMRegister dst = i.OutputSimd128Register();
XMMRegister src0 = i.InputSimd128Register(0);
XMMRegister src1 = i.InputSimd128Register(1);
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(tasm(), AVX);
__ vpcmpgtq(dst, src1, src0);
__ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
__ vpxor(dst, dst, kScratchDoubleReg);
} else if (CpuFeatures::IsSupported(SSE4_2)) {
CpuFeatureScope sse_scope(tasm(), SSE4_2);
DCHECK_NE(dst, src0);
if (dst != src1) {
__ movdqa(dst, src1);
}
__ pcmpgtq(dst, src0);
__ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
__ pxor(dst, kScratchDoubleReg);
} else {
DCHECK_NE(dst, src0);
DCHECK_NE(dst, src1);
__ movdqa(dst, src0);
__ movdqa(kScratchDoubleReg, src1);
__ psubq(dst, src1);
__ pcmpeqd(kScratchDoubleReg, src0);
__ pand(dst, kScratchDoubleReg);
__ movdqa(kScratchDoubleReg, src1);
__ pcmpgtd(kScratchDoubleReg, src0);
__ por(dst, kScratchDoubleReg);
__ pshufd(dst, dst, 0xF5);
__ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
__ pxor(dst, kScratchDoubleReg);
}
break;
}
case kX64I64x2ShrU: {
// Take shift value modulo 2^6.
ASSEMBLE_SIMD_SHIFT(psrlq, 6);
......
......@@ -214,6 +214,8 @@ namespace compiler {
V(X64I64x2Sub) \
V(X64I64x2Mul) \
V(X64I64x2Eq) \
V(X64I64x2GtS) \
V(X64I64x2GeS) \
V(X64I64x2Ne) \
V(X64I64x2ShrU) \
V(X64I64x2SignSelect) \
......
......@@ -190,6 +190,8 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kX64I64x2Sub:
case kX64I64x2Mul:
case kX64I64x2Eq:
case kX64I64x2GtS:
case kX64I64x2GeS:
case kX64I64x2Ne:
case kX64I64x2ShrU:
case kX64I64x2SignSelect:
......
......@@ -3774,6 +3774,37 @@ void InstructionSelector::VisitI32x4WidenI8x16U(Node* node) {
VisitWiden(this, node, kX64I32x4WidenI8x16U);
}
void InstructionSelector::VisitI64x2GtS(Node* node) {
X64OperandGenerator g(this);
if (CpuFeatures::IsSupported(AVX)) {
Emit(kX64I64x2GtS, g.DefineAsRegister(node),
g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)));
} else if (CpuFeatures::IsSupported(SSE4_2)) {
Emit(kX64I64x2GtS, g.DefineSameAsFirst(node),
g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)));
} else {
Emit(kX64I64x2GtS, g.DefineAsRegister(node),
g.UseUniqueRegister(node->InputAt(0)),
g.UseUniqueRegister(node->InputAt(1)));
}
}
void InstructionSelector::VisitI64x2GeS(Node* node) {
X64OperandGenerator g(this);
if (CpuFeatures::IsSupported(AVX)) {
Emit(kX64I64x2GeS, g.DefineAsRegister(node),
g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)));
} else if (CpuFeatures::IsSupported(SSE4_2)) {
Emit(kX64I64x2GeS, g.DefineAsRegister(node),
g.UseUniqueRegister(node->InputAt(0)),
g.UseRegister(node->InputAt(1)));
} else {
Emit(kX64I64x2GeS, g.DefineAsRegister(node),
g.UseUniqueRegister(node->InputAt(0)),
g.UseUniqueRegister(node->InputAt(1)));
}
}
// static
MachineOperatorBuilder::Flags
InstructionSelector::SupportedMachineOperatorFlags() {
......
......@@ -439,6 +439,8 @@ std::ostream& operator<<(std::ostream& os, TruncateKind kind) {
V(I64x2Mul, Operator::kCommutative, 2, 0, 1) \
V(I64x2Eq, Operator::kCommutative, 2, 0, 1) \
V(I64x2Ne, Operator::kCommutative, 2, 0, 1) \
V(I64x2GtS, Operator::kNoProperties, 2, 0, 1) \
V(I64x2GeS, Operator::kNoProperties, 2, 0, 1) \
V(I64x2ShrU, Operator::kNoProperties, 2, 0, 1) \
V(I64x2ExtMulLowI32x4S, Operator::kCommutative, 2, 0, 1) \
V(I64x2ExtMulHighI32x4S, Operator::kCommutative, 2, 0, 1) \
......
......@@ -679,6 +679,8 @@ class V8_EXPORT_PRIVATE MachineOperatorBuilder final
const Operator* I64x2Mul();
const Operator* I64x2Eq();
const Operator* I64x2Ne();
const Operator* I64x2GtS();
const Operator* I64x2GeS();
const Operator* I64x2ShrU();
const Operator* I64x2ExtMulLowI32x4S();
const Operator* I64x2ExtMulHighI32x4S();
......
......@@ -836,6 +836,8 @@
V(I64x2Mul) \
V(I64x2Eq) \
V(I64x2Ne) \
V(I64x2GtS) \
V(I64x2GeS) \
V(I64x2ShrU) \
V(I64x2ExtMulLowI32x4S) \
V(I64x2ExtMulHighI32x4S) \
......
......@@ -4860,6 +4860,18 @@ Node* WasmGraphBuilder::SimdOp(wasm::WasmOpcode opcode, Node* const* inputs) {
case wasm::kExprI64x2Ne:
return graph()->NewNode(mcgraph()->machine()->I64x2Ne(), inputs[0],
inputs[1]);
case wasm::kExprI64x2LtS:
return graph()->NewNode(mcgraph()->machine()->I64x2GtS(), inputs[1],
inputs[0]);
case wasm::kExprI64x2LeS:
return graph()->NewNode(mcgraph()->machine()->I64x2GeS(), inputs[1],
inputs[0]);
case wasm::kExprI64x2GtS:
return graph()->NewNode(mcgraph()->machine()->I64x2GtS(), inputs[0],
inputs[1]);
case wasm::kExprI64x2GeS:
return graph()->NewNode(mcgraph()->machine()->I64x2GeS(), inputs[0],
inputs[1]);
case wasm::kExprI64x2ShrU:
return graph()->NewNode(mcgraph()->machine()->I64x2ShrU(), inputs[0],
inputs[1]);
......
......@@ -272,6 +272,10 @@ constexpr const char* WasmOpcodes::OpcodeName(WasmOpcode opcode) {
CASE_SIGN_OP(SIMDI, Min, "min")
CASE_SIGN_OP(SIMDI, Max, "max")
CASE_SIGN_OP(SIMDI, Lt, "lt")
CASE_I64x2_OP(LtS, "lt_s")
CASE_I64x2_OP(GtS, "gt_s")
CASE_I64x2_OP(LeS, "le_s")
CASE_I64x2_OP(GeS, "ge_s")
CASE_SIGN_OP(SIMDI, Le, "le")
CASE_SIGN_OP(SIMDI, Gt, "gt")
CASE_SIGN_OP(SIMDI, Ge, "ge")
......
......@@ -348,6 +348,10 @@ bool V8_EXPORT_PRIVATE IsJSCompatibleSignature(const FunctionSig* sig,
V(I32x4GeS, 0xfd3f, s_ss) \
V(I32x4GeU, 0xfd40, s_ss) \
V(I64x2Eq, 0xfdc0, s_ss) \
V(I64x2LtS, 0xfd74, s_ss) \
V(I64x2GtS, 0xfd7a, s_ss) \
V(I64x2LeS, 0xfdee, s_ss) \
V(I64x2GeS, 0xfde2, s_ss) \
V(I64x2Ne, 0xfdd0, s_ss) \
V(F32x4Eq, 0xfd41, s_ss) \
V(F32x4Ne, 0xfd42, s_ss) \
......
......@@ -1042,6 +1042,24 @@ WASM_SIMD_TEST_NO_LOWERING(I64x2Ne) {
RunI64x2BinOpTest(execution_tier, lower_simd, kExprI64x2Ne, NotEqual);
}
#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64
WASM_SIMD_TEST_NO_LOWERING(I64x2LtS) {
RunI64x2BinOpTest(execution_tier, lower_simd, kExprI64x2LtS, Less);
}
WASM_SIMD_TEST_NO_LOWERING(I64x2LeS) {
RunI64x2BinOpTest(execution_tier, lower_simd, kExprI64x2LeS, LessEqual);
}
WASM_SIMD_TEST_NO_LOWERING(I64x2GtS) {
RunI64x2BinOpTest(execution_tier, lower_simd, kExprI64x2GtS, Greater);
}
WASM_SIMD_TEST_NO_LOWERING(I64x2GeS) {
RunI64x2BinOpTest(execution_tier, lower_simd, kExprI64x2GeS, GreaterEqual);
}
#endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64
WASM_SIMD_TEST(F64x2Splat) {
WasmRunner<int32_t, double> r(execution_tier, lower_simd);
// Set up a global to hold output vector.
......
......@@ -2310,6 +2310,10 @@ class WasmInterpreterInternals {
CMPOP_CASE(F32x4Le, f32x4, float4, int4, 4, a <= b)
CMPOP_CASE(I64x2Eq, i64x2, int2, int2, 2, a == b)
CMPOP_CASE(I64x2Ne, i64x2, int2, int2, 2, a != b)
CMPOP_CASE(I64x2LtS, i64x2, int2, int2, 2, a < b)
CMPOP_CASE(I64x2GtS, i64x2, int2, int2, 2, a > b)
CMPOP_CASE(I64x2LeS, i64x2, int2, int2, 2, a <= b)
CMPOP_CASE(I64x2GeS, i64x2, int2, int2, 2, a >= b)
CMPOP_CASE(I32x4Eq, i32x4, int4, int4, 4, a == b)
CMPOP_CASE(I32x4Ne, i32x4, int4, int4, 4, a != b)
CMPOP_CASE(I32x4GtS, i32x4, int4, int4, 4, a > b)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment