Commit 2e54e9af authored by Ng Zhi An's avatar Ng Zhi An Committed by Commit Bot

[wasm simd] Implement I64x2 LtS, LtU, GtS, GtU, LeS, LeU, GeS, GeU

Bug: v8:8460
Change-Id: I837b7dc3f575d69ccbf2cd76332c0bf775724675
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1684362Reviewed-by: 's avatarDeepti Gandluri <gdeepti@chromium.org>
Reviewed-by: 's avatarMichael Starzinger <mstarzinger@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#62585}
parent 85bc4ef6
......@@ -14,6 +14,7 @@ namespace internal {
// CPU feature flags.
enum CpuFeature {
// x86
SSE4_2,
SSE4_1,
SSSE3,
SSE3,
......
......@@ -78,6 +78,7 @@ void CpuFeatures::ProbeImpl(bool cross_compile) {
// Only use statically determined features for cross compile (snapshot).
if (cross_compile) return;
if (cpu.has_sse42() && FLAG_enable_sse4_2) supported_ |= 1u << SSE4_2;
if (cpu.has_sse41() && FLAG_enable_sse4_1) {
supported_ |= 1u << SSE4_1;
supported_ |= 1u << SSSE3;
......@@ -4704,6 +4705,30 @@ void Assembler::sse4_instr(XMMRegister dst, Operand src, byte prefix,
emit_sse_operand(dst, src);
}
void Assembler::sse4_2_instr(XMMRegister dst, XMMRegister src, byte prefix,
byte escape1, byte escape2, byte opcode) {
DCHECK(IsEnabled(SSE4_2));
EnsureSpace ensure_space(this);
emit(prefix);
emit_optional_rex_32(dst, src);
emit(escape1);
emit(escape2);
emit(opcode);
emit_sse_operand(dst, src);
}
void Assembler::sse4_2_instr(XMMRegister dst, Operand src, byte prefix,
byte escape1, byte escape2, byte opcode) {
DCHECK(IsEnabled(SSE4_2));
EnsureSpace ensure_space(this);
emit(prefix);
emit_optional_rex_32(dst, src);
emit(escape1);
emit(escape2);
emit(opcode);
emit_sse_operand(dst, src);
}
void Assembler::lddqu(XMMRegister dst, Operand src) {
DCHECK(IsEnabled(SSE3));
EnsureSpace ensure_space(this);
......
......@@ -952,6 +952,23 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
SSE4_INSTRUCTION_LIST(DECLARE_SSE4_INSTRUCTION)
#undef DECLARE_SSE4_INSTRUCTION
// SSE4.2
void sse4_2_instr(XMMRegister dst, XMMRegister src, byte prefix, byte escape1,
byte escape2, byte opcode);
void sse4_2_instr(XMMRegister dst, Operand src, byte prefix, byte escape1,
byte escape2, byte opcode);
#define DECLARE_SSE4_2_INSTRUCTION(instruction, prefix, escape1, escape2, \
opcode) \
void instruction(XMMRegister dst, XMMRegister src) { \
sse4_2_instr(dst, src, 0x##prefix, 0x##escape1, 0x##escape2, 0x##opcode); \
} \
void instruction(XMMRegister dst, Operand src) { \
sse4_2_instr(dst, src, 0x##prefix, 0x##escape1, 0x##escape2, 0x##opcode); \
}
SSE4_2_INSTRUCTION_LIST(DECLARE_SSE4_2_INSTRUCTION)
#undef DECLARE_SSE4_2_INSTRUCTION
#define DECLARE_SSE34_AVX_INSTRUCTION(instruction, prefix, escape1, escape2, \
opcode) \
void v##instruction(XMMRegister dst, XMMRegister src1, XMMRegister src2) { \
......
......@@ -85,4 +85,6 @@
V(pmaxud, 66, 0F, 38, 3F) \
V(pmulld, 66, 0F, 38, 40)
#define SSE4_2_INSTRUCTION_LIST(V) V(pcmpgtq, 66, 0F, 38, 37)
#endif // V8_CODEGEN_X64_SSE_INSTR_H_
......@@ -1871,8 +1871,16 @@ void InstructionSelector::VisitNode(Node* node) {
return MarkAsSimd128(node), VisitI64x2Eq(node);
case IrOpcode::kI64x2Ne:
return MarkAsSimd128(node), VisitI64x2Ne(node);
case IrOpcode::kI64x2GtS:
return MarkAsSimd128(node), VisitI64x2GtS(node);
case IrOpcode::kI64x2GeS:
return MarkAsSimd128(node), VisitI64x2GeS(node);
case IrOpcode::kI64x2ShrU:
return MarkAsSimd128(node), VisitI64x2ShrU(node);
case IrOpcode::kI64x2GtU:
return MarkAsSimd128(node), VisitI64x2GtU(node);
case IrOpcode::kI64x2GeU:
return MarkAsSimd128(node), VisitI64x2GeU(node);
case IrOpcode::kI32x4Splat:
return MarkAsSimd128(node), VisitI32x4Splat(node);
case IrOpcode::kI32x4ExtractLane:
......@@ -2528,7 +2536,11 @@ void InstructionSelector::VisitI64x2Add(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI64x2Sub(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI64x2Eq(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI64x2Ne(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI64x2GtS(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI64x2GeS(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI64x2ShrU(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI64x2GtU(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI64x2GeU(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_X64
void InstructionSelector::VisitFinishRegion(Node* node) { EmitIdentity(node); }
......
......@@ -2492,10 +2492,67 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ pxor(i.OutputSimd128Register(), kScratchDoubleReg);
break;
}
case kX64I64x2GtS: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
CpuFeatureScope sse_scope(tasm(), SSE4_2);
__ pcmpgtq(i.OutputSimd128Register(), i.InputSimd128Register(1));
break;
}
case kX64I64x2GeS: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
CpuFeatureScope sse_scope(tasm(), SSE4_2);
XMMRegister dst = i.OutputSimd128Register();
XMMRegister src = i.InputSimd128Register(1);
XMMRegister tmp = i.ToSimd128Register(instr->TempAt(0));
__ movaps(tmp, src);
__ pcmpgtq(tmp, dst);
__ pcmpeqd(dst, dst);
__ pxor(dst, tmp);
break;
}
case kX64I64x2ShrU: {
__ psrlq(i.OutputSimd128Register(), i.InputInt8(1));
break;
}
case kX64I64x2GtU: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
CpuFeatureScope sse_scope(tasm(), SSE4_2);
XMMRegister dst = i.OutputSimd128Register();
XMMRegister src = i.InputSimd128Register(1);
XMMRegister tmp = i.ToSimd128Register(instr->TempAt(0));
// Sets up a mask to clear the sign bit of the 2 quadwords.
__ Set(kScratchRegister, 0x8000000000000000);
__ movq(kScratchDoubleReg, kScratchRegister);
__ pshufd(kScratchDoubleReg, kScratchDoubleReg, 0x44);
__ movaps(tmp, src);
__ pxor(tmp, kScratchDoubleReg);
__ pxor(dst, kScratchDoubleReg);
__ pcmpgtq(dst, tmp);
break;
}
case kX64I64x2GeU: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
CpuFeatureScope sse_scope(tasm(), SSE4_2);
XMMRegister dst = i.OutputSimd128Register();
XMMRegister src = i.InputSimd128Register(1);
XMMRegister tmp = i.ToSimd128Register(instr->TempAt(0));
// Sets up a mask to clear the sign bit of the 2 quadwords.
__ Set(kScratchRegister, 0x8000000000000000);
__ movq(kScratchDoubleReg, kScratchRegister);
__ pshufd(kScratchDoubleReg, kScratchDoubleReg, 0x44);
__ movaps(tmp, src);
__ pxor(dst, kScratchDoubleReg);
__ pxor(tmp, kScratchDoubleReg);
__ pcmpgtq(tmp, dst);
__ pcmpeqd(dst, dst);
__ pxor(dst, tmp);
break;
}
case kX64I32x4Splat: {
XMMRegister dst = i.OutputSimd128Register();
if (instr->InputAt(0)->IsRegister()) {
......
......@@ -188,7 +188,11 @@ namespace compiler {
V(X64I64x2Sub) \
V(X64I64x2Eq) \
V(X64I64x2Ne) \
V(X64I64x2GtS) \
V(X64I64x2GeS) \
V(X64I64x2ShrU) \
V(X64I64x2GtU) \
V(X64I64x2GeU) \
V(X64I32x4Splat) \
V(X64I32x4ExtractLane) \
V(X64I32x4ReplaceLane) \
......
......@@ -154,7 +154,11 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kX64I64x2Sub:
case kX64I64x2Eq:
case kX64I64x2Ne:
case kX64I64x2GtS:
case kX64I64x2GeS:
case kX64I64x2ShrU:
case kX64I64x2GtU:
case kX64I64x2GeU:
case kX64I32x4Splat:
case kX64I32x4ExtractLane:
case kX64I32x4ReplaceLane:
......
......@@ -2567,6 +2567,7 @@ VISIT_ATOMIC_BINOP(Xor)
V(I64x2Sub) \
V(I64x2Eq) \
V(I64x2Ne) \
V(I64x2GtS) \
V(I32x4Add) \
V(I32x4AddHoriz) \
V(I32x4Sub) \
......@@ -2621,6 +2622,11 @@ VISIT_ATOMIC_BINOP(Xor)
V(S128Or) \
V(S128Xor)
#define SIMD_BINOP_ONE_TEMP_LIST(V) \
V(I64x2GeS) \
V(I64x2GtU) \
V(I64x2GeU)
#define SIMD_UNOP_LIST(V) \
V(F32x4SConvertI32x4) \
V(F32x4Abs) \
......@@ -2732,6 +2738,18 @@ SIMD_BINOP_LIST(VISIT_SIMD_BINOP)
#undef VISIT_SIMD_BINOP
#undef SIMD_BINOP_LIST
#define VISIT_SIMD_BINOP_ONE_TEMP(Opcode) \
void InstructionSelector::Visit##Opcode(Node* node) { \
X64OperandGenerator g(this); \
InstructionOperand temps[] = {g.TempSimd128Register()}; \
Emit(kX64##Opcode, g.DefineSameAsFirst(node), \
g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)), \
arraysize(temps), temps); \
}
SIMD_BINOP_ONE_TEMP_LIST(VISIT_SIMD_BINOP_ONE_TEMP)
#undef VISIT_SIMD_BINOP_ONE_TEMP
#undef SIMD_BINOP_ONE_TEMP_LIST
#define VISIT_SIMD_ANYTRUE(Opcode) \
void InstructionSelector::Visit##Opcode(Node* node) { \
X64OperandGenerator g(this); \
......
......@@ -269,6 +269,10 @@ MachineType AtomicOpType(Operator const* op) {
V(I64x2Sub, Operator::kNoProperties, 2, 0, 1) \
V(I64x2Eq, Operator::kCommutative, 2, 0, 1) \
V(I64x2Ne, Operator::kCommutative, 2, 0, 1) \
V(I64x2GtS, Operator::kNoProperties, 2, 0, 1) \
V(I64x2GeS, Operator::kNoProperties, 2, 0, 1) \
V(I64x2GtU, Operator::kNoProperties, 2, 0, 1) \
V(I64x2GeU, Operator::kNoProperties, 2, 0, 1) \
V(I32x4Splat, Operator::kNoProperties, 1, 0, 1) \
V(I32x4SConvertF32x4, Operator::kNoProperties, 1, 0, 1) \
V(I32x4SConvertI16x8Low, Operator::kNoProperties, 1, 0, 1) \
......
......@@ -500,7 +500,11 @@ class V8_EXPORT_PRIVATE MachineOperatorBuilder final
const Operator* I64x2Sub();
const Operator* I64x2Eq();
const Operator* I64x2Ne();
const Operator* I64x2GtS();
const Operator* I64x2GeS();
const Operator* I64x2ShrU(int32_t);
const Operator* I64x2GtU();
const Operator* I64x2GeU();
const Operator* I32x4Splat();
const Operator* I32x4ExtractLane(int32_t);
......
......@@ -764,7 +764,11 @@
V(I64x2Sub) \
V(I64x2Eq) \
V(I64x2Ne) \
V(I64x2GtS) \
V(I64x2GeS) \
V(I64x2ShrU) \
V(I64x2GtU) \
V(I64x2GeU) \
V(I32x4Splat) \
V(I32x4ExtractLane) \
V(I32x4ReplaceLane) \
......
......@@ -4064,6 +4064,30 @@ Node* WasmGraphBuilder::SimdOp(wasm::WasmOpcode opcode, Node* const* inputs) {
case wasm::kExprI64x2Ne:
return graph()->NewNode(mcgraph()->machine()->I64x2Ne(), inputs[0],
inputs[1]);
case wasm::kExprI64x2LtS:
return graph()->NewNode(mcgraph()->machine()->I64x2GtS(), inputs[1],
inputs[0]);
case wasm::kExprI64x2LeS:
return graph()->NewNode(mcgraph()->machine()->I64x2GeS(), inputs[1],
inputs[0]);
case wasm::kExprI64x2GtS:
return graph()->NewNode(mcgraph()->machine()->I64x2GtS(), inputs[0],
inputs[1]);
case wasm::kExprI64x2GeS:
return graph()->NewNode(mcgraph()->machine()->I64x2GeS(), inputs[0],
inputs[1]);
case wasm::kExprI64x2LtU:
return graph()->NewNode(mcgraph()->machine()->I64x2GtU(), inputs[1],
inputs[0]);
case wasm::kExprI64x2LeU:
return graph()->NewNode(mcgraph()->machine()->I64x2GeU(), inputs[1],
inputs[0]);
case wasm::kExprI64x2GtU:
return graph()->NewNode(mcgraph()->machine()->I64x2GtU(), inputs[0],
inputs[1]);
case wasm::kExprI64x2GeU:
return graph()->NewNode(mcgraph()->machine()->I64x2GeU(), inputs[0],
inputs[1]);
case wasm::kExprI32x4Splat:
return graph()->NewNode(mcgraph()->machine()->I32x4Splat(), inputs[0]);
case wasm::kExprI32x4SConvertF32x4:
......
......@@ -932,6 +932,8 @@ DEFINE_BOOL(enable_sse3, true, "enable use of SSE3 instructions if available")
DEFINE_BOOL(enable_ssse3, true, "enable use of SSSE3 instructions if available")
DEFINE_BOOL(enable_sse4_1, true,
"enable use of SSE4.1 instructions if available")
DEFINE_BOOL(enable_sse4_2, true,
"enable use of SSE4.2 instructions if available")
DEFINE_BOOL(enable_sahf, true,
"enable use of SAHF instruction if available (X64 only)")
DEFINE_BOOL(enable_avx, true, "enable use of AVX instructions if available")
......
......@@ -2270,6 +2270,18 @@ class ThreadImpl {
CMPOP_CASE(F32x4Le, f32x4, float4, int4, 4, a <= b)
CMPOP_CASE(I64x2Eq, i64x2, int2, int2, 2, a == b)
CMPOP_CASE(I64x2Ne, i64x2, int2, int2, 2, a != b)
CMPOP_CASE(I64x2GtS, i64x2, int2, int2, 2, a > b)
CMPOP_CASE(I64x2GeS, i64x2, int2, int2, 2, a >= b)
CMPOP_CASE(I64x2LtS, i64x2, int2, int2, 2, a < b)
CMPOP_CASE(I64x2LeS, i64x2, int2, int2, 2, a <= b)
CMPOP_CASE(I64x2GtU, i64x2, int2, int2, 2,
static_cast<uint64_t>(a) > static_cast<uint64_t>(b))
CMPOP_CASE(I64x2GeU, i64x2, int2, int2, 2,
static_cast<uint64_t>(a) >= static_cast<uint64_t>(b))
CMPOP_CASE(I64x2LtU, i64x2, int2, int2, 2,
static_cast<uint64_t>(a) < static_cast<uint64_t>(b))
CMPOP_CASE(I64x2LeU, i64x2, int2, int2, 2,
static_cast<uint64_t>(a) <= static_cast<uint64_t>(b))
CMPOP_CASE(I32x4Eq, i32x4, int4, int4, 4, a == b)
CMPOP_CASE(I32x4Ne, i32x4, int4, int4, 4, a != b)
CMPOP_CASE(I32x4GtS, i32x4, int4, int4, 4, a > b)
......
......@@ -257,9 +257,13 @@ const char* WasmOpcodes::OpcodeName(WasmOpcode opcode) {
CASE_SIGN_OP(SIMDI, Min, "min")
CASE_SIGN_OP(SIMDI, Max, "max")
CASE_SIGN_OP(SIMDI, Lt, "lt")
CASE_SIGN_OP(I64x2, Lt, "lt")
CASE_SIGN_OP(SIMDI, Le, "le")
CASE_SIGN_OP(I64x2, Le, "le")
CASE_SIGN_OP(SIMDI, Gt, "gt")
CASE_SIGN_OP(I64x2, Gt, "gt")
CASE_SIGN_OP(SIMDI, Ge, "ge")
CASE_SIGN_OP(I64x2, Ge, "ge")
CASE_SIGN_OP(SIMDI, Shr, "shr")
CASE_SIGN_OP(I64x2, Shr, "shr")
CASE_SIMDI_OP(Shl, "shl")
......
......@@ -307,6 +307,14 @@ bool IsJSCompatibleSignature(const FunctionSig* sig, bool hasBigIntFeature);
V(I32x4GeU, 0xfd35, s_ss) \
V(I64x2Eq, 0xfd36, s_ss) \
V(I64x2Ne, 0xfd37, s_ss) \
V(I64x2LtS, 0xfd38, s_ss) \
V(I64x2LtU, 0xfd39, s_ss) \
V(I64x2GtS, 0xfd3a, s_ss) \
V(I64x2GtU, 0xfd3b, s_ss) \
V(I64x2LeS, 0xfd3c, s_ss) \
V(I64x2LeU, 0xfd3d, s_ss) \
V(I64x2GeS, 0xfd3e, s_ss) \
V(I64x2GeU, 0xfd3f, s_ss) \
V(F32x4Eq, 0xfd40, s_ss) \
V(F32x4Ne, 0xfd41, s_ss) \
V(F32x4Lt, 0xfd42, s_ss) \
......
......@@ -832,6 +832,40 @@ WASM_SIMD_TEST_NO_LOWERING(I64x2Ne) {
RunI64x2BinOpTest(execution_tier, lower_simd, kExprI64x2Ne, NotEqual);
}
WASM_SIMD_TEST_NO_LOWERING(I64x2LtS) {
RunI64x2BinOpTest(execution_tier, lower_simd, kExprI64x2LtS, Less);
}
WASM_SIMD_TEST_NO_LOWERING(I64x2LeS) {
RunI64x2BinOpTest(execution_tier, lower_simd, kExprI64x2LeS, LessEqual);
}
WASM_SIMD_TEST_NO_LOWERING(I64x2GtS) {
RunI64x2BinOpTest(execution_tier, lower_simd, kExprI64x2GtS, Greater);
}
WASM_SIMD_TEST_NO_LOWERING(I64x2GeS) {
RunI64x2BinOpTest(execution_tier, lower_simd, kExprI64x2GeS, GreaterEqual);
}
WASM_SIMD_TEST_NO_LOWERING(I64x2LtU) {
RunI64x2BinOpTest(execution_tier, lower_simd, kExprI64x2LtU, UnsignedLess);
}
WASM_SIMD_TEST_NO_LOWERING(I64x2LeU) {
RunI64x2BinOpTest(execution_tier, lower_simd, kExprI64x2LeU,
UnsignedLessEqual);
}
WASM_SIMD_TEST_NO_LOWERING(I64x2GtU) {
RunI64x2BinOpTest(execution_tier, lower_simd, kExprI64x2GtU, UnsignedGreater);
}
WASM_SIMD_TEST_NO_LOWERING(I64x2GeU) {
RunI64x2BinOpTest(execution_tier, lower_simd, kExprI64x2GeU,
UnsignedGreaterEqual);
}
void RunI64x2ShiftOpTest(ExecutionTier execution_tier, LowerSimd lower_simd,
WasmOpcode opcode, Int64ShiftOp expected_op) {
for (int shift = 1; shift < 64; shift++) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment