Commit 8c58e051 authored by Ng Zhi An's avatar Ng Zhi An Committed by Commit Bot

[wasm simd] Implement I64x2 Min and Max on x64

Bug: v8:8460
Change-Id: I913406a4079c766432a56d059a6cb9861fd469bd
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1703993Reviewed-by: 's avatarDeepti Gandluri <gdeepti@chromium.org>
Reviewed-by: 's avatarMichael Starzinger <mstarzinger@chromium.org>
Reviewed-by: 's avatarBill Budge <bbudge@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#62924}
parent 84a641fa
......@@ -428,6 +428,9 @@ Assembler::Assembler(const AssemblerOptions& options,
std::unique_ptr<AssemblerBuffer> buffer)
: AssemblerBase(options, std::move(buffer)), constpool_(this) {
reloc_info_writer.Reposition(buffer_start_ + buffer_->size(), pc_);
if (CpuFeatures::IsSupported(SSE4_2)) {
EnableCpuFeature(SSE4_1);
}
if (CpuFeatures::IsSupported(SSE4_1)) {
EnableCpuFeature(SSSE3);
}
......
......@@ -71,6 +71,7 @@
V(psignd, 66, 0F, 38, 0A)
#define SSE4_INSTRUCTION_LIST(V) \
V(blendvpd, 66, 0F, 38, 15) \
V(pcmpeqq, 66, 0F, 38, 29) \
V(ptest, 66, 0F, 38, 17) \
V(pmovsxbw, 66, 0F, 38, 20) \
......
......@@ -1897,6 +1897,10 @@ void InstructionSelector::VisitNode(Node* node) {
return MarkAsSimd128(node), VisitI64x2Sub(node);
case IrOpcode::kI64x2Mul:
return MarkAsSimd128(node), VisitI64x2Mul(node);
case IrOpcode::kI64x2MinS:
return MarkAsSimd128(node), VisitI64x2MinS(node);
case IrOpcode::kI64x2MaxS:
return MarkAsSimd128(node), VisitI64x2MaxS(node);
case IrOpcode::kI64x2Eq:
return MarkAsSimd128(node), VisitI64x2Eq(node);
case IrOpcode::kI64x2Ne:
......@@ -1907,6 +1911,10 @@ void InstructionSelector::VisitNode(Node* node) {
return MarkAsSimd128(node), VisitI64x2GeS(node);
case IrOpcode::kI64x2ShrU:
return MarkAsSimd128(node), VisitI64x2ShrU(node);
case IrOpcode::kI64x2MinU:
return MarkAsSimd128(node), VisitI64x2MinU(node);
case IrOpcode::kI64x2MaxU:
return MarkAsSimd128(node), VisitI64x2MaxU(node);
case IrOpcode::kI64x2GtU:
return MarkAsSimd128(node), VisitI64x2GtU(node);
case IrOpcode::kI64x2GeU:
......@@ -2582,11 +2590,15 @@ void InstructionSelector::VisitI64x2ShrS(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI64x2Add(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI64x2Sub(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI64x2Mul(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI64x2MinS(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI64x2MaxS(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI64x2Eq(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI64x2Ne(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI64x2GtS(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI64x2GeS(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI64x2ShrU(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI64x2MinU(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI64x2MaxU(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI64x2GtU(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI64x2GeU(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_X64
......
......@@ -2575,6 +2575,33 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ paddq(left, tmp2); // left == dst
break;
}
case kX64I64x2MinS: {
CpuFeatureScope sse_scope_4_2(tasm(), SSE4_2);
XMMRegister dst = i.OutputSimd128Register();
XMMRegister src = i.InputSimd128Register(1);
XMMRegister tmp = i.ToSimd128Register(instr->TempAt(0));
DCHECK_EQ(dst, i.InputSimd128Register(0));
DCHECK_EQ(src, xmm0);
__ movaps(tmp, src);
__ pcmpgtq(src, dst);
__ blendvpd(tmp, dst); // implicit use of xmm0 as mask
__ movaps(dst, tmp);
break;
}
case kX64I64x2MaxS: {
CpuFeatureScope sse_scope_4_2(tasm(), SSE4_2);
XMMRegister dst = i.OutputSimd128Register();
XMMRegister src = i.InputSimd128Register(1);
XMMRegister tmp = i.ToSimd128Register(instr->TempAt(0));
DCHECK_EQ(dst, i.InputSimd128Register(0));
DCHECK_EQ(src, xmm0);
__ movaps(tmp, src);
__ pcmpgtq(src, dst);
__ blendvpd(dst, tmp); // implicit use of xmm0 as mask
break;
}
case kX64I64x2Eq: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
CpuFeatureScope sse_scope(tasm(), SSE4_1);
......@@ -2612,6 +2639,53 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ psrlq(i.OutputSimd128Register(), i.InputInt8(1));
break;
}
case kX64I64x2MinU: {
CpuFeatureScope sse_scope_4_2(tasm(), SSE4_2);
CpuFeatureScope sse_scope_4_1(tasm(), SSE4_1);
XMMRegister dst = i.OutputSimd128Register();
XMMRegister src = i.InputSimd128Register(1);
XMMRegister src_tmp = i.ToSimd128Register(instr->TempAt(0));
XMMRegister dst_tmp = i.ToSimd128Register(instr->TempAt(1));
DCHECK_EQ(dst, i.InputSimd128Register(0));
DCHECK_EQ(src, xmm0);
__ movaps(src_tmp, src);
__ movaps(dst_tmp, dst);
__ pcmpeqd(src, src);
__ psllq(src, 63);
__ pxor(dst_tmp, src);
__ pxor(src, src_tmp);
__ pcmpgtq(src, dst_tmp);
__ blendvpd(src_tmp, dst); // implicit use of xmm0 as mask
__ movaps(dst, src_tmp);
break;
}
case kX64I64x2MaxU: {
CpuFeatureScope sse_scope_4_2(tasm(), SSE4_2);
CpuFeatureScope sse_scope_4_1(tasm(), SSE4_1);
XMMRegister dst = i.OutputSimd128Register();
XMMRegister src = i.InputSimd128Register(1);
XMMRegister src_tmp = i.ToSimd128Register(instr->TempAt(0));
XMMRegister dst_tmp = i.ToSimd128Register(instr->TempAt(1));
DCHECK_EQ(dst, i.InputSimd128Register(0));
DCHECK_EQ(src, xmm0);
__ movaps(src_tmp, src);
__ movaps(dst_tmp, dst);
__ pcmpeqd(src, src);
__ psllq(src, 63);
__ pxor(dst_tmp, src);
__ pxor(src, src_tmp);
__ pcmpgtq(src, dst_tmp);
__ blendvpd(dst, src_tmp); // implicit use of xmm0 as mask
break;
}
case kX64I64x2GtU: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
CpuFeatureScope sse_scope(tasm(), SSE4_2);
......
......@@ -199,11 +199,15 @@ namespace compiler {
V(X64I64x2Add) \
V(X64I64x2Sub) \
V(X64I64x2Mul) \
V(X64I64x2MinS) \
V(X64I64x2MaxS) \
V(X64I64x2Eq) \
V(X64I64x2Ne) \
V(X64I64x2GtS) \
V(X64I64x2GeS) \
V(X64I64x2ShrU) \
V(X64I64x2MinU) \
V(X64I64x2MaxU) \
V(X64I64x2GtU) \
V(X64I64x2GeU) \
V(X64I32x4Splat) \
......
......@@ -164,11 +164,15 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kX64I64x2Add:
case kX64I64x2Sub:
case kX64I64x2Mul:
case kX64I64x2MinS:
case kX64I64x2MaxS:
case kX64I64x2Eq:
case kX64I64x2Ne:
case kX64I64x2GtS:
case kX64I64x2GeS:
case kX64I64x2ShrU:
case kX64I64x2MinU:
case kX64I64x2MaxU:
case kX64I64x2GtU:
case kX64I64x2GeU:
case kX64I32x4Splat:
......
......@@ -2848,6 +2848,40 @@ void InstructionSelector::VisitI64x2Mul(Node* node) {
g.UseUniqueRegister(node->InputAt(1)), arraysize(temps), temps);
}
void InstructionSelector::VisitI64x2MinS(Node* node) {
X64OperandGenerator g(this);
InstructionOperand temps[] = {g.TempSimd128Register()};
Emit(kX64I64x2MinS, g.DefineSameAsFirst(node),
g.UseRegister(node->InputAt(0)), g.UseFixed(node->InputAt(1), xmm0),
arraysize(temps), temps);
}
void InstructionSelector::VisitI64x2MaxS(Node* node) {
X64OperandGenerator g(this);
InstructionOperand temps[] = {g.TempSimd128Register()};
Emit(kX64I64x2MaxS, g.DefineSameAsFirst(node),
g.UseRegister(node->InputAt(0)), g.UseFixed(node->InputAt(1), xmm0),
arraysize(temps), temps);
}
void InstructionSelector::VisitI64x2MinU(Node* node) {
X64OperandGenerator g(this);
InstructionOperand temps[] = {g.TempSimd128Register(),
g.TempSimd128Register()};
Emit(kX64I64x2MinU, g.DefineSameAsFirst(node),
g.UseRegister(node->InputAt(0)), g.UseFixed(node->InputAt(1), xmm0),
arraysize(temps), temps);
}
void InstructionSelector::VisitI64x2MaxU(Node* node) {
X64OperandGenerator g(this);
InstructionOperand temps[] = {g.TempSimd128Register(),
g.TempSimd128Register()};
Emit(kX64I64x2MaxU, g.DefineSameAsFirst(node),
g.UseRegister(node->InputAt(0)), g.UseFixed(node->InputAt(1), xmm0),
arraysize(temps), temps);
}
void InstructionSelector::VisitI32x4SConvertF32x4(Node* node) {
X64OperandGenerator g(this);
Emit(kX64I32x4SConvertF32x4, g.DefineSameAsFirst(node),
......
......@@ -277,10 +277,14 @@ MachineType AtomicOpType(Operator const* op) {
V(I64x2Add, Operator::kCommutative, 2, 0, 1) \
V(I64x2Sub, Operator::kNoProperties, 2, 0, 1) \
V(I64x2Mul, Operator::kCommutative, 2, 0, 1) \
V(I64x2MinS, Operator::kCommutative, 2, 0, 1) \
V(I64x2MaxS, Operator::kCommutative, 2, 0, 1) \
V(I64x2Eq, Operator::kCommutative, 2, 0, 1) \
V(I64x2Ne, Operator::kCommutative, 2, 0, 1) \
V(I64x2GtS, Operator::kNoProperties, 2, 0, 1) \
V(I64x2GeS, Operator::kNoProperties, 2, 0, 1) \
V(I64x2MinU, Operator::kCommutative, 2, 0, 1) \
V(I64x2MaxU, Operator::kCommutative, 2, 0, 1) \
V(I64x2GtU, Operator::kNoProperties, 2, 0, 1) \
V(I64x2GeU, Operator::kNoProperties, 2, 0, 1) \
V(I32x4Splat, Operator::kNoProperties, 1, 0, 1) \
......
......@@ -511,11 +511,15 @@ class V8_EXPORT_PRIVATE MachineOperatorBuilder final
const Operator* I64x2Add();
const Operator* I64x2Sub();
const Operator* I64x2Mul();
const Operator* I64x2MinS();
const Operator* I64x2MaxS();
const Operator* I64x2Eq();
const Operator* I64x2Ne();
const Operator* I64x2GtS();
const Operator* I64x2GeS();
const Operator* I64x2ShrU(int32_t);
const Operator* I64x2MinU();
const Operator* I64x2MaxU();
const Operator* I64x2GtU();
const Operator* I64x2GeU();
......
......@@ -784,11 +784,15 @@
V(I64x2Add) \
V(I64x2Sub) \
V(I64x2Mul) \
V(I64x2MinS) \
V(I64x2MaxS) \
V(I64x2Eq) \
V(I64x2Ne) \
V(I64x2GtS) \
V(I64x2GeS) \
V(I64x2ShrU) \
V(I64x2MinU) \
V(I64x2MaxU) \
V(I64x2GtU) \
V(I64x2GeU) \
V(I32x4Splat) \
......
......@@ -4089,6 +4089,12 @@ Node* WasmGraphBuilder::SimdOp(wasm::WasmOpcode opcode, Node* const* inputs) {
case wasm::kExprI64x2Mul:
return graph()->NewNode(mcgraph()->machine()->I64x2Mul(), inputs[0],
inputs[1]);
case wasm::kExprI64x2MinS:
return graph()->NewNode(mcgraph()->machine()->I64x2MinS(), inputs[0],
inputs[1]);
case wasm::kExprI64x2MaxS:
return graph()->NewNode(mcgraph()->machine()->I64x2MaxS(), inputs[0],
inputs[1]);
case wasm::kExprI64x2Eq:
return graph()->NewNode(mcgraph()->machine()->I64x2Eq(), inputs[0],
inputs[1]);
......@@ -4107,6 +4113,12 @@ Node* WasmGraphBuilder::SimdOp(wasm::WasmOpcode opcode, Node* const* inputs) {
case wasm::kExprI64x2GeS:
return graph()->NewNode(mcgraph()->machine()->I64x2GeS(), inputs[0],
inputs[1]);
case wasm::kExprI64x2MinU:
return graph()->NewNode(mcgraph()->machine()->I64x2MinU(), inputs[0],
inputs[1]);
case wasm::kExprI64x2MaxU:
return graph()->NewNode(mcgraph()->machine()->I64x2MaxU(), inputs[0],
inputs[1]);
case wasm::kExprI64x2LtU:
return graph()->NewNode(mcgraph()->machine()->I64x2GtU(), inputs[1],
inputs[0]);
......
......@@ -2252,6 +2252,12 @@ class ThreadImpl {
BINOP_CASE(I64x2Add, i64x2, int2, 2, base::AddWithWraparound(a, b))
BINOP_CASE(I64x2Sub, i64x2, int2, 2, base::SubWithWraparound(a, b))
BINOP_CASE(I64x2Mul, i64x2, int2, 2, base::MulWithWraparound(a, b))
BINOP_CASE(I64x2MinS, i64x2, int2, 2, a < b ? a : b)
BINOP_CASE(I64x2MinU, i64x2, int2, 2,
static_cast<uint64_t>(a) < static_cast<uint64_t>(b) ? a : b)
BINOP_CASE(I64x2MaxS, i64x2, int2, 2, a > b ? a : b)
BINOP_CASE(I64x2MaxU, i64x2, int2, 2,
static_cast<uint64_t>(a) > static_cast<uint64_t>(b) ? a : b)
BINOP_CASE(I32x4Add, i32x4, int4, 4, base::AddWithWraparound(a, b))
BINOP_CASE(I32x4Sub, i32x4, int4, 4, base::SubWithWraparound(a, b))
BINOP_CASE(I32x4Mul, i32x4, int4, 4, base::MulWithWraparound(a, b))
......
......@@ -270,7 +270,9 @@ const char* WasmOpcodes::OpcodeName(WasmOpcode opcode) {
CASE_SIMDI_OP(ExtractLane, "extract_lane")
CASE_SIMDI_OP(ReplaceLane, "replace_lane")
CASE_SIGN_OP(SIMDI, Min, "min")
CASE_SIGN_OP(I64x2, Min, "min")
CASE_SIGN_OP(SIMDI, Max, "max")
CASE_SIGN_OP(I64x2, Max, "max")
CASE_SIGN_OP(SIMDI, Lt, "lt")
CASE_SIGN_OP(I64x2, Lt, "lt")
CASE_SIGN_OP(SIMDI, Le, "le")
......
......@@ -376,6 +376,10 @@ bool IsJSCompatibleSignature(const FunctionSig* sig, bool hasBigIntFeature);
V(I64x2Add, 0xfd8a, s_ss) \
V(I64x2Sub, 0xfd8d, s_ss) \
V(I64x2Mul, 0xfd8c, s_ss) \
V(I64x2MinS, 0xfd8e, s_ss) \
V(I64x2MinU, 0xfd8f, s_ss) \
V(I64x2MaxS, 0xfd90, s_ss) \
V(I64x2MaxU, 0xfd91, s_ss) \
V(F32x4Abs, 0xfd95, s_s) \
V(F32x4Neg, 0xfd96, s_s) \
V(F32x4RecipApprox, 0xfd98, s_s) \
......
......@@ -1131,6 +1131,24 @@ WASM_SIMD_TEST_NO_LOWERING(I64x2Mul) {
base::MulWithWraparound);
}
WASM_SIMD_TEST_NO_LOWERING(I64x2MinS) {
RunI64x2BinOpTest(execution_tier, lower_simd, kExprI64x2MinS, Minimum);
}
WASM_SIMD_TEST_NO_LOWERING(I64x2MaxS) {
RunI64x2BinOpTest(execution_tier, lower_simd, kExprI64x2MaxS, Maximum);
}
WASM_SIMD_TEST_NO_LOWERING(I64x2MinU) {
RunI64x2BinOpTest(execution_tier, lower_simd, kExprI64x2MinU,
UnsignedMinimum);
}
WASM_SIMD_TEST_NO_LOWERING(I64x2MaxU) {
RunI64x2BinOpTest(execution_tier, lower_simd, kExprI64x2MaxU,
UnsignedMaximum);
}
WASM_SIMD_TEST_NO_LOWERING(I64x2Eq) {
RunI64x2BinOpTest(execution_tier, lower_simd, kExprI64x2Eq, Equal);
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment