Commit 92a5b399 authored by Ng Zhi An's avatar Ng Zhi An Committed by Commit Bot

[wasm-simd] Implement i64x2 add sub mul for ia32

Bug: v8:9728
Change-Id: I6d8f096adc42a6d417f876d5805302b3bea3308b
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1867381Reviewed-by: 's avatarDeepti Gandluri <gdeepti@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#64787}
parent 91862a70
......@@ -264,6 +264,7 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
AVX_OP2_WITH_TYPE(Movd, movd, Operand, XMMRegister)
AVX_OP2_WITH_TYPE(Cvtdq2ps, cvtdq2ps, XMMRegister, Operand)
AVX_OP2_WITH_TYPE(Sqrtpd, sqrtpd, XMMRegister, const Operand&)
AVX_OP2_WITH_TYPE(Movaps, movaps, XMMRegister, XMMRegister)
AVX_OP2_WITH_TYPE(Movapd, movapd, XMMRegister, XMMRegister)
AVX_OP2_WITH_TYPE(Movapd, movapd, XMMRegister, const Operand&)
......@@ -338,6 +339,9 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
AVX_PACKED_OP3(Cmpunordpd, cmpunordpd)
AVX_PACKED_OP3(Psllq, psllq)
AVX_PACKED_OP3(Psrlq, psrlq)
AVX_PACKED_OP3(Paddq, paddq)
AVX_PACKED_OP3(Psubq, psubq)
AVX_PACKED_OP3(Pmuludq, pmuludq)
#undef AVX_PACKED_OP3
AVX_PACKED_OP3_WITH_TYPE(Psllq, psllq, XMMRegister, uint8_t)
......
......@@ -12,6 +12,7 @@
V(paddb, 66, 0F, FC) \
V(paddw, 66, 0F, FD) \
V(paddd, 66, 0F, FE) \
V(paddq, 66, 0F, D4) \
V(paddsb, 66, 0F, EC) \
V(paddsw, 66, 0F, ED) \
V(paddusb, 66, 0F, DC) \
......@@ -32,6 +33,7 @@
V(psllw, 66, 0F, F1) \
V(pslld, 66, 0F, F2) \
V(psllq, 66, 0F, F3) \
V(pmuludq, 66, 0F, F4) \
V(psraw, 66, 0F, E1) \
V(psrad, 66, 0F, E2) \
V(psrlw, 66, 0F, D1) \
......
......@@ -2041,6 +2041,41 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ Psubq(dst, tmp2);
break;
}
case kIA32I64x2Add: {
__ Paddq(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
case kIA32I64x2Sub: {
__ Psubq(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
case kIA32I64x2Mul: {
XMMRegister dst = i.OutputSimd128Register();
XMMRegister left = i.InputSimd128Register(0);
XMMRegister right = i.InputSimd128Register(1);
XMMRegister tmp1 = i.TempSimd128Register(0);
XMMRegister tmp2 = i.TempSimd128Register(1);
__ Movaps(tmp1, left);
__ Movaps(tmp2, right);
// Multiply high dword of each qword of left with right.
__ Psrlq(tmp1, 32);
__ Pmuludq(tmp1, tmp1, right);
// Multiply high dword of each qword of right with left.
__ Psrlq(tmp2, 32);
__ Pmuludq(tmp2, tmp2, left);
__ Paddq(tmp2, tmp2, tmp1);
__ Psllq(tmp2, tmp2, 32);
__ Pmuludq(dst, left, right);
__ Paddq(dst, dst, tmp2);
break;
}
case kIA32I64x2ShrU: {
XMMRegister tmp = i.TempSimd128Register(0);
Register shift = i.InputRegister(1);
......
......@@ -138,6 +138,9 @@ namespace compiler {
V(IA32I64x2Neg) \
V(IA32I64x2Shl) \
V(IA32I64x2ShrS) \
V(IA32I64x2Add) \
V(IA32I64x2Sub) \
V(IA32I64x2Mul) \
V(IA32I64x2ShrU) \
V(SSEF32x4Splat) \
V(AVXF32x4Splat) \
......
......@@ -119,6 +119,9 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kIA32I64x2Neg:
case kIA32I64x2Shl:
case kIA32I64x2ShrS:
case kIA32I64x2Add:
case kIA32I64x2Sub:
case kIA32I64x2Mul:
case kIA32I64x2ShrU:
case kSSEF32x4Splat:
case kAVXF32x4Splat:
......
......@@ -2027,6 +2027,10 @@ void InstructionSelector::VisitWord32AtomicPairCompareExchange(Node* node) {
V(S128Or) \
V(S128Xor)
#define SIMD_BINOP_UNIFIED_SSE_AVX_LIST(V) \
V(I64x2Add) \
V(I64x2Sub)
#define SIMD_UNOP_LIST(V) \
V(F32x4SConvertI32x4) \
V(F32x4RecipApprox) \
......@@ -2150,6 +2154,21 @@ void InstructionSelector::VisitI64x2ShrS(Node* node) {
}
}
void InstructionSelector::VisitI64x2Mul(Node* node) {
IA32OperandGenerator g(this);
InstructionOperand temps[] = {g.TempSimd128Register(),
g.TempSimd128Register()};
if (IsSupported(AVX)) {
Emit(kIA32I64x2Mul, g.DefineAsRegister(node),
g.UseUniqueRegister(node->InputAt(0)),
g.UseUniqueRegister(node->InputAt(1)), arraysize(temps), temps);
} else {
Emit(kIA32I64x2Mul, g.DefineSameAsFirst(node),
g.UseUniqueRegister(node->InputAt(0)),
g.UseUniqueRegister(node->InputAt(1)), arraysize(temps), temps);
}
}
void InstructionSelector::VisitF32x4Splat(Node* node) {
VisitRRSimd(this, node, kAVXF32x4Splat, kSSEF32x4Splat);
}
......@@ -2336,6 +2355,14 @@ SIMD_BINOP_LIST(VISIT_SIMD_BINOP)
#undef VISIT_SIMD_BINOP
#undef SIMD_BINOP_LIST
#define VISIT_SIMD_BINOP_UNIFIED_SSE_AVX(Opcode) \
void InstructionSelector::Visit##Opcode(Node* node) { \
VisitRROFloat(this, node, kIA32##Opcode, kIA32##Opcode); \
}
SIMD_BINOP_UNIFIED_SSE_AVX_LIST(VISIT_SIMD_BINOP_UNIFIED_SSE_AVX)
#undef VISIT_SIMD_BINOP_UNIFIED_SSE_AVX
#undef SIMD_BINOP_UNIFIED_SSE_AVX_LIST
void VisitPack(InstructionSelector* selector, Node* node, ArchOpcode avx_opcode,
ArchOpcode sse_opcode) {
IA32OperandGenerator g(selector);
......
......@@ -2679,14 +2679,14 @@ void InstructionSelector::VisitF64x2Max(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI64x2Neg(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI64x2Shl(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI64x2ShrS(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI64x2Add(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI64x2Sub(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI64x2Mul(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI64x2ShrU(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_IA32
void InstructionSelector::VisitI64x2Splat(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI64x2ExtractLane(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI64x2ReplaceLane(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI64x2Add(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI64x2Sub(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI64x2Mul(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI64x2Eq(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI64x2Ne(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI64x2GtS(Node* node) { UNIMPLEMENTED(); }
......
......@@ -1009,7 +1009,6 @@ WASM_SIMD_TEST_NO_LOWERING(I64x2ShrU) {
LogicalShiftRight);
}
#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64
void RunI64x2BinOpTest(ExecutionTier execution_tier, LowerSimd lower_simd,
WasmOpcode opcode, Int64BinOp expected_op) {
WasmRunner<int32_t, int64_t, int64_t> r(execution_tier, lower_simd);
......@@ -1046,6 +1045,7 @@ WASM_SIMD_TEST_NO_LOWERING(I64x2Sub) {
base::SubWithWraparound);
}
#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64
WASM_SIMD_TEST_NO_LOWERING(I64x2Eq) {
RunI64x2BinOpTest(execution_tier, lower_simd, kExprI64x2Eq, Equal);
}
......@@ -1431,12 +1431,10 @@ WASM_SIMD_TEST_NO_LOWERING(F64x2Max) {
RunF64x2BinOpTest(execution_tier, lower_simd, kExprF64x2Max, JSMax);
}
#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64
WASM_SIMD_TEST_NO_LOWERING(I64x2Mul) {
RunI64x2BinOpTest(execution_tier, lower_simd, kExprI64x2Mul,
base::MulWithWraparound);
}
#endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64
#if V8_TARGET_ARCH_X64
WASM_SIMD_TEST_NO_LOWERING(I64x2MinS) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment