Commit 4a604f2f authored by gdeepti's avatar gdeepti Committed by Commit bot

[wasm] Implement Generic S128 Load/Store, logical ops and Horizontal add

 - Ops: S128Load, S128Store, S128And, S128Or, S128Xor, S128Not, I32x4AddHoriz, I16x8AddHoriz
 - Add x64 assembler support for - phaddd, phaddw, pand, por
 - Enable tests for Globals, other tests applicable to x64 apart from tests for implemented ops

BUG=v8:6020

R=bbudge@chromium.org, bmeurer@chromium.org, zvi.rackover@intel.com

Review-Url: https://codereview.chromium.org/2849463003
Cr-Commit-Position: refs/heads/master@{#45005}
parent cda2e2dd
......@@ -2219,9 +2219,9 @@ void InstructionSelector::VisitI32x4ShrU(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_MIPS &&
// !V8_TARGET_ARCH_MIPS64
#if !V8_TARGET_ARCH_ARM
#if !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_X64
void InstructionSelector::VisitI32x4AddHoriz(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_ARM
#endif // !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_X64
#if !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_MIPS && !V8_TARGET_ARCH_MIPS64
void InstructionSelector::VisitI32x4SConvertF32x4(Node* node) {
......@@ -2277,13 +2277,9 @@ void InstructionSelector::VisitI16x8Add(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI16x8AddSaturateS(Node* node) {
UNIMPLEMENTED();
}
#endif // !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM
#if !V8_TARGET_ARCH_ARM
void InstructionSelector::VisitI16x8AddHoriz(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_ARM
#if !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM
void InstructionSelector::VisitI16x8Sub(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI16x8SubSaturateS(Node* node) {
......@@ -2427,7 +2423,9 @@ void InstructionSelector::VisitI8x16MaxU(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI8x16LtU(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI8x16LeU(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_ARM
#if !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM
void InstructionSelector::VisitS128And(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitS128Or(Node* node) { UNIMPLEMENTED(); }
......@@ -2435,7 +2433,7 @@ void InstructionSelector::VisitS128Or(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitS128Xor(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitS128Not(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_ARM
#endif // !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM
#if !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_MIPS && \
!V8_TARGET_ARCH_MIPS64
......
......@@ -2006,6 +2006,19 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ Movsd(operand, i.InputDoubleRegister(index));
}
break;
case kX64Movdqu: {
CpuFeatureScope sse_scope(masm(), SSSE3);
EmitOOLTrapIfNeeded(zone(), this, opcode, instr->InputCount(), i,
__ pc_offset());
if (instr->HasOutput()) {
__ movdqu(i.OutputSimd128Register(), i.MemoryOperand());
} else {
size_t index = 0;
Operand operand = i.MemoryOperand(&index);
__ movdqu(operand, i.InputSimd128Register(index));
}
break;
}
case kX64BitcastFI:
if (instr->InputAt(0)->IsFPStackSlot()) {
__ movl(i.OutputRegister(), i.InputOperand(0));
......@@ -2187,6 +2200,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ paddd(i.OutputSimd128Register(), i.InputSimd128Register(1));
break;
}
case kX64I32x4AddHoriz: {
CpuFeatureScope sse_scope(masm(), SSSE3);
__ phaddd(i.OutputSimd128Register(), i.InputSimd128Register(1));
break;
}
case kX64I32x4Sub: {
__ psubd(i.OutputSimd128Register(), i.InputSimd128Register(1));
break;
......@@ -2276,6 +2294,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ paddsw(i.OutputSimd128Register(), i.InputSimd128Register(1));
break;
}
case kX64I16x8AddHoriz: {
CpuFeatureScope sse_scope(masm(), SSSE3);
__ phaddw(i.OutputSimd128Register(), i.InputSimd128Register(1));
break;
}
case kX64I16x8Sub: {
__ psubw(i.OutputSimd128Register(), i.InputSimd128Register(1));
break;
......@@ -2410,6 +2433,24 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ pmaxub(i.OutputSimd128Register(), i.InputSimd128Register(1));
break;
}
case kX64S128And: {
__ pand(i.OutputSimd128Register(), i.InputSimd128Register(1));
break;
}
case kX64S128Or: {
__ por(i.OutputSimd128Register(), i.InputSimd128Register(1));
break;
}
case kX64S128Xor: {
__ pxor(i.OutputSimd128Register(), i.InputSimd128Register(1));
break;
}
case kX64S128Not: {
XMMRegister dst = i.OutputSimd128Register();
__ pcmpeqd(dst, dst);
__ pxor(dst, i.InputSimd128Register(1));
break;
}
case kX64S128Select: {
// Mask used here is stored in dst.
XMMRegister dst = i.OutputSimd128Register();
......
......@@ -132,6 +132,7 @@ namespace compiler {
V(X64Movq) \
V(X64Movsd) \
V(X64Movss) \
V(X64Movdqu) \
V(X64BitcastFI) \
V(X64BitcastDL) \
V(X64BitcastIF) \
......@@ -149,6 +150,7 @@ namespace compiler {
V(X64I32x4Shl) \
V(X64I32x4ShrS) \
V(X64I32x4Add) \
V(X64I32x4AddHoriz) \
V(X64I32x4Sub) \
V(X64I32x4Mul) \
V(X64I32x4MinS) \
......@@ -165,6 +167,7 @@ namespace compiler {
V(X64I16x8ShrS) \
V(X64I16x8Add) \
V(X64I16x8AddSaturateS) \
V(X64I16x8AddHoriz) \
V(X64I16x8Sub) \
V(X64I16x8SubSaturateS) \
V(X64I16x8Mul) \
......@@ -192,6 +195,10 @@ namespace compiler {
V(X64I8x16SubSaturateU) \
V(X64I8x16MinU) \
V(X64I8x16MaxU) \
V(X64S128And) \
V(X64S128Or) \
V(X64S128Xor) \
V(X64S128Not) \
V(X64S128Select) \
V(X64S128Zero)
......
......@@ -129,6 +129,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kX64I32x4Shl:
case kX64I32x4ShrS:
case kX64I32x4Add:
case kX64I32x4AddHoriz:
case kX64I32x4Sub:
case kX64I32x4Mul:
case kX64I32x4MinS:
......@@ -145,6 +146,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kX64I16x8ShrS:
case kX64I16x8Add:
case kX64I16x8AddSaturateS:
case kX64I16x8AddHoriz:
case kX64I16x8Sub:
case kX64I16x8SubSaturateS:
case kX64I16x8Mul:
......@@ -172,6 +174,10 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kX64I8x16SubSaturateU:
case kX64I8x16MinU:
case kX64I8x16MaxU:
case kX64S128And:
case kX64S128Or:
case kX64S128Xor:
case kX64S128Not:
case kX64S128Select:
case kX64S128Zero:
return (instr->addressing_mode() == kMode_None)
......@@ -215,6 +221,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kX64Movq:
case kX64Movsd:
case kX64Movss:
case kX64Movdqu:
return instr->HasOutput() ? kIsLoadOperation : kHasSideEffect;
case kX64StackCheck:
......
......@@ -231,6 +231,8 @@ ArchOpcode GetLoadOpcode(LoadRepresentation load_rep) {
opcode = kX64Movq;
break;
case MachineRepresentation::kSimd128: // Fall through.
opcode = kX64Movdqu;
break;
case MachineRepresentation::kSimd1x4: // Fall through.
case MachineRepresentation::kSimd1x8: // Fall through.
case MachineRepresentation::kSimd1x16: // Fall through.
......@@ -266,6 +268,8 @@ ArchOpcode GetStoreOpcode(StoreRepresentation store_rep) {
return kX64Movq;
break;
case MachineRepresentation::kSimd128: // Fall through.
return kX64Movdqu;
break;
case MachineRepresentation::kSimd1x4: // Fall through.
case MachineRepresentation::kSimd1x8: // Fall through.
case MachineRepresentation::kSimd1x16: // Fall through.
......@@ -2457,6 +2461,7 @@ VISIT_ATOMIC_BINOP(Xor)
#define SIMD_BINOP_LIST(V) \
V(I32x4Add) \
V(I32x4AddHoriz) \
V(I32x4Sub) \
V(I32x4Mul) \
V(I32x4MinS) \
......@@ -2467,6 +2472,7 @@ VISIT_ATOMIC_BINOP(Xor)
V(I32x4MaxU) \
V(I16x8Add) \
V(I16x8AddSaturateS) \
V(I16x8AddHoriz) \
V(I16x8Sub) \
V(I16x8SubSaturateS) \
V(I16x8Mul) \
......@@ -2489,7 +2495,12 @@ VISIT_ATOMIC_BINOP(Xor)
V(I8x16AddSaturateU) \
V(I8x16SubSaturateU) \
V(I8x16MinU) \
V(I8x16MaxU)
V(I8x16MaxU) \
V(S128And) \
V(S128Or) \
V(S128Xor)
#define SIMD_UNOP_LIST(V) V(S128Not)
#define SIMD_SHIFT_OPCODES(V) \
V(I32x4Shl) \
......@@ -2547,6 +2558,15 @@ SIMD_ZERO_OP_LIST(SIMD_VISIT_ZERO_OP)
SIMD_SHIFT_OPCODES(VISIT_SIMD_SHIFT)
#undef VISIT_SIMD_SHIFT
#define VISIT_SIMD_UNOP(Opcode) \
void InstructionSelector::Visit##Opcode(Node* node) { \
X64OperandGenerator g(this); \
Emit(kX64##Opcode, g.DefineAsRegister(node), \
g.UseRegister(node->InputAt(0))); \
}
SIMD_UNOP_LIST(VISIT_SIMD_UNOP)
#undef VISIT_SIMD_UNOP
#define VISIT_SIMD_BINOP(Opcode) \
void InstructionSelector::Visit##Opcode(Node* node) { \
X64OperandGenerator g(this); \
......
......@@ -41,6 +41,8 @@
V(psubsw, 66, 0F, E9) \
V(psubusb, 66, 0F, D8) \
V(psubusw, 66, 0F, D9) \
V(pand, 66, 0F, DB) \
V(por, 66, 0F, EB) \
V(pxor, 66, 0F, EF) \
V(cvtps2dq, 66, 0F, 5B)
......@@ -48,6 +50,8 @@
V(pabsb, 66, 0F, 38, 1C) \
V(pabsw, 66, 0F, 38, 1D) \
V(pabsd, 66, 0F, 38, 1E) \
V(phaddd, 66, 0F, 38, 02) \
V(phaddw, 66, 0F, 38, 01) \
V(pshufb, 66, 0F, 38, 00) \
V(psignb, 66, 0F, 38, 08) \
V(psignw, 66, 0F, 38, 09) \
......
......@@ -990,13 +990,13 @@ WASM_EXEC_COMPILED_TEST(I32x4Mul) { RunI32x4BinOpTest(kExprI32x4Mul, Mul); }
#endif // V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_X64 || SIMD_LOWERING_TARGET ||
// V8_TARGET_ARCH_MIPS || V8_TARGET_ARCH_MIPS64
#if V8_TARGET_ARCH_ARM || SIMD_LOWERING_TARGET
#if V8_TARGET_ARCH_ARM || SIMD_LOWERING_TARGET || V8_TARGET_ARCH_X64
WASM_EXEC_COMPILED_TEST(S128And) { RunI32x4BinOpTest(kExprS128And, And); }
WASM_EXEC_COMPILED_TEST(S128Or) { RunI32x4BinOpTest(kExprS128Or, Or); }
WASM_EXEC_COMPILED_TEST(S128Xor) { RunI32x4BinOpTest(kExprS128Xor, Xor); }
#endif // V8_TARGET_ARCH_ARM || SIMD_LOWERING_TARGET
#endif // V8_TARGET_ARCH_ARM || SIMD_LOWERING_TARGET || V8_TARGET_ARCH_X64
#if V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_X64 || SIMD_LOWERING_TARGET || \
V8_TARGET_ARCH_MIPS || V8_TARGET_ARCH_MIPS64
......@@ -1619,7 +1619,9 @@ WASM_EXEC_COMPILED_TEST(S8x2Reverse) {
RunUnaryLaneOpTest<int8_t>(kExprS8x2Reverse, {{1, 0, 3, 2, 5, 4, 7, 6, 9, 8,
11, 10, 13, 12, 15, 14}});
}
#endif // V8_TARGET_ARCH_ARM
#if V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_X64
// Test binary ops with two lane test patterns, all lanes distinct.
template <typename T>
void RunBinaryLaneOpTest(
......@@ -1632,8 +1634,8 @@ void RunBinaryLaneOpTest(
T* global2 = r.module().AddGlobal<T>(kWasmS128);
static const size_t kElems = kSimd128Size / sizeof(T);
for (size_t i = 0; i < kElems; i++) {
global1[i] = i;
global2[i] = kElems + i;
global1[i] = static_cast<T>(i);
global2[i] = static_cast<T>(kElems + i);
}
BUILD(r,
WASM_SET_GLOBAL(0, WASM_SIMD_BINOP(simd_op, WASM_GET_GLOBAL(0),
......@@ -1646,10 +1648,6 @@ void RunBinaryLaneOpTest(
}
}
WASM_EXEC_COMPILED_TEST(F32x4AddHoriz) {
RunBinaryLaneOpTest<float>(kExprF32x4AddHoriz, {{1.0f, 5.0f, 9.0f, 13.0f}});
}
WASM_EXEC_COMPILED_TEST(I32x4AddHoriz) {
RunBinaryLaneOpTest<int32_t>(kExprI32x4AddHoriz, {{1, 5, 9, 13}});
}
......@@ -1658,6 +1656,12 @@ WASM_EXEC_COMPILED_TEST(I16x8AddHoriz) {
RunBinaryLaneOpTest<int16_t>(kExprI16x8AddHoriz,
{{1, 5, 9, 13, 17, 21, 25, 29}});
}
#endif // V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_X64
#if V8_TARGET_ARCH_ARM
WASM_EXEC_COMPILED_TEST(F32x4AddHoriz) {
RunBinaryLaneOpTest<float>(kExprF32x4AddHoriz, {{1.0f, 5.0f, 9.0f, 13.0f}});
}
WASM_EXEC_COMPILED_TEST(S32x4ZipLeft) {
RunBinaryLaneOpTest<int32_t>(kExprS32x4ZipLeft, {{0, 4, 1, 5}});
......@@ -2023,7 +2027,9 @@ WASM_EXEC_COMPILED_TEST(SimdI32x4AddWithF32x4) {
WASM_I32V(1), WASM_I32V(0)));
CHECK_EQ(1, r.Call());
}
#endif // V8_TARGET_ARCH_ARM || SIMD_LOWERING_TARGET
#if V8_TARGET_ARCH_ARM || SIMD_LOWERING_TARGET || V8_TARGET_ARCH_X64
WASM_EXEC_COMPILED_TEST(SimdI32x4Local) {
FLAG_wasm_simd_prototype = true;
WasmRunner<int32_t> r(kExecuteCompiled);
......@@ -2080,7 +2086,9 @@ WASM_EXEC_COMPILED_TEST(SimdI32x4For) {
WASM_GET_LOCAL(0));
CHECK_EQ(1, r.Call());
}
#endif // V8_TARGET_ARCH_ARM || SIMD_LOWERING_TARGET || V8_TARGET_ARCH_X64
#if V8_TARGET_ARCH_ARM || SIMD_LOWERING_TARGET
WASM_EXEC_COMPILED_TEST(SimdF32x4For) {
FLAG_wasm_simd_prototype = true;
WasmRunner<int32_t> r(kExecuteCompiled);
......@@ -2105,7 +2113,9 @@ WASM_EXEC_COMPILED_TEST(SimdF32x4For) {
WASM_GET_LOCAL(0));
CHECK_EQ(1, r.Call());
}
#endif // V8_TARGET_ARCH_ARM || SIMD_LOWERING_TARGET
#if V8_TARGET_ARCH_ARM || SIMD_LOWERING_TARGET || V8_TARGET_ARCH_X64
WASM_EXEC_COMPILED_TEST(SimdI32x4GetGlobal) {
FLAG_wasm_simd_prototype = true;
WasmRunner<int32_t, int32_t> r(kExecuteCompiled);
......@@ -2151,7 +2161,9 @@ WASM_EXEC_COMPILED_TEST(SimdI32x4SetGlobal) {
CHECK_EQ(*(global + 2), 45);
CHECK_EQ(*(global + 3), 56);
}
#endif // V8_TARGET_ARCH_ARM || SIMD_LOWERING_TARGET || V8_TARGET_ARCH_X64
#if V8_TARGET_ARCH_ARM || SIMD_LOWERING_TARGET
WASM_EXEC_COMPILED_TEST(SimdF32x4GetGlobal) {
FLAG_wasm_simd_prototype = true;
WasmRunner<int32_t, int32_t> r(kExecuteCompiled);
......@@ -2197,7 +2209,9 @@ WASM_EXEC_COMPILED_TEST(SimdF32x4SetGlobal) {
CHECK_EQ(*(global + 2), 32.25);
CHECK_EQ(*(global + 3), 65.0);
}
#endif // V8_TARGET_ARCH_ARM || SIMD_LOWERING_TARGET
#if V8_TARGET_ARCH_ARM || SIMD_LOWERING_TARGET || V8_TARGET_ARCH_X64
WASM_EXEC_COMPILED_TEST(SimdLoadStoreLoad) {
FLAG_wasm_simd_prototype = true;
WasmRunner<int32_t> r(kExecuteCompiled);
......@@ -2215,4 +2229,4 @@ WASM_EXEC_COMPILED_TEST(SimdLoadStoreLoad) {
CHECK_EQ(expected, r.Call());
}
}
#endif // V8_TARGET_ARCH_ARM || SIMD_LOWERING_TARGET
#endif // V8_TARGET_ARCH_ARM || SIMD_LOWERING_TARGET || V8_TARGET_ARCH_X64
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment