Commit de88bfb2 authored by Deepti Gandluri's avatar Deepti Gandluri Committed by Commit Bot

[wasm-simd] Implement remaining I8x16 SIMD ops on x64

 - Implementation for I8x16 Shifts, and Mul
 - Fix convert bug
 - Enable all tests except for shuffle tests

Change-Id: Id1a469d2883c30ea782c51d21dc462d211f94420
Reviewed-on: https://chromium-review.googlesource.com/c/1318609Reviewed-by: 's avatarBill Budge <bbudge@chromium.org>
Commit-Queue: Deepti Gandluri <gdeepti@chromium.org>
Cr-Commit-Position: refs/heads/master@{#57254}
parent 691dbd2f
...@@ -2481,14 +2481,6 @@ void InstructionSelector::VisitWord64AtomicCompareExchange(Node* node) { ...@@ -2481,14 +2481,6 @@ void InstructionSelector::VisitWord64AtomicCompareExchange(Node* node) {
#if !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS && \ #if !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS && \
!V8_TARGET_ARCH_MIPS64 && !V8_TARGET_ARCH_IA32 !V8_TARGET_ARCH_MIPS64 && !V8_TARGET_ARCH_IA32
void InstructionSelector::VisitI8x16Shl(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI8x16ShrS(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI8x16ShrU(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI8x16Mul(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitS8x16Shuffle(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitS8x16Shuffle(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS #endif // !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS
// && !V8_TARGET_ARCH_MIPS64 && !V8_TARGET_ARCH_IA32 // && !V8_TARGET_ARCH_MIPS64 && !V8_TARGET_ARCH_IA32
......
...@@ -2683,6 +2683,37 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -2683,6 +2683,37 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
} }
break; break;
} }
case kX64I8x16Shl: {
XMMRegister dst = i.OutputSimd128Register();
DCHECK_EQ(dst, i.InputSimd128Register(0));
int8_t shift = i.InputInt8(1) & 0x7;
if (shift < 4) {
// For small shifts, doubling is faster.
for (int i = 0; i < shift; ++i) {
__ paddb(dst, dst);
}
} else {
// Mask off the unwanted bits before word-shifting.
__ pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
__ psrlw(kScratchDoubleReg, 8 + shift);
__ packuswb(kScratchDoubleReg, kScratchDoubleReg);
__ pand(dst, kScratchDoubleReg);
__ psllw(dst, shift);
}
break;
}
case kX64I8x16ShrS: {
XMMRegister dst = i.OutputSimd128Register();
XMMRegister src = i.InputSimd128Register(0);
int8_t shift = i.InputInt8(1) & 0x7;
// Unpack the bytes into words, do arithmetic shifts, and repack.
__ punpckhbw(kScratchDoubleReg, src);
__ punpcklbw(dst, src);
__ psraw(kScratchDoubleReg, 8 + shift);
__ psraw(dst, 8 + shift);
__ packsswb(dst, kScratchDoubleReg);
break;
}
case kX64I8x16Add: { case kX64I8x16Add: {
__ paddb(i.OutputSimd128Register(), i.InputSimd128Register(1)); __ paddb(i.OutputSimd128Register(), i.InputSimd128Register(1));
break; break;
...@@ -2699,6 +2730,39 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -2699,6 +2730,39 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ psubsb(i.OutputSimd128Register(), i.InputSimd128Register(1)); __ psubsb(i.OutputSimd128Register(), i.InputSimd128Register(1));
break; break;
} }
case kX64I8x16Mul: {
XMMRegister dst = i.OutputSimd128Register();
DCHECK_EQ(dst, i.InputSimd128Register(0));
XMMRegister right = i.InputSimd128Register(1);
XMMRegister tmp = i.ToSimd128Register(instr->TempAt(0));
// I16x8 view of I8x16
// left = AAaa AAaa ... AAaa AAaa
// right= BBbb BBbb ... BBbb BBbb
// t = 00AA 00AA ... 00AA 00AA
// s = 00BB 00BB ... 00BB 00BB
__ movaps(tmp, dst);
__ movaps(kScratchDoubleReg, right);
__ psrlw(tmp, 8);
__ psrlw(kScratchDoubleReg, 8);
// dst = left * 256
__ psllw(dst, 8);
// t = I16x8Mul(t, s)
// => __PP __PP ... __PP __PP
__ pmullw(tmp, kScratchDoubleReg);
// dst = I16x8Mul(left * 256, right)
// => pp__ pp__ ... pp__ pp__
__ pmullw(dst, right);
// t = I16x8Shl(t, 8)
// => PP00 PP00 ... PP00 PP00
__ psllw(tmp, 8);
// dst = I16x8Shr(dst, 8)
// => 00pp 00pp ... 00pp 00pp
__ psrlw(dst, 8);
// dst = I16x8Or(dst, t)
// => PPpp PPpp ... PPpp PPpp
__ por(dst, tmp);
break;
}
case kX64I8x16MinS: { case kX64I8x16MinS: {
CpuFeatureScope sse_scope(tasm(), SSE4_1); CpuFeatureScope sse_scope(tasm(), SSE4_1);
__ pminsb(i.OutputSimd128Register(), i.InputSimd128Register(1)); __ pminsb(i.OutputSimd128Register(), i.InputSimd128Register(1));
...@@ -2743,6 +2807,18 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -2743,6 +2807,18 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ packuswb(dst, kScratchDoubleReg); __ packuswb(dst, kScratchDoubleReg);
break; break;
} }
case kX64I8x16ShrU: {
XMMRegister dst = i.OutputSimd128Register();
XMMRegister src = i.InputSimd128Register(0);
int8_t shift = i.InputInt8(1) & 0x7;
// Unpack the bytes into words, do logical shifts, and repack.
__ punpckhbw(kScratchDoubleReg, src);
__ punpcklbw(dst, src);
__ psrlw(kScratchDoubleReg, 8 + shift);
__ psrlw(dst, 8 + shift);
__ packuswb(dst, kScratchDoubleReg);
break;
}
case kX64I8x16AddSaturateU: { case kX64I8x16AddSaturateU: {
__ paddusb(i.OutputSimd128Register(), i.InputSimd128Register(1)); __ paddusb(i.OutputSimd128Register(), i.InputSimd128Register(1));
break; break;
......
...@@ -230,10 +230,13 @@ namespace compiler { ...@@ -230,10 +230,13 @@ namespace compiler {
V(X64I8x16ReplaceLane) \ V(X64I8x16ReplaceLane) \
V(X64I8x16SConvertI16x8) \ V(X64I8x16SConvertI16x8) \
V(X64I8x16Neg) \ V(X64I8x16Neg) \
V(X64I8x16Shl) \
V(X64I8x16ShrS) \
V(X64I8x16Add) \ V(X64I8x16Add) \
V(X64I8x16AddSaturateS) \ V(X64I8x16AddSaturateS) \
V(X64I8x16Sub) \ V(X64I8x16Sub) \
V(X64I8x16SubSaturateS) \ V(X64I8x16SubSaturateS) \
V(X64I8x16Mul) \
V(X64I8x16MinS) \ V(X64I8x16MinS) \
V(X64I8x16MaxS) \ V(X64I8x16MaxS) \
V(X64I8x16Eq) \ V(X64I8x16Eq) \
...@@ -243,16 +246,17 @@ namespace compiler { ...@@ -243,16 +246,17 @@ namespace compiler {
V(X64I8x16UConvertI16x8) \ V(X64I8x16UConvertI16x8) \
V(X64I8x16AddSaturateU) \ V(X64I8x16AddSaturateU) \
V(X64I8x16SubSaturateU) \ V(X64I8x16SubSaturateU) \
V(X64I8x16ShrU) \
V(X64I8x16MinU) \ V(X64I8x16MinU) \
V(X64I8x16MaxU) \ V(X64I8x16MaxU) \
V(X64I8x16GtU) \ V(X64I8x16GtU) \
V(X64I8x16GeU) \ V(X64I8x16GeU) \
V(X64S128Zero) \
V(X64S128Not) \
V(X64S128And) \ V(X64S128And) \
V(X64S128Or) \ V(X64S128Or) \
V(X64S128Xor) \ V(X64S128Xor) \
V(X64S128Not) \
V(X64S128Select) \ V(X64S128Select) \
V(X64S128Zero) \
V(X64S1x4AnyTrue) \ V(X64S1x4AnyTrue) \
V(X64S1x4AllTrue) \ V(X64S1x4AllTrue) \
V(X64S1x8AnyTrue) \ V(X64S1x8AnyTrue) \
......
...@@ -207,10 +207,13 @@ int InstructionScheduler::GetTargetInstructionFlags( ...@@ -207,10 +207,13 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kX64I8x16ReplaceLane: case kX64I8x16ReplaceLane:
case kX64I8x16SConvertI16x8: case kX64I8x16SConvertI16x8:
case kX64I8x16Neg: case kX64I8x16Neg:
case kX64I8x16Shl:
case kX64I8x16ShrS:
case kX64I8x16Add: case kX64I8x16Add:
case kX64I8x16AddSaturateS: case kX64I8x16AddSaturateS:
case kX64I8x16Sub: case kX64I8x16Sub:
case kX64I8x16SubSaturateS: case kX64I8x16SubSaturateS:
case kX64I8x16Mul:
case kX64I8x16MinS: case kX64I8x16MinS:
case kX64I8x16MaxS: case kX64I8x16MaxS:
case kX64I8x16Eq: case kX64I8x16Eq:
...@@ -220,6 +223,7 @@ int InstructionScheduler::GetTargetInstructionFlags( ...@@ -220,6 +223,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kX64I8x16UConvertI16x8: case kX64I8x16UConvertI16x8:
case kX64I8x16AddSaturateU: case kX64I8x16AddSaturateU:
case kX64I8x16SubSaturateU: case kX64I8x16SubSaturateU:
case kX64I8x16ShrU:
case kX64I8x16MinU: case kX64I8x16MinU:
case kX64I8x16MaxU: case kX64I8x16MaxU:
case kX64I8x16GtU: case kX64I8x16GtU:
......
...@@ -2655,7 +2655,10 @@ VISIT_ATOMIC_BINOP(Xor) ...@@ -2655,7 +2655,10 @@ VISIT_ATOMIC_BINOP(Xor)
V(I32x4ShrU) \ V(I32x4ShrU) \
V(I16x8Shl) \ V(I16x8Shl) \
V(I16x8ShrS) \ V(I16x8ShrS) \
V(I16x8ShrU) V(I16x8ShrU) \
V(I8x16Shl) \
V(I8x16ShrS) \
V(I8x16ShrU)
#define SIMD_ANYTRUE_LIST(V) \ #define SIMD_ANYTRUE_LIST(V) \
V(S1x4AnyTrue) \ V(S1x4AnyTrue) \
...@@ -2777,8 +2780,9 @@ void InstructionSelector::VisitI32x4SConvertF32x4(Node* node) { ...@@ -2777,8 +2780,9 @@ void InstructionSelector::VisitI32x4SConvertF32x4(Node* node) {
void InstructionSelector::VisitI32x4UConvertF32x4(Node* node) { void InstructionSelector::VisitI32x4UConvertF32x4(Node* node) {
X64OperandGenerator g(this); X64OperandGenerator g(this);
InstructionOperand temps[] = {g.TempSimd128Register()};
Emit(kX64I32x4UConvertF32x4, g.DefineSameAsFirst(node), Emit(kX64I32x4UConvertF32x4, g.DefineSameAsFirst(node),
g.UseRegister(node->InputAt(0))); g.UseRegister(node->InputAt(0)), arraysize(temps), temps);
} }
void InstructionSelector::VisitI16x8UConvertI32x4(Node* node) { void InstructionSelector::VisitI16x8UConvertI32x4(Node* node) {
...@@ -2793,6 +2797,14 @@ void InstructionSelector::VisitI8x16UConvertI16x8(Node* node) { ...@@ -2793,6 +2797,14 @@ void InstructionSelector::VisitI8x16UConvertI16x8(Node* node) {
g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1))); g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)));
} }
void InstructionSelector::VisitI8x16Mul(Node* node) {
X64OperandGenerator g(this);
InstructionOperand temps[] = {g.TempSimd128Register()};
Emit(kX64I8x16Mul, g.DefineSameAsFirst(node),
g.UseUniqueRegister(node->InputAt(0)),
g.UseUniqueRegister(node->InputAt(1)), arraysize(temps), temps);
}
void InstructionSelector::VisitInt32AbsWithOverflow(Node* node) { void InstructionSelector::VisitInt32AbsWithOverflow(Node* node) {
UNREACHABLE(); UNREACHABLE();
} }
......
...@@ -438,8 +438,6 @@ WASM_SIMD_TEST(F32x4ReplaceLane) { ...@@ -438,8 +438,6 @@ WASM_SIMD_TEST(F32x4ReplaceLane) {
CHECK_EQ(1, r.Call(3.14159f, -1.5f)); CHECK_EQ(1, r.Call(3.14159f, -1.5f));
} }
#if V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_MIPS || \
V8_TARGET_ARCH_MIPS64 || V8_TARGET_ARCH_IA32
// Tests both signed and unsigned conversion. // Tests both signed and unsigned conversion.
WASM_SIMD_TEST(F32x4ConvertI32x4) { WASM_SIMD_TEST(F32x4ConvertI32x4) {
WasmRunner<int32_t, int32_t, float, float> r(execution_tier, lower_simd); WasmRunner<int32_t, int32_t, float, float> r(execution_tier, lower_simd);
...@@ -463,8 +461,6 @@ WASM_SIMD_TEST(F32x4ConvertI32x4) { ...@@ -463,8 +461,6 @@ WASM_SIMD_TEST(F32x4ConvertI32x4) {
static_cast<float>(static_cast<uint32_t>(*i)))); static_cast<float>(static_cast<uint32_t>(*i))));
} }
} }
#endif // V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_MIPS ||
// V8_TARGET_ARCH_MIPS64 || V8_TARGET_ARCH_IA32
void RunF32x4UnOpTest(ExecutionTier execution_tier, LowerSimd lower_simd, void RunF32x4UnOpTest(ExecutionTier execution_tier, LowerSimd lower_simd,
WasmOpcode simd_op, FloatUnOp expected_op, WasmOpcode simd_op, FloatUnOp expected_op,
...@@ -819,9 +815,6 @@ WASM_SIMD_TEST(I8x16ReplaceLane) { ...@@ -819,9 +815,6 @@ WASM_SIMD_TEST(I8x16ReplaceLane) {
CHECK_EQ(1, r.Call(1, 2)); CHECK_EQ(1, r.Call(1, 2));
} }
#if V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_MIPS || \
V8_TARGET_ARCH_MIPS64 || V8_TARGET_ARCH_IA32
int32_t ConvertToInt(double val, bool unsigned_integer) { int32_t ConvertToInt(double val, bool unsigned_integer) {
if (std::isnan(val)) return 0; if (std::isnan(val)) return 0;
if (unsigned_integer) { if (unsigned_integer) {
...@@ -900,8 +893,6 @@ WASM_SIMD_TEST(I32x4ConvertI16x8) { ...@@ -900,8 +893,6 @@ WASM_SIMD_TEST(I32x4ConvertI16x8) {
CHECK_EQ(1, r.Call(*i, unpacked_signed, unpacked_unsigned, 0)); CHECK_EQ(1, r.Call(*i, unpacked_signed, unpacked_unsigned, 0));
} }
} }
#endif // V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_MIPS ||
// V8_TARGET_ARCH_MIPS64 || V8_TARGET_ARCH_IA32
void RunI32x4UnOpTest(ExecutionTier execution_tier, LowerSimd lower_simd, void RunI32x4UnOpTest(ExecutionTier execution_tier, LowerSimd lower_simd,
WasmOpcode simd_op, Int32UnOp expected_op) { WasmOpcode simd_op, Int32UnOp expected_op) {
...@@ -1542,13 +1533,9 @@ WASM_SIMD_TEST(I8x16LeU) { ...@@ -1542,13 +1533,9 @@ WASM_SIMD_TEST(I8x16LeU) {
UnsignedLessEqual); UnsignedLessEqual);
} }
#if V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_MIPS || \
V8_TARGET_ARCH_MIPS64 || V8_TARGET_ARCH_IA32
WASM_SIMD_TEST(I8x16Mul) { WASM_SIMD_TEST(I8x16Mul) {
RunI8x16BinOpTest(execution_tier, lower_simd, kExprI8x16Mul, Mul); RunI8x16BinOpTest(execution_tier, lower_simd, kExprI8x16Mul, Mul);
} }
#endif // V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_MIPS ||
// V8_TARGET_ARCH_MIPS64 || V8_TARGET_ARCH_IA32
void RunI8x16ShiftOpTest(ExecutionTier execution_tier, LowerSimd lower_simd, void RunI8x16ShiftOpTest(ExecutionTier execution_tier, LowerSimd lower_simd,
WasmOpcode simd_op, Int8ShiftOp expected_op) { WasmOpcode simd_op, Int8ShiftOp expected_op) {
...@@ -1566,8 +1553,6 @@ void RunI8x16ShiftOpTest(ExecutionTier execution_tier, LowerSimd lower_simd, ...@@ -1566,8 +1553,6 @@ void RunI8x16ShiftOpTest(ExecutionTier execution_tier, LowerSimd lower_simd,
} }
} }
#if V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_MIPS || \
V8_TARGET_ARCH_MIPS64 || V8_TARGET_ARCH_IA32
WASM_SIMD_TEST(I8x16Shl) { WASM_SIMD_TEST(I8x16Shl) {
RunI8x16ShiftOpTest(execution_tier, lower_simd, kExprI8x16Shl, RunI8x16ShiftOpTest(execution_tier, lower_simd, kExprI8x16Shl,
LogicalShiftLeft); LogicalShiftLeft);
...@@ -1582,8 +1567,6 @@ WASM_SIMD_TEST(I8x16ShrU) { ...@@ -1582,8 +1567,6 @@ WASM_SIMD_TEST(I8x16ShrU) {
RunI8x16ShiftOpTest(execution_tier, lower_simd, kExprI8x16ShrU, RunI8x16ShiftOpTest(execution_tier, lower_simd, kExprI8x16ShrU,
LogicalShiftRight); LogicalShiftRight);
} }
#endif // V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_MIPS ||
// V8_TARGET_ARCH_MIPS64 || V8_TARGET_ARCH_IA32
// Test Select by making a mask where the 0th and 3rd lanes are true and the // Test Select by making a mask where the 0th and 3rd lanes are true and the
// rest false, and comparing for non-equality with zero to convert to a boolean // rest false, and comparing for non-equality with zero to convert to a boolean
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment