Commit 425ab4ea authored by Ng Zhi An's avatar Ng Zhi An Committed by Commit Bot

[wasm simd] Implement I64x2Shl, ShrS, ShrU

Bug: v8:8460
Change-Id: I8be7244f19fbb48371c3ad12631e0da71e6321d4
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1682432Reviewed-by: 's avatarDeepti Gandluri <gdeepti@chromium.org>
Reviewed-by: 's avatarMichael Starzinger <mstarzinger@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#62518}
parent 74e68470
......@@ -1859,10 +1859,16 @@ void InstructionSelector::VisitNode(Node* node) {
return MarkAsSimd128(node), VisitI64x2ReplaceLane(node);
case IrOpcode::kI64x2Neg:
return MarkAsSimd128(node), VisitI64x2Neg(node);
case IrOpcode::kI64x2Shl:
return MarkAsSimd128(node), VisitI64x2Shl(node);
case IrOpcode::kI64x2ShrS:
return MarkAsSimd128(node), VisitI64x2ShrS(node);
case IrOpcode::kI64x2Add:
return MarkAsSimd128(node), VisitI64x2Add(node);
case IrOpcode::kI64x2Sub:
return MarkAsSimd128(node), VisitI64x2Sub(node);
case IrOpcode::kI64x2ShrU:
return MarkAsSimd128(node), VisitI64x2ShrU(node);
case IrOpcode::kI32x4Splat:
return MarkAsSimd128(node), VisitI32x4Splat(node);
case IrOpcode::kI32x4ExtractLane:
......@@ -2512,8 +2518,11 @@ void InstructionSelector::VisitI64x2Splat(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI64x2ExtractLane(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI64x2ReplaceLane(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI64x2Neg(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI64x2Shl(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI64x2ShrS(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI64x2Add(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI64x2Sub(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI64x2ShrU(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_X64
void InstructionSelector::VisitFinishRegion(Node* node) { EmitIdentity(node); }
......
......@@ -2445,6 +2445,28 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ psubq(dst, src);
break;
}
case kX64I64x2Shl: {
__ psllq(i.OutputSimd128Register(), i.InputInt8(1));
break;
}
case kX64I64x2ShrS: {
// TODO(zhin): there is vpsraq but requires AVX512
CpuFeatureScope sse_scope(tasm(), SSE4_1);
// ShrS on each quadword one at a time
XMMRegister dst = i.OutputSimd128Register();
XMMRegister src = i.InputSimd128Register(0);
// lower quadword
__ pextrq(kScratchRegister, src, 0x0);
__ sarq(kScratchRegister, Immediate(i.InputInt8(1)));
__ pinsrq(dst, kScratchRegister, 0x0);
// upper quadword
__ pextrq(kScratchRegister, src, 0x1);
__ sarq(kScratchRegister, Immediate(i.InputInt8(1)));
__ pinsrq(dst, kScratchRegister, 0x1);
break;
}
case kX64I64x2Add: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
__ paddq(i.OutputSimd128Register(), i.InputSimd128Register(1));
......@@ -2455,6 +2477,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ psubq(i.OutputSimd128Register(), i.InputSimd128Register(1));
break;
}
case kX64I64x2ShrU: {
__ psrlq(i.OutputSimd128Register(), i.InputInt8(1));
break;
}
case kX64I32x4Splat: {
XMMRegister dst = i.OutputSimd128Register();
if (instr->InputAt(0)->IsRegister()) {
......
......@@ -179,12 +179,15 @@ namespace compiler {
V(X64F32x4Lt) \
V(X64F32x4Le) \
V(X64I64x2Splat) \
V(X64I64x2Add) \
V(X64I64x2Sub) \
V(X64I32x4Splat) \
V(X64I64x2ExtractLane) \
V(X64I64x2ReplaceLane) \
V(X64I64x2Neg) \
V(X64I64x2Shl) \
V(X64I64x2ShrS) \
V(X64I64x2Add) \
V(X64I64x2Sub) \
V(X64I64x2ShrU) \
V(X64I32x4Splat) \
V(X64I32x4ExtractLane) \
V(X64I32x4ReplaceLane) \
V(X64I32x4SConvertF32x4) \
......
......@@ -145,12 +145,15 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kX64F32x4Lt:
case kX64F32x4Le:
case kX64I64x2Splat:
case kX64I64x2Add:
case kX64I64x2Sub:
case kX64I32x4Splat:
case kX64I64x2ExtractLane:
case kX64I64x2ReplaceLane:
case kX64I64x2Neg:
case kX64I64x2Shl:
case kX64I64x2ShrS:
case kX64I64x2Add:
case kX64I64x2Sub:
case kX64I64x2ShrU:
case kX64I32x4Splat:
case kX64I32x4ExtractLane:
case kX64I32x4ReplaceLane:
case kX64I32x4SConvertF32x4:
......
......@@ -2640,6 +2640,9 @@ VISIT_ATOMIC_BINOP(Xor)
V(S128Not)
#define SIMD_SHIFT_OPCODES(V) \
V(I64x2Shl) \
V(I64x2ShrS) \
V(I64x2ShrU) \
V(I32x4Shl) \
V(I32x4ShrS) \
V(I32x4ShrU) \
......
......@@ -452,6 +452,7 @@ MachineType AtomicOpType(Operator const* op) {
V(I8x16, 16)
#define SIMD_FORMAT_LIST(V) \
V(64x2, 64) \
V(32x4, 32) \
V(16x8, 16) \
V(8x16, 8)
......
......@@ -494,8 +494,11 @@ class V8_EXPORT_PRIVATE MachineOperatorBuilder final
const Operator* I64x2ExtractLane(int32_t);
const Operator* I64x2ReplaceLane(int32_t);
const Operator* I64x2Neg();
const Operator* I64x2Shl(int32_t);
const Operator* I64x2ShrS(int32_t);
const Operator* I64x2Add();
const Operator* I64x2Sub();
const Operator* I64x2ShrU(int32_t);
const Operator* I32x4Splat();
const Operator* I32x4ExtractLane(int32_t);
......
......@@ -757,8 +757,11 @@
V(I64x2ExtractLane) \
V(I64x2ReplaceLane) \
V(I64x2Neg) \
V(I64x2Shl) \
V(I64x2ShrS) \
V(I64x2Add) \
V(I64x2Sub) \
V(I64x2ShrU) \
V(I32x4Splat) \
V(I32x4ExtractLane) \
V(I32x4ReplaceLane) \
......
......@@ -4369,6 +4369,14 @@ Node* WasmGraphBuilder::SimdShiftOp(wasm::WasmOpcode opcode, uint8_t shift,
Node* const* inputs) {
has_simd_ = true;
switch (opcode) {
case wasm::kExprI64x2Shl:
return graph()->NewNode(mcgraph()->machine()->I64x2Shl(shift), inputs[0]);
case wasm::kExprI64x2ShrS:
return graph()->NewNode(mcgraph()->machine()->I64x2ShrS(shift),
inputs[0]);
case wasm::kExprI64x2ShrU:
return graph()->NewNode(mcgraph()->machine()->I64x2ShrU(shift),
inputs[0]);
case wasm::kExprI32x4Shl:
return graph()->NewNode(mcgraph()->machine()->I32x4Shl(shift), inputs[0]);
case wasm::kExprI32x4ShrS:
......
......@@ -1084,6 +1084,11 @@ class WasmDecoder : public Decoder {
SimdShiftImmediate<validate>& imm) {
uint8_t max_shift = 0;
switch (opcode) {
case kExprI64x2Shl:
case kExprI64x2ShrS:
case kExprI64x2ShrU:
max_shift = 64;
break;
case kExprI32x4Shl:
case kExprI32x4ShrS:
case kExprI32x4ShrU:
......@@ -2707,6 +2712,9 @@ class WasmFullDecoder : public WasmDecoder<validate> {
len = SimdReplaceLane(opcode, kWasmI32);
break;
}
case kExprI64x2Shl:
case kExprI64x2ShrS:
case kExprI64x2ShrU:
case kExprI32x4Shl:
case kExprI32x4ShrS:
case kExprI32x4ShrU:
......
......@@ -2343,6 +2343,11 @@ class ThreadImpl {
Push(WasmValue(Simd128(res))); \
return true; \
}
SHIFT_CASE(I64x2Shl, i64x2, int2, 2,
static_cast<uint64_t>(a) << imm.shift)
SHIFT_CASE(I64x2ShrS, i64x2, int2, 2, a >> imm.shift)
SHIFT_CASE(I64x2ShrU, i64x2, int2, 2,
static_cast<uint64_t>(a) >> imm.shift)
SHIFT_CASE(I32x4Shl, i32x4, int4, 4,
static_cast<uint32_t>(a) << imm.shift)
SHIFT_CASE(I32x4ShrS, i32x4, int4, 4, a >> imm.shift)
......
......@@ -259,7 +259,9 @@ const char* WasmOpcodes::OpcodeName(WasmOpcode opcode) {
CASE_SIGN_OP(SIMDI, Gt, "gt")
CASE_SIGN_OP(SIMDI, Ge, "ge")
CASE_SIGN_OP(SIMDI, Shr, "shr")
CASE_SIGN_OP(I64x2, Shr, "shr")
CASE_SIMDI_OP(Shl, "shl")
CASE_I64x2_OP(Shl, "shl")
CASE_I64x2_OP(Splat, "splat")
CASE_I32x4_OP(AddHoriz, "add_horizontal")
CASE_I16x8_OP(AddHoriz, "add_horizontal")
......
......@@ -400,7 +400,10 @@ bool IsJSCompatibleSignature(const FunctionSig* sig, bool hasBigIntFeature);
V(I16x8ShrU, 0xfd67, _) \
V(I32x4Shl, 0xfd76, _) \
V(I32x4ShrS, 0xfd77, _) \
V(I32x4ShrU, 0xfd78, _)
V(I32x4ShrU, 0xfd78, _) \
V(I64x2Shl, 0xfd87, _) \
V(I64x2ShrS, 0xfd88, _) \
V(I64x2ShrU, 0xfd89, _)
#define FOREACH_SIMD_1_OPERAND_2_PARAM_OPCODE(V) \
V(I8x16ReplaceLane, 0xfd07, _) \
......
......@@ -327,7 +327,10 @@ void PrintWasmText(const WasmModule* module, const ModuleWireBytes& wire_bytes,
case kExprI16x8ShrU:
case kExprI32x4Shl:
case kExprI32x4ShrS:
case kExprI32x4ShrU: {
case kExprI32x4ShrU:
case kExprI64x2Shl:
case kExprI64x2ShrS:
case kExprI64x2ShrU: {
SimdShiftImmediate<Decoder::kNoValidate> imm(&i, i.pc());
os << WasmOpcodes::OpcodeName(opcode) << ' ' << imm.shift;
break;
......
......@@ -24,6 +24,7 @@ using FloatBinOp = float (*)(float, float);
using FloatCompareOp = int (*)(float, float);
using Int64UnOp = int64_t (*)(int64_t);
using Int64BinOp = int64_t (*)(int64_t, int64_t);
using Int64ShiftOp = int64_t (*)(int64_t, int);
using Int32UnOp = int32_t (*)(int32_t);
using Int32BinOp = int32_t (*)(int32_t, int32_t);
using Int32CompareOp = int (*)(int32_t, int32_t);
......@@ -822,6 +823,44 @@ WASM_SIMD_TEST_NO_LOWERING(I64x2Sub) {
RunI64x2BinOpTest(execution_tier, lower_simd, kExprI64x2Sub,
base::SubWithWraparound);
}
void RunI64x2ShiftOpTest(ExecutionTier execution_tier, LowerSimd lower_simd,
WasmOpcode opcode, Int64ShiftOp expected_op) {
for (int shift = 1; shift < 64; shift++) {
WasmRunner<int32_t, int64_t> r(execution_tier, lower_simd);
int64_t* g = r.builder().AddGlobal<int64_t>(kWasmS128);
byte value = 0;
byte simd1 = r.AllocateLocal(kWasmS128);
BUILD(r,
WASM_SET_LOCAL(simd1, WASM_SIMD_I64x2_SPLAT(WASM_GET_LOCAL(value))),
WASM_SET_GLOBAL(
0, WASM_SIMD_SHIFT_OP(opcode, shift, WASM_GET_LOCAL(simd1))),
WASM_ONE);
FOR_INT64_INPUTS(x) {
r.Call(x);
int64_t expected = expected_op(x, shift);
for (int i = 0; i < 2; i++) {
CHECK_EQ(expected, ReadLittleEndianValue<int64_t>(&g[i]));
}
}
}
}
WASM_SIMD_TEST_NO_LOWERING(I64x2Shl) {
RunI64x2ShiftOpTest(execution_tier, lower_simd, kExprI64x2Shl,
LogicalShiftLeft);
}
WASM_SIMD_TEST_NO_LOWERING(I64x2ShrS) {
RunI64x2ShiftOpTest(execution_tier, lower_simd, kExprI64x2ShrS,
ArithmeticShiftRight);
}
WASM_SIMD_TEST_NO_LOWERING(I64x2ShrU) {
RunI64x2ShiftOpTest(execution_tier, lower_simd, kExprI64x2ShrU,
LogicalShiftRight);
}
#endif // V8_TARGET_ARCH_X64
WASM_SIMD_TEST(I32x4Splat) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment