Commit 3692bef9 authored by Ng Zhi An's avatar Ng Zhi An Committed by Commit Bot

[wasm-simd][x64] Prototype i32x4.dot_i16x8_s

This implements I32x4DotI16x8S for x64 and interpreter.

Bug: v8:10583
Change-Id: I404ac68c19c1686a93f29c3f4fc2d661c9558c67
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2229056Reviewed-by: 's avatarTobias Tebbi <tebbi@chromium.org>
Reviewed-by: 's avatarDeepti Gandluri <gdeepti@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#68244}
parent 60cc3570
......@@ -202,6 +202,7 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
AVX_OP(Psrlw, psrlw)
AVX_OP(Psrld, psrld)
AVX_OP(Psrlq, psrlq)
AVX_OP(Pmaddwd, pmaddwd)
AVX_OP(Paddb, paddb)
AVX_OP(Paddw, paddw)
AVX_OP(Paddd, paddd)
......
......@@ -57,6 +57,7 @@
V(packssdw, 66, 0F, 6B) \
V(punpcklqdq, 66, 0F, 6C) \
V(punpckhqdq, 66, 0F, 6D) \
V(pmaddwd, 66, 0F, F5) \
V(paddb, 66, 0F, FC) \
V(paddw, 66, 0F, FD) \
V(paddd, 66, 0F, FE) \
......
......@@ -2057,6 +2057,8 @@ void InstructionSelector::VisitNode(Node* node) {
return MarkAsSimd128(node), VisitI32x4Abs(node);
case IrOpcode::kI32x4BitMask:
return MarkAsWord32(node), VisitI32x4BitMask(node);
case IrOpcode::kI32x4DotI16x8S:
return MarkAsSimd128(node), VisitI32x4DotI16x8S(node);
case IrOpcode::kI16x8Splat:
return MarkAsSimd128(node), VisitI16x8Splat(node);
case IrOpcode::kI16x8ExtractLaneU:
......@@ -2695,6 +2697,11 @@ void InstructionSelector::VisitF32x4Trunc(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF32x4NearestInt(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_X64
#if !V8_TARGET_ARCH_X64
// TODO(v8:10583) Prototype i32x4.dot_i16x8_s
void InstructionSelector::VisitI32x4DotI16x8S(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_X64
void InstructionSelector::VisitFinishRegion(Node* node) { EmitIdentity(node); }
void InstructionSelector::VisitParameter(Node* node) {
......
......@@ -3187,6 +3187,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ Movmskps(i.OutputRegister(), i.InputSimd128Register(0));
break;
}
case kX64I32x4DotI16x8S: {
__ Pmaddwd(i.OutputSimd128Register(), i.InputSimd128Register(1));
break;
}
case kX64S128Zero: {
XMMRegister dst = i.OutputSimd128Register();
__ Xorps(dst, dst);
......
......@@ -250,6 +250,7 @@ namespace compiler {
V(X64I32x4GeU) \
V(X64I32x4Abs) \
V(X64I32x4BitMask) \
V(X64I32x4DotI16x8S) \
V(X64I16x8Splat) \
V(X64I16x8ExtractLaneU) \
V(X64I16x8ExtractLaneS) \
......
......@@ -222,6 +222,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kX64I32x4GeU:
case kX64I32x4Abs:
case kX64I32x4BitMask:
case kX64I32x4DotI16x8S:
case kX64I16x8Splat:
case kX64I16x8ExtractLaneU:
case kX64I16x8ExtractLaneS:
......
......@@ -2700,6 +2700,7 @@ VISIT_ATOMIC_BINOP(Xor)
V(I32x4MinU) \
V(I32x4MaxU) \
V(I32x4GeU) \
V(I32x4DotI16x8S) \
V(I16x8SConvertI32x4) \
V(I16x8Add) \
V(I16x8AddSaturateS) \
......
......@@ -416,6 +416,7 @@ ShiftKind ShiftKindOf(Operator const* op) {
V(I32x4GeU, Operator::kNoProperties, 2, 0, 1) \
V(I32x4Abs, Operator::kNoProperties, 1, 0, 1) \
V(I32x4BitMask, Operator::kNoProperties, 1, 0, 1) \
V(I32x4DotI16x8S, Operator::kCommutative, 2, 0, 1) \
V(I16x8Splat, Operator::kNoProperties, 1, 0, 1) \
V(I16x8SConvertI8x16Low, Operator::kNoProperties, 1, 0, 1) \
V(I16x8SConvertI8x16High, Operator::kNoProperties, 1, 0, 1) \
......
......@@ -664,6 +664,7 @@ class V8_EXPORT_PRIVATE MachineOperatorBuilder final
const Operator* I32x4GeU();
const Operator* I32x4Abs();
const Operator* I32x4BitMask();
const Operator* I32x4DotI16x8S();
const Operator* I16x8Splat();
const Operator* I16x8ExtractLaneU(int32_t);
......
......@@ -855,6 +855,7 @@
V(I32x4GeU) \
V(I32x4Abs) \
V(I32x4BitMask) \
V(I32x4DotI16x8S) \
V(I16x8Splat) \
V(I16x8ExtractLaneU) \
V(I16x8ExtractLaneS) \
......
......@@ -4425,6 +4425,9 @@ Node* WasmGraphBuilder::SimdOp(wasm::WasmOpcode opcode, Node* const* inputs) {
return graph()->NewNode(mcgraph()->machine()->I32x4Abs(), inputs[0]);
case wasm::kExprI32x4BitMask:
return graph()->NewNode(mcgraph()->machine()->I32x4BitMask(), inputs[0]);
case wasm::kExprI32x4DotI16x8S:
return graph()->NewNode(mcgraph()->machine()->I32x4DotI16x8S(), inputs[0],
inputs[1]);
case wasm::kExprI16x8Splat:
return graph()->NewNode(mcgraph()->machine()->I16x8Splat(), inputs[0]);
case wasm::kExprI16x8SConvertI8x16Low:
......
......@@ -2133,6 +2133,8 @@ int DisassemblerX64::TwoByteOpcodeInstruction(byte* data) {
mnemonic = "psllq";
} else if (opcode == 0xF4) {
mnemonic = "pmuludq";
} else if (opcode == 0xF5) {
mnemonic = "pmaddwd";
} else if (opcode == 0xF8) {
mnemonic = "psubb";
} else if (opcode == 0xF9) {
......
......@@ -2587,6 +2587,19 @@ class WasmInterpreterInternals {
ADD_HORIZ_CASE(F32x4AddHoriz, f32x4, float4, 4)
ADD_HORIZ_CASE(I16x8AddHoriz, i16x8, int8, 8)
#undef ADD_HORIZ_CASE
case kExprI32x4DotI16x8S: {
int8 v2 = Pop().to_s128().to_i16x8();
int8 v1 = Pop().to_s128().to_i16x8();
int4 res;
for (size_t i = 0; i < 4; i++) {
int32_t lo = (v1.val[LANE(i * 2, v1)] * v2.val[LANE(i * 2, v2)]);
int32_t hi =
(v1.val[LANE(i * 2 + 1, v1)] * v2.val[LANE(i * 2 + 1, v2)]);
res.val[LANE(i, res)] = lo + hi;
}
Push(WasmValue(Simd128(res)));
return true;
}
case kExprS8x16Swizzle: {
int16 v2 = Pop().to_s128().to_i8x16();
int16 v1 = Pop().to_s128().to_i8x16();
......
......@@ -335,6 +335,8 @@ const char* WasmOpcodes::OpcodeName(WasmOpcode opcode) {
CASE_F64x2_OP(Trunc, "trunc")
CASE_F64x2_OP(NearestInt, "nearest")
CASE_I32x4_OP(DotI16x8S, "dot_i16x8_s")
// Atomic operations.
CASE_OP(AtomicNotify, "atomic.notify")
CASE_INT_OP(AtomicWait, "atomic.wait")
......
......@@ -470,9 +470,10 @@ bool IsJSCompatibleSignature(const FunctionSig* sig, const WasmFeatures&);
V(F64x2Qfms, 0xfdff, s_sss) \
V(I16x8AddHoriz, 0xfdaf, s_ss) \
V(I32x4AddHoriz, 0xfdb0, s_ss) \
V(I32x4DotI16x8S, 0xfdba, s_ss) \
V(F32x4AddHoriz, 0xfdb2, s_ss) \
V(F32x4RecipApprox, 0xfdb3, s_s) \
V(F32x4RecipSqrtApprox, 0xfdba, s_s) \
V(F32x4RecipSqrtApprox, 0xfdbc, s_s) \
V(F32x4Pmin, 0xfdea, s_ss) \
V(F32x4Pmax, 0xfdeb, s_ss) \
V(F64x2Pmin, 0xfdf6, s_ss) \
......
......@@ -2306,6 +2306,35 @@ WASM_SIMD_TEST(I16x8RoundingAverageU) {
base::RoundingAverageUnsigned);
}
// TODO(v8:10583) Prototype i32x4.dot_i16x8_s
#if V8_TARGET_ARCH_X64
WASM_SIMD_TEST_NO_LOWERING(I32x4DotI16x8S) {
FLAG_SCOPE(wasm_simd_post_mvp);
WasmRunner<int32_t, int16_t, int16_t> r(execution_tier, lower_simd);
int32_t* g = r.builder().template AddGlobal<int32_t>(kWasmS128);
byte value1 = 0, value2 = 1;
byte temp1 = r.AllocateLocal(kWasmS128);
byte temp2 = r.AllocateLocal(kWasmS128);
BUILD(r, WASM_SET_LOCAL(temp1, WASM_SIMD_I16x8_SPLAT(WASM_GET_LOCAL(value1))),
WASM_SET_LOCAL(temp2, WASM_SIMD_I16x8_SPLAT(WASM_GET_LOCAL(value2))),
WASM_SET_GLOBAL(
0, WASM_SIMD_BINOP(kExprI32x4DotI16x8S, WASM_GET_LOCAL(temp1),
WASM_GET_LOCAL(temp2))),
WASM_ONE);
for (int16_t x : compiler::ValueHelper::GetVector<int16_t>()) {
for (int16_t y : compiler::ValueHelper::GetVector<int16_t>()) {
r.Call(x, y);
int32_t expected = x * y * 2;
for (int i = 0; i < 4; i++) {
CHECK_EQ(expected, ReadLittleEndianValue<int32_t>(&g[i]));
}
}
}
}
#endif // V8_TARGET_ARCH_X64
void RunI16x8ShiftOpTest(ExecutionTier execution_tier, LowerSimd lower_simd,
WasmOpcode opcode, Int16ShiftOp expected_op) {
// Intentionally shift by 16, should be no-op.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment