Commit f165b310 authored by Deepti Gandluri's avatar Deepti Gandluri Committed by V8 LUCI CQ

[wasm-relaxed-simd] Implement dot product instructions for ia32/x64

Reference lowering in the corresponding issue:
https://github.com/WebAssembly/relaxed-simd/issues/52

Bug: v8:12284


Change-Id: Ia59419f41ae1e53804b0fdb7169bf6f56f864c53
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3862956Reviewed-by: 's avatarThibaud Michaud <thibaudm@chromium.org>
Commit-Queue: Deepti Gandluri <gdeepti@chromium.org>
Cr-Commit-Position: refs/heads/main@{#82923}
parent ac23cc04
......@@ -704,6 +704,21 @@ void SharedTurboAssembler::I16x8Q15MulRSatS(XMMRegister dst, XMMRegister src1,
Pxor(dst, scratch);
}
void SharedTurboAssembler::I16x8DotI8x16I7x16S(XMMRegister dst,
XMMRegister src1,
XMMRegister src2) {
ASM_CODE_COMMENT(this);
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(this, AVX);
vpmaddubsw(dst, src2, src1);
} else {
if (dst != src2) {
movdqa(dst, src2);
}
pmaddubsw(dst, src1);
}
}
void SharedTurboAssembler::I32x4ExtAddPairwiseI16x8U(XMMRegister dst,
XMMRegister src,
XMMRegister tmp) {
......
......@@ -442,6 +442,7 @@ class V8_EXPORT_PRIVATE SharedTurboAssembler : public TurboAssemblerBase {
// Will move src1 to dst if AVX is not supported.
void I16x8Q15MulRSatS(XMMRegister dst, XMMRegister src1, XMMRegister src2,
XMMRegister scratch);
void I16x8DotI8x16I7x16S(XMMRegister dst, XMMRegister src1, XMMRegister src2);
void I32x4ExtAddPairwiseI16x8U(XMMRegister dst, XMMRegister src,
XMMRegister tmp);
// Requires that dst == src1 if AVX is not supported.
......
......@@ -2089,6 +2089,12 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
i.InputSimd128Register(1));
break;
}
case kIA32I16x8DotI8x16I7x16S: {
__ I16x8DotI8x16I7x16S(i.OutputSimd128Register(),
i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
case kIA32F32x4Splat: {
__ F32x4Splat(i.OutputSimd128Register(), i.InputDoubleRegister(0));
break;
......
......@@ -359,6 +359,7 @@ namespace compiler {
V(IA32I32x4AllTrue) \
V(IA32I16x8AllTrue) \
V(IA32I8x16AllTrue) \
V(IA32I16x8DotI8x16I7x16S) \
V(IA32Word32AtomicPairLoad) \
V(IA32Word32ReleasePairStore) \
V(IA32Word32SeqCstPairStore) \
......
......@@ -244,6 +244,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kIA32I16x8ExtAddPairwiseI8x16U:
case kIA32I16x8Q15MulRSatS:
case kIA32I16x8RelaxedQ15MulRS:
case kIA32I16x8DotI8x16I7x16S:
case kIA32I8x16Splat:
case kIA32I8x16ExtractLaneS:
case kIA32Pinsrb:
......
......@@ -3311,6 +3311,12 @@ void InstructionSelector::VisitF32x4Qfms(Node* node) {
VisitRRRR(this, node, kIA32F32x4Qfms);
}
void InstructionSelector::VisitI16x8DotI8x16I7x16S(Node* node) {
IA32OperandGenerator g(this);
Emit(kIA32I16x8DotI8x16I7x16S, g.DefineAsRegister(node),
g.UseUniqueRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)));
}
void InstructionSelector::AddOutputToSelectContinuation(OperandGenerator* g,
int first_input_index,
Node* node) {
......
......@@ -2802,11 +2802,13 @@ void InstructionSelector::VisitF32x4Qfms(Node* node) { UNIMPLEMENTED(); }
// && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_IA32 &&
// !V8_TARGET_ARCH_RISCV64 && !V8_TARGET_ARCH_RISCV32
#if !V8_TARGET_ARCH_ARM64
#if !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_IA32
void InstructionSelector::VisitI16x8DotI8x16I7x16S(Node* node) {
UNIMPLEMENTED();
}
#endif // !V8_TARGET_ARCH_ARM6 && !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_IA32
#if !V8_TARGET_ARCH_ARM64
void InstructionSelector::VisitI32x4DotI8x16I7x16AddS(Node* node) {
UNIMPLEMENTED();
}
......
......@@ -3651,6 +3651,12 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
i.InputSimd128Register(1));
break;
}
case kX64I16x8DotI8x16I7x16S: {
__ I16x8DotI8x16I7x16S(i.OutputSimd128Register(),
i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
case kX64I8x16Splat: {
XMMRegister dst = i.OutputSimd128Register();
if (HasRegisterInput(instr, 0)) {
......
......@@ -335,6 +335,7 @@ namespace compiler {
V(X64I16x8ExtAddPairwiseI8x16U) \
V(X64I16x8Q15MulRSatS) \
V(X64I16x8RelaxedQ15MulRS) \
V(X64I16x8DotI8x16I7x16S) \
V(X64I8x16Splat) \
V(X64I8x16ExtractLaneS) \
V(X64I8x16SConvertI16x8) \
......
......@@ -280,6 +280,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kX64I16x8ExtAddPairwiseI8x16U:
case kX64I16x8Q15MulRSatS:
case kX64I16x8RelaxedQ15MulRS:
case kX64I16x8DotI8x16I7x16S:
case kX64I8x16Splat:
case kX64I8x16ExtractLaneS:
case kX64I8x16SConvertI16x8:
......
......@@ -4320,6 +4320,12 @@ void InstructionSelector::VisitF64x2PromoteLowF32x4(Node* node) {
VisitRR(this, node, code);
}
void InstructionSelector::VisitI16x8DotI8x16I7x16S(Node* node) {
X64OperandGenerator g(this);
Emit(kX64I16x8DotI8x16I7x16S, g.DefineAsRegister(node),
g.UseUniqueRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)));
}
void InstructionSelector::AddOutputToSelectContinuation(OperandGenerator* g,
int first_input_index,
Node* node) {
......
......@@ -3670,7 +3670,7 @@ void LiftoffAssembler::emit_i16x8_relaxed_q15mulr_s(LiftoffRegister dst,
void LiftoffAssembler::emit_i16x8_dot_i8x16_i7x16_s(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
bailout(kSimd, "emit_i16x8_dot_i8x16_i7x16_s");
I16x8DotI8x16I7x16S(dst.fp(), lhs.fp(), rhs.fp());
}
void LiftoffAssembler::emit_i32x4_dot_i8x16_i7x16_add_s(LiftoffRegister dst,
......
......@@ -3255,7 +3255,7 @@ void LiftoffAssembler::emit_i16x8_relaxed_q15mulr_s(LiftoffRegister dst,
void LiftoffAssembler::emit_i16x8_dot_i8x16_i7x16_s(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
bailout(kSimd, "emit_i16x8_dot_i8x16_i7x16_s");
I16x8DotI8x16I7x16S(dst.fp(), lhs.fp(), rhs.fp());
}
void LiftoffAssembler::emit_i32x4_dot_i8x16_i7x16_add_s(LiftoffRegister dst,
......
......@@ -435,7 +435,7 @@ WASM_RELAXED_SIMD_TEST(I16x8RelaxedQ15MulRS) {
}
}
#if V8_TARGET_ARCH_ARM64
#if V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_IA32
WASM_RELAXED_SIMD_TEST(I16x8DotI8x16I7x16S) {
WasmRunner<int32_t, int8_t, int8_t> r(execution_tier);
int16_t* g = r.builder().template AddGlobal<int16_t>(kWasmS128);
......@@ -460,7 +460,9 @@ WASM_RELAXED_SIMD_TEST(I16x8DotI8x16I7x16S) {
}
}
}
#endif // V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_IA32
#if V8_TARGET_ARCH_ARM64
WASM_RELAXED_SIMD_TEST(I32x4DotI8x16I7x16AddS) {
WasmRunner<int32_t, int8_t, int8_t, int32_t> r(execution_tier);
int32_t* g = r.builder().template AddGlobal<int32_t>(kWasmS128);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment