Commit a52b44f0 authored by Ilya Rezvov's avatar Ilya Rezvov Committed by V8 LUCI CQ

[wasm-simd] Prototype relaxed integer Dot product instructions

Prototype the instruction on the interpreter, and Arm64. Details of
instruction lowerings on all relevant architectures can be found at:
https://github.com/WebAssembly/relaxed-simd/issues/52

Bug: v8:12908
Change-Id: If8ffb82c38042191c67c9b5c23a231877d4f2159
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3679848Reviewed-by: 's avatarClemens Backes <clemensb@chromium.org>
Commit-Queue: Ilya Rezvov <irezvov@chromium.org>
Reviewed-by: 's avatarDeepti Gandluri <gdeepti@chromium.org>
Cr-Commit-Position: refs/heads/main@{#80924}
parent 90c80f7a
...@@ -2488,6 +2488,31 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -2488,6 +2488,31 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ Addp(i.OutputSimd128Register().V4S(), tmp1, tmp2); __ Addp(i.OutputSimd128Register().V4S(), tmp1, tmp2);
break; break;
} }
case kArm64I16x8DotI8x16S: {
UseScratchRegisterScope scope(tasm());
VRegister lhs = i.InputSimd128Register(0);
VRegister rhs = i.InputSimd128Register(1);
VRegister tmp1 = scope.AcquireV(kFormat8H);
VRegister tmp2 = scope.AcquireV(kFormat8H);
__ Smull(tmp1, lhs.V8B(), rhs.V8B());
__ Smull2(tmp2, lhs.V16B(), rhs.V16B());
__ Addp(i.OutputSimd128Register().V8H(), tmp1, tmp2);
break;
}
case kArm64I32x4DotI8x16AddS: {
UseScratchRegisterScope scope(tasm());
VRegister lhs = i.InputSimd128Register(0);
VRegister rhs = i.InputSimd128Register(1);
VRegister tmp1 = scope.AcquireV(kFormat8H);
VRegister tmp2 = scope.AcquireV(kFormat8H);
__ Smull(tmp1, lhs.V8B(), rhs.V8B());
__ Smull2(tmp2, lhs.V16B(), rhs.V16B());
__ Addp(tmp1, tmp1, tmp2);
__ Saddlp(tmp1.V4S(), tmp1);
__ Add(i.OutputSimd128Register().V4S(), tmp1.V4S(),
i.InputSimd128Register(2).V4S());
break;
}
case kArm64IExtractLaneU: { case kArm64IExtractLaneU: {
VectorFormat f = VectorFormatFillQ(LaneSizeField::decode(opcode)); VectorFormat f = VectorFormatFillQ(LaneSizeField::decode(opcode));
__ Umov(i.OutputRegister32(), i.InputSimd128Register(0).Format(f), __ Umov(i.OutputRegister32(), i.InputSimd128Register(0).Format(f),
......
...@@ -276,6 +276,8 @@ namespace compiler { ...@@ -276,6 +276,8 @@ namespace compiler {
V(Arm64IGeU) \ V(Arm64IGeU) \
V(Arm64I32x4BitMask) \ V(Arm64I32x4BitMask) \
V(Arm64I32x4DotI16x8S) \ V(Arm64I32x4DotI16x8S) \
V(Arm64I16x8DotI8x16S) \
V(Arm64I32x4DotI8x16AddS) \
V(Arm64I32x4TruncSatF64x2SZero) \ V(Arm64I32x4TruncSatF64x2SZero) \
V(Arm64I32x4TruncSatF64x2UZero) \ V(Arm64I32x4TruncSatF64x2UZero) \
V(Arm64IExtractLaneU) \ V(Arm64IExtractLaneU) \
......
...@@ -225,6 +225,8 @@ int InstructionScheduler::GetTargetInstructionFlags( ...@@ -225,6 +225,8 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kArm64IGeU: case kArm64IGeU:
case kArm64I32x4BitMask: case kArm64I32x4BitMask:
case kArm64I32x4DotI16x8S: case kArm64I32x4DotI16x8S:
case kArm64I16x8DotI8x16S:
case kArm64I32x4DotI8x16AddS:
case kArm64I32x4TruncSatF64x2SZero: case kArm64I32x4TruncSatF64x2SZero:
case kArm64I32x4TruncSatF64x2UZero: case kArm64I32x4TruncSatF64x2UZero:
case kArm64IExtractLaneU: case kArm64IExtractLaneU:
......
...@@ -3538,6 +3538,7 @@ void InstructionSelector::VisitInt64AbsWithOverflow(Node* node) { ...@@ -3538,6 +3538,7 @@ void InstructionSelector::VisitInt64AbsWithOverflow(Node* node) {
#define SIMD_BINOP_LIST(V) \ #define SIMD_BINOP_LIST(V) \
V(I32x4Mul, kArm64I32x4Mul) \ V(I32x4Mul, kArm64I32x4Mul) \
V(I32x4DotI16x8S, kArm64I32x4DotI16x8S) \ V(I32x4DotI16x8S, kArm64I32x4DotI16x8S) \
V(I16x8DotI8x16I7x16S, kArm64I16x8DotI8x16S) \
V(I16x8SConvertI32x4, kArm64I16x8SConvertI32x4) \ V(I16x8SConvertI32x4, kArm64I16x8SConvertI32x4) \
V(I16x8Mul, kArm64I16x8Mul) \ V(I16x8Mul, kArm64I16x8Mul) \
V(I16x8UConvertI32x4, kArm64I16x8UConvertI32x4) \ V(I16x8UConvertI32x4, kArm64I16x8UConvertI32x4) \
...@@ -3724,6 +3725,13 @@ void InstructionSelector::VisitS128Zero(Node* node) { ...@@ -3724,6 +3725,13 @@ void InstructionSelector::VisitS128Zero(Node* node) {
Emit(kArm64S128Zero, g.DefineAsRegister(node)); Emit(kArm64S128Zero, g.DefineAsRegister(node));
} }
void InstructionSelector::VisitI32x4DotI8x16I7x16AddS(Node* node) {
Arm64OperandGenerator g(this);
Emit(
kArm64I32x4DotI8x16AddS, g.DefineAsRegister(node), g.UseRegister(node->InputAt(0)),
g.UseRegister(node->InputAt(1)), g.UseRegister(node->InputAt(2)));
}
#define SIMD_VISIT_EXTRACT_LANE(Type, T, Sign, LaneSize) \ #define SIMD_VISIT_EXTRACT_LANE(Type, T, Sign, LaneSize) \
void InstructionSelector::Visit##Type##ExtractLane##Sign(Node* node) { \ void InstructionSelector::Visit##Type##ExtractLane##Sign(Node* node) { \
VisitRRI(this, \ VisitRRI(this, \
......
...@@ -2372,6 +2372,10 @@ void InstructionSelector::VisitNode(Node* node) { ...@@ -2372,6 +2372,10 @@ void InstructionSelector::VisitNode(Node* node) {
return MarkAsSimd128(node), VisitI32x4RelaxedTruncF32x4U(node); return MarkAsSimd128(node), VisitI32x4RelaxedTruncF32x4U(node);
case IrOpcode::kI16x8RelaxedQ15MulRS: case IrOpcode::kI16x8RelaxedQ15MulRS:
return MarkAsSimd128(node), VisitI16x8RelaxedQ15MulRS(node); return MarkAsSimd128(node), VisitI16x8RelaxedQ15MulRS(node);
case IrOpcode::kI16x8DotI8x16I7x16S:
return MarkAsSimd128(node), VisitI16x8DotI8x16I7x16S(node);
case IrOpcode::kI32x4DotI8x16I7x16AddS:
return MarkAsSimd128(node), VisitI32x4DotI8x16I7x16AddS(node);
default: default:
FATAL("Unexpected operator #%d:%s @ node #%d", node->opcode(), FATAL("Unexpected operator #%d:%s @ node #%d", node->opcode(),
node->op()->mnemonic(), node->id()); node->op()->mnemonic(), node->id());
...@@ -2830,6 +2834,16 @@ void InstructionSelector::VisitI16x8RelaxedQ15MulRS(Node* node) { ...@@ -2830,6 +2834,16 @@ void InstructionSelector::VisitI16x8RelaxedQ15MulRS(Node* node) {
} }
#endif // !V8_TARGET_ARCH_ARM6 && !V8_TARGET_ARCH_ARM #endif // !V8_TARGET_ARCH_ARM6 && !V8_TARGET_ARCH_ARM
#if !V8_TARGET_ARCH_ARM64
void InstructionSelector::VisitI16x8DotI8x16I7x16S(Node* node) {
UNIMPLEMENTED();
}
void InstructionSelector::VisitI32x4DotI8x16I7x16AddS(Node* node) {
UNIMPLEMENTED();
}
#endif // !V8_TARGET_ARCH_ARM6
void InstructionSelector::VisitFinishRegion(Node* node) { EmitIdentity(node); } void InstructionSelector::VisitFinishRegion(Node* node) { EmitIdentity(node); }
void InstructionSelector::VisitParameter(Node* node) { void InstructionSelector::VisitParameter(Node* node) {
......
...@@ -608,7 +608,9 @@ std::ostream& operator<<(std::ostream& os, TruncateKind kind) { ...@@ -608,7 +608,9 @@ std::ostream& operator<<(std::ostream& os, TruncateKind kind) {
V(I32x4RelaxedTruncF32x4U, Operator::kNoProperties, 1, 0, 1) \ V(I32x4RelaxedTruncF32x4U, Operator::kNoProperties, 1, 0, 1) \
V(I32x4RelaxedTruncF64x2SZero, Operator::kNoProperties, 1, 0, 1) \ V(I32x4RelaxedTruncF64x2SZero, Operator::kNoProperties, 1, 0, 1) \
V(I32x4RelaxedTruncF64x2UZero, Operator::kNoProperties, 1, 0, 1) \ V(I32x4RelaxedTruncF64x2UZero, Operator::kNoProperties, 1, 0, 1) \
V(I16x8RelaxedQ15MulRS, Operator::kCommutative, 2, 0, 1) V(I16x8RelaxedQ15MulRS, Operator::kCommutative, 2, 0, 1) \
V(I16x8DotI8x16I7x16S, Operator::kCommutative, 2, 0, 1) \
V(I32x4DotI8x16I7x16AddS, Operator::kNoProperties, 3, 0, 1)
// The format is: // The format is:
// V(Name, properties, value_input_count, control_input_count, output_count) // V(Name, properties, value_input_count, control_input_count, output_count)
......
...@@ -925,6 +925,8 @@ class V8_EXPORT_PRIVATE MachineOperatorBuilder final ...@@ -925,6 +925,8 @@ class V8_EXPORT_PRIVATE MachineOperatorBuilder final
const Operator* I32x4RelaxedTruncF64x2SZero(); const Operator* I32x4RelaxedTruncF64x2SZero();
const Operator* I32x4RelaxedTruncF64x2UZero(); const Operator* I32x4RelaxedTruncF64x2UZero();
const Operator* I16x8RelaxedQ15MulRS(); const Operator* I16x8RelaxedQ15MulRS();
const Operator* I16x8DotI8x16I7x16S();
const Operator* I32x4DotI8x16I7x16AddS();
// load [base + index] // load [base + index]
const Operator* Load(LoadRepresentation rep); const Operator* Load(LoadRepresentation rep);
......
...@@ -1001,6 +1001,8 @@ ...@@ -1001,6 +1001,8 @@
V(I32x4RelaxedTruncF64x2SZero) \ V(I32x4RelaxedTruncF64x2SZero) \
V(I32x4RelaxedTruncF64x2UZero) \ V(I32x4RelaxedTruncF64x2UZero) \
V(I16x8RelaxedQ15MulRS) \ V(I16x8RelaxedQ15MulRS) \
V(I16x8DotI8x16I7x16S) \
V(I32x4DotI8x16I7x16AddS) \
V(I8x16Shuffle) \ V(I8x16Shuffle) \
V(V128AnyTrue) \ V(V128AnyTrue) \
V(I64x2AllTrue) \ V(I64x2AllTrue) \
......
...@@ -4476,6 +4476,12 @@ Node* WasmGraphBuilder::SimdOp(wasm::WasmOpcode opcode, Node* const* inputs) { ...@@ -4476,6 +4476,12 @@ Node* WasmGraphBuilder::SimdOp(wasm::WasmOpcode opcode, Node* const* inputs) {
case wasm::kExprI16x8RelaxedQ15MulRS: case wasm::kExprI16x8RelaxedQ15MulRS:
return graph()->NewNode(mcgraph()->machine()->I16x8RelaxedQ15MulRS(), return graph()->NewNode(mcgraph()->machine()->I16x8RelaxedQ15MulRS(),
inputs[0], inputs[1]); inputs[0], inputs[1]);
case wasm::kExprI16x8DotI8x16I7x16S:
return graph()->NewNode(mcgraph()->machine()->I16x8DotI8x16I7x16S(),
inputs[0], inputs[1]);
case wasm::kExprI32x4DotI8x16I7x16AddS:
return graph()->NewNode(mcgraph()->machine()->I32x4DotI8x16I7x16AddS(),
inputs[0], inputs[1], inputs[2]);
case wasm::kExprI16x8Abs: case wasm::kExprI16x8Abs:
return graph()->NewNode(mcgraph()->machine()->I16x8Abs(), inputs[0]); return graph()->NewNode(mcgraph()->machine()->I16x8Abs(), inputs[0]);
case wasm::kExprI16x8BitMask: case wasm::kExprI16x8BitMask:
......
...@@ -3510,6 +3510,19 @@ void LiftoffAssembler::emit_i16x8_relaxed_q15mulr_s(LiftoffRegister dst, ...@@ -3510,6 +3510,19 @@ void LiftoffAssembler::emit_i16x8_relaxed_q15mulr_s(LiftoffRegister dst,
liftoff::GetSimd128Register(src2)); liftoff::GetSimd128Register(src2));
} }
void LiftoffAssembler::emit_i16x8_dot_i8x16_i7x16_s(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
bailout(kSimd, "emit_i16x8_dot_i8x16_i7x16_s");
}
void LiftoffAssembler::emit_i32x4_dot_i8x16_i7x16_add_s(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs,
LiftoffRegister acc) {
bailout(kSimd, "emit_i32x4_dot_i8x16_i7x16_add_s");
}
void LiftoffAssembler::emit_i8x16_shuffle(LiftoffRegister dst, void LiftoffAssembler::emit_i8x16_shuffle(LiftoffRegister dst,
LiftoffRegister lhs, LiftoffRegister lhs,
LiftoffRegister rhs, LiftoffRegister rhs,
......
...@@ -3175,6 +3175,31 @@ void LiftoffAssembler::emit_i16x8_relaxed_q15mulr_s(LiftoffRegister dst, ...@@ -3175,6 +3175,31 @@ void LiftoffAssembler::emit_i16x8_relaxed_q15mulr_s(LiftoffRegister dst,
Sqrdmulh(dst.fp().V8H(), src1.fp().V8H(), src2.fp().V8H()); Sqrdmulh(dst.fp().V8H(), src1.fp().V8H(), src2.fp().V8H());
} }
void LiftoffAssembler::emit_i16x8_dot_i8x16_i7x16_s(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
UseScratchRegisterScope scope(this);
VRegister tmp1 = scope.AcquireV(kFormat8H);
VRegister tmp2 = scope.AcquireV(kFormat8H);
Smull(tmp1, lhs.fp().V8B(), rhs.fp().V8B());
Smull2(tmp2, lhs.fp().V16B(), rhs.fp().V16B());
Addp(dst.fp().V8H(), tmp1, tmp2);
}
void LiftoffAssembler::emit_i32x4_dot_i8x16_i7x16_add_s(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs,
LiftoffRegister acc) {
UseScratchRegisterScope scope(this);
VRegister tmp1 = scope.AcquireV(kFormat8H);
VRegister tmp2 = scope.AcquireV(kFormat8H);
Smull(tmp1, lhs.fp().V8B(), rhs.fp().V8B());
Smull2(tmp2, lhs.fp().V16B(), rhs.fp().V16B());
Addp(tmp1, tmp1, tmp2);
Saddlp(tmp1.V4S(), tmp1);
Add(dst.fp().V4S(), tmp1.V4S(), acc.fp().V4S());
}
void LiftoffAssembler::emit_i32x4_abs(LiftoffRegister dst, void LiftoffAssembler::emit_i32x4_abs(LiftoffRegister dst,
LiftoffRegister src) { LiftoffRegister src) {
Abs(dst.fp().V4S(), src.fp().V4S()); Abs(dst.fp().V4S(), src.fp().V4S());
......
...@@ -3657,6 +3657,19 @@ void LiftoffAssembler::emit_i16x8_relaxed_q15mulr_s(LiftoffRegister dst, ...@@ -3657,6 +3657,19 @@ void LiftoffAssembler::emit_i16x8_relaxed_q15mulr_s(LiftoffRegister dst,
bailout(kRelaxedSimd, "emit_i16x8_relaxed_q15mulr_s"); bailout(kRelaxedSimd, "emit_i16x8_relaxed_q15mulr_s");
} }
void LiftoffAssembler::emit_i16x8_dot_i8x16_i7x16_s(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
bailout(kSimd, "emit_i16x8_dot_i8x16_i7x16_s");
}
void LiftoffAssembler::emit_i32x4_dot_i8x16_i7x16_add_s(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs,
LiftoffRegister acc) {
bailout(kSimd, "emit_i32x4_dot_i8x16_i7x16_add_s");
}
void LiftoffAssembler::emit_i32x4_neg(LiftoffRegister dst, void LiftoffAssembler::emit_i32x4_neg(LiftoffRegister dst,
LiftoffRegister src) { LiftoffRegister src) {
if (dst.fp() == src.fp()) { if (dst.fp() == src.fp()) {
......
...@@ -1259,6 +1259,13 @@ class LiftoffAssembler : public TurboAssembler { ...@@ -1259,6 +1259,13 @@ class LiftoffAssembler : public TurboAssembler {
inline void emit_i16x8_relaxed_q15mulr_s(LiftoffRegister dst, inline void emit_i16x8_relaxed_q15mulr_s(LiftoffRegister dst,
LiftoffRegister src1, LiftoffRegister src1,
LiftoffRegister src2); LiftoffRegister src2);
inline void emit_i16x8_dot_i8x16_i7x16_s(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2);
inline void emit_i32x4_dot_i8x16_i7x16_add_s(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2,
LiftoffRegister acc);
inline void emit_i32x4_neg(LiftoffRegister dst, LiftoffRegister src); inline void emit_i32x4_neg(LiftoffRegister dst, LiftoffRegister src);
inline void emit_i32x4_alltrue(LiftoffRegister dst, LiftoffRegister src); inline void emit_i32x4_alltrue(LiftoffRegister dst, LiftoffRegister src);
inline void emit_i32x4_bitmask(LiftoffRegister dst, LiftoffRegister src); inline void emit_i32x4_bitmask(LiftoffRegister dst, LiftoffRegister src);
......
...@@ -4102,6 +4102,22 @@ class LiftoffCompiler { ...@@ -4102,6 +4102,22 @@ class LiftoffCompiler {
case wasm::kExprI32x4RelaxedTruncF64x2UZero: case wasm::kExprI32x4RelaxedTruncF64x2UZero:
return EmitUnOp<kS128, kS128>( return EmitUnOp<kS128, kS128>(
&LiftoffAssembler::emit_i32x4_relaxed_trunc_f64x2_u_zero); &LiftoffAssembler::emit_i32x4_relaxed_trunc_f64x2_u_zero);
case wasm::kExprI16x8DotI8x16I7x16S:
return EmitBinOp<kS128, kS128>(
&LiftoffAssembler::emit_i16x8_dot_i8x16_i7x16_s);
case wasm::kExprI32x4DotI8x16I7x16AddS: {
// There is no helper for an instruction with 3 SIMD operands
// and we do not expect to add any more, so inlining it here.
static constexpr RegClass res_rc = reg_class_for(kS128);
LiftoffRegister acc = __ PopToRegister();
LiftoffRegister rhs = __ PopToRegister(LiftoffRegList{acc});
LiftoffRegister lhs = __ PopToRegister(LiftoffRegList{rhs, acc});
LiftoffRegister dst = __ GetUnusedRegister(res_rc, {lhs, rhs, acc}, {});
__ emit_i32x4_dot_i8x16_i7x16_add_s(dst, lhs, rhs, acc);
__ PushRegister(kS128, dst);
return;
}
default: default:
unsupported(decoder, kSimd, "simd"); unsupported(decoder, kSimd, "simd");
} }
......
...@@ -3231,6 +3231,19 @@ void LiftoffAssembler::emit_i16x8_relaxed_q15mulr_s(LiftoffRegister dst, ...@@ -3231,6 +3231,19 @@ void LiftoffAssembler::emit_i16x8_relaxed_q15mulr_s(LiftoffRegister dst,
bailout(kRelaxedSimd, "emit_i16x8_relaxed_q15mulr_s"); bailout(kRelaxedSimd, "emit_i16x8_relaxed_q15mulr_s");
} }
void LiftoffAssembler::emit_i16x8_dot_i8x16_i7x16_s(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
bailout(kSimd, "emit_i16x8_dot_i8x16_i7x16_s");
}
void LiftoffAssembler::emit_i32x4_dot_i8x16_i7x16_add_s(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs,
LiftoffRegister acc) {
bailout(kSimd, "emit_i32x4_dot_i8x16_i7x16_add_s");
}
void LiftoffAssembler::emit_i32x4_neg(LiftoffRegister dst, void LiftoffAssembler::emit_i32x4_neg(LiftoffRegister dst,
LiftoffRegister src) { LiftoffRegister src) {
if (dst.fp() == src.fp()) { if (dst.fp() == src.fp()) {
......
...@@ -377,6 +377,8 @@ constexpr const char* WasmOpcodes::OpcodeName(WasmOpcode opcode) { ...@@ -377,6 +377,8 @@ constexpr const char* WasmOpcodes::OpcodeName(WasmOpcode opcode) {
CASE_I32x4_OP(RelaxedTruncF64x2SZero, "relaxed_trunc_f64x2_s_zero"); CASE_I32x4_OP(RelaxedTruncF64x2SZero, "relaxed_trunc_f64x2_s_zero");
CASE_I32x4_OP(RelaxedTruncF64x2UZero, "relaxed_trunc_f64x2_u_zero"); CASE_I32x4_OP(RelaxedTruncF64x2UZero, "relaxed_trunc_f64x2_u_zero");
CASE_I16x8_OP(RelaxedQ15MulRS, "relaxed_q15mulr_s") CASE_I16x8_OP(RelaxedQ15MulRS, "relaxed_q15mulr_s")
CASE_I16x8_OP(DotI8x16I7x16S, "dot_i8x16_i7x16_s")
CASE_I32x4_OP(DotI8x16I7x16AddS, "dot_i8x16_i7x16_add_s")
// Atomic operations. // Atomic operations.
CASE_OP(AtomicNotify, "atomic.notify") CASE_OP(AtomicNotify, "atomic.notify")
......
...@@ -541,7 +541,9 @@ bool V8_EXPORT_PRIVATE IsJSCompatibleSignature(const FunctionSig* sig, ...@@ -541,7 +541,9 @@ bool V8_EXPORT_PRIVATE IsJSCompatibleSignature(const FunctionSig* sig,
V(F32x4RelaxedMax, 0xfd10e, s_ss) \ V(F32x4RelaxedMax, 0xfd10e, s_ss) \
V(F64x2RelaxedMin, 0xfd10f, s_ss) \ V(F64x2RelaxedMin, 0xfd10f, s_ss) \
V(F64x2RelaxedMax, 0xfd110, s_ss) \ V(F64x2RelaxedMax, 0xfd110, s_ss) \
V(I16x8RelaxedQ15MulRS, 0xfd111, s_ss) V(I16x8RelaxedQ15MulRS, 0xfd111, s_ss) \
V(I16x8DotI8x16I7x16S, 0xfd112, s_ss) \
V(I32x4DotI8x16I7x16AddS, 0xfd113, s_sss)
#define FOREACH_SIMD_1_OPERAND_1_PARAM_OPCODE(V) \ #define FOREACH_SIMD_1_OPERAND_1_PARAM_OPCODE(V) \
V(I8x16ExtractLaneS, 0xfd15, _) \ V(I8x16ExtractLaneS, 0xfd15, _) \
......
...@@ -411,8 +411,66 @@ WASM_RELAXED_SIMD_TEST(I16x8RelaxedQ15MulRS) { ...@@ -411,8 +411,66 @@ WASM_RELAXED_SIMD_TEST(I16x8RelaxedQ15MulRS) {
RunI16x8BinOpTest<int16_t>(execution_tier, kExprI16x8RelaxedQ15MulRS, RunI16x8BinOpTest<int16_t>(execution_tier, kExprI16x8RelaxedQ15MulRS,
SaturateRoundingQMul<int16_t>); SaturateRoundingQMul<int16_t>);
} }
#endif // V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_ARM #endif // V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_ARM
#if V8_TARGET_ARCH_ARM64
WASM_RELAXED_SIMD_TEST(I16x8DotI8x16I7x16S) {
WasmRunner<int32_t, int8_t, int8_t> r(execution_tier);
int16_t* g = r.builder().template AddGlobal<int16_t>(kWasmS128);
byte value1 = 0, value2 = 1;
byte temp1 = r.AllocateLocal(kWasmS128);
byte temp2 = r.AllocateLocal(kWasmS128);
BUILD(r, WASM_LOCAL_SET(temp1, WASM_SIMD_I8x16_SPLAT(WASM_LOCAL_GET(value1))),
WASM_LOCAL_SET(temp2, WASM_SIMD_I8x16_SPLAT(WASM_LOCAL_GET(value2))),
WASM_GLOBAL_SET(
0, WASM_SIMD_BINOP(kExprI16x8DotI8x16I7x16S, WASM_LOCAL_GET(temp1),
WASM_LOCAL_GET(temp2))),
WASM_ONE);
for (int8_t x : compiler::ValueHelper::GetVector<int8_t>()) {
for (int8_t y : compiler::ValueHelper::GetVector<int8_t>()) {
r.Call(x, y & 0x7F);
// * 2 because we of (x*y) + (x*y) = 2*x*y
int16_t expected = base::MulWithWraparound(x * (y & 0x7F), 2);
for (int i = 0; i < 8; i++) {
CHECK_EQ(expected, LANE(g, i));
}
}
}
}
WASM_RELAXED_SIMD_TEST(I32x4DotI8x16I7x16AddS) {
WasmRunner<int32_t, int8_t, int8_t, int32_t> r(execution_tier);
int32_t* g = r.builder().template AddGlobal<int32_t>(kWasmS128);
byte value1 = 0, value2 = 1, value3 = 2;
byte temp1 = r.AllocateLocal(kWasmS128);
byte temp2 = r.AllocateLocal(kWasmS128);
byte temp3 = r.AllocateLocal(kWasmS128);
BUILD(
r, WASM_LOCAL_SET(temp1, WASM_SIMD_I8x16_SPLAT(WASM_LOCAL_GET(value1))),
WASM_LOCAL_SET(temp2, WASM_SIMD_I8x16_SPLAT(WASM_LOCAL_GET(value2))),
WASM_LOCAL_SET(temp3, WASM_SIMD_I32x4_SPLAT(WASM_LOCAL_GET(value3))),
WASM_GLOBAL_SET(0, WASM_SIMD_TERNOP(
kExprI32x4DotI8x16I7x16AddS, WASM_LOCAL_GET(temp1),
WASM_LOCAL_GET(temp2), WASM_LOCAL_GET(temp3))),
WASM_ONE);
for (int8_t x : compiler::ValueHelper::GetVector<int8_t>()) {
for (int8_t y : compiler::ValueHelper::GetVector<int8_t>()) {
for (int32_t z : compiler::ValueHelper::GetVector<int32_t>()) {
r.Call(x, y & 0x7F, z);
int32_t expected = base::AddWithWraparound(
base::MulWithWraparound(x * (y & 0x7F), 4), z);
for (int i = 0; i < 4; i++) {
CHECK_EQ(expected, LANE(g, i));
}
}
}
}
}
#endif // V8_TARGET_ARCH_ARM64
#undef WASM_RELAXED_SIMD_TEST #undef WASM_RELAXED_SIMD_TEST
} // namespace test_run_wasm_relaxed_simd } // namespace test_run_wasm_relaxed_simd
} // namespace wasm } // namespace wasm
......
...@@ -2782,6 +2782,39 @@ class WasmInterpreterInternals { ...@@ -2782,6 +2782,39 @@ class WasmInterpreterInternals {
*len += 16; *len += 16;
return true; return true;
} }
case kExprI16x8DotI8x16I7x16S: {
int16 v2 = Pop().to_s128().to_i8x16();
int16 v1 = Pop().to_s128().to_i8x16();
int8 res;
for (size_t i = 0; i < 8; i++) {
int16_t lo = (v1.val[LANE(i * 2, v1)] * v2.val[LANE(i * 2, v2)]);
int16_t hi =
(v1.val[LANE(i * 2 + 1, v1)] * v2.val[LANE(i * 2 + 1, v2)]);
res.val[LANE(i, res)] = base::AddWithWraparound(lo, hi);
}
Push(WasmValue(Simd128(res)));
return true;
}
case kExprI32x4DotI8x16I7x16AddS: {
int4 v3 = Pop().to_s128().to_i32x4();
int16 v2 = Pop().to_s128().to_i8x16();
int16 v1 = Pop().to_s128().to_i8x16();
int4 res;
for (size_t i = 0; i < 4; i++) {
int32_t a = (v1.val[LANE(i * 4, v1)] * v2.val[LANE(i * 4, v2)]);
int32_t b =
(v1.val[LANE(i * 4 + 1, v1)] * v2.val[LANE(i * 4 + 1, v2)]);
int32_t c =
(v1.val[LANE(i * 4 + 2, v1)] * v2.val[LANE(i * 4 + 2, v2)]);
int32_t d =
(v1.val[LANE(i * 4 + 3, v1)] * v2.val[LANE(i * 4 + 3, v2)]);
int32_t acc = v3.val[LANE(i, v3)];
// a + b + c + d should not wrap
res.val[LANE(i, res)] = base::AddWithWraparound(a + b + c + d, acc);
}
Push(WasmValue(Simd128(res)));
return true;
}
case kExprI8x16RelaxedSwizzle: case kExprI8x16RelaxedSwizzle:
case kExprI8x16Swizzle: { case kExprI8x16Swizzle: {
int16 v2 = Pop().to_s128().to_i8x16(); int16 v2 = Pop().to_s128().to_i8x16();
......
...@@ -927,6 +927,7 @@ inline uint16_t ExtractPrefixedOpcodeBytes(WasmOpcode opcode) { ...@@ -927,6 +927,7 @@ inline uint16_t ExtractPrefixedOpcodeBytes(WasmOpcode opcode) {
#define WASM_SIMD_SPLAT(Type, ...) __VA_ARGS__, WASM_SIMD_OP(kExpr##Type##Splat) #define WASM_SIMD_SPLAT(Type, ...) __VA_ARGS__, WASM_SIMD_OP(kExpr##Type##Splat)
#define WASM_SIMD_UNOP(op, x) x, WASM_SIMD_OP(op) #define WASM_SIMD_UNOP(op, x) x, WASM_SIMD_OP(op)
#define WASM_SIMD_BINOP(op, x, y) x, y, WASM_SIMD_OP(op) #define WASM_SIMD_BINOP(op, x, y) x, y, WASM_SIMD_OP(op)
#define WASM_SIMD_TERNOP(op, x, y, z) x, y, z, WASM_SIMD_OP(op)
#define WASM_SIMD_SHIFT_OP(op, x, y) x, y, WASM_SIMD_OP(op) #define WASM_SIMD_SHIFT_OP(op, x, y) x, y, WASM_SIMD_OP(op)
#define WASM_SIMD_CONCAT_OP(op, bytes, x, y) \ #define WASM_SIMD_CONCAT_OP(op, bytes, x, y) \
x, y, WASM_SIMD_OP(op), TO_BYTE(bytes) x, y, WASM_SIMD_OP(op), TO_BYTE(bytes)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment