Commit 4ad68f1c authored by Ng Zhi An's avatar Ng Zhi An Committed by Commit Bot

[wasm-simd][arm64] Prototype extended multiply

Also known as multiply long, this multiplies the top or bottom half of
the input operands, the result is twice as wide as the input.

This implements arm64 and interpreter.

Bug: v8:11008
Change-Id: Iad693007066dd1a9bc529b282e88812a081c3a01
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2469156Reviewed-by: 's avatarTobias Tebbi <tebbi@chromium.org>
Reviewed-by: 's avatarBill Budge <bbudge@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#70624}
parent eb6b4ce1
......@@ -1139,12 +1139,50 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
case kArm64Mul32:
__ Mul(i.OutputRegister32(), i.InputRegister32(0), i.InputRegister32(1));
break;
case kArm64Smull:
__ Smull(i.OutputRegister(), i.InputRegister32(0), i.InputRegister32(1));
case kArm64Smull: {
if (instr->InputAt(0)->IsRegister()) {
__ Smull(i.OutputRegister(), i.InputRegister32(0),
i.InputRegister32(1));
} else {
DCHECK(instr->InputAt(0)->IsSimd128Register());
VectorFormat dst_f = VectorFormatFillQ(MiscField::decode(opcode));
VectorFormat src_f = VectorFormatHalfWidth(dst_f);
__ Smull(i.OutputSimd128Register().Format(dst_f),
i.InputSimd128Register(0).Format(src_f),
i.InputSimd128Register(1).Format(src_f));
}
break;
}
case kArm64Smull2: {
VectorFormat dst_f = VectorFormatFillQ(MiscField::decode(opcode));
VectorFormat src_f = VectorFormatHalfWidthDoubleLanes(dst_f);
__ Smull2(i.OutputSimd128Register().Format(dst_f),
i.InputSimd128Register(0).Format(src_f),
i.InputSimd128Register(1).Format(src_f));
break;
}
case kArm64Umull: {
if (instr->InputAt(0)->IsRegister()) {
__ Umull(i.OutputRegister(), i.InputRegister32(0),
i.InputRegister32(1));
} else {
DCHECK(instr->InputAt(0)->IsSimd128Register());
VectorFormat dst_f = VectorFormatFillQ(MiscField::decode(opcode));
VectorFormat src_f = VectorFormatHalfWidth(dst_f);
__ Umull(i.OutputSimd128Register().Format(dst_f),
i.InputSimd128Register(0).Format(src_f),
i.InputSimd128Register(1).Format(src_f));
}
break;
case kArm64Umull:
__ Umull(i.OutputRegister(), i.InputRegister32(0), i.InputRegister32(1));
}
case kArm64Umull2: {
VectorFormat dst_f = VectorFormatFillQ(MiscField::decode(opcode));
VectorFormat src_f = VectorFormatHalfWidthDoubleLanes(dst_f);
__ Umull2(i.OutputSimd128Register().Format(dst_f),
i.InputSimd128Register(0).Format(src_f),
i.InputSimd128Register(1).Format(src_f));
break;
}
case kArm64Madd:
__ Madd(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1),
i.InputRegister(2));
......
......@@ -40,7 +40,9 @@ namespace compiler {
V(Arm64Mul) \
V(Arm64Mul32) \
V(Arm64Smull) \
V(Arm64Smull2) \
V(Arm64Umull) \
V(Arm64Umull2) \
V(Arm64Madd) \
V(Arm64Madd32) \
V(Arm64Msub) \
......
......@@ -41,7 +41,9 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kArm64Mul:
case kArm64Mul32:
case kArm64Smull:
case kArm64Smull2:
case kArm64Umull:
case kArm64Umull2:
case kArm64Madd:
case kArm64Madd32:
case kArm64Msub:
......
......@@ -158,6 +158,14 @@ void VisitRRR(InstructionSelector* selector, ArchOpcode opcode, Node* node) {
g.UseRegister(node->InputAt(1)));
}
void VisitRRR(InstructionSelector* selector, InstructionCode opcode,
Node* node) {
Arm64OperandGenerator g(selector);
selector->Emit(opcode, g.DefineAsRegister(node),
g.UseRegister(node->InputAt(0)),
g.UseRegister(node->InputAt(1)));
}
void VisitSimdShiftRRR(InstructionSelector* selector, ArchOpcode opcode,
Node* node, int width) {
Arm64OperandGenerator g(selector);
......@@ -1639,6 +1647,63 @@ void InstructionSelector::VisitInt64Mul(Node* node) {
VisitRRR(this, kArm64Mul, node);
}
namespace {
void VisitExtMul(InstructionSelector* selector, ArchOpcode opcode, Node* node,
int dst_lane_size) {
InstructionCode code = opcode;
code |= MiscField::encode(dst_lane_size);
VisitRRR(selector, code, node);
}
} // namespace
void InstructionSelector::VisitI16x8ExtMulLowI8x16S(Node* node) {
VisitExtMul(this, kArm64Smull, node, 16);
}
void InstructionSelector::VisitI16x8ExtMulHighI8x16S(Node* node) {
VisitExtMul(this, kArm64Smull2, node, 16);
}
void InstructionSelector::VisitI16x8ExtMulLowI8x16U(Node* node) {
VisitExtMul(this, kArm64Umull, node, 16);
}
void InstructionSelector::VisitI16x8ExtMulHighI8x16U(Node* node) {
VisitExtMul(this, kArm64Umull2, node, 16);
}
void InstructionSelector::VisitI32x4ExtMulLowI16x8S(Node* node) {
VisitExtMul(this, kArm64Smull, node, 32);
}
void InstructionSelector::VisitI32x4ExtMulHighI16x8S(Node* node) {
VisitExtMul(this, kArm64Smull2, node, 32);
}
void InstructionSelector::VisitI32x4ExtMulLowI16x8U(Node* node) {
VisitExtMul(this, kArm64Umull, node, 32);
}
void InstructionSelector::VisitI32x4ExtMulHighI16x8U(Node* node) {
VisitExtMul(this, kArm64Umull2, node, 32);
}
void InstructionSelector::VisitI64x2ExtMulLowI32x4S(Node* node) {
VisitExtMul(this, kArm64Smull, node, 64);
}
void InstructionSelector::VisitI64x2ExtMulHighI32x4S(Node* node) {
VisitExtMul(this, kArm64Smull2, node, 64);
}
void InstructionSelector::VisitI64x2ExtMulLowI32x4U(Node* node) {
VisitExtMul(this, kArm64Umull, node, 64);
}
void InstructionSelector::VisitI64x2ExtMulHighI32x4U(Node* node) {
VisitExtMul(this, kArm64Umull2, node, 64);
}
void InstructionSelector::VisitInt32MulHigh(Node* node) {
Arm64OperandGenerator g(this);
InstructionOperand const smull_operand = g.TempRegister();
......
......@@ -2013,6 +2013,14 @@ void InstructionSelector::VisitNode(Node* node) {
return MarkAsSimd128(node), VisitI64x2Eq(node);
case IrOpcode::kI64x2ShrU:
return MarkAsSimd128(node), VisitI64x2ShrU(node);
case IrOpcode::kI64x2ExtMulLowI32x4S:
return MarkAsSimd128(node), VisitI64x2ExtMulLowI32x4S(node);
case IrOpcode::kI64x2ExtMulHighI32x4S:
return MarkAsSimd128(node), VisitI64x2ExtMulHighI32x4S(node);
case IrOpcode::kI64x2ExtMulLowI32x4U:
return MarkAsSimd128(node), VisitI64x2ExtMulLowI32x4U(node);
case IrOpcode::kI64x2ExtMulHighI32x4U:
return MarkAsSimd128(node), VisitI64x2ExtMulHighI32x4U(node);
case IrOpcode::kI32x4Splat:
return MarkAsSimd128(node), VisitI32x4Splat(node);
case IrOpcode::kI32x4ExtractLane:
......@@ -2073,6 +2081,14 @@ void InstructionSelector::VisitNode(Node* node) {
return MarkAsWord32(node), VisitI32x4BitMask(node);
case IrOpcode::kI32x4DotI16x8S:
return MarkAsSimd128(node), VisitI32x4DotI16x8S(node);
case IrOpcode::kI32x4ExtMulLowI16x8S:
return MarkAsSimd128(node), VisitI32x4ExtMulLowI16x8S(node);
case IrOpcode::kI32x4ExtMulHighI16x8S:
return MarkAsSimd128(node), VisitI32x4ExtMulHighI16x8S(node);
case IrOpcode::kI32x4ExtMulLowI16x8U:
return MarkAsSimd128(node), VisitI32x4ExtMulLowI16x8U(node);
case IrOpcode::kI32x4ExtMulHighI16x8U:
return MarkAsSimd128(node), VisitI32x4ExtMulHighI16x8U(node);
case IrOpcode::kI16x8Splat:
return MarkAsSimd128(node), VisitI16x8Splat(node);
case IrOpcode::kI16x8ExtractLaneU:
......@@ -2145,6 +2161,14 @@ void InstructionSelector::VisitNode(Node* node) {
return MarkAsSimd128(node), VisitI16x8Abs(node);
case IrOpcode::kI16x8BitMask:
return MarkAsWord32(node), VisitI16x8BitMask(node);
case IrOpcode::kI16x8ExtMulLowI8x16S:
return MarkAsSimd128(node), VisitI16x8ExtMulLowI8x16S(node);
case IrOpcode::kI16x8ExtMulHighI8x16S:
return MarkAsSimd128(node), VisitI16x8ExtMulHighI8x16S(node);
case IrOpcode::kI16x8ExtMulLowI8x16U:
return MarkAsSimd128(node), VisitI16x8ExtMulLowI8x16U(node);
case IrOpcode::kI16x8ExtMulHighI8x16U:
return MarkAsSimd128(node), VisitI16x8ExtMulHighI8x16U(node);
case IrOpcode::kI8x16Splat:
return MarkAsSimd128(node), VisitI8x16Splat(node);
case IrOpcode::kI8x16ExtractLaneU:
......@@ -2698,6 +2722,44 @@ void InstructionSelector::VisitI64x2UConvertI32x4High(Node* node) {
// TODO(v8:11002) Prototype i8x16.popcnt.
void InstructionSelector::VisitI8x16Popcnt(Node* node) { UNIMPLEMENTED(); }
// TODO(v8:11008) Prototype extended multiplication.
void InstructionSelector::VisitI64x2ExtMulLowI32x4S(Node* node) {
UNIMPLEMENTED();
}
void InstructionSelector::VisitI64x2ExtMulHighI32x4S(Node* node) {
UNIMPLEMENTED();
}
void InstructionSelector::VisitI64x2ExtMulLowI32x4U(Node* node) {
UNIMPLEMENTED();
}
void InstructionSelector::VisitI64x2ExtMulHighI32x4U(Node* node) {
UNIMPLEMENTED();
}
void InstructionSelector::VisitI32x4ExtMulLowI16x8S(Node* node) {
UNIMPLEMENTED();
}
void InstructionSelector::VisitI32x4ExtMulHighI16x8S(Node* node) {
UNIMPLEMENTED();
}
void InstructionSelector::VisitI32x4ExtMulLowI16x8U(Node* node) {
UNIMPLEMENTED();
}
void InstructionSelector::VisitI32x4ExtMulHighI16x8U(Node* node) {
UNIMPLEMENTED();
}
void InstructionSelector::VisitI16x8ExtMulLowI8x16S(Node* node) {
UNIMPLEMENTED();
}
void InstructionSelector::VisitI16x8ExtMulHighI8x16S(Node* node) {
UNIMPLEMENTED();
}
void InstructionSelector::VisitI16x8ExtMulLowI8x16U(Node* node) {
UNIMPLEMENTED();
}
void InstructionSelector::VisitI16x8ExtMulHighI8x16U(Node* node) {
UNIMPLEMENTED();
}
#endif // !V8_TARGET_ARCH_ARM64
#if !V8_TARGET_ARCH_X64
......
......@@ -425,6 +425,10 @@ ShiftKind ShiftKindOf(Operator const* op) {
V(I64x2Mul, Operator::kCommutative, 2, 0, 1) \
V(I64x2Eq, Operator::kCommutative, 2, 0, 1) \
V(I64x2ShrU, Operator::kNoProperties, 2, 0, 1) \
V(I64x2ExtMulLowI32x4S, Operator::kCommutative, 2, 0, 1) \
V(I64x2ExtMulHighI32x4S, Operator::kCommutative, 2, 0, 1) \
V(I64x2ExtMulLowI32x4U, Operator::kCommutative, 2, 0, 1) \
V(I64x2ExtMulHighI32x4U, Operator::kCommutative, 2, 0, 1) \
V(I32x4Splat, Operator::kNoProperties, 1, 0, 1) \
V(I32x4SConvertF32x4, Operator::kNoProperties, 1, 0, 1) \
V(I32x4SConvertI16x8Low, Operator::kNoProperties, 1, 0, 1) \
......@@ -453,6 +457,10 @@ ShiftKind ShiftKindOf(Operator const* op) {
V(I32x4Abs, Operator::kNoProperties, 1, 0, 1) \
V(I32x4BitMask, Operator::kNoProperties, 1, 0, 1) \
V(I32x4DotI16x8S, Operator::kCommutative, 2, 0, 1) \
V(I32x4ExtMulLowI16x8S, Operator::kCommutative, 2, 0, 1) \
V(I32x4ExtMulHighI16x8S, Operator::kCommutative, 2, 0, 1) \
V(I32x4ExtMulLowI16x8U, Operator::kCommutative, 2, 0, 1) \
V(I32x4ExtMulHighI16x8U, Operator::kCommutative, 2, 0, 1) \
V(I16x8Splat, Operator::kNoProperties, 1, 0, 1) \
V(I16x8SConvertI8x16Low, Operator::kNoProperties, 1, 0, 1) \
V(I16x8SConvertI8x16High, Operator::kNoProperties, 1, 0, 1) \
......@@ -486,6 +494,10 @@ ShiftKind ShiftKindOf(Operator const* op) {
V(I16x8Q15MulRSatS, Operator::kCommutative, 2, 0, 1) \
V(I16x8Abs, Operator::kNoProperties, 1, 0, 1) \
V(I16x8BitMask, Operator::kNoProperties, 1, 0, 1) \
V(I16x8ExtMulLowI8x16S, Operator::kCommutative, 2, 0, 1) \
V(I16x8ExtMulHighI8x16S, Operator::kCommutative, 2, 0, 1) \
V(I16x8ExtMulLowI8x16U, Operator::kCommutative, 2, 0, 1) \
V(I16x8ExtMulHighI8x16U, Operator::kCommutative, 2, 0, 1) \
V(I8x16Splat, Operator::kNoProperties, 1, 0, 1) \
V(I8x16Neg, Operator::kNoProperties, 1, 0, 1) \
V(I8x16Shl, Operator::kNoProperties, 2, 0, 1) \
......
......@@ -672,6 +672,10 @@ class V8_EXPORT_PRIVATE MachineOperatorBuilder final
const Operator* I64x2Mul();
const Operator* I64x2Eq();
const Operator* I64x2ShrU();
const Operator* I64x2ExtMulLowI32x4S();
const Operator* I64x2ExtMulHighI32x4S();
const Operator* I64x2ExtMulLowI32x4U();
const Operator* I64x2ExtMulHighI32x4U();
const Operator* I32x4Splat();
const Operator* I32x4ExtractLane(int32_t);
......@@ -704,6 +708,10 @@ class V8_EXPORT_PRIVATE MachineOperatorBuilder final
const Operator* I32x4Abs();
const Operator* I32x4BitMask();
const Operator* I32x4DotI16x8S();
const Operator* I32x4ExtMulLowI16x8S();
const Operator* I32x4ExtMulHighI16x8S();
const Operator* I32x4ExtMulLowI16x8U();
const Operator* I32x4ExtMulHighI16x8U();
const Operator* I16x8Splat();
const Operator* I16x8ExtractLaneU(int32_t);
......@@ -742,6 +750,10 @@ class V8_EXPORT_PRIVATE MachineOperatorBuilder final
const Operator* I16x8Q15MulRSatS();
const Operator* I16x8Abs();
const Operator* I16x8BitMask();
const Operator* I16x8ExtMulLowI8x16S();
const Operator* I16x8ExtMulHighI8x16S();
const Operator* I16x8ExtMulLowI8x16U();
const Operator* I16x8ExtMulHighI8x16U();
const Operator* I8x16Splat();
const Operator* I8x16ExtractLaneU(int32_t);
......
......@@ -831,6 +831,10 @@
V(I64x2Mul) \
V(I64x2Eq) \
V(I64x2ShrU) \
V(I64x2ExtMulLowI32x4S) \
V(I64x2ExtMulHighI32x4S) \
V(I64x2ExtMulLowI32x4U) \
V(I64x2ExtMulHighI32x4U) \
V(I32x4Splat) \
V(I32x4ExtractLane) \
V(I32x4ReplaceLane) \
......@@ -865,6 +869,10 @@
V(I32x4Abs) \
V(I32x4BitMask) \
V(I32x4DotI16x8S) \
V(I32x4ExtMulLowI16x8S) \
V(I32x4ExtMulHighI16x8S) \
V(I32x4ExtMulLowI16x8U) \
V(I32x4ExtMulHighI16x8U) \
V(I16x8Splat) \
V(I16x8ExtractLaneU) \
V(I16x8ExtractLaneS) \
......@@ -905,6 +913,10 @@
V(I16x8Q15MulRSatS) \
V(I16x8Abs) \
V(I16x8BitMask) \
V(I16x8ExtMulLowI8x16S) \
V(I16x8ExtMulHighI8x16S) \
V(I16x8ExtMulLowI8x16U) \
V(I16x8ExtMulHighI8x16U) \
V(I8x16Splat) \
V(I8x16ExtractLaneU) \
V(I8x16ExtractLaneS) \
......
......@@ -4650,6 +4650,18 @@ Node* WasmGraphBuilder::SimdOp(wasm::WasmOpcode opcode, Node* const* inputs) {
case wasm::kExprI64x2ShrU:
return graph()->NewNode(mcgraph()->machine()->I64x2ShrU(), inputs[0],
inputs[1]);
case wasm::kExprI64x2ExtMulLowI32x4S:
return graph()->NewNode(mcgraph()->machine()->I64x2ExtMulLowI32x4S(),
inputs[0], inputs[1]);
case wasm::kExprI64x2ExtMulHighI32x4S:
return graph()->NewNode(mcgraph()->machine()->I64x2ExtMulHighI32x4S(),
inputs[0], inputs[1]);
case wasm::kExprI64x2ExtMulLowI32x4U:
return graph()->NewNode(mcgraph()->machine()->I64x2ExtMulLowI32x4U(),
inputs[0], inputs[1]);
case wasm::kExprI64x2ExtMulHighI32x4U:
return graph()->NewNode(mcgraph()->machine()->I64x2ExtMulHighI32x4U(),
inputs[0], inputs[1]);
case wasm::kExprI32x4Splat:
return graph()->NewNode(mcgraph()->machine()->I32x4Splat(), inputs[0]);
case wasm::kExprI32x4SConvertF32x4:
......@@ -4742,6 +4754,18 @@ Node* WasmGraphBuilder::SimdOp(wasm::WasmOpcode opcode, Node* const* inputs) {
case wasm::kExprI32x4DotI16x8S:
return graph()->NewNode(mcgraph()->machine()->I32x4DotI16x8S(), inputs[0],
inputs[1]);
case wasm::kExprI32x4ExtMulLowI16x8S:
return graph()->NewNode(mcgraph()->machine()->I32x4ExtMulLowI16x8S(),
inputs[0], inputs[1]);
case wasm::kExprI32x4ExtMulHighI16x8S:
return graph()->NewNode(mcgraph()->machine()->I32x4ExtMulHighI16x8S(),
inputs[0], inputs[1]);
case wasm::kExprI32x4ExtMulLowI16x8U:
return graph()->NewNode(mcgraph()->machine()->I32x4ExtMulLowI16x8U(),
inputs[0], inputs[1]);
case wasm::kExprI32x4ExtMulHighI16x8U:
return graph()->NewNode(mcgraph()->machine()->I32x4ExtMulHighI16x8U(),
inputs[0], inputs[1]);
case wasm::kExprI16x8Splat:
return graph()->NewNode(mcgraph()->machine()->I16x8Splat(), inputs[0]);
case wasm::kExprI16x8SConvertI8x16Low:
......@@ -4849,6 +4873,18 @@ Node* WasmGraphBuilder::SimdOp(wasm::WasmOpcode opcode, Node* const* inputs) {
return graph()->NewNode(mcgraph()->machine()->I16x8Abs(), inputs[0]);
case wasm::kExprI16x8BitMask:
return graph()->NewNode(mcgraph()->machine()->I16x8BitMask(), inputs[0]);
case wasm::kExprI16x8ExtMulLowI8x16S:
return graph()->NewNode(mcgraph()->machine()->I16x8ExtMulLowI8x16S(),
inputs[0], inputs[1]);
case wasm::kExprI16x8ExtMulHighI8x16S:
return graph()->NewNode(mcgraph()->machine()->I16x8ExtMulHighI8x16S(),
inputs[0], inputs[1]);
case wasm::kExprI16x8ExtMulLowI8x16U:
return graph()->NewNode(mcgraph()->machine()->I16x8ExtMulLowI8x16U(),
inputs[0], inputs[1]);
case wasm::kExprI16x8ExtMulHighI8x16U:
return graph()->NewNode(mcgraph()->machine()->I16x8ExtMulHighI8x16U(),
inputs[0], inputs[1]);
case wasm::kExprI8x16Splat:
return graph()->NewNode(mcgraph()->machine()->I8x16Splat(), inputs[0]);
case wasm::kExprI8x16Neg:
......
......@@ -202,6 +202,21 @@ T SaturateRoundingQMul(T a, T b) {
return Saturate<T>(product);
}
// Multiply two numbers, returning a result that is twice as wide, no overflow.
// Put Wide first so we can use function template argument deduction for Narrow,
// and callers can provide only Wide.
template <typename Wide, typename Narrow>
Wide MultiplyLong(Narrow a, Narrow b) {
static_assert(
std::is_integral<Narrow>::value && std::is_integral<Wide>::value,
"only integral types");
static_assert(std::is_signed<Narrow>::value == std::is_signed<Wide>::value,
"both must have same signedness");
static_assert(sizeof(Narrow) * 2 == sizeof(Wide), "only twice as long");
return static_cast<Wide>(a) * static_cast<Wide>(b);
}
// Helper macros for defining a contiguous sequence of field offset constants.
// Example: (backslashes at the ends of respective lines of this multi-line
// macro definition are omitted here to please the compiler)
......
......@@ -348,6 +348,13 @@ constexpr const char* WasmOpcodes::OpcodeName(WasmOpcode opcode) {
CASE_I32x4_OP(DotI16x8S, "dot_i16x8_s")
CASE_SIGN_OP(I16x8, ExtMulLowI8x16, "extmul_low_i8x16")
CASE_SIGN_OP(I16x8, ExtMulHighI8x16, "extmul_high_i8x16")
CASE_SIGN_OP(I32x4, ExtMulLowI16x8, "extmul_low_i16x8")
CASE_SIGN_OP(I32x4, ExtMulHighI16x8, "extmul_high_i16x8")
CASE_SIGN_OP(I64x2, ExtMulLowI32x4, "extmul_low_i32x4")
CASE_SIGN_OP(I64x2, ExtMulHighI32x4, "extmul_high_i32x4")
// Atomic operations.
CASE_OP(AtomicNotify, "atomic.notify")
CASE_INT_OP(AtomicWait, "atomic.wait")
......
......@@ -475,6 +475,18 @@ bool V8_EXPORT_PRIVATE IsJSCompatibleSignature(const FunctionSig* sig,
V(I8x16Mul, 0xfd75, s_ss) \
V(I8x16Popcnt, 0xfd7c, s_s) \
V(I16x8Q15MulRSatS, 0xfd9c, s_ss) \
V(I16x8ExtMulLowI8x16S, 0xfd9a, s_ss) \
V(I16x8ExtMulHighI8x16S, 0xfd9d, s_ss) \
V(I16x8ExtMulLowI8x16U, 0xfd9e, s_ss) \
V(I16x8ExtMulHighI8x16U, 0xfd9f, s_ss) \
V(I32x4ExtMulLowI16x8S, 0xfdbb, s_ss) \
V(I32x4ExtMulHighI16x8S, 0xfdbd, s_ss) \
V(I32x4ExtMulLowI16x8U, 0xfdbe, s_ss) \
V(I32x4ExtMulHighI16x8U, 0xfdbf, s_ss) \
V(I64x2ExtMulLowI32x4S, 0xfdd2, s_ss) \
V(I64x2ExtMulHighI32x4S, 0xfdd3, s_ss) \
V(I64x2ExtMulLowI32x4U, 0xfdd6, s_ss) \
V(I64x2ExtMulHighI32x4U, 0xfdd7, s_ss) \
V(I64x2Eq, 0xfdc0, s_ss) \
V(F32x4Qfma, 0xfdb4, s_sss) \
V(I64x2BitMask, 0xfdc4, i_s) \
......
......@@ -2220,13 +2220,125 @@ WASM_SIMD_TEST(I16x8RoundingAverageU) {
base::RoundingAverageUnsigned);
}
// TODO(v8:10971) Prototype i16x8.q15mulr_sat_s
#if V8_TARGET_ARCH_ARM64
// TODO(v8:10971) Prototype i16x8.q15mulr_sat_s
WASM_SIMD_TEST_NO_LOWERING(I16x8Q15MulRSatS) {
FLAG_SCOPE(wasm_simd_post_mvp);
RunI16x8BinOpTest<int16_t>(execution_tier, lower_simd, kExprI16x8Q15MulRSatS,
SaturateRoundingQMul<int16_t>);
}
// TODO(v8:11008) Prototype extended multiplication.
namespace {
enum class MulHalf { kLow, kHigh };
// Helper to run ext mul tests. It will splat 2 input values into 2 v128, call
// the mul op on these operands, and set the result into a global.
// It will zero the top or bottom half of one of the operands, this will catch
// mistakes if we are multiply the incorrect halves.
template <typename S, typename T, typename OpType = T (*)(S, S)>
void RunExtMulTest(TestExecutionTier execution_tier, LowerSimd lower_simd,
WasmOpcode opcode, OpType expected_op, WasmOpcode splat,
MulHalf half) {
FLAG_SCOPE(wasm_simd_post_mvp);
WasmRunner<int32_t, S, S> r(execution_tier, lower_simd);
int lane_to_zero = half == MulHalf::kLow ? 1 : 0;
T* g = r.builder().template AddGlobal<T>(kWasmS128);
BUILD(r,
WASM_SET_GLOBAL(
0, WASM_SIMD_BINOP(
opcode,
WASM_SIMD_I64x2_REPLACE_LANE(
lane_to_zero, WASM_SIMD_UNOP(splat, WASM_GET_LOCAL(0)),
WASM_I64V_1(0)),
WASM_SIMD_UNOP(splat, WASM_GET_LOCAL(1)))),
WASM_ONE);
constexpr int lanes = kSimd128Size / sizeof(T);
for (S x : compiler::ValueHelper::GetVector<S>()) {
for (S y : compiler::ValueHelper::GetVector<S>()) {
r.Call(x, y);
T expected = expected_op(x, y);
for (int i = 0; i < lanes; i++) {
CHECK_EQ(expected, ReadLittleEndianValue<T>(&g[i]));
}
}
}
}
} // namespace
WASM_SIMD_TEST_NO_LOWERING(I16x8ExtMulLowI8x16S) {
RunExtMulTest<int8_t, int16_t>(execution_tier, lower_simd,
kExprI16x8ExtMulLowI8x16S, MultiplyLong,
kExprI8x16Splat, MulHalf::kLow);
}
WASM_SIMD_TEST_NO_LOWERING(I16x8ExtMulHighI8x16S) {
RunExtMulTest<int8_t, int16_t>(execution_tier, lower_simd,
kExprI16x8ExtMulHighI8x16S, MultiplyLong,
kExprI8x16Splat, MulHalf::kHigh);
}
WASM_SIMD_TEST_NO_LOWERING(I16x8ExtMulLowI8x16U) {
RunExtMulTest<uint8_t, uint16_t>(execution_tier, lower_simd,
kExprI16x8ExtMulLowI8x16U, MultiplyLong,
kExprI8x16Splat, MulHalf::kLow);
}
WASM_SIMD_TEST_NO_LOWERING(I16x8ExtMulHighI8x16U) {
RunExtMulTest<uint8_t, uint16_t>(execution_tier, lower_simd,
kExprI16x8ExtMulHighI8x16U, MultiplyLong,
kExprI8x16Splat, MulHalf::kHigh);
}
WASM_SIMD_TEST_NO_LOWERING(I32x4ExtMulLowI16x8S) {
RunExtMulTest<int16_t, int32_t>(execution_tier, lower_simd,
kExprI32x4ExtMulLowI16x8S, MultiplyLong,
kExprI16x8Splat, MulHalf::kLow);
}
WASM_SIMD_TEST_NO_LOWERING(I32x4ExtMulHighI16x8S) {
RunExtMulTest<int16_t, int32_t>(execution_tier, lower_simd,
kExprI32x4ExtMulHighI16x8S, MultiplyLong,
kExprI16x8Splat, MulHalf::kHigh);
}
WASM_SIMD_TEST_NO_LOWERING(I32x4ExtMulLowI16x8U) {
RunExtMulTest<uint16_t, uint32_t>(execution_tier, lower_simd,
kExprI32x4ExtMulLowI16x8U, MultiplyLong,
kExprI16x8Splat, MulHalf::kLow);
}
WASM_SIMD_TEST_NO_LOWERING(I32x4ExtMulHighI16x8U) {
RunExtMulTest<uint16_t, uint32_t>(execution_tier, lower_simd,
kExprI32x4ExtMulHighI16x8U, MultiplyLong,
kExprI16x8Splat, MulHalf::kHigh);
}
WASM_SIMD_TEST_NO_LOWERING(I64x2ExtMulLowI32x4S) {
RunExtMulTest<int32_t, int64_t>(execution_tier, lower_simd,
kExprI64x2ExtMulLowI32x4S, MultiplyLong,
kExprI32x4Splat, MulHalf::kLow);
}
WASM_SIMD_TEST_NO_LOWERING(I64x2ExtMulHighI32x4S) {
RunExtMulTest<int32_t, int64_t>(execution_tier, lower_simd,
kExprI64x2ExtMulHighI32x4S, MultiplyLong,
kExprI32x4Splat, MulHalf::kHigh);
}
WASM_SIMD_TEST_NO_LOWERING(I64x2ExtMulLowI32x4U) {
RunExtMulTest<uint32_t, uint64_t>(execution_tier, lower_simd,
kExprI64x2ExtMulLowI32x4U, MultiplyLong,
kExprI32x4Splat, MulHalf::kLow);
}
WASM_SIMD_TEST_NO_LOWERING(I64x2ExtMulHighI32x4U) {
RunExtMulTest<uint32_t, uint64_t>(execution_tier, lower_simd,
kExprI64x2ExtMulHighI32x4U, MultiplyLong,
kExprI32x4Splat, MulHalf::kHigh);
}
#endif // V8_TARGET_ARCH_ARM64
WASM_SIMD_TEST(I32x4DotI16x8S) {
......
......@@ -2429,6 +2429,42 @@ class WasmInterpreterInternals {
SHIFT_CASE(I8x16ShrS, i8x16, int16, 16, a >> (shift % 8))
SHIFT_CASE(I8x16ShrU, i8x16, int16, 16,
static_cast<uint8_t>(a) >> (shift % 8))
case kExprI16x8ExtMulLowI8x16S: {
return DoSimdExtMul<int16, int8, int8_t, int16_t>(0);
}
case kExprI16x8ExtMulHighI8x16S: {
return DoSimdExtMul<int16, int8, int8_t, int16_t>(8);
}
case kExprI16x8ExtMulLowI8x16U: {
return DoSimdExtMul<int16, int8, uint8_t, uint16_t>(0);
}
case kExprI16x8ExtMulHighI8x16U: {
return DoSimdExtMul<int16, int8, uint8_t, uint16_t>(8);
}
case kExprI32x4ExtMulLowI16x8S: {
return DoSimdExtMul<int8, int4, int16_t, int32_t>(0);
}
case kExprI32x4ExtMulHighI16x8S: {
return DoSimdExtMul<int8, int4, int16_t, int32_t>(4);
}
case kExprI32x4ExtMulLowI16x8U: {
return DoSimdExtMul<int8, int4, uint16_t, uint32_t>(0);
}
case kExprI32x4ExtMulHighI16x8U: {
return DoSimdExtMul<int8, int4, uint16_t, uint32_t>(4);
}
case kExprI64x2ExtMulLowI32x4S: {
return DoSimdExtMul<int4, int2, int32_t, int64_t>(0);
}
case kExprI64x2ExtMulHighI32x4S: {
return DoSimdExtMul<int4, int2, int32_t, int64_t>(2);
}
case kExprI64x2ExtMulLowI32x4U: {
return DoSimdExtMul<int4, int2, uint32_t, uint64_t>(0);
}
case kExprI64x2ExtMulHighI32x4U: {
return DoSimdExtMul<int4, int2, uint32_t, uint64_t>(2);
}
#undef SHIFT_CASE
#define CONVERT_CASE(op, src_type, name, dst_type, count, start_index, ctype, \
expr) \
......@@ -2810,6 +2846,24 @@ class WasmInterpreterInternals {
return true;
}
template <typename s_type, typename d_type, typename narrow, typename wide>
bool DoSimdExtMul(unsigned start) {
WasmValue v2 = Pop();
WasmValue v1 = Pop();
auto s1 = v1.to_s128().to<s_type>();
auto s2 = v2.to_s128().to<s_type>();
auto end = start + (kSimd128Size / sizeof(wide));
d_type res;
for (size_t dst = 0; start < end; ++start, ++dst) {
// Need static_cast for unsigned narrow types.
res.val[LANE(dst, res)] =
MultiplyLong<wide>(static_cast<narrow>(s1.val[LANE(start, s)]),
static_cast<narrow>(s2.val[LANE(start, s)]));
}
Push(WasmValue(Simd128(res)));
return true;
}
// Check if our control stack (frames_) exceeds the limit. Trigger stack
// overflow if it does, and unwinding the current frame.
// Returns true if execution can continue, false if the stack was fully
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment