Commit e0b32029 authored by Ng Zhi An's avatar Ng Zhi An Committed by Commit Bot

[wasm-simd][arm64] Prototype saturating rounding multiply high

Only implemented and tested on arm64 and interpreter.

Moved a helper function (Clamp, renamed to Saturate) into src/utils to
be able to reuse this in interpreter and tests.

Bug: v8:10971
Change-Id: Iaffcd36d27e0e8ab11e167befa96eef8e59f1c81
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2438990Reviewed-by: 's avatarTobias Tebbi <tebbi@chromium.org>
Reviewed-by: 's avatarBill Budge <bbudge@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#70360}
parent e02656ea
......@@ -2334,6 +2334,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
SIMD_BINOP_CASE(kArm64I16x8GtU, Cmhi, 8H);
SIMD_BINOP_CASE(kArm64I16x8GeU, Cmhs, 8H);
SIMD_BINOP_CASE(kArm64I16x8RoundingAverageU, Urhadd, 8H);
SIMD_BINOP_CASE(kArm64I16x8Q15MulRSatS, Sqrdmulh, 8H);
SIMD_UNOP_CASE(kArm64I16x8Abs, Abs, 8H);
case kArm64I16x8BitMask: {
Register dst = i.OutputRegister32();
......
......@@ -292,6 +292,7 @@ namespace compiler {
V(Arm64I16x8GtU) \
V(Arm64I16x8GeU) \
V(Arm64I16x8RoundingAverageU) \
V(Arm64I16x8Q15MulRSatS) \
V(Arm64I16x8Abs) \
V(Arm64I16x8BitMask) \
V(Arm64I8x16Splat) \
......
......@@ -262,6 +262,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kArm64I16x8GtU:
case kArm64I16x8GeU:
case kArm64I16x8RoundingAverageU:
case kArm64I16x8Q15MulRSatS:
case kArm64I16x8Abs:
case kArm64I16x8BitMask:
case kArm64I8x16Splat:
......
......@@ -3323,6 +3323,7 @@ void InstructionSelector::VisitInt64AbsWithOverflow(Node* node) {
V(I16x8GtU, kArm64I16x8GtU) \
V(I16x8GeU, kArm64I16x8GeU) \
V(I16x8RoundingAverageU, kArm64I16x8RoundingAverageU) \
V(I16x8Q15MulRSatS, kArm64I16x8Q15MulRSatS) \
V(I8x16SConvertI16x8, kArm64I8x16SConvertI16x8) \
V(I8x16AddSaturateS, kArm64I8x16AddSaturateS) \
V(I8x16SubSaturateS, kArm64I8x16SubSaturateS) \
......
......@@ -2140,6 +2140,8 @@ void InstructionSelector::VisitNode(Node* node) {
return MarkAsSimd128(node), VisitI16x8GeU(node);
case IrOpcode::kI16x8RoundingAverageU:
return MarkAsSimd128(node), VisitI16x8RoundingAverageU(node);
case IrOpcode::kI16x8Q15MulRSatS:
return MarkAsSimd128(node), VisitI16x8Q15MulRSatS(node);
case IrOpcode::kI16x8Abs:
return MarkAsSimd128(node), VisitI16x8Abs(node);
case IrOpcode::kI16x8BitMask:
......@@ -2694,6 +2696,11 @@ void InstructionSelector::VisitI32x4DotI16x8S(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_IA32 && !V8_TARGET_ARCH_ARM64
// && !V8_TARGET_ARCH_ARM
// TODO(v8:10971) Prototype i16x8.q15mulr_sat_s
#if !V8_TARGET_ARCH_ARM64
void InstructionSelector::VisitI16x8Q15MulRSatS(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_ARM64
void InstructionSelector::VisitFinishRegion(Node* node) { EmitIdentity(node); }
void InstructionSelector::VisitParameter(Node* node) {
......
......@@ -449,6 +449,7 @@ ShiftKind ShiftKindOf(Operator const* op) {
V(I16x8GtU, Operator::kNoProperties, 2, 0, 1) \
V(I16x8GeU, Operator::kNoProperties, 2, 0, 1) \
V(I16x8RoundingAverageU, Operator::kCommutative, 2, 0, 1) \
V(I16x8Q15MulRSatS, Operator::kCommutative, 2, 0, 1) \
V(I16x8Abs, Operator::kNoProperties, 1, 0, 1) \
V(I16x8BitMask, Operator::kNoProperties, 1, 0, 1) \
V(I8x16Splat, Operator::kNoProperties, 1, 0, 1) \
......
......@@ -718,6 +718,7 @@ class V8_EXPORT_PRIVATE MachineOperatorBuilder final
const Operator* I16x8GtU();
const Operator* I16x8GeU();
const Operator* I16x8RoundingAverageU();
const Operator* I16x8Q15MulRSatS();
const Operator* I16x8Abs();
const Operator* I16x8BitMask();
......
......@@ -906,6 +906,7 @@
V(I16x8GtU) \
V(I16x8GeU) \
V(I16x8RoundingAverageU) \
V(I16x8Q15MulRSatS) \
V(I16x8Abs) \
V(I16x8BitMask) \
V(I8x16Splat) \
......
......@@ -4810,6 +4810,9 @@ Node* WasmGraphBuilder::SimdOp(wasm::WasmOpcode opcode, Node* const* inputs) {
case wasm::kExprI16x8RoundingAverageU:
return graph()->NewNode(mcgraph()->machine()->I16x8RoundingAverageU(),
inputs[0], inputs[1]);
case wasm::kExprI16x8Q15MulRSatS:
return graph()->NewNode(mcgraph()->machine()->I16x8Q15MulRSatS(),
inputs[0], inputs[1]);
case wasm::kExprI16x8Abs:
return graph()->NewNode(mcgraph()->machine()->I16x8Abs(), inputs[0]);
case wasm::kExprI16x8BitMask:
......
......@@ -134,6 +134,15 @@ inline double Modulo(double x, double y) {
#endif
}
template <typename T>
T Saturate(int64_t value) {
static_assert(sizeof(int64_t) > sizeof(T), "T must be int32_t or smaller");
int64_t min = static_cast<int64_t>(std::numeric_limits<T>::min());
int64_t max = static_cast<int64_t>(std::numeric_limits<T>::max());
int64_t clamped = std::max(min, std::min(max, value));
return static_cast<T>(clamped);
}
template <typename T>
T SaturateAdd(T a, T b) {
if (std::is_signed<T>::value) {
......@@ -176,6 +185,23 @@ T SaturateSub(T a, T b) {
return a - b;
}
template <typename T>
T SaturateRoundingQMul(T a, T b) {
// Saturating rounding multiplication for Q-format numbers. See
// https://en.wikipedia.org/wiki/Q_(number_format) for a description.
// Specifically this supports Q7, Q15, and Q31. This follows the
// implementation in simulator-logic-arm64.cc (sqrdmulh) to avoid overflow
// when a == b == int32 min.
static_assert(std::is_integral<T>::value, "only integral types");
constexpr int size_in_bits = sizeof(T) * 8;
int round_const = 1 << (size_in_bits - 2);
int64_t product = a * b;
product += round_const;
product >>= (size_in_bits - 1);
return Saturate<T>(product);
}
// Helper macros for defining a contiguous sequence of field offset constants.
// Example: (backslashes at the ends of respective lines of this multi-line
// macro definition are omitted here to please the compiler)
......
......@@ -318,6 +318,7 @@ constexpr const char* WasmOpcodes::OpcodeName(WasmOpcode opcode) {
CASE_I8x16_OP(RoundingAverageU, "avgr_u")
CASE_I16x8_OP(RoundingAverageU, "avgr_u")
CASE_I16x8_OP(Q15MulRSatS, "q15mulr_sat_s")
CASE_I8x16_OP(Abs, "abs")
CASE_I16x8_OP(Abs, "abs")
......
......@@ -464,6 +464,7 @@ bool V8_EXPORT_PRIVATE IsJSCompatibleSignature(const FunctionSig* sig,
#define FOREACH_SIMD_POST_MVP_OPCODE(V) \
V(I8x16Mul, 0xfd75, s_ss) \
V(I16x8Q15MulRSatS, 0xfd9c, s_ss) \
V(V64x2AnyTrue, 0xfdc2, i_s) \
V(V64x2AllTrue, 0xfdc3, i_s) \
V(I64x2Eq, 0xfdc0, s_ss) \
......
......@@ -212,15 +212,6 @@ T ArithmeticShiftRight(T a, int shift) {
return a >> (shift % (sizeof(T) * 8));
}
template <typename T>
T Clamp(int64_t value) {
static_assert(sizeof(int64_t) > sizeof(T), "T must be int32_t or smaller");
int64_t min = static_cast<int64_t>(std::numeric_limits<T>::min());
int64_t max = static_cast<int64_t>(std::numeric_limits<T>::max());
int64_t clamped = std::max(min, std::min(max, value));
return static_cast<T>(clamped);
}
template <typename T>
int64_t Widen(T value) {
static_assert(sizeof(int64_t) > sizeof(T), "T must be int32_t or smaller");
......@@ -236,29 +227,29 @@ int64_t UnsignedWiden(T value) {
template <typename T>
T Narrow(int64_t value) {
return Clamp<T>(value);
return Saturate<T>(value);
}
template <typename T>
T AddSaturate(T a, T b) {
return Clamp<T>(Widen(a) + Widen(b));
return Saturate<T>(Widen(a) + Widen(b));
}
template <typename T>
T SubSaturate(T a, T b) {
return Clamp<T>(Widen(a) - Widen(b));
return Saturate<T>(Widen(a) - Widen(b));
}
template <typename T>
T UnsignedAddSaturate(T a, T b) {
using UnsignedT = typename std::make_unsigned<T>::type;
return Clamp<UnsignedT>(UnsignedWiden(a) + UnsignedWiden(b));
return Saturate<UnsignedT>(UnsignedWiden(a) + UnsignedWiden(b));
}
template <typename T>
T UnsignedSubSaturate(T a, T b) {
using UnsignedT = typename std::make_unsigned<T>::type;
return Clamp<UnsignedT>(UnsignedWiden(a) - UnsignedWiden(b));
return Saturate<UnsignedT>(UnsignedWiden(a) - UnsignedWiden(b));
}
template <typename T>
......@@ -2259,6 +2250,15 @@ WASM_SIMD_TEST(I16x8RoundingAverageU) {
base::RoundingAverageUnsigned);
}
// TODO(v8:10971) Prototype i16x8.q15mulr_sat_s
#if V8_TARGET_ARCH_ARM64
WASM_SIMD_TEST_NO_LOWERING(I16x8Q15MulRSatS) {
FLAG_SCOPE(wasm_simd_post_mvp);
RunI16x8BinOpTest<int16_t>(execution_tier, lower_simd, kExprI16x8Q15MulRSatS,
SaturateRoundingQMul<int16_t>);
}
#endif // V8_TARGET_ARCH_ARM64
// TODO(v8:10583) Prototype i32x4.dot_i16x8_s
#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_IA32 || V8_TARGET_ARCH_ARM64 || \
V8_TARGET_ARCH_ARM
......
......@@ -2222,6 +2222,8 @@ class WasmInterpreterInternals {
BINOP_CASE(I16x8SubSaturateU, i16x8, int8, 8, SaturateSub<uint16_t>(a, b))
BINOP_CASE(I16x8RoundingAverageU, i16x8, int8, 8,
base::RoundingAverageUnsigned<uint16_t>(a, b))
BINOP_CASE(I16x8Q15MulRSatS, i16x8, int8, 8,
SaturateRoundingQMul<int16_t>(a, b))
BINOP_CASE(I8x16Add, i8x16, int16, 16, base::AddWithWraparound(a, b))
BINOP_CASE(I8x16Sub, i8x16, int16, 16, base::SubWithWraparound(a, b))
BINOP_CASE(I8x16Mul, i8x16, int16, 16, base::MulWithWraparound(a, b))
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment