Commit 90b42052 authored by Ng Zhi An's avatar Ng Zhi An Committed by Commit Bot

[wasm-simd] Implement rounding average on x64 and interpreter

This change includes templatization of the test helper to allow the
same function to be reused for both signed and unsigned data types.

We implement a new function RoundingAverageUnsigned in overflowing-math,
rather than in base/utils, since the addition could overflow.

SIMD scalar lowering and implementation for other backends will follow
in future patches.

Bug: v8:10039
Change-Id: I70735f7b6536f197869ef1afbccaf5649e7e8448
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1958007Reviewed-by: 's avatarClemens Backes <clemensb@chromium.org>
Reviewed-by: 's avatarTobias Tebbi <tebbi@chromium.org>
Reviewed-by: 's avatarDeepti Gandluri <gdeepti@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#65531}
parent c8233829
......@@ -83,6 +83,13 @@ inline float RecipSqrt(float a) {
return -std::numeric_limits<float>::infinity();
}
template <typename T>
inline T RoundingAverageUnsigned(T a, T b) {
static_assert(std::is_unsigned<T>::value, "Only for unsiged types");
static_assert(sizeof(T) < sizeof(uint64_t), "Must be smaller than uint64_t");
return (static_cast<uint64_t>(a) + static_cast<uint64_t>(b) + 1) >> 1;
}
} // namespace base
} // namespace v8
......
......@@ -187,6 +187,8 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
AVX_OP(Pxor, pxor)
AVX_OP(Psubd, psubd)
AVX_OP(Pslld, pslld)
AVX_OP(Pavgb, pavgb)
AVX_OP(Pavgw, pavgw)
AVX_OP(Psrad, psrad)
AVX_OP(Psrld, psrld)
AVX_OP(Paddd, paddd)
......
......@@ -79,8 +79,10 @@
V(psllw, 66, 0F, F1) \
V(pslld, 66, 0F, F2) \
V(psllq, 66, 0F, F3) \
V(pavgb, 66, 0F, E0) \
V(psraw, 66, 0F, E1) \
V(psrad, 66, 0F, E2) \
V(pavgw, 66, 0F, E3) \
V(psrlw, 66, 0F, D1) \
V(psrld, 66, 0F, D2) \
V(psrlq, 66, 0F, D3) \
......
......@@ -2095,6 +2095,8 @@ void InstructionSelector::VisitNode(Node* node) {
return MarkAsSimd128(node), VisitI16x8GtU(node);
case IrOpcode::kI16x8GeU:
return MarkAsSimd128(node), VisitI16x8GeU(node);
case IrOpcode::kI16x8RoundingAverageU:
return MarkAsSimd128(node), VisitI16x8RoundingAverageU(node);
case IrOpcode::kI8x16Splat:
return MarkAsSimd128(node), VisitI8x16Splat(node);
case IrOpcode::kI8x16ExtractLaneU:
......@@ -2149,6 +2151,8 @@ void InstructionSelector::VisitNode(Node* node) {
return MarkAsSimd128(node), VisitI8x16GtU(node);
case IrOpcode::kI8x16GeU:
return MarkAsSimd128(node), VisitI8x16GeU(node);
case IrOpcode::kI8x16RoundingAverageU:
return MarkAsSimd128(node), VisitI8x16RoundingAverageU(node);
case IrOpcode::kS128Zero:
return MarkAsSimd128(node), VisitS128Zero(node);
case IrOpcode::kS128And:
......@@ -2630,6 +2634,12 @@ void InstructionSelector::VisitF64x2SConvertI64x2(Node* node) {
void InstructionSelector::VisitF64x2UConvertI64x2(Node* node) {
UNIMPLEMENTED();
}
void InstructionSelector::VisitI16x8RoundingAverageU(Node* node) {
UNIMPLEMENTED();
}
void InstructionSelector::VisitI8x16RoundingAverageU(Node* node) {
UNIMPLEMENTED();
}
#if !V8_TARGET_ARCH_ARM64
#if !V8_TARGET_ARCH_ARM
void InstructionSelector::VisitLoadTransform(Node* node) { UNIMPLEMENTED(); }
......
......@@ -3339,6 +3339,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ pcmpeqw(dst, src);
break;
}
case kX64I16x8RoundingAverageU: {
__ Pavgw(i.OutputSimd128Register(), i.InputSimd128Register(1));
break;
}
case kX64I8x16Splat: {
CpuFeatureScope sse_scope(tasm(), SSSE3);
XMMRegister dst = i.OutputSimd128Register();
......@@ -3578,6 +3582,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ pcmpeqb(dst, src);
break;
}
case kX64I8x16RoundingAverageU: {
__ Pavgb(i.OutputSimd128Register(), i.InputSimd128Register(1));
break;
}
case kX64S128And: {
__ pand(i.OutputSimd128Register(), i.InputSimd128Register(1));
break;
......
......@@ -274,6 +274,7 @@ namespace compiler {
V(X64I16x8MaxU) \
V(X64I16x8GtU) \
V(X64I16x8GeU) \
V(X64I16x8RoundingAverageU) \
V(X64I8x16Splat) \
V(X64I8x16ExtractLaneU) \
V(X64I8x16ExtractLaneS) \
......@@ -301,6 +302,7 @@ namespace compiler {
V(X64I8x16MaxU) \
V(X64I8x16GtU) \
V(X64I8x16GeU) \
V(X64I8x16RoundingAverageU) \
V(X64S128Zero) \
V(X64S128Not) \
V(X64S128And) \
......
......@@ -246,6 +246,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kX64I16x8MaxU:
case kX64I16x8GtU:
case kX64I16x8GeU:
case kX64I16x8RoundingAverageU:
case kX64I8x16Splat:
case kX64I8x16ExtractLaneU:
case kX64I8x16ExtractLaneS:
......@@ -273,6 +274,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kX64I8x16MaxU:
case kX64I8x16GtU:
case kX64I8x16GeU:
case kX64I8x16RoundingAverageU:
case kX64S128And:
case kX64S128Or:
case kX64S128Xor:
......
......@@ -2675,6 +2675,7 @@ VISIT_ATOMIC_BINOP(Xor)
V(I16x8MinU) \
V(I16x8MaxU) \
V(I16x8GeU) \
V(I16x8RoundingAverageU) \
V(I8x16SConvertI16x8) \
V(I8x16Add) \
V(I8x16AddSaturateS) \
......@@ -2690,6 +2691,7 @@ VISIT_ATOMIC_BINOP(Xor)
V(I8x16MinU) \
V(I8x16MaxU) \
V(I8x16GeU) \
V(I8x16RoundingAverageU) \
V(S128And) \
V(S128Or) \
V(S128Xor)
......
......@@ -421,6 +421,7 @@ MachineType AtomicOpType(Operator const* op) {
V(I16x8MaxU, Operator::kCommutative, 2, 0, 1) \
V(I16x8GtU, Operator::kNoProperties, 2, 0, 1) \
V(I16x8GeU, Operator::kNoProperties, 2, 0, 1) \
V(I16x8RoundingAverageU, Operator::kCommutative, 2, 0, 1) \
V(I8x16Splat, Operator::kNoProperties, 1, 0, 1) \
V(I8x16Neg, Operator::kNoProperties, 1, 0, 1) \
V(I8x16Shl, Operator::kNoProperties, 2, 0, 1) \
......@@ -445,6 +446,7 @@ MachineType AtomicOpType(Operator const* op) {
V(I8x16MaxU, Operator::kCommutative, 2, 0, 1) \
V(I8x16GtU, Operator::kNoProperties, 2, 0, 1) \
V(I8x16GeU, Operator::kNoProperties, 2, 0, 1) \
V(I8x16RoundingAverageU, Operator::kCommutative, 2, 0, 1) \
V(S128Load, Operator::kNoProperties, 2, 0, 1) \
V(S128Store, Operator::kNoProperties, 3, 0, 1) \
V(S128Zero, Operator::kNoProperties, 0, 0, 1) \
......
......@@ -666,6 +666,7 @@ class V8_EXPORT_PRIVATE MachineOperatorBuilder final
const Operator* I16x8MaxU();
const Operator* I16x8GtU();
const Operator* I16x8GeU();
const Operator* I16x8RoundingAverageU();
const Operator* I8x16Splat();
const Operator* I8x16ExtractLaneU(int32_t);
......@@ -695,6 +696,7 @@ class V8_EXPORT_PRIVATE MachineOperatorBuilder final
const Operator* I8x16MaxU();
const Operator* I8x16GtU();
const Operator* I8x16GeU();
const Operator* I8x16RoundingAverageU();
const Operator* S128Load();
const Operator* S128Store();
......
......@@ -876,6 +876,7 @@
V(I16x8LeU) \
V(I16x8GtU) \
V(I16x8GeU) \
V(I16x8RoundingAverageU) \
V(I8x16Splat) \
V(I8x16ExtractLaneU) \
V(I8x16ExtractLaneS) \
......@@ -907,6 +908,7 @@
V(I8x16LeU) \
V(I8x16GtU) \
V(I8x16GeU) \
V(I8x16RoundingAverageU) \
V(S128Load) \
V(S128Store) \
V(S128Zero) \
......
......@@ -4407,6 +4407,9 @@ Node* WasmGraphBuilder::SimdOp(wasm::WasmOpcode opcode, Node* const* inputs) {
case wasm::kExprI16x8GeU:
return graph()->NewNode(mcgraph()->machine()->I16x8GeU(), inputs[0],
inputs[1]);
case wasm::kExprI16x8RoundingAverageU:
return graph()->NewNode(mcgraph()->machine()->I16x8RoundingAverageU(),
inputs[0], inputs[1]);
case wasm::kExprI8x16Splat:
return graph()->NewNode(mcgraph()->machine()->I8x16Splat(), inputs[0]);
case wasm::kExprI8x16Neg:
......@@ -4489,6 +4492,9 @@ Node* WasmGraphBuilder::SimdOp(wasm::WasmOpcode opcode, Node* const* inputs) {
case wasm::kExprI8x16GeU:
return graph()->NewNode(mcgraph()->machine()->I8x16GeU(), inputs[0],
inputs[1]);
case wasm::kExprI8x16RoundingAverageU:
return graph()->NewNode(mcgraph()->machine()->I8x16RoundingAverageU(),
inputs[0], inputs[1]);
case wasm::kExprS128And:
return graph()->NewNode(mcgraph()->machine()->S128And(), inputs[0],
inputs[1]);
......
......@@ -2006,10 +2006,14 @@ int DisassemblerX64::TwoByteOpcodeInstruction(byte* data) {
mnemonic = "paddusw";
} else if (opcode == 0xDE) {
mnemonic = "pmaxub";
} else if (opcode == 0xE0) {
mnemonic = "pavgb";
} else if (opcode == 0xE1) {
mnemonic = "psraw";
} else if (opcode == 0xE2) {
mnemonic = "psrad";
} else if (opcode == 0xE3) {
mnemonic = "pavgw";
} else if (opcode == 0xE8) {
mnemonic = "psubsb";
} else if (opcode == 0xE9) {
......
......@@ -2325,6 +2325,8 @@ class ThreadImpl {
BINOP_CASE(I16x8AddSaturateU, i16x8, int8, 8, SaturateAdd<uint16_t>(a, b))
BINOP_CASE(I16x8SubSaturateS, i16x8, int8, 8, SaturateSub<int16_t>(a, b))
BINOP_CASE(I16x8SubSaturateU, i16x8, int8, 8, SaturateSub<uint16_t>(a, b))
BINOP_CASE(I16x8RoundingAverageU, i16x8, int8, 8,
base::RoundingAverageUnsigned<uint16_t>(a, b))
BINOP_CASE(I8x16Add, i8x16, int16, 16, base::AddWithWraparound(a, b))
BINOP_CASE(I8x16Sub, i8x16, int16, 16, base::SubWithWraparound(a, b))
BINOP_CASE(I8x16Mul, i8x16, int16, 16, base::MulWithWraparound(a, b))
......@@ -2340,6 +2342,8 @@ class ThreadImpl {
BINOP_CASE(I8x16SubSaturateS, i8x16, int16, 16, SaturateSub<int8_t>(a, b))
BINOP_CASE(I8x16SubSaturateU, i8x16, int16, 16,
SaturateSub<uint8_t>(a, b))
BINOP_CASE(I8x16RoundingAverageU, i8x16, int16, 16,
base::RoundingAverageUnsigned<uint8_t>(a, b))
#undef BINOP_CASE
#define UNOP_CASE(op, name, stype, count, expr) \
case kExpr##op: { \
......
......@@ -334,6 +334,9 @@ const char* WasmOpcodes::OpcodeName(WasmOpcode opcode) {
CASE_I64x2_OP(Load32x2S, "load32x2_s")
CASE_I64x2_OP(Load32x2U, "load32x2_u")
CASE_I8x16_OP(RoundingAverageU, "avgr_u")
CASE_I16x8_OP(RoundingAverageU, "avgr_u")
// Atomic operations.
CASE_OP(AtomicNotify, "atomic.notify")
CASE_INT_OP(AtomicWait, "atomic.wait")
......
......@@ -444,6 +444,8 @@ bool IsJSCompatibleSignature(const FunctionSig* sig, const WasmFeatures&);
V(I32x4Load16x4U, 0xfdd5, s_s) \
V(I64x2Load32x2S, 0xfdd6, s_s) \
V(I64x2Load32x2U, 0xfdd7, s_s) \
V(I8x16RoundingAverageU, 0xfdd9, s_ss) \
V(I16x8RoundingAverageU, 0xfdda, s_ss) \
V(I16x8AddHoriz, 0xfdbd, s_ss) \
V(I32x4AddHoriz, 0xfdbe, s_ss) \
V(F32x4AddHoriz, 0xfdbf, s_ss) \
......
......@@ -2059,11 +2059,12 @@ WASM_SIMD_TEST(I16x8Neg) {
base::NegateWithWraparound);
}
template <typename T = int16_t, typename OpType = T (*)(T, T)>
void RunI16x8BinOpTest(ExecutionTier execution_tier, LowerSimd lower_simd,
WasmOpcode opcode, Int16BinOp expected_op) {
WasmRunner<int32_t, int32_t, int32_t> r(execution_tier, lower_simd);
WasmOpcode opcode, OpType expected_op) {
WasmRunner<int32_t, T, T> r(execution_tier, lower_simd);
// Global to hold output.
int16_t* g = r.builder().AddGlobal<int16_t>(kWasmS128);
T* g = r.builder().template AddGlobal<T>(kWasmS128);
// Build fn to splat test values, perform binop, and write the result.
byte value1 = 0, value2 = 1;
byte temp1 = r.AllocateLocal(kWasmS128);
......@@ -2074,12 +2075,12 @@ void RunI16x8BinOpTest(ExecutionTier execution_tier, LowerSimd lower_simd,
WASM_GET_LOCAL(temp2))),
WASM_ONE);
FOR_INT16_INPUTS(x) {
FOR_INT16_INPUTS(y) {
for (T x : compiler::ValueHelper::GetVector<T>()) {
for (T y : compiler::ValueHelper::GetVector<T>()) {
r.Call(x, y);
int16_t expected = expected_op(x, y);
T expected = expected_op(x, y);
for (int i = 0; i < 8; i++) {
CHECK_EQ(expected, ReadLittleEndianValue<int16_t>(&g[i]));
CHECK_EQ(expected, ReadLittleEndianValue<T>(&g[i]));
}
}
}
......@@ -2180,6 +2181,14 @@ WASM_SIMD_TEST(I16x8LeU) {
UnsignedLessEqual);
}
#if V8_TARGET_ARCH_X64
WASM_SIMD_TEST_NO_LOWERING(I16x8RoundingAverageU) {
RunI16x8BinOpTest<uint16_t>(execution_tier, lower_simd,
kExprI16x8RoundingAverageU,
base::RoundingAverageUnsigned);
}
#endif // V8_TARGET_ARCH_X64
void RunI16x8ShiftOpTest(ExecutionTier execution_tier, LowerSimd lower_simd,
WasmOpcode opcode, Int16ShiftOp expected_op) {
// Intentionally shift by 16, should be no-op.
......@@ -2276,11 +2285,12 @@ WASM_SIMD_TEST(I8x16ConvertI16x8) {
}
}
template <typename T = int8_t, typename OpType = T (*)(T, T)>
void RunI8x16BinOpTest(ExecutionTier execution_tier, LowerSimd lower_simd,
WasmOpcode opcode, Int8BinOp expected_op) {
WasmRunner<int32_t, int32_t, int32_t> r(execution_tier, lower_simd);
WasmOpcode opcode, OpType expected_op) {
WasmRunner<int32_t, T, T> r(execution_tier, lower_simd);
// Global to hold output.
int8_t* g = r.builder().AddGlobal<int8_t>(kWasmS128);
T* g = r.builder().template AddGlobal<T>(kWasmS128);
// Build fn to splat test values, perform binop, and write the result.
byte value1 = 0, value2 = 1;
byte temp1 = r.AllocateLocal(kWasmS128);
......@@ -2291,12 +2301,12 @@ void RunI8x16BinOpTest(ExecutionTier execution_tier, LowerSimd lower_simd,
WASM_GET_LOCAL(temp2))),
WASM_ONE);
FOR_INT8_INPUTS(x) {
FOR_INT8_INPUTS(y) {
for (T x : compiler::ValueHelper::GetVector<T>()) {
for (T y : compiler::ValueHelper::GetVector<T>()) {
r.Call(x, y);
int8_t expected = expected_op(x, y);
T expected = expected_op(x, y);
for (int i = 0; i < 16; i++) {
CHECK_EQ(expected, ReadLittleEndianValue<int8_t>(&g[i]));
CHECK_EQ(expected, ReadLittleEndianValue<T>(&g[i]));
}
}
}
......@@ -2397,6 +2407,14 @@ WASM_SIMD_TEST(I8x16Mul) {
base::MulWithWraparound);
}
#if V8_TARGET_ARCH_X64
WASM_SIMD_TEST_NO_LOWERING(I8x16RoundingAverageU) {
RunI8x16BinOpTest<uint8_t>(execution_tier, lower_simd,
kExprI8x16RoundingAverageU,
base::RoundingAverageUnsigned);
}
#endif // V8_TARGET_ARCH_X64
void RunI8x16ShiftOpTest(ExecutionTier execution_tier, LowerSimd lower_simd,
WasmOpcode opcode, Int8ShiftOp expected_op) {
// Intentionally shift by 8, should be no-op.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment