Commit 102b4b3c authored by Ng Zhi An's avatar Ng Zhi An Committed by Commit Bot

[wasm-simd][arm64] Prototype i8x16.popcnt

This implements i8x16.popcnt on arm64 and interpreter.

Bug: v8:11002
Change-Id: Ia94a053d7e0a0c800057ac80865ba6f86ac7caf8
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2461058Reviewed-by: 's avatarBill Budge <bbudge@chromium.org>
Reviewed-by: 's avatarGeorg Neis <neis@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#70461}
parent 1c6cd2ac
...@@ -1420,6 +1420,12 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -1420,6 +1420,12 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
case kArm64Cmn32: case kArm64Cmn32:
__ Cmn(i.InputOrZeroRegister32(0), i.InputOperand2_32(1)); __ Cmn(i.InputOrZeroRegister32(0), i.InputOperand2_32(1));
break; break;
case kArm64Cnt: {
VectorFormat f = VectorFormatFillQ(MiscField::decode(opcode));
__ Cnt(i.OutputSimd128Register().Format(f),
i.InputSimd128Register(0).Format(f));
break;
}
case kArm64Tst: case kArm64Tst:
__ Tst(i.InputOrZeroRegister64(0), i.InputOperand2_64(1)); __ Tst(i.InputOrZeroRegister64(0), i.InputOperand2_64(1));
break; break;
......
...@@ -24,6 +24,7 @@ namespace compiler { ...@@ -24,6 +24,7 @@ namespace compiler {
V(Arm64Cmp32) \ V(Arm64Cmp32) \
V(Arm64Cmn) \ V(Arm64Cmn) \
V(Arm64Cmn32) \ V(Arm64Cmn32) \
V(Arm64Cnt) \
V(Arm64Tst) \ V(Arm64Tst) \
V(Arm64Tst32) \ V(Arm64Tst32) \
V(Arm64Or) \ V(Arm64Or) \
......
...@@ -25,6 +25,7 @@ int InstructionScheduler::GetTargetInstructionFlags( ...@@ -25,6 +25,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kArm64Cmp32: case kArm64Cmp32:
case kArm64Cmn: case kArm64Cmn:
case kArm64Cmn32: case kArm64Cmn32:
case kArm64Cnt:
case kArm64Tst: case kArm64Tst:
case kArm64Tst32: case kArm64Tst32:
case kArm64Or: case kArm64Or:
......
...@@ -3766,6 +3766,12 @@ void InstructionSelector::VisitI16x8UConvertI8x16High(Node* node) { ...@@ -3766,6 +3766,12 @@ void InstructionSelector::VisitI16x8UConvertI8x16High(Node* node) {
VisitSignExtendLong(this, kArm64Uxtl2, node, 16); VisitSignExtendLong(this, kArm64Uxtl2, node, 16);
} }
void InstructionSelector::VisitI8x16Popcnt(Node* node) {
InstructionCode code = kArm64Cnt;
code |= MiscField::encode(8);
VisitRR(this, code, node);
}
// static // static
MachineOperatorBuilder::Flags MachineOperatorBuilder::Flags
InstructionSelector::SupportedMachineOperatorFlags() { InstructionSelector::SupportedMachineOperatorFlags() {
......
...@@ -2198,6 +2198,8 @@ void InstructionSelector::VisitNode(Node* node) { ...@@ -2198,6 +2198,8 @@ void InstructionSelector::VisitNode(Node* node) {
return MarkAsSimd128(node), VisitI8x16GeU(node); return MarkAsSimd128(node), VisitI8x16GeU(node);
case IrOpcode::kI8x16RoundingAverageU: case IrOpcode::kI8x16RoundingAverageU:
return MarkAsSimd128(node), VisitI8x16RoundingAverageU(node); return MarkAsSimd128(node), VisitI8x16RoundingAverageU(node);
case IrOpcode::kI8x16Popcnt:
return MarkAsSimd128(node), VisitI8x16Popcnt(node);
case IrOpcode::kI8x16Abs: case IrOpcode::kI8x16Abs:
return MarkAsSimd128(node), VisitI8x16Abs(node); return MarkAsSimd128(node), VisitI8x16Abs(node);
case IrOpcode::kI8x16BitMask: case IrOpcode::kI8x16BitMask:
...@@ -2697,6 +2699,9 @@ void InstructionSelector::VisitI64x2UConvertI32x4Low(Node* node) { ...@@ -2697,6 +2699,9 @@ void InstructionSelector::VisitI64x2UConvertI32x4Low(Node* node) {
void InstructionSelector::VisitI64x2UConvertI32x4High(Node* node) { void InstructionSelector::VisitI64x2UConvertI32x4High(Node* node) {
UNIMPLEMENTED(); UNIMPLEMENTED();
} }
// TODO(v8:11002) Prototype i8x16.popcnt.
void InstructionSelector::VisitI8x16Popcnt(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_ARM64 #endif // !V8_TARGET_ARCH_ARM64
#if !V8_TARGET_ARCH_X64 #if !V8_TARGET_ARCH_X64
......
...@@ -492,6 +492,7 @@ ShiftKind ShiftKindOf(Operator const* op) { ...@@ -492,6 +492,7 @@ ShiftKind ShiftKindOf(Operator const* op) {
V(I8x16GtU, Operator::kNoProperties, 2, 0, 1) \ V(I8x16GtU, Operator::kNoProperties, 2, 0, 1) \
V(I8x16GeU, Operator::kNoProperties, 2, 0, 1) \ V(I8x16GeU, Operator::kNoProperties, 2, 0, 1) \
V(I8x16RoundingAverageU, Operator::kCommutative, 2, 0, 1) \ V(I8x16RoundingAverageU, Operator::kCommutative, 2, 0, 1) \
V(I8x16Popcnt, Operator::kNoProperties, 1, 0, 1) \
V(I8x16Abs, Operator::kNoProperties, 1, 0, 1) \ V(I8x16Abs, Operator::kNoProperties, 1, 0, 1) \
V(I8x16BitMask, Operator::kNoProperties, 1, 0, 1) \ V(I8x16BitMask, Operator::kNoProperties, 1, 0, 1) \
V(S128Load, Operator::kNoProperties, 2, 0, 1) \ V(S128Load, Operator::kNoProperties, 2, 0, 1) \
......
...@@ -760,6 +760,7 @@ class V8_EXPORT_PRIVATE MachineOperatorBuilder final ...@@ -760,6 +760,7 @@ class V8_EXPORT_PRIVATE MachineOperatorBuilder final
const Operator* I8x16GtU(); const Operator* I8x16GtU();
const Operator* I8x16GeU(); const Operator* I8x16GeU();
const Operator* I8x16RoundingAverageU(); const Operator* I8x16RoundingAverageU();
const Operator* I8x16Popcnt();
const Operator* I8x16Abs(); const Operator* I8x16Abs();
const Operator* I8x16BitMask(); const Operator* I8x16BitMask();
......
...@@ -937,6 +937,7 @@ ...@@ -937,6 +937,7 @@
V(I8x16GtU) \ V(I8x16GtU) \
V(I8x16GeU) \ V(I8x16GeU) \
V(I8x16RoundingAverageU) \ V(I8x16RoundingAverageU) \
V(I8x16Popcnt) \
V(I8x16Abs) \ V(I8x16Abs) \
V(I8x16BitMask) \ V(I8x16BitMask) \
V(S128Load) \ V(S128Load) \
......
...@@ -4904,6 +4904,8 @@ Node* WasmGraphBuilder::SimdOp(wasm::WasmOpcode opcode, Node* const* inputs) { ...@@ -4904,6 +4904,8 @@ Node* WasmGraphBuilder::SimdOp(wasm::WasmOpcode opcode, Node* const* inputs) {
case wasm::kExprI8x16RoundingAverageU: case wasm::kExprI8x16RoundingAverageU:
return graph()->NewNode(mcgraph()->machine()->I8x16RoundingAverageU(), return graph()->NewNode(mcgraph()->machine()->I8x16RoundingAverageU(),
inputs[0], inputs[1]); inputs[0], inputs[1]);
case wasm::kExprI8x16Popcnt:
return graph()->NewNode(mcgraph()->machine()->I8x16Popcnt(), inputs[0]);
case wasm::kExprI8x16Abs: case wasm::kExprI8x16Abs:
return graph()->NewNode(mcgraph()->machine()->I8x16Abs(), inputs[0]); return graph()->NewNode(mcgraph()->machine()->I8x16Abs(), inputs[0]);
case wasm::kExprI8x16BitMask: case wasm::kExprI8x16BitMask:
......
...@@ -319,6 +319,7 @@ constexpr const char* WasmOpcodes::OpcodeName(WasmOpcode opcode) { ...@@ -319,6 +319,7 @@ constexpr const char* WasmOpcodes::OpcodeName(WasmOpcode opcode) {
CASE_I16x8_OP(Q15MulRSatS, "q15mulr_sat_s") CASE_I16x8_OP(Q15MulRSatS, "q15mulr_sat_s")
CASE_I8x16_OP(Abs, "abs") CASE_I8x16_OP(Abs, "abs")
CASE_I8x16_OP(Popcnt, "popcnt")
CASE_I16x8_OP(Abs, "abs") CASE_I16x8_OP(Abs, "abs")
CASE_I32x4_OP(Abs, "abs") CASE_I32x4_OP(Abs, "abs")
......
...@@ -472,6 +472,7 @@ bool V8_EXPORT_PRIVATE IsJSCompatibleSignature(const FunctionSig* sig, ...@@ -472,6 +472,7 @@ bool V8_EXPORT_PRIVATE IsJSCompatibleSignature(const FunctionSig* sig,
#define FOREACH_SIMD_POST_MVP_OPCODE(V) \ #define FOREACH_SIMD_POST_MVP_OPCODE(V) \
V(I8x16Mul, 0xfd75, s_ss) \ V(I8x16Mul, 0xfd75, s_ss) \
V(I8x16Popcnt, 0xfd7c, s_s) \
V(I16x8Q15MulRSatS, 0xfd9c, s_ss) \ V(I16x8Q15MulRSatS, 0xfd9c, s_ss) \
V(I64x2Eq, 0xfdc0, s_ss) \ V(I64x2Eq, 0xfdc0, s_ss) \
V(F32x4Qfma, 0xfdb4, s_sss) \ V(F32x4Qfma, 0xfdb4, s_sss) \
......
...@@ -2331,6 +2331,31 @@ WASM_SIMD_TEST(I8x16Abs) { ...@@ -2331,6 +2331,31 @@ WASM_SIMD_TEST(I8x16Abs) {
RunI8x16UnOpTest(execution_tier, lower_simd, kExprI8x16Abs, Abs); RunI8x16UnOpTest(execution_tier, lower_simd, kExprI8x16Abs, Abs);
} }
#if V8_TARGET_ARCH_ARM64
// TODO(v8:11002) Prototype i8x16.popcnt.
WASM_SIMD_TEST_NO_LOWERING(I8x16Popcnt) {
FLAG_SCOPE(wasm_simd_post_mvp);
WasmRunner<int32_t, int32_t> r(execution_tier, lower_simd);
// Global to hold output.
int8_t* g = r.builder().AddGlobal<int8_t>(kWasmS128);
// Build fn to splat test value, perform unop, and write the result.
byte value = 0;
byte temp1 = r.AllocateLocal(kWasmS128);
BUILD(r, WASM_SET_LOCAL(temp1, WASM_SIMD_I8x16_SPLAT(WASM_GET_LOCAL(value))),
WASM_SET_GLOBAL(
0, WASM_SIMD_UNOP(kExprI8x16Popcnt, WASM_GET_LOCAL(temp1))),
WASM_ONE);
FOR_UINT8_INPUTS(x) {
r.Call(x);
unsigned expected = base::bits::CountPopulation(x);
for (int i = 0; i < 16; i++) {
CHECK_EQ(expected, ReadLittleEndianValue<int8_t>(&g[i]));
}
}
}
#endif // V8_TARGET_ARCH_ARM64
// Tests both signed and unsigned conversion from I16x8 (packing). // Tests both signed and unsigned conversion from I16x8 (packing).
WASM_SIMD_TEST(I8x16ConvertI16x8) { WASM_SIMD_TEST(I8x16ConvertI16x8) {
WasmRunner<int32_t, int32_t> r(execution_tier, lower_simd); WasmRunner<int32_t, int32_t> r(execution_tier, lower_simd);
......
...@@ -2274,6 +2274,8 @@ class WasmInterpreterInternals { ...@@ -2274,6 +2274,8 @@ class WasmInterpreterInternals {
UNOP_CASE(I16x8Abs, i16x8, int8, 8, std::abs(a)) UNOP_CASE(I16x8Abs, i16x8, int8, 8, std::abs(a))
UNOP_CASE(I8x16Neg, i8x16, int16, 16, base::NegateWithWraparound(a)) UNOP_CASE(I8x16Neg, i8x16, int16, 16, base::NegateWithWraparound(a))
UNOP_CASE(I8x16Abs, i8x16, int16, 16, std::abs(a)) UNOP_CASE(I8x16Abs, i8x16, int16, 16, std::abs(a))
UNOP_CASE(I8x16Popcnt, i8x16, int16, 16,
base::bits::CountPopulation<uint8_t>(a))
#undef UNOP_CASE #undef UNOP_CASE
// Cast to double in call to signbit is due to MSCV issue, see // Cast to double in call to signbit is due to MSCV issue, see
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment