Commit a2596d41 authored by Zhi An Ng's avatar Zhi An Ng Committed by Commit Bot

[wasm-simd][x64] Prototype i8x16.popcnt

Code sequence from https://github.com/WebAssembly/simd/pull/379.

Bug: v8:11002
Change-Id: I47c1090d792f8cbb9d7846ace9a4f996d0c460b1
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2626717Reviewed-by: 's avatarDeepti Gandluri <gdeepti@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#72174}
parent eedb2961
......@@ -183,6 +183,7 @@
// These require AVX2, and we only define the VEX-128 versions.
#define AVX2_BROADCAST_LIST(V) \
V(vpbroadcastd, 66, 0F, 38, 58) \
V(vpbroadcastb, 66, 0F, 38, 78) \
V(vpbroadcastw, 66, 0F, 38, 79)
......
......@@ -2778,10 +2778,10 @@ void InstructionSelector::VisitPrefetchNonTemporal(Node* node) {
}
#endif // !V8_TARGET_ARCH_ARM64
#if !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_ARM
#if !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_X64
// TODO(v8:11002) Prototype i8x16.popcnt.
void InstructionSelector::VisitI8x16Popcnt(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_ARM
#endif // !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_X64
#if !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_X64 && \
!V8_TARGET_ARCH_IA32
......
......@@ -3786,6 +3786,43 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
break;
}
case kX64I8x16Popcnt: {
XMMRegister dst = i.OutputSimd128Register();
XMMRegister src = i.InputSimd128Register(0);
XMMRegister tmp = i.TempSimd128Register(0);
// tmp = wasm_i8x16_splat(0x0F)
__ Move(tmp, uint32_t{0x0F0F0F0F});
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(tasm(), AVX);
if (CpuFeatures::IsSupported(AVX2)) {
CpuFeatureScope avx2_scope(tasm(), AVX2);
__ vpbroadcastd(tmp, tmp);
} else {
__ vpshufd(tmp, tmp, 0);
}
__ vpandn(kScratchDoubleReg, tmp, src);
__ vpand(dst, tmp, src);
__ Move(tmp, 0x04030302'03020201, 0x03020201'02010100);
__ vpsrlw(kScratchDoubleReg, kScratchDoubleReg, 4);
__ vpshufb(dst, tmp, dst);
__ vpshufb(kScratchDoubleReg, tmp, kScratchDoubleReg);
__ vpaddb(dst, dst, kScratchDoubleReg);
} else {
__ shufps(tmp, tmp, 0);
__ Move(kScratchDoubleReg, src);
__ andps(kScratchDoubleReg, tmp);
__ andnps(tmp, src);
__ Move(dst, 0x04030302'03020201, 0x03020201'02010100);
__ psrlw(tmp, 4);
__ pshufb(dst, kScratchDoubleReg);
__ Move(kScratchDoubleReg, 0x04030302'03020201, 0x03020201'02010100);
__ pshufb(kScratchDoubleReg, tmp);
__ paddb(dst, kScratchDoubleReg);
}
break;
}
case kX64S128Load8Splat: {
EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
XMMRegister dst = i.OutputSimd128Register();
......
......@@ -343,6 +343,7 @@ namespace compiler {
V(X64S128AndNot) \
V(X64I8x16Swizzle) \
V(X64I8x16Shuffle) \
V(X64I8x16Popcnt) \
V(X64S128Load8Splat) \
V(X64S128Load16Splat) \
V(X64S128Load32Splat) \
......
......@@ -317,6 +317,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kX64V16x8AllTrue:
case kX64I8x16Swizzle:
case kX64I8x16Shuffle:
case kX64I8x16Popcnt:
case kX64Shufps:
case kX64S32x4Rotate:
case kX64S32x4Swizzle:
......
......@@ -3695,6 +3695,16 @@ void InstructionSelector::VisitI16x8ExtAddPairwiseI8x16U(Node* node) {
Emit(kX64I16x8ExtAddPairwiseI8x16U, dst, g.UseRegister(node->InputAt(0)));
}
void InstructionSelector::VisitI8x16Popcnt(Node* node) {
X64OperandGenerator g(this);
InstructionOperand dst = CpuFeatures::IsSupported(AVX)
? g.DefineAsRegister(node)
: g.DefineAsRegister(node);
InstructionOperand temps[] = {g.TempSimd128Register()};
Emit(kX64I8x16Popcnt, dst, g.UseUniqueRegister(node->InputAt(0)),
arraysize(temps), temps);
}
// static
MachineOperatorBuilder::Flags
InstructionSelector::SupportedMachineOperatorFlags() {
......
......@@ -2623,7 +2623,7 @@ WASM_SIMD_TEST(I8x16Abs) {
RunI8x16UnOpTest(execution_tier, lower_simd, kExprI8x16Abs, Abs);
}
#if V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_ARM
#if V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_X64
// TODO(v8:11002) Prototype i8x16.popcnt.
WASM_SIMD_TEST_NO_LOWERING(I8x16Popcnt) {
FLAG_SCOPE(wasm_simd_post_mvp);
......@@ -2646,7 +2646,7 @@ WASM_SIMD_TEST_NO_LOWERING(I8x16Popcnt) {
}
}
}
#endif // V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_ARM
#endif // V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_X64
// Tests both signed and unsigned conversion from I16x8 (packing).
WASM_SIMD_TEST(I8x16ConvertI16x8) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment