Commit a2596d41 authored by Zhi An Ng's avatar Zhi An Ng Committed by Commit Bot

[wasm-simd][x64] Prototype i8x16.popcnt

Code sequence from https://github.com/WebAssembly/simd/pull/379.

Bug: v8:11002
Change-Id: I47c1090d792f8cbb9d7846ace9a4f996d0c460b1
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2626717Reviewed-by: 's avatarDeepti Gandluri <gdeepti@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#72174}
parent eedb2961
...@@ -183,6 +183,7 @@ ...@@ -183,6 +183,7 @@
// These require AVX2, and we only define the VEX-128 versions. // These require AVX2, and we only define the VEX-128 versions.
#define AVX2_BROADCAST_LIST(V) \ #define AVX2_BROADCAST_LIST(V) \
V(vpbroadcastd, 66, 0F, 38, 58) \
V(vpbroadcastb, 66, 0F, 38, 78) \ V(vpbroadcastb, 66, 0F, 38, 78) \
V(vpbroadcastw, 66, 0F, 38, 79) V(vpbroadcastw, 66, 0F, 38, 79)
......
...@@ -2778,10 +2778,10 @@ void InstructionSelector::VisitPrefetchNonTemporal(Node* node) { ...@@ -2778,10 +2778,10 @@ void InstructionSelector::VisitPrefetchNonTemporal(Node* node) {
} }
#endif // !V8_TARGET_ARCH_ARM64 #endif // !V8_TARGET_ARCH_ARM64
#if !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_ARM #if !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_X64
// TODO(v8:11002) Prototype i8x16.popcnt. // TODO(v8:11002) Prototype i8x16.popcnt.
void InstructionSelector::VisitI8x16Popcnt(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitI8x16Popcnt(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_ARM #endif // !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_X64
#if !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_X64 && \ #if !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_X64 && \
!V8_TARGET_ARCH_IA32 !V8_TARGET_ARCH_IA32
......
...@@ -3786,6 +3786,43 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -3786,6 +3786,43 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
} }
break; break;
} }
case kX64I8x16Popcnt: {
XMMRegister dst = i.OutputSimd128Register();
XMMRegister src = i.InputSimd128Register(0);
XMMRegister tmp = i.TempSimd128Register(0);
// tmp = wasm_i8x16_splat(0x0F)
__ Move(tmp, uint32_t{0x0F0F0F0F});
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(tasm(), AVX);
if (CpuFeatures::IsSupported(AVX2)) {
CpuFeatureScope avx2_scope(tasm(), AVX2);
__ vpbroadcastd(tmp, tmp);
} else {
__ vpshufd(tmp, tmp, 0);
}
__ vpandn(kScratchDoubleReg, tmp, src);
__ vpand(dst, tmp, src);
__ Move(tmp, 0x04030302'03020201, 0x03020201'02010100);
__ vpsrlw(kScratchDoubleReg, kScratchDoubleReg, 4);
__ vpshufb(dst, tmp, dst);
__ vpshufb(kScratchDoubleReg, tmp, kScratchDoubleReg);
__ vpaddb(dst, dst, kScratchDoubleReg);
} else {
__ shufps(tmp, tmp, 0);
__ Move(kScratchDoubleReg, src);
__ andps(kScratchDoubleReg, tmp);
__ andnps(tmp, src);
__ Move(dst, 0x04030302'03020201, 0x03020201'02010100);
__ psrlw(tmp, 4);
__ pshufb(dst, kScratchDoubleReg);
__ Move(kScratchDoubleReg, 0x04030302'03020201, 0x03020201'02010100);
__ pshufb(kScratchDoubleReg, tmp);
__ paddb(dst, kScratchDoubleReg);
}
break;
}
case kX64S128Load8Splat: { case kX64S128Load8Splat: {
EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset()); EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
XMMRegister dst = i.OutputSimd128Register(); XMMRegister dst = i.OutputSimd128Register();
......
...@@ -343,6 +343,7 @@ namespace compiler { ...@@ -343,6 +343,7 @@ namespace compiler {
V(X64S128AndNot) \ V(X64S128AndNot) \
V(X64I8x16Swizzle) \ V(X64I8x16Swizzle) \
V(X64I8x16Shuffle) \ V(X64I8x16Shuffle) \
V(X64I8x16Popcnt) \
V(X64S128Load8Splat) \ V(X64S128Load8Splat) \
V(X64S128Load16Splat) \ V(X64S128Load16Splat) \
V(X64S128Load32Splat) \ V(X64S128Load32Splat) \
......
...@@ -317,6 +317,7 @@ int InstructionScheduler::GetTargetInstructionFlags( ...@@ -317,6 +317,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kX64V16x8AllTrue: case kX64V16x8AllTrue:
case kX64I8x16Swizzle: case kX64I8x16Swizzle:
case kX64I8x16Shuffle: case kX64I8x16Shuffle:
case kX64I8x16Popcnt:
case kX64Shufps: case kX64Shufps:
case kX64S32x4Rotate: case kX64S32x4Rotate:
case kX64S32x4Swizzle: case kX64S32x4Swizzle:
......
...@@ -3695,6 +3695,16 @@ void InstructionSelector::VisitI16x8ExtAddPairwiseI8x16U(Node* node) { ...@@ -3695,6 +3695,16 @@ void InstructionSelector::VisitI16x8ExtAddPairwiseI8x16U(Node* node) {
Emit(kX64I16x8ExtAddPairwiseI8x16U, dst, g.UseRegister(node->InputAt(0))); Emit(kX64I16x8ExtAddPairwiseI8x16U, dst, g.UseRegister(node->InputAt(0)));
} }
void InstructionSelector::VisitI8x16Popcnt(Node* node) {
X64OperandGenerator g(this);
InstructionOperand dst = CpuFeatures::IsSupported(AVX)
? g.DefineAsRegister(node)
: g.DefineAsRegister(node);
InstructionOperand temps[] = {g.TempSimd128Register()};
Emit(kX64I8x16Popcnt, dst, g.UseUniqueRegister(node->InputAt(0)),
arraysize(temps), temps);
}
// static // static
MachineOperatorBuilder::Flags MachineOperatorBuilder::Flags
InstructionSelector::SupportedMachineOperatorFlags() { InstructionSelector::SupportedMachineOperatorFlags() {
......
...@@ -2623,7 +2623,7 @@ WASM_SIMD_TEST(I8x16Abs) { ...@@ -2623,7 +2623,7 @@ WASM_SIMD_TEST(I8x16Abs) {
RunI8x16UnOpTest(execution_tier, lower_simd, kExprI8x16Abs, Abs); RunI8x16UnOpTest(execution_tier, lower_simd, kExprI8x16Abs, Abs);
} }
#if V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_ARM #if V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_X64
// TODO(v8:11002) Prototype i8x16.popcnt. // TODO(v8:11002) Prototype i8x16.popcnt.
WASM_SIMD_TEST_NO_LOWERING(I8x16Popcnt) { WASM_SIMD_TEST_NO_LOWERING(I8x16Popcnt) {
FLAG_SCOPE(wasm_simd_post_mvp); FLAG_SCOPE(wasm_simd_post_mvp);
...@@ -2646,7 +2646,7 @@ WASM_SIMD_TEST_NO_LOWERING(I8x16Popcnt) { ...@@ -2646,7 +2646,7 @@ WASM_SIMD_TEST_NO_LOWERING(I8x16Popcnt) {
} }
} }
} }
#endif // V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_ARM #endif // V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_X64
// Tests both signed and unsigned conversion from I16x8 (packing). // Tests both signed and unsigned conversion from I16x8 (packing).
WASM_SIMD_TEST(I8x16ConvertI16x8) { WASM_SIMD_TEST(I8x16ConvertI16x8) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment