Commit 67ff779e authored by Ng Zhi An's avatar Ng Zhi An Committed by Commit Bot

[wasm-simd][x64] Optimize i8x16.popcnt

Instead of loading the same mask twice, we load from an external
reference twice. This saves some some binary size and a bunch of
instructions.

Bug: v8:11002
Change-Id: Ice80bd10694dcca920e18b8043390d7631c65805
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2643404Reviewed-by: 's avatarDeepti Gandluri <gdeepti@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#72242}
parent d46ea552
......@@ -75,6 +75,12 @@ constexpr struct alignas(16) {
} double_negate_constant = {uint64_t{0x8000000000000000},
uint64_t{0x8000000000000000}};
constexpr struct alignas(16) {
uint64_t a;
uint64_t b;
} wasm_i8x16_popcnt_mask = {uint64_t{0x03020201'02010100},
uint64_t{0x04030302'03020201}};
// Implementation of ExternalReference
static ExternalReference::Type BuiltinCallTypeForResultSize(int result_size) {
......@@ -480,6 +486,10 @@ ExternalReference ExternalReference::address_of_double_neg_constant() {
return ExternalReference(reinterpret_cast<Address>(&double_negate_constant));
}
ExternalReference ExternalReference::address_of_wasm_i8x16_popcnt_mask() {
return ExternalReference(reinterpret_cast<Address>(&wasm_i8x16_popcnt_mask));
}
ExternalReference
ExternalReference::address_of_enable_experimental_regexp_engine() {
return ExternalReference(&FLAG_enable_experimental_regexp_engine);
......
......@@ -115,6 +115,7 @@ class StatsCounter;
V(address_of_runtime_stats_flag, "TracingFlags::runtime_stats") \
V(address_of_the_hole_nan, "the_hole_nan") \
V(address_of_uint32_bias, "uint32_bias") \
V(address_of_wasm_i8x16_popcnt_mask, "wasm_i8x16_popcnt_mask") \
V(bytecode_size_table_address, "Bytecodes::bytecode_size_table_address") \
V(check_object_type, "check_object_type") \
V(compute_integer_hash, "ComputeSeededHash") \
......
......@@ -3808,16 +3808,18 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ vpshufb(kScratchDoubleReg, tmp, kScratchDoubleReg);
__ vpaddb(dst, dst, kScratchDoubleReg);
} else {
Operand mask = __ ExternalReferenceAsOperand(
ExternalReference::address_of_wasm_i8x16_popcnt_mask());
__ shufps(tmp, tmp, 0);
__ Move(kScratchDoubleReg, src);
__ andps(kScratchDoubleReg, tmp);
__ andnps(tmp, src);
__ Move(dst, 0x04030302'03020201, 0x03020201'02010100);
__ psrlw(tmp, 4);
__ pshufb(dst, kScratchDoubleReg);
__ Move(kScratchDoubleReg, 0x04030302'03020201, 0x03020201'02010100);
__ pshufb(kScratchDoubleReg, tmp);
__ paddb(dst, kScratchDoubleReg);
__ Move(kScratchDoubleReg, tmp);
__ andps(tmp, src);
__ andnps(kScratchDoubleReg, src);
__ psrlw(kScratchDoubleReg, 4);
__ movups(dst, mask);
__ pshufb(dst, tmp);
__ movups(tmp, mask);
__ pshufb(tmp, kScratchDoubleReg);
__ paddb(dst, tmp);
}
break;
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment