Commit b6a4f494 authored by Ng Zhi An's avatar Ng Zhi An Committed by Commit Bot

[wasm-simd][liftoff][arm64] Implement s8x16shuffle

Bug: v8:9909
Change-Id: Ica96c2f373b4d90209c8d144486f423f1d8f0859
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2235548
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Reviewed-by: 's avatarClemens Backes <clemensb@chromium.org>
Cr-Commit-Position: refs/heads/master@{#68372}
parent 80ce96e1
......@@ -1988,7 +1988,39 @@ void LiftoffAssembler::emit_s8x16_shuffle(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs,
const uint8_t shuffle[16]) {
bailout(kSimd, "s8x16_shuffle");
VRegister src1 = lhs.fp();
VRegister src2 = rhs.fp();
VRegister temp = dst.fp();
if (dst == lhs || dst == rhs) {
// dst overlaps with lhs or rhs, so we need a temporary.
temp = GetUnusedRegister(kFpReg, LiftoffRegList::ForRegs(lhs, rhs)).fp();
}
UseScratchRegisterScope scope(this);
if (src1 != src2 && !AreConsecutive(src1, src2)) {
// Tbl needs consecutive registers, which our scratch registers are.
src1 = scope.AcquireV(kFormat16B);
src2 = scope.AcquireV(kFormat16B);
DCHECK(AreConsecutive(src1, src2));
Mov(src1.Q(), lhs.fp().Q());
Mov(src2.Q(), rhs.fp().Q());
}
uint8_t mask = lhs == rhs ? 0x0F : 0x1F;
int64_t imms[2] = {0, 0};
for (int i = 7; i >= 0; i--) {
imms[0] = (imms[0] << 8) | (shuffle[i] & mask);
imms[1] = (imms[1] << 8) | (shuffle[i + 8] & mask);
}
Movi(temp.V16B(), imms[1], imms[0]);
if (src1 == src2) {
Tbl(dst.fp().V16B(), src1.V16B(), temp.V16B());
} else {
Tbl(dst.fp().V16B(), src1.V16B(), src2.V16B(), temp.V16B());
}
}
void LiftoffAssembler::emit_i8x16_splat(LiftoffRegister dst,
......
......@@ -10,6 +10,7 @@
// implementation in Liftoff.
#include "src/codegen/assembler-inl.h"
#include "src/wasm/wasm-opcodes.h"
#include "test/cctest/cctest.h"
#include "test/cctest/wasm/wasm-run-utils.h"
#include "test/common/wasm/test-signatures.h"
......@@ -104,6 +105,52 @@ WASM_SIMD_LIFTOFF_TEST(REGRESS_1088273) {
CHECK_EQ(18688, r.Call());
}
// A test to exercise logic in Liftoff's implementation of shuffle. The
// implementation in Liftoff is a bit more tricky due to shuffle requiring
// adjacent registers in ARM/ARM64.
WASM_SIMD_LIFTOFF_TEST(S8x16Shuffle) {
WasmRunner<int32_t> r(ExecutionTier::kLiftoff, kNoLowerSimd);
// Temps to use up registers and force non-adjacent registers for shuffle.
byte local0 = r.AllocateLocal(kWasmS128);
byte local1 = r.AllocateLocal(kWasmS128);
// g0 and g1 are globals that hold input values for the shuffle,
// g0 contains byte array [0, 1, ... 15], g1 contains byte array [16, 17,
// ... 31]. They should never be overwritten - write only to output.
byte* g0 = r.builder().AddGlobal<byte>(kWasmS128);
byte* g1 = r.builder().AddGlobal<byte>(kWasmS128);
for (int i = 0; i < 16; i++) {
WriteLittleEndianValue<byte>(&g0[i], i);
WriteLittleEndianValue<byte>(&g1[i], i + 16);
}
// Output global holding a kWasmS128.
byte* output = r.builder().AddGlobal<byte>(kWasmS128);
// s8x16_shuffle(lhs, rhs, pattern) will take the last element of rhs and
// place it into the last lane of lhs.
std::array<byte, 16> pattern = {
{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 31}};
// Set up locals so shuffle is called with non-adjacent registers v2 and v0.
BUILD(r, WASM_SET_LOCAL(local0, WASM_GET_GLOBAL(1)), // local0 is in v0
WASM_SET_LOCAL(local1, WASM_GET_GLOBAL(0)), // local1 is in v1
WASM_GET_GLOBAL(0), // global0 is in v2
WASM_GET_LOCAL(local0), // local0 is in v0
WASM_SET_GLOBAL(2, WASM_SIMD_S8x16_SHUFFLE_OP(
kExprS8x16Shuffle, pattern, WASM_NOP, WASM_NOP)),
WASM_ONE);
r.Call();
// The shuffle pattern only changes the last element.
for (int i = 0; i < 15; i++) {
byte actual = ReadLittleEndianValue<byte>(&output[i]);
CHECK_EQ(i, actual);
}
CHECK_EQ(31, ReadLittleEndianValue<byte>(&output[15]));
}
#undef WASM_SIMD_LIFTOFF_TEST
} // namespace test_run_wasm_simd_liftoff
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment