Commit 839e9695 authored by Ng Zhi An's avatar Ng Zhi An Committed by Commit Bot

[wasm-simd][liftoff] Implement s8x16.swizzle

Implemented for ia32, x64, arm, arm64, all in one patch (phew). The
code is simple enough (short paragraph) that are the same as TurboFan
codegen.

Bug: v8:9909
Change-Id: Idbc1cbd58c16e455b1656c2367c8d9db10308e35
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2208610
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Reviewed-by: 's avatarClemens Backes <clemensb@chromium.org>
Cr-Commit-Position: refs/heads/master@{#68022}
parent 7271e7c5
...@@ -2180,6 +2180,23 @@ void LiftoffAssembler::LoadTransform(LiftoffRegister dst, Register src_addr, ...@@ -2180,6 +2180,23 @@ void LiftoffAssembler::LoadTransform(LiftoffRegister dst, Register src_addr,
} }
} }
void LiftoffAssembler::emit_s8x16_swizzle(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
UseScratchRegisterScope temps(this);
NeonListOperand table(liftoff::GetSimd128Register(lhs));
if (dst == lhs) {
// dst will be overwritten, so keep the table somewhere else.
QwNeonRegister tbl = temps.AcquireQ();
TurboAssembler::Move(tbl, liftoff::GetSimd128Register(lhs));
table = NeonListOperand(tbl);
}
vtbl(dst.low_fp(), table, rhs.low_fp());
vtbl(dst.high_fp(), table, rhs.high_fp());
}
void LiftoffAssembler::emit_f64x2_splat(LiftoffRegister dst, void LiftoffAssembler::emit_f64x2_splat(LiftoffRegister dst,
LiftoffRegister src) { LiftoffRegister src) {
TurboAssembler::Move(dst.low_fp(), src.fp()); TurboAssembler::Move(dst.low_fp(), src.fp());
......
...@@ -1200,6 +1200,12 @@ void LiftoffAssembler::LoadTransform(LiftoffRegister dst, Register src_addr, ...@@ -1200,6 +1200,12 @@ void LiftoffAssembler::LoadTransform(LiftoffRegister dst, Register src_addr,
} }
} }
void LiftoffAssembler::emit_s8x16_swizzle(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
Tbl(dst.fp().V16B(), lhs.fp().V16B(), rhs.fp().V16B());
}
void LiftoffAssembler::emit_f64x2_splat(LiftoffRegister dst, void LiftoffAssembler::emit_f64x2_splat(LiftoffRegister dst,
LiftoffRegister src) { LiftoffRegister src) {
Dup(dst.fp().V2D(), src.fp().D(), 0); Dup(dst.fp().V2D(), src.fp().D(), 0);
......
...@@ -2170,6 +2170,22 @@ void LiftoffAssembler::LoadTransform(LiftoffRegister dst, Register src_addr, ...@@ -2170,6 +2170,22 @@ void LiftoffAssembler::LoadTransform(LiftoffRegister dst, Register src_addr,
} }
} }
void LiftoffAssembler::emit_s8x16_swizzle(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
XMMRegister mask =
GetUnusedRegister(kFpReg, LiftoffRegList::ForRegs(lhs, rhs)).fp();
// Out-of-range indices should return 0, add 112 (0x70) so that any value > 15
// saturates to 128 (top bit set), so pshufb will zero that lane.
TurboAssembler::Move(mask, uint32_t{0x70707070});
Pshufd(mask, mask, uint8_t{0x0});
Paddusb(mask, rhs.fp());
if (lhs != dst) {
Movaps(dst.fp(), lhs.fp());
}
Pshufb(dst.fp(), mask);
}
void LiftoffAssembler::emit_i8x16_splat(LiftoffRegister dst, void LiftoffAssembler::emit_i8x16_splat(LiftoffRegister dst,
LiftoffRegister src) { LiftoffRegister src) {
Movd(dst.fp(), src.gp()); Movd(dst.fp(), src.gp());
......
...@@ -746,7 +746,8 @@ class LiftoffAssembler : public TurboAssembler { ...@@ -746,7 +746,8 @@ class LiftoffAssembler : public TurboAssembler {
Register offset_reg, uint32_t offset_imm, Register offset_reg, uint32_t offset_imm,
LoadType type, LoadTransformationKind transform, LoadType type, LoadTransformationKind transform,
uint32_t* protected_load_pc); uint32_t* protected_load_pc);
inline void emit_s8x16_swizzle(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs);
inline void emit_i8x16_splat(LiftoffRegister dst, LiftoffRegister src); inline void emit_i8x16_splat(LiftoffRegister dst, LiftoffRegister src);
inline void emit_i16x8_splat(LiftoffRegister dst, LiftoffRegister src); inline void emit_i16x8_splat(LiftoffRegister dst, LiftoffRegister src);
inline void emit_i32x4_splat(LiftoffRegister dst, LiftoffRegister src); inline void emit_i32x4_splat(LiftoffRegister dst, LiftoffRegister src);
......
...@@ -2453,6 +2453,8 @@ class LiftoffCompiler { ...@@ -2453,6 +2453,8 @@ class LiftoffCompiler {
return unsupported(decoder, kSimd, "simd"); return unsupported(decoder, kSimd, "simd");
} }
switch (opcode) { switch (opcode) {
case wasm::kExprS8x16Swizzle:
return EmitBinOp<kS128, kS128>(&LiftoffAssembler::emit_s8x16_swizzle);
case wasm::kExprI8x16Splat: case wasm::kExprI8x16Splat:
return EmitUnOp<kI32, kS128>(&LiftoffAssembler::emit_i8x16_splat); return EmitUnOp<kI32, kS128>(&LiftoffAssembler::emit_i8x16_splat);
case wasm::kExprI16x8Splat: case wasm::kExprI16x8Splat:
......
...@@ -2222,6 +2222,22 @@ void LiftoffAssembler::LoadTransform(LiftoffRegister dst, Register src_addr, ...@@ -2222,6 +2222,22 @@ void LiftoffAssembler::LoadTransform(LiftoffRegister dst, Register src_addr,
} }
} }
void LiftoffAssembler::emit_s8x16_swizzle(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
XMMRegister mask =
GetUnusedRegister(kFpReg, LiftoffRegList::ForRegs(lhs, rhs)).fp();
// Out-of-range indices should return 0, add 112 (0x70) so that any value > 15
// saturates to 128 (top bit set), so pshufb will zero that lane.
TurboAssembler::Move(mask, uint32_t{0x70707070});
Pshufd(mask, mask, uint8_t{0x0});
Paddusb(mask, rhs.fp());
if (lhs != dst) {
Movaps(dst.fp(), lhs.fp());
}
Pshufb(dst.fp(), mask);
}
void LiftoffAssembler::emit_i8x16_splat(LiftoffRegister dst, void LiftoffAssembler::emit_i8x16_splat(LiftoffRegister dst,
LiftoffRegister src) { LiftoffRegister src) {
Movd(dst.fp(), src.gp()); Movd(dst.fp(), src.gp());
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment