Commit bd22e3be authored by Thibaud Michaud's avatar Thibaud Michaud Committed by V8 LUCI CQ

[wasm-relaxed-simd] Add relaxed laneselect in Liftoff on ia32/x64

R=clemensb@chromium.org
CC=gdeepti@chromium.org

Bug: v8:12284
Change-Id: Ied63b2c7b57210f2cf2e1580520405ce1be5dc33
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3625979Reviewed-by: 's avatarClemens Backes <clemensb@chromium.org>
Commit-Queue: Thibaud Michaud <thibaudm@chromium.org>
Cr-Commit-Position: refs/heads/main@{#80400}
parent 9ae5bfa5
......@@ -2466,6 +2466,13 @@ void LiftoffAssembler::emit_i8x16_relaxed_swizzle(LiftoffRegister dst,
bailout(kSimd, "emit_i8x16_relaxed_swizzle");
}
void LiftoffAssembler::emit_s128_relaxed_laneselect(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2,
LiftoffRegister mask) {
bailout(kSimd, "emit_s128_relaxed_laneselect");
}
void LiftoffAssembler::emit_f64x2_splat(LiftoffRegister dst,
LiftoffRegister src) {
TurboAssembler::Move(dst.low_fp(), src.fp());
......
......@@ -1781,6 +1781,13 @@ void LiftoffAssembler::emit_i8x16_relaxed_swizzle(LiftoffRegister dst,
bailout(kSimd, "emit_i8x16_relaxed_swizzle");
}
void LiftoffAssembler::emit_s128_relaxed_laneselect(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2,
LiftoffRegister mask) {
bailout(kSimd, "emit_s128_relaxed_laneselect");
}
void LiftoffAssembler::emit_f64x2_splat(LiftoffRegister dst,
LiftoffRegister src) {
Dup(dst.fp().V2D(), src.fp().D(), 0);
......
......@@ -2915,6 +2915,13 @@ void LiftoffAssembler::emit_i8x16_relaxed_swizzle(LiftoffRegister dst,
true);
}
void LiftoffAssembler::emit_s128_relaxed_laneselect(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2,
LiftoffRegister mask) {
Pblendvb(dst.fp(), src2.fp(), src1.fp(), mask.fp());
}
void LiftoffAssembler::emit_i8x16_popcnt(LiftoffRegister dst,
LiftoffRegister src) {
Register scratch = GetUnusedRegister(RegClass::kGpReg, {}).gp();
......
......@@ -626,6 +626,11 @@ LiftoffRegister LiftoffAssembler::LoadToRegister(VarState slot,
LiftoffRegList pinned) {
if (slot.is_reg()) return slot.reg();
LiftoffRegister reg = GetUnusedRegister(reg_class_for(slot.kind()), pinned);
return LoadToRegister(slot, reg);
}
LiftoffRegister LiftoffAssembler::LoadToRegister(VarState slot,
LiftoffRegister reg) {
if (slot.is_const()) {
LoadConstant(reg, slot.constant());
} else {
......
......@@ -450,6 +450,8 @@ class LiftoffAssembler : public TurboAssembler {
LiftoffRegister LoadToRegister(VarState slot, LiftoffRegList pinned);
LiftoffRegister LoadToRegister(VarState slot, LiftoffRegister dst);
LiftoffRegister PopToRegister(LiftoffRegList pinned = {}) {
DCHECK(!cache_state_.stack_state.empty());
VarState slot = cache_state_.stack_state.back();
......@@ -461,6 +463,25 @@ class LiftoffAssembler : public TurboAssembler {
return LoadToRegister(slot, pinned);
}
void PopToFixedRegister(LiftoffRegister reg) {
DCHECK(!cache_state_.stack_state.empty());
VarState slot = cache_state_.stack_state.back();
cache_state_.stack_state.pop_back();
if (slot.is_reg()) {
cache_state_.dec_used(slot.reg());
if (slot.reg() == reg) return;
if (cache_state_.is_used(reg)) {
SpillOneRegister(reg);
}
Move(reg, slot.reg(), slot.kind());
return;
}
if (cache_state_.is_used(reg)) {
SpillOneRegister(reg);
}
LoadToRegister(slot, reg);
}
// Use this to pop a value into a register that has no other uses, so it
// can be modified.
LiftoffRegister PopToModifiableRegister(LiftoffRegList pinned = {}) {
......@@ -1054,6 +1075,10 @@ class LiftoffAssembler : public TurboAssembler {
inline void emit_i8x16_relaxed_swizzle(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs);
inline void emit_s128_relaxed_laneselect(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2,
LiftoffRegister mask);
inline void emit_i8x16_popcnt(LiftoffRegister dst, LiftoffRegister src);
inline void emit_i8x16_splat(LiftoffRegister dst, LiftoffRegister src);
inline void emit_i16x8_splat(LiftoffRegister dst, LiftoffRegister src);
......
......@@ -3434,18 +3434,8 @@ class LiftoffCompiler {
template <ValueKind src_kind, ValueKind result_kind,
ValueKind result_lane_kind = kVoid, typename EmitFn>
void EmitTerOp(EmitFn fn) {
static constexpr RegClass src_rc = reg_class_for(src_kind);
static constexpr RegClass result_rc = reg_class_for(result_kind);
LiftoffRegister src3 = __ PopToRegister();
LiftoffRegister src2 = __ PopToRegister(LiftoffRegList{src3});
LiftoffRegister src1 = __ PopToRegister(LiftoffRegList{src3, src2});
// Reusing src1 and src2 will complicate codegen for select for some
// backend, so we allow only reusing src3 (the mask), and pin src1 and src2.
LiftoffRegister dst = src_rc == result_rc
? __ GetUnusedRegister(result_rc, {src3},
LiftoffRegList{src1, src2})
: __ GetUnusedRegister(result_rc, {});
void EmitTerOp(EmitFn fn, LiftoffRegister dst, LiftoffRegister src1,
LiftoffRegister src2, LiftoffRegister src3) {
CallEmitFn(fn, dst, src1, src2, src3);
if (V8_UNLIKELY(nondeterminism_)) {
LiftoffRegList pinned = {dst};
......@@ -3460,6 +3450,45 @@ class LiftoffCompiler {
__ PushRegister(result_kind, dst);
}
template <ValueKind src_kind, ValueKind result_kind,
ValueKind result_lane_kind = kVoid, typename EmitFn>
void EmitTerOp(EmitFn fn) {
LiftoffRegister src3 = __ PopToRegister();
LiftoffRegister src2 = __ PopToRegister(LiftoffRegList{src3});
LiftoffRegister src1 = __ PopToRegister(LiftoffRegList{src3, src2});
static constexpr RegClass src_rc = reg_class_for(src_kind);
static constexpr RegClass result_rc = reg_class_for(result_kind);
// Reusing src1 and src2 will complicate codegen for select for some
// backend, so we allow only reusing src3 (the mask), and pin src1 and src2.
LiftoffRegister dst = src_rc == result_rc
? __ GetUnusedRegister(result_rc, {src3},
LiftoffRegList{src1, src2})
: __ GetUnusedRegister(result_rc, {});
EmitTerOp<src_kind, result_kind, result_lane_kind, EmitFn>(fn, dst, src1,
src2, src3);
}
void EmitRelaxedLaneSelect() {
#if defined(V8_TARGET_ARCH_IA32) || defined(V8_TARGET_ARCH_X64)
if (!CpuFeatures::IsSupported(AVX)) {
LiftoffRegister mask(xmm0);
__ PopToFixedRegister(mask);
LiftoffRegister src2 = __ PopToModifiableRegister(LiftoffRegList{mask});
LiftoffRegister src1 = __ PopToRegister(LiftoffRegList{src2, mask});
EmitTerOp<kS128, kS128>(&LiftoffAssembler::emit_s128_relaxed_laneselect,
src2, src1, src2, mask);
return;
}
#endif
LiftoffRegList pinned;
LiftoffRegister mask = pinned.set(__ PopToRegister(pinned));
LiftoffRegister src2 = pinned.set(__ PopToRegister(pinned));
LiftoffRegister src1 = pinned.set(__ PopToRegister(pinned));
LiftoffRegister dst = __ GetUnusedRegister(RegClass::kFpReg, {}, pinned);
EmitTerOp<kS128, kS128>(&LiftoffAssembler::emit_s128_relaxed_laneselect,
dst, src1, src2, mask);
}
template <typename EmitFn, typename EmitFnImm>
void EmitSimdShiftOp(EmitFn fn, EmitFnImm fnImm) {
static constexpr RegClass result_rc = reg_class_for(kS128);
......@@ -4033,6 +4062,11 @@ class LiftoffCompiler {
return EmitSimdFmaOp(&LiftoffAssembler::emit_f64x2_qfma);
case wasm::kExprF64x2Qfms:
return EmitSimdFmaOp(&LiftoffAssembler::emit_f64x2_qfms);
case wasm::kExprI16x8RelaxedLaneSelect:
case wasm::kExprI8x16RelaxedLaneSelect:
case wasm::kExprI32x4RelaxedLaneSelect:
case wasm::kExprI64x2RelaxedLaneSelect:
return EmitRelaxedLaneSelect();
default:
unsupported(decoder, kSimd, "simd");
}
......
......@@ -1840,6 +1840,13 @@ void LiftoffAssembler::emit_i8x16_relaxed_swizzle(LiftoffRegister dst,
bailout(kSimd, "emit_i8x16_relaxed_swizzle");
}
void LiftoffAssembler::emit_s128_relaxed_laneselect(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2,
LiftoffRegister mask) {
bailout(kSimd, "emit_s128_relaxed_laneselect");
}
void LiftoffAssembler::emit_i8x16_splat(LiftoffRegister dst,
LiftoffRegister src) {
bailout(kSimd, "emit_i8x16_splat");
......
......@@ -2155,6 +2155,13 @@ void LiftoffAssembler::emit_i8x16_relaxed_swizzle(LiftoffRegister dst,
bailout(kSimd, "emit_i8x16_relaxed_swizzle");
}
void LiftoffAssembler::emit_s128_relaxed_laneselect(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2,
LiftoffRegister mask) {
bailout(kSimd, "emit_s128_relaxed_laneselect");
}
void LiftoffAssembler::emit_i8x16_splat(LiftoffRegister dst,
LiftoffRegister src) {
fill_b(dst.fp().toW(), src.gp());
......
......@@ -1775,6 +1775,13 @@ void LiftoffAssembler::emit_i8x16_relaxed_swizzle(LiftoffRegister dst,
bailout(kSimd, "emit_i8x16_relaxed_swizzle");
}
void LiftoffAssembler::emit_s128_relaxed_laneselect(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2,
LiftoffRegister mask) {
bailout(kSimd, "emit_s128_relaxed_laneselect");
}
void LiftoffAssembler::emit_f64x2_splat(LiftoffRegister dst,
LiftoffRegister src) {
bailout(kUnsupportedArchitecture, "emit_f64x2splat");
......
......@@ -1943,6 +1943,13 @@ void LiftoffAssembler::emit_i8x16_relaxed_swizzle(LiftoffRegister dst,
bailout(kSimd, "emit_i8x16_relaxed_swizzle");
}
void LiftoffAssembler::emit_s128_relaxed_laneselect(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2,
LiftoffRegister mask) {
bailout(kSimd, "emit_s128_relaxed_laneselect");
}
void LiftoffAssembler::emit_i8x16_splat(LiftoffRegister dst,
LiftoffRegister src) {
VU.set(kScratchReg, E8, m1);
......
......@@ -2731,6 +2731,13 @@ void LiftoffAssembler::emit_i8x16_relaxed_swizzle(LiftoffRegister dst,
bailout(kSimd, "emit_i8x16_relaxed_swizzle");
}
void LiftoffAssembler::emit_s128_relaxed_laneselect(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2,
LiftoffRegister mask) {
bailout(kSimd, "emit_s128_relaxed_laneselect");
}
void LiftoffAssembler::emit_f64x2_convert_low_i32x4_s(LiftoffRegister dst,
LiftoffRegister src) {
F64x2ConvertLowI32x4S(dst.fp(), src.fp());
......
......@@ -2518,6 +2518,13 @@ void LiftoffAssembler::emit_i8x16_relaxed_swizzle(LiftoffRegister dst,
kScratchRegister, true);
}
void LiftoffAssembler::emit_s128_relaxed_laneselect(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2,
LiftoffRegister mask) {
Pblendvb(dst.fp(), src2.fp(), src1.fp(), mask.fp());
}
void LiftoffAssembler::emit_i8x16_popcnt(LiftoffRegister dst,
LiftoffRegister src) {
I8x16Popcnt(dst.fp(), src.fp(), kScratchDoubleReg,
......
......@@ -140,10 +140,6 @@
'test-intl/StringLocaleCompareFastPath': [['mode != release', SKIP], SLOW, NO_VARIANTS],
# TODO(12284): Implement relaxed SIMD in Liftoff.
'test-run-wasm-relaxed-simd/RunWasm_I8x16RelaxedLaneSelect_liftoff': [SKIP],
'test-run-wasm-relaxed-simd/RunWasm_I16x8RelaxedLaneSelect_liftoff': [SKIP],
'test-run-wasm-relaxed-simd/RunWasm_I32x4RelaxedLaneSelect_liftoff': [SKIP],
'test-run-wasm-relaxed-simd/RunWasm_I64x2RelaxedLaneSelect_liftoff': [SKIP],
'test-run-wasm-relaxed-simd/RunWasm_F32x4RelaxedMin_liftoff': [SKIP],
'test-run-wasm-relaxed-simd/RunWasm_F32x4RelaxedMax_liftoff': [SKIP],
'test-run-wasm-relaxed-simd/RunWasm_F64x2RelaxedMin_liftoff': [SKIP],
......@@ -1206,6 +1202,10 @@
['arch != ia32 and arch != x64', {
# TODO(12284): Implement relaxed SIMD in Liftoff on missing architectures.
'test-run-wasm-relaxed-simd/RunWasm_I8x16RelaxedLaneSelect_liftoff': [SKIP],
'test-run-wasm-relaxed-simd/RunWasm_I16x8RelaxedLaneSelect_liftoff': [SKIP],
'test-run-wasm-relaxed-simd/RunWasm_I32x4RelaxedLaneSelect_liftoff': [SKIP],
'test-run-wasm-relaxed-simd/RunWasm_I64x2RelaxedLaneSelect_liftoff': [SKIP],
'test-run-wasm-relaxed-simd/RunWasm_F32x4Qfma_liftoff': [SKIP],
'test-run-wasm-relaxed-simd/RunWasm_F32x4Qfms_liftoff': [SKIP],
'test-run-wasm-relaxed-simd/RunWasm_F64x2Qfma_liftoff': [SKIP],
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment