Commit bd22e3be authored by Thibaud Michaud's avatar Thibaud Michaud Committed by V8 LUCI CQ

[wasm-relaxed-simd] Add relaxed laneselect in Liftoff on ia32/x64

R=clemensb@chromium.org
CC=gdeepti@chromium.org

Bug: v8:12284
Change-Id: Ied63b2c7b57210f2cf2e1580520405ce1be5dc33
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3625979Reviewed-by: 's avatarClemens Backes <clemensb@chromium.org>
Commit-Queue: Thibaud Michaud <thibaudm@chromium.org>
Cr-Commit-Position: refs/heads/main@{#80400}
parent 9ae5bfa5
...@@ -2466,6 +2466,13 @@ void LiftoffAssembler::emit_i8x16_relaxed_swizzle(LiftoffRegister dst, ...@@ -2466,6 +2466,13 @@ void LiftoffAssembler::emit_i8x16_relaxed_swizzle(LiftoffRegister dst,
bailout(kSimd, "emit_i8x16_relaxed_swizzle"); bailout(kSimd, "emit_i8x16_relaxed_swizzle");
} }
void LiftoffAssembler::emit_s128_relaxed_laneselect(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2,
LiftoffRegister mask) {
bailout(kSimd, "emit_s128_relaxed_laneselect");
}
void LiftoffAssembler::emit_f64x2_splat(LiftoffRegister dst, void LiftoffAssembler::emit_f64x2_splat(LiftoffRegister dst,
LiftoffRegister src) { LiftoffRegister src) {
TurboAssembler::Move(dst.low_fp(), src.fp()); TurboAssembler::Move(dst.low_fp(), src.fp());
......
...@@ -1781,6 +1781,13 @@ void LiftoffAssembler::emit_i8x16_relaxed_swizzle(LiftoffRegister dst, ...@@ -1781,6 +1781,13 @@ void LiftoffAssembler::emit_i8x16_relaxed_swizzle(LiftoffRegister dst,
bailout(kSimd, "emit_i8x16_relaxed_swizzle"); bailout(kSimd, "emit_i8x16_relaxed_swizzle");
} }
void LiftoffAssembler::emit_s128_relaxed_laneselect(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2,
LiftoffRegister mask) {
bailout(kSimd, "emit_s128_relaxed_laneselect");
}
void LiftoffAssembler::emit_f64x2_splat(LiftoffRegister dst, void LiftoffAssembler::emit_f64x2_splat(LiftoffRegister dst,
LiftoffRegister src) { LiftoffRegister src) {
Dup(dst.fp().V2D(), src.fp().D(), 0); Dup(dst.fp().V2D(), src.fp().D(), 0);
......
...@@ -2915,6 +2915,13 @@ void LiftoffAssembler::emit_i8x16_relaxed_swizzle(LiftoffRegister dst, ...@@ -2915,6 +2915,13 @@ void LiftoffAssembler::emit_i8x16_relaxed_swizzle(LiftoffRegister dst,
true); true);
} }
void LiftoffAssembler::emit_s128_relaxed_laneselect(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2,
LiftoffRegister mask) {
Pblendvb(dst.fp(), src2.fp(), src1.fp(), mask.fp());
}
void LiftoffAssembler::emit_i8x16_popcnt(LiftoffRegister dst, void LiftoffAssembler::emit_i8x16_popcnt(LiftoffRegister dst,
LiftoffRegister src) { LiftoffRegister src) {
Register scratch = GetUnusedRegister(RegClass::kGpReg, {}).gp(); Register scratch = GetUnusedRegister(RegClass::kGpReg, {}).gp();
......
...@@ -626,6 +626,11 @@ LiftoffRegister LiftoffAssembler::LoadToRegister(VarState slot, ...@@ -626,6 +626,11 @@ LiftoffRegister LiftoffAssembler::LoadToRegister(VarState slot,
LiftoffRegList pinned) { LiftoffRegList pinned) {
if (slot.is_reg()) return slot.reg(); if (slot.is_reg()) return slot.reg();
LiftoffRegister reg = GetUnusedRegister(reg_class_for(slot.kind()), pinned); LiftoffRegister reg = GetUnusedRegister(reg_class_for(slot.kind()), pinned);
return LoadToRegister(slot, reg);
}
LiftoffRegister LiftoffAssembler::LoadToRegister(VarState slot,
LiftoffRegister reg) {
if (slot.is_const()) { if (slot.is_const()) {
LoadConstant(reg, slot.constant()); LoadConstant(reg, slot.constant());
} else { } else {
......
...@@ -450,6 +450,8 @@ class LiftoffAssembler : public TurboAssembler { ...@@ -450,6 +450,8 @@ class LiftoffAssembler : public TurboAssembler {
LiftoffRegister LoadToRegister(VarState slot, LiftoffRegList pinned); LiftoffRegister LoadToRegister(VarState slot, LiftoffRegList pinned);
LiftoffRegister LoadToRegister(VarState slot, LiftoffRegister dst);
LiftoffRegister PopToRegister(LiftoffRegList pinned = {}) { LiftoffRegister PopToRegister(LiftoffRegList pinned = {}) {
DCHECK(!cache_state_.stack_state.empty()); DCHECK(!cache_state_.stack_state.empty());
VarState slot = cache_state_.stack_state.back(); VarState slot = cache_state_.stack_state.back();
...@@ -461,6 +463,25 @@ class LiftoffAssembler : public TurboAssembler { ...@@ -461,6 +463,25 @@ class LiftoffAssembler : public TurboAssembler {
return LoadToRegister(slot, pinned); return LoadToRegister(slot, pinned);
} }
void PopToFixedRegister(LiftoffRegister reg) {
DCHECK(!cache_state_.stack_state.empty());
VarState slot = cache_state_.stack_state.back();
cache_state_.stack_state.pop_back();
if (slot.is_reg()) {
cache_state_.dec_used(slot.reg());
if (slot.reg() == reg) return;
if (cache_state_.is_used(reg)) {
SpillOneRegister(reg);
}
Move(reg, slot.reg(), slot.kind());
return;
}
if (cache_state_.is_used(reg)) {
SpillOneRegister(reg);
}
LoadToRegister(slot, reg);
}
// Use this to pop a value into a register that has no other uses, so it // Use this to pop a value into a register that has no other uses, so it
// can be modified. // can be modified.
LiftoffRegister PopToModifiableRegister(LiftoffRegList pinned = {}) { LiftoffRegister PopToModifiableRegister(LiftoffRegList pinned = {}) {
...@@ -1054,6 +1075,10 @@ class LiftoffAssembler : public TurboAssembler { ...@@ -1054,6 +1075,10 @@ class LiftoffAssembler : public TurboAssembler {
inline void emit_i8x16_relaxed_swizzle(LiftoffRegister dst, inline void emit_i8x16_relaxed_swizzle(LiftoffRegister dst,
LiftoffRegister lhs, LiftoffRegister lhs,
LiftoffRegister rhs); LiftoffRegister rhs);
inline void emit_s128_relaxed_laneselect(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2,
LiftoffRegister mask);
inline void emit_i8x16_popcnt(LiftoffRegister dst, LiftoffRegister src); inline void emit_i8x16_popcnt(LiftoffRegister dst, LiftoffRegister src);
inline void emit_i8x16_splat(LiftoffRegister dst, LiftoffRegister src); inline void emit_i8x16_splat(LiftoffRegister dst, LiftoffRegister src);
inline void emit_i16x8_splat(LiftoffRegister dst, LiftoffRegister src); inline void emit_i16x8_splat(LiftoffRegister dst, LiftoffRegister src);
......
...@@ -3434,18 +3434,8 @@ class LiftoffCompiler { ...@@ -3434,18 +3434,8 @@ class LiftoffCompiler {
template <ValueKind src_kind, ValueKind result_kind, template <ValueKind src_kind, ValueKind result_kind,
ValueKind result_lane_kind = kVoid, typename EmitFn> ValueKind result_lane_kind = kVoid, typename EmitFn>
void EmitTerOp(EmitFn fn) { void EmitTerOp(EmitFn fn, LiftoffRegister dst, LiftoffRegister src1,
static constexpr RegClass src_rc = reg_class_for(src_kind); LiftoffRegister src2, LiftoffRegister src3) {
static constexpr RegClass result_rc = reg_class_for(result_kind);
LiftoffRegister src3 = __ PopToRegister();
LiftoffRegister src2 = __ PopToRegister(LiftoffRegList{src3});
LiftoffRegister src1 = __ PopToRegister(LiftoffRegList{src3, src2});
// Reusing src1 and src2 will complicate codegen for select for some
// backend, so we allow only reusing src3 (the mask), and pin src1 and src2.
LiftoffRegister dst = src_rc == result_rc
? __ GetUnusedRegister(result_rc, {src3},
LiftoffRegList{src1, src2})
: __ GetUnusedRegister(result_rc, {});
CallEmitFn(fn, dst, src1, src2, src3); CallEmitFn(fn, dst, src1, src2, src3);
if (V8_UNLIKELY(nondeterminism_)) { if (V8_UNLIKELY(nondeterminism_)) {
LiftoffRegList pinned = {dst}; LiftoffRegList pinned = {dst};
...@@ -3460,6 +3450,45 @@ class LiftoffCompiler { ...@@ -3460,6 +3450,45 @@ class LiftoffCompiler {
__ PushRegister(result_kind, dst); __ PushRegister(result_kind, dst);
} }
template <ValueKind src_kind, ValueKind result_kind,
ValueKind result_lane_kind = kVoid, typename EmitFn>
void EmitTerOp(EmitFn fn) {
LiftoffRegister src3 = __ PopToRegister();
LiftoffRegister src2 = __ PopToRegister(LiftoffRegList{src3});
LiftoffRegister src1 = __ PopToRegister(LiftoffRegList{src3, src2});
static constexpr RegClass src_rc = reg_class_for(src_kind);
static constexpr RegClass result_rc = reg_class_for(result_kind);
// Reusing src1 and src2 will complicate codegen for select for some
// backend, so we allow only reusing src3 (the mask), and pin src1 and src2.
LiftoffRegister dst = src_rc == result_rc
? __ GetUnusedRegister(result_rc, {src3},
LiftoffRegList{src1, src2})
: __ GetUnusedRegister(result_rc, {});
EmitTerOp<src_kind, result_kind, result_lane_kind, EmitFn>(fn, dst, src1,
src2, src3);
}
void EmitRelaxedLaneSelect() {
#if defined(V8_TARGET_ARCH_IA32) || defined(V8_TARGET_ARCH_X64)
if (!CpuFeatures::IsSupported(AVX)) {
LiftoffRegister mask(xmm0);
__ PopToFixedRegister(mask);
LiftoffRegister src2 = __ PopToModifiableRegister(LiftoffRegList{mask});
LiftoffRegister src1 = __ PopToRegister(LiftoffRegList{src2, mask});
EmitTerOp<kS128, kS128>(&LiftoffAssembler::emit_s128_relaxed_laneselect,
src2, src1, src2, mask);
return;
}
#endif
LiftoffRegList pinned;
LiftoffRegister mask = pinned.set(__ PopToRegister(pinned));
LiftoffRegister src2 = pinned.set(__ PopToRegister(pinned));
LiftoffRegister src1 = pinned.set(__ PopToRegister(pinned));
LiftoffRegister dst = __ GetUnusedRegister(RegClass::kFpReg, {}, pinned);
EmitTerOp<kS128, kS128>(&LiftoffAssembler::emit_s128_relaxed_laneselect,
dst, src1, src2, mask);
}
template <typename EmitFn, typename EmitFnImm> template <typename EmitFn, typename EmitFnImm>
void EmitSimdShiftOp(EmitFn fn, EmitFnImm fnImm) { void EmitSimdShiftOp(EmitFn fn, EmitFnImm fnImm) {
static constexpr RegClass result_rc = reg_class_for(kS128); static constexpr RegClass result_rc = reg_class_for(kS128);
...@@ -4033,6 +4062,11 @@ class LiftoffCompiler { ...@@ -4033,6 +4062,11 @@ class LiftoffCompiler {
return EmitSimdFmaOp(&LiftoffAssembler::emit_f64x2_qfma); return EmitSimdFmaOp(&LiftoffAssembler::emit_f64x2_qfma);
case wasm::kExprF64x2Qfms: case wasm::kExprF64x2Qfms:
return EmitSimdFmaOp(&LiftoffAssembler::emit_f64x2_qfms); return EmitSimdFmaOp(&LiftoffAssembler::emit_f64x2_qfms);
case wasm::kExprI16x8RelaxedLaneSelect:
case wasm::kExprI8x16RelaxedLaneSelect:
case wasm::kExprI32x4RelaxedLaneSelect:
case wasm::kExprI64x2RelaxedLaneSelect:
return EmitRelaxedLaneSelect();
default: default:
unsupported(decoder, kSimd, "simd"); unsupported(decoder, kSimd, "simd");
} }
......
...@@ -1840,6 +1840,13 @@ void LiftoffAssembler::emit_i8x16_relaxed_swizzle(LiftoffRegister dst, ...@@ -1840,6 +1840,13 @@ void LiftoffAssembler::emit_i8x16_relaxed_swizzle(LiftoffRegister dst,
bailout(kSimd, "emit_i8x16_relaxed_swizzle"); bailout(kSimd, "emit_i8x16_relaxed_swizzle");
} }
void LiftoffAssembler::emit_s128_relaxed_laneselect(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2,
LiftoffRegister mask) {
bailout(kSimd, "emit_s128_relaxed_laneselect");
}
void LiftoffAssembler::emit_i8x16_splat(LiftoffRegister dst, void LiftoffAssembler::emit_i8x16_splat(LiftoffRegister dst,
LiftoffRegister src) { LiftoffRegister src) {
bailout(kSimd, "emit_i8x16_splat"); bailout(kSimd, "emit_i8x16_splat");
......
...@@ -2155,6 +2155,13 @@ void LiftoffAssembler::emit_i8x16_relaxed_swizzle(LiftoffRegister dst, ...@@ -2155,6 +2155,13 @@ void LiftoffAssembler::emit_i8x16_relaxed_swizzle(LiftoffRegister dst,
bailout(kSimd, "emit_i8x16_relaxed_swizzle"); bailout(kSimd, "emit_i8x16_relaxed_swizzle");
} }
void LiftoffAssembler::emit_s128_relaxed_laneselect(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2,
LiftoffRegister mask) {
bailout(kSimd, "emit_s128_relaxed_laneselect");
}
void LiftoffAssembler::emit_i8x16_splat(LiftoffRegister dst, void LiftoffAssembler::emit_i8x16_splat(LiftoffRegister dst,
LiftoffRegister src) { LiftoffRegister src) {
fill_b(dst.fp().toW(), src.gp()); fill_b(dst.fp().toW(), src.gp());
......
...@@ -1775,6 +1775,13 @@ void LiftoffAssembler::emit_i8x16_relaxed_swizzle(LiftoffRegister dst, ...@@ -1775,6 +1775,13 @@ void LiftoffAssembler::emit_i8x16_relaxed_swizzle(LiftoffRegister dst,
bailout(kSimd, "emit_i8x16_relaxed_swizzle"); bailout(kSimd, "emit_i8x16_relaxed_swizzle");
} }
void LiftoffAssembler::emit_s128_relaxed_laneselect(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2,
LiftoffRegister mask) {
bailout(kSimd, "emit_s128_relaxed_laneselect");
}
void LiftoffAssembler::emit_f64x2_splat(LiftoffRegister dst, void LiftoffAssembler::emit_f64x2_splat(LiftoffRegister dst,
LiftoffRegister src) { LiftoffRegister src) {
bailout(kUnsupportedArchitecture, "emit_f64x2splat"); bailout(kUnsupportedArchitecture, "emit_f64x2splat");
......
...@@ -1943,6 +1943,13 @@ void LiftoffAssembler::emit_i8x16_relaxed_swizzle(LiftoffRegister dst, ...@@ -1943,6 +1943,13 @@ void LiftoffAssembler::emit_i8x16_relaxed_swizzle(LiftoffRegister dst,
bailout(kSimd, "emit_i8x16_relaxed_swizzle"); bailout(kSimd, "emit_i8x16_relaxed_swizzle");
} }
void LiftoffAssembler::emit_s128_relaxed_laneselect(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2,
LiftoffRegister mask) {
bailout(kSimd, "emit_s128_relaxed_laneselect");
}
void LiftoffAssembler::emit_i8x16_splat(LiftoffRegister dst, void LiftoffAssembler::emit_i8x16_splat(LiftoffRegister dst,
LiftoffRegister src) { LiftoffRegister src) {
VU.set(kScratchReg, E8, m1); VU.set(kScratchReg, E8, m1);
......
...@@ -2731,6 +2731,13 @@ void LiftoffAssembler::emit_i8x16_relaxed_swizzle(LiftoffRegister dst, ...@@ -2731,6 +2731,13 @@ void LiftoffAssembler::emit_i8x16_relaxed_swizzle(LiftoffRegister dst,
bailout(kSimd, "emit_i8x16_relaxed_swizzle"); bailout(kSimd, "emit_i8x16_relaxed_swizzle");
} }
void LiftoffAssembler::emit_s128_relaxed_laneselect(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2,
LiftoffRegister mask) {
bailout(kSimd, "emit_s128_relaxed_laneselect");
}
void LiftoffAssembler::emit_f64x2_convert_low_i32x4_s(LiftoffRegister dst, void LiftoffAssembler::emit_f64x2_convert_low_i32x4_s(LiftoffRegister dst,
LiftoffRegister src) { LiftoffRegister src) {
F64x2ConvertLowI32x4S(dst.fp(), src.fp()); F64x2ConvertLowI32x4S(dst.fp(), src.fp());
......
...@@ -2518,6 +2518,13 @@ void LiftoffAssembler::emit_i8x16_relaxed_swizzle(LiftoffRegister dst, ...@@ -2518,6 +2518,13 @@ void LiftoffAssembler::emit_i8x16_relaxed_swizzle(LiftoffRegister dst,
kScratchRegister, true); kScratchRegister, true);
} }
void LiftoffAssembler::emit_s128_relaxed_laneselect(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2,
LiftoffRegister mask) {
Pblendvb(dst.fp(), src2.fp(), src1.fp(), mask.fp());
}
void LiftoffAssembler::emit_i8x16_popcnt(LiftoffRegister dst, void LiftoffAssembler::emit_i8x16_popcnt(LiftoffRegister dst,
LiftoffRegister src) { LiftoffRegister src) {
I8x16Popcnt(dst.fp(), src.fp(), kScratchDoubleReg, I8x16Popcnt(dst.fp(), src.fp(), kScratchDoubleReg,
......
...@@ -140,10 +140,6 @@ ...@@ -140,10 +140,6 @@
'test-intl/StringLocaleCompareFastPath': [['mode != release', SKIP], SLOW, NO_VARIANTS], 'test-intl/StringLocaleCompareFastPath': [['mode != release', SKIP], SLOW, NO_VARIANTS],
# TODO(12284): Implement relaxed SIMD in Liftoff. # TODO(12284): Implement relaxed SIMD in Liftoff.
'test-run-wasm-relaxed-simd/RunWasm_I8x16RelaxedLaneSelect_liftoff': [SKIP],
'test-run-wasm-relaxed-simd/RunWasm_I16x8RelaxedLaneSelect_liftoff': [SKIP],
'test-run-wasm-relaxed-simd/RunWasm_I32x4RelaxedLaneSelect_liftoff': [SKIP],
'test-run-wasm-relaxed-simd/RunWasm_I64x2RelaxedLaneSelect_liftoff': [SKIP],
'test-run-wasm-relaxed-simd/RunWasm_F32x4RelaxedMin_liftoff': [SKIP], 'test-run-wasm-relaxed-simd/RunWasm_F32x4RelaxedMin_liftoff': [SKIP],
'test-run-wasm-relaxed-simd/RunWasm_F32x4RelaxedMax_liftoff': [SKIP], 'test-run-wasm-relaxed-simd/RunWasm_F32x4RelaxedMax_liftoff': [SKIP],
'test-run-wasm-relaxed-simd/RunWasm_F64x2RelaxedMin_liftoff': [SKIP], 'test-run-wasm-relaxed-simd/RunWasm_F64x2RelaxedMin_liftoff': [SKIP],
...@@ -1206,6 +1202,10 @@ ...@@ -1206,6 +1202,10 @@
['arch != ia32 and arch != x64', { ['arch != ia32 and arch != x64', {
# TODO(12284): Implement relaxed SIMD in Liftoff on missing architectures. # TODO(12284): Implement relaxed SIMD in Liftoff on missing architectures.
'test-run-wasm-relaxed-simd/RunWasm_I8x16RelaxedLaneSelect_liftoff': [SKIP],
'test-run-wasm-relaxed-simd/RunWasm_I16x8RelaxedLaneSelect_liftoff': [SKIP],
'test-run-wasm-relaxed-simd/RunWasm_I32x4RelaxedLaneSelect_liftoff': [SKIP],
'test-run-wasm-relaxed-simd/RunWasm_I64x2RelaxedLaneSelect_liftoff': [SKIP],
'test-run-wasm-relaxed-simd/RunWasm_F32x4Qfma_liftoff': [SKIP], 'test-run-wasm-relaxed-simd/RunWasm_F32x4Qfma_liftoff': [SKIP],
'test-run-wasm-relaxed-simd/RunWasm_F32x4Qfms_liftoff': [SKIP], 'test-run-wasm-relaxed-simd/RunWasm_F32x4Qfms_liftoff': [SKIP],
'test-run-wasm-relaxed-simd/RunWasm_F64x2Qfma_liftoff': [SKIP], 'test-run-wasm-relaxed-simd/RunWasm_F64x2Qfma_liftoff': [SKIP],
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment