Commit 236aff1b authored by Zhi An Ng's avatar Zhi An Ng Committed by Commit Bot

[wasm-simd][liftoff][ia32] Fix operands on non-AVX

The main fix here is on non-AVX builds, the shifts require dst == src.
Calling macro-assembler functions like Psrlw assumes that we already
met that requirement (which we usually specify in TurboFan's
instruction-selector). On Liftoff, we need to do that check manually.
This is done by using the helper EmitSimdShiftOpImm which will do this
check, and dispatches to AVX if supported, or mov if dst != src.

Also fix a couple of places where we forgot to annotate the required SSE
extension for the instruction.

Bug: v8:11078
Change-Id: Icc31df9ab80c041f4bb3bd48444658368eaa2c91
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2505251Reviewed-by: 's avatarClemens Backes <clemensb@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#70889}
parent d4f7ea80
...@@ -2896,7 +2896,7 @@ void LiftoffAssembler::emit_i16x8_gt_u(LiftoffRegister dst, LiftoffRegister lhs, ...@@ -2896,7 +2896,7 @@ void LiftoffAssembler::emit_i16x8_gt_u(LiftoffRegister dst, LiftoffRegister lhs,
ref = liftoff::kScratchDoubleReg; ref = liftoff::kScratchDoubleReg;
} }
liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmaxuw, &Assembler::pmaxuw>( liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmaxuw, &Assembler::pmaxuw>(
this, dst, lhs, rhs); this, dst, lhs, rhs, SSE4_1);
Pcmpeqw(dst.fp(), ref); Pcmpeqw(dst.fp(), ref);
Pcmpeqw(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg); Pcmpeqw(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg);
Pxor(dst.fp(), liftoff::kScratchDoubleReg); Pxor(dst.fp(), liftoff::kScratchDoubleReg);
...@@ -2955,7 +2955,7 @@ void LiftoffAssembler::emit_i32x4_gt_u(LiftoffRegister dst, LiftoffRegister lhs, ...@@ -2955,7 +2955,7 @@ void LiftoffAssembler::emit_i32x4_gt_u(LiftoffRegister dst, LiftoffRegister lhs,
ref = liftoff::kScratchDoubleReg; ref = liftoff::kScratchDoubleReg;
} }
liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmaxud, &Assembler::pmaxud>( liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmaxud, &Assembler::pmaxud>(
this, dst, lhs, rhs); this, dst, lhs, rhs, SSE4_1);
Pcmpeqd(dst.fp(), ref); Pcmpeqd(dst.fp(), ref);
Pcmpeqd(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg); Pcmpeqd(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg);
Pxor(dst.fp(), liftoff::kScratchDoubleReg); Pxor(dst.fp(), liftoff::kScratchDoubleReg);
...@@ -3200,7 +3200,8 @@ void LiftoffAssembler::emit_i8x16_shri_u(LiftoffRegister dst, ...@@ -3200,7 +3200,8 @@ void LiftoffAssembler::emit_i8x16_shri_u(LiftoffRegister dst,
Register tmp = GetUnusedRegister(kGpReg, {}).gp(); Register tmp = GetUnusedRegister(kGpReg, {}).gp();
// Perform 16-bit shift, then mask away high bits. // Perform 16-bit shift, then mask away high bits.
uint8_t shift = rhs & 7; uint8_t shift = rhs & 7;
Psrlw(dst.fp(), lhs.fp(), byte{shift}); liftoff::EmitSimdShiftOpImm<&Assembler::vpsrlw, &Assembler::psrlw, 3>(
this, dst, lhs, rhs);
uint8_t bmask = 0xff >> shift; uint8_t bmask = 0xff >> shift;
uint32_t mask = bmask << 24 | bmask << 16 | bmask << 8 | bmask; uint32_t mask = bmask << 24 | bmask << 16 | bmask << 8 | bmask;
...@@ -3643,7 +3644,15 @@ void LiftoffAssembler::emit_i64x2_shr_s(LiftoffRegister dst, ...@@ -3643,7 +3644,15 @@ void LiftoffAssembler::emit_i64x2_shr_s(LiftoffRegister dst,
Psllq(tmp, tmp, 63); Psllq(tmp, tmp, 63);
Psrlq(tmp, tmp, shift); Psrlq(tmp, tmp, shift);
Psrlq(dst.fp(), lhs.fp(), shift); if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
vpsrlq(dst.fp(), lhs.fp(), shift);
} else {
if (dst != lhs) {
movaps(dst.fp(), lhs.fp());
}
psrlq(dst.fp(), shift);
}
Pxor(dst.fp(), tmp); Pxor(dst.fp(), tmp);
Psubq(dst.fp(), tmp); Psubq(dst.fp(), tmp);
} }
...@@ -3658,7 +3667,8 @@ void LiftoffAssembler::emit_i64x2_shri_s(LiftoffRegister dst, ...@@ -3658,7 +3667,8 @@ void LiftoffAssembler::emit_i64x2_shri_s(LiftoffRegister dst,
Psllq(tmp, tmp, 63); Psllq(tmp, tmp, 63);
Psrlq(tmp, tmp, shift); Psrlq(tmp, tmp, shift);
Psrlq(dst.fp(), lhs.fp(), shift); liftoff::EmitSimdShiftOpImm<&Assembler::vpsrlq, &Assembler::psrlq, 6>(
this, dst, lhs, rhs);
Pxor(dst.fp(), tmp); Pxor(dst.fp(), tmp);
Psubq(dst.fp(), tmp); Psubq(dst.fp(), tmp);
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment