Commit d0aa5c03 authored by Ng Zhi An's avatar Ng Zhi An Committed by V8 LUCI CQ

[wasm-simd] Share I32x4SConvertF32x4 implementation

Move I32x4SConvertF32x4 into shared implementation, and takes care of
both AVX and no-AVX implementation. Instruction selector still requires
dst == src to save a move in codegen.

Bug: v8:11589
Change-Id: Ie982682b3002192ab27700bf73f8c1e66aeba492
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3086732
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Reviewed-by: 's avatarClemens Backes <clemensb@chromium.org>
Cr-Commit-Position: refs/heads/master@{#76243}
parent 339dde1c
......@@ -378,6 +378,31 @@ void SharedTurboAssembler::I32x4ExtMul(XMMRegister dst, XMMRegister src1,
}
}
void SharedTurboAssembler::I32x4SConvertF32x4(XMMRegister dst, XMMRegister src,
XMMRegister scratch) {
// Convert NAN to 0.
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
vcmpeqps(scratch, src, src);
vpand(dst, src, scratch);
} else {
movaps(scratch, src);
cmpeqps(scratch, src);
if (dst != src) movaps(dst, src);
andps(dst, scratch);
}
// Set top bit if >= 0 (but not -0.0!).
Pxor(scratch, dst);
// Convert to packed single-precision.
Cvttps2dq(dst, dst);
// Set top bit if >=0 is now < 0.
Pand(scratch, dst);
Psrad(scratch, scratch, byte{31});
// Set positive overflow lanes to 0x7FFFFFFF.
Pxor(dst, scratch);
}
void SharedTurboAssembler::I32x4SConvertI16x8High(XMMRegister dst,
XMMRegister src) {
if (CpuFeatures::IsSupported(AVX)) {
......
......@@ -310,6 +310,9 @@ class V8_EXPORT_PRIVATE SharedTurboAssembler : public TurboAssemblerBase {
// Requires that dst == src1 if AVX is not supported.
void I32x4ExtMul(XMMRegister dst, XMMRegister src1, XMMRegister src2,
XMMRegister scratch, bool low, bool is_signed);
// Requires dst == src if AVX is not supported.
void I32x4SConvertF32x4(XMMRegister dst, XMMRegister src,
XMMRegister scratch);
void I32x4SConvertI16x8High(XMMRegister dst, XMMRegister src);
void I32x4UConvertI16x8High(XMMRegister dst, XMMRegister src,
XMMRegister scratch);
......
......@@ -2445,20 +2445,8 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
case kIA32I32x4SConvertF32x4: {
XMMRegister dst = i.OutputSimd128Register();
XMMRegister src = i.InputSimd128Register(0);
// NAN->0
__ Cmpeqps(kScratchDoubleReg, src, src);
__ Pand(dst, src, kScratchDoubleReg);
// Set top bit if >= 0 (but not -0.0!)
__ Pxor(kScratchDoubleReg, dst);
// Convert
__ Cvttps2dq(dst, dst);
// Set top bit if >=0 is now < 0
__ Pand(kScratchDoubleReg, dst);
__ Psrad(kScratchDoubleReg, kScratchDoubleReg, byte{31});
// Set positive overflow lanes to 0x7FFFFFFF
__ Pxor(dst, kScratchDoubleReg);
__ I32x4SConvertF32x4(i.OutputSimd128Register(),
i.InputSimd128Register(0), kScratchDoubleReg);
break;
}
case kIA32I32x4SConvertI16x8Low: {
......
......@@ -3084,21 +3084,8 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
case kX64I32x4SConvertF32x4: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
XMMRegister dst = i.OutputSimd128Register();
// NAN->0
__ Movaps(kScratchDoubleReg, dst);
__ Cmpeqps(kScratchDoubleReg, kScratchDoubleReg);
__ Pand(dst, kScratchDoubleReg);
// Set top bit if >= 0 (but not -0.0!)
__ Pxor(kScratchDoubleReg, dst);
// Convert
__ Cvttps2dq(dst, dst);
// Set top bit if >=0 is now < 0
__ Pand(kScratchDoubleReg, dst);
__ Psrad(kScratchDoubleReg, byte{31});
// Set positive overflow lanes to 0x7FFFFFFF
__ Pxor(dst, kScratchDoubleReg);
__ I32x4SConvertF32x4(i.OutputSimd128Register(),
i.InputSimd128Register(0), kScratchDoubleReg);
break;
}
case kX64I32x4SConvertI16x8Low: {
......
......@@ -3321,7 +3321,8 @@ void InstructionSelector::VisitI64x2Mul(Node* node) {
void InstructionSelector::VisitI32x4SConvertF32x4(Node* node) {
X64OperandGenerator g(this);
Emit(kX64I32x4SConvertF32x4, g.DefineSameAsFirst(node),
Emit(kX64I32x4SConvertF32x4,
IsSupported(AVX) ? g.DefineAsRegister(node) : g.DefineSameAsFirst(node),
g.UseRegister(node->InputAt(0)));
}
......
......@@ -4300,26 +4300,7 @@ void LiftoffAssembler::emit_f64x2_promote_low_f32x4(LiftoffRegister dst,
void LiftoffAssembler::emit_i32x4_sconvert_f32x4(LiftoffRegister dst,
LiftoffRegister src) {
// NAN->0
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
vcmpeqps(liftoff::kScratchDoubleReg, src.fp(), src.fp());
vpand(dst.fp(), src.fp(), liftoff::kScratchDoubleReg);
} else {
movaps(liftoff::kScratchDoubleReg, src.fp());
cmpeqps(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg);
if (dst.fp() != src.fp()) movaps(dst.fp(), src.fp());
andps(dst.fp(), liftoff::kScratchDoubleReg);
}
// Set top bit if >= 0 (but not -0.0!).
Pxor(liftoff::kScratchDoubleReg, dst.fp());
// Convert to int.
Cvttps2dq(dst.fp(), dst.fp());
// Set top bit if >=0 is now < 0.
Pand(liftoff::kScratchDoubleReg, dst.fp());
Psrad(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg, byte{31});
// Set positive overflow lanes to 0x7FFFFFFF.
Pxor(dst.fp(), liftoff::kScratchDoubleReg);
I32x4SConvertF32x4(dst.fp(), src.fp(), liftoff::kScratchDoubleReg);
}
void LiftoffAssembler::emit_i32x4_uconvert_f32x4(LiftoffRegister dst,
......
......@@ -3852,26 +3852,7 @@ void LiftoffAssembler::emit_f64x2_promote_low_f32x4(LiftoffRegister dst,
void LiftoffAssembler::emit_i32x4_sconvert_f32x4(LiftoffRegister dst,
LiftoffRegister src) {
// NAN->0
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
vcmpeqps(kScratchDoubleReg, src.fp(), src.fp());
vpand(dst.fp(), src.fp(), kScratchDoubleReg);
} else {
movaps(kScratchDoubleReg, src.fp());
cmpeqps(kScratchDoubleReg, kScratchDoubleReg);
if (dst.fp() != src.fp()) movaps(dst.fp(), src.fp());
andps(dst.fp(), kScratchDoubleReg);
}
// Set top bit if >= 0 (but not -0.0!).
Pxor(kScratchDoubleReg, dst.fp());
// Convert to int.
Cvttps2dq(dst.fp(), dst.fp());
// Set top bit if >=0 is now < 0.
Pand(kScratchDoubleReg, dst.fp());
Psrad(kScratchDoubleReg, byte{31});
// Set positive overflow lanes to 0x7FFFFFFF.
Pxor(dst.fp(), kScratchDoubleReg);
I32x4SConvertF32x4(dst.fp(), src.fp(), kScratchDoubleReg);
}
void LiftoffAssembler::emit_i32x4_uconvert_f32x4(LiftoffRegister dst,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment