Commit 66964c51 authored by Ng Zhi An's avatar Ng Zhi An Committed by Commit Bot

[wasm-simd][x64][liftoff] Implement double precision conversions

Extract codegen into macro-assembler functions for reuse in Liftoff.

Some minor tweaks in I32x4TruncSatF64x2SZero and I32x4TruncSatF64x2UZero
to check dst and src overlap and move to scratch/dst accordingly. In
TurboFan we can set these restrictions in the instruction-selector, but
not in Liftoff. This doesn't make TurboFan codegen any worse, since
those restrictions are still in place.

Bug: v8:11265
Change-Id: Ib6b3ebeb5fed99eddd0700fb4aba91d4168c3213
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2683206
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Reviewed-by: 's avatarClemens Backes <clemensb@chromium.org>
Reviewed-by: 's avatarDeepti Gandluri <gdeepti@chromium.org>
Cr-Commit-Position: refs/heads/master@{#72638}
parent a16add80
......@@ -2357,6 +2357,90 @@ void TurboAssembler::I8x16Popcnt(XMMRegister dst, XMMRegister src,
}
}
void TurboAssembler::F64x2ConvertLowI32x4U(XMMRegister dst, XMMRegister src) {
// dst = [ src_low, 0x43300000, src_high, 0x4330000 ];
// 0x43300000'00000000 is a special double where the significand bits
// precisely represents all uint32 numbers.
Unpcklps(dst, src,
ExternalReferenceAsOperand(
ExternalReference::
address_of_wasm_f64x2_convert_low_i32x4_u_int_mask()));
Subpd(dst, ExternalReferenceAsOperand(
ExternalReference::address_of_wasm_double_2_power_52()));
}
void TurboAssembler::I32x4TruncSatF64x2SZero(XMMRegister dst, XMMRegister src) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(this, AVX);
XMMRegister original_dst = dst;
// Make sure we don't overwrite src.
if (dst == src) {
DCHECK_NE(src, kScratchDoubleReg);
dst = kScratchDoubleReg;
}
// dst = 0 if src == NaN, else all ones.
vcmpeqpd(dst, src, src);
// dst = 0 if src == NaN, else INT32_MAX as double.
vandpd(dst, dst,
ExternalReferenceAsOperand(
ExternalReference::address_of_wasm_int32_max_as_double()));
// dst = 0 if src == NaN, src is saturated to INT32_MAX as double.
vminpd(dst, src, dst);
// Values > INT32_MAX already saturated, values < INT32_MIN raises an
// exception, which is masked and returns 0x80000000.
vcvttpd2dq(dst, dst);
if (original_dst != dst) {
Move(original_dst, dst);
}
} else {
if (dst != src) {
Move(dst, src);
}
Move(kScratchDoubleReg, dst);
cmpeqpd(kScratchDoubleReg, dst);
andps(kScratchDoubleReg,
ExternalReferenceAsOperand(
ExternalReference::address_of_wasm_int32_max_as_double()));
minpd(dst, kScratchDoubleReg);
cvttpd2dq(dst, dst);
}
}
void TurboAssembler::I32x4TruncSatF64x2UZero(XMMRegister dst, XMMRegister src) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(this, AVX);
vxorpd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
// Saturate to 0.
vmaxpd(dst, src, kScratchDoubleReg);
// Saturate to UINT32_MAX.
vminpd(dst, dst,
ExternalReferenceAsOperand(
ExternalReference::address_of_wasm_uint32_max_as_double()));
// Truncate.
vroundpd(dst, dst, kRoundToZero);
// Add to special double where significant bits == uint32.
vaddpd(dst, dst,
ExternalReferenceAsOperand(
ExternalReference::address_of_wasm_double_2_power_52()));
// Extract low 32 bits of each double's significand, zero top lanes.
// dst = [dst[0], dst[2], 0, 0]
vshufps(dst, dst, kScratchDoubleReg, 0x88);
} else {
CpuFeatureScope scope(this, SSE4_1);
if (dst != src) {
Move(dst, src);
}
xorps(kScratchDoubleReg, kScratchDoubleReg);
maxpd(dst, kScratchDoubleReg);
minpd(dst, ExternalReferenceAsOperand(
ExternalReference::address_of_wasm_uint32_max_as_double()));
roundpd(dst, dst, kRoundToZero);
addpd(dst, ExternalReferenceAsOperand(
ExternalReference::address_of_wasm_double_2_power_52()));
shufps(dst, kScratchDoubleReg, 0x88);
}
}
void TurboAssembler::Abspd(XMMRegister dst) {
Andps(dst, ExternalReferenceAsOperand(
ExternalReference::address_of_double_abs_constant()));
......
......@@ -610,6 +610,10 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
void I8x16Popcnt(XMMRegister dst, XMMRegister src, XMMRegister tmp);
void F64x2ConvertLowI32x4U(XMMRegister dst, XMMRegister src);
void I32x4TruncSatF64x2SZero(XMMRegister dst, XMMRegister src);
void I32x4TruncSatF64x2UZero(XMMRegister dst, XMMRegister src);
void Abspd(XMMRegister dst);
void Negpd(XMMRegister dst);
......
......@@ -2478,19 +2478,8 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
case kX64F64x2ConvertLowI32x4U: {
XMMRegister dst = i.OutputSimd128Register();
XMMRegister src = i.InputSimd128Register(0);
// dst = [ src_low, 0x43300000, src_high, 0x4330000 ];
// 0x43300000'00000000 is a special double where the significand bits
// precisely represents all uint32 numbers.
__ Unpcklps(
dst, src,
__ ExternalReferenceAsOperand(
ExternalReference::
address_of_wasm_f64x2_convert_low_i32x4_u_int_mask()));
__ Subpd(dst,
__ ExternalReferenceAsOperand(
ExternalReference::address_of_wasm_double_2_power_52()));
__ F64x2ConvertLowI32x4U(i.OutputSimd128Register(),
i.InputSimd128Register(0));
break;
}
case kX64F64x2PromoteLowF32x4: {
......@@ -2502,76 +2491,13 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
case kX64I32x4TruncSatF64x2SZero: {
XMMRegister dst = i.OutputSimd128Register();
XMMRegister src = i.InputSimd128Register(0);
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(tasm(), AVX);
DCHECK_NE(dst, src);
// dst = 0 if src == NaN, else all ones.
__ vcmpeqpd(dst, src, src);
// dst = 0 if src == NaN, else INT32_MAX as double.
__ vandpd(
dst, dst,
__ ExternalReferenceAsOperand(
ExternalReference::address_of_wasm_int32_max_as_double()));
// dst = 0 if src == NaN, src is saturated to INT32_MAX as double.
__ vminpd(dst, src, dst);
// Values > INT32_MAX already saturated, values < INT32_MIN raises an
// exception, which is masked and returns 0x80000000.
__ vcvttpd2dq(dst, dst);
} else {
DCHECK_EQ(dst, src);
__ Move(kScratchDoubleReg, src);
__ cmpeqpd(kScratchDoubleReg, src);
__ andps(kScratchDoubleReg,
__ ExternalReferenceAsOperand(
ExternalReference::address_of_wasm_int32_max_as_double()));
__ minpd(dst, kScratchDoubleReg);
__ cvttpd2dq(dst, dst);
}
__ I32x4TruncSatF64x2SZero(i.OutputSimd128Register(),
i.InputSimd128Register(0));
break;
}
case kX64I32x4TruncSatF64x2UZero: {
XMMRegister dst = i.OutputSimd128Register();
XMMRegister src = i.InputSimd128Register(0);
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(tasm(), AVX);
__ vxorpd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
// Saturate to 0.
__ vmaxpd(dst, src, kScratchDoubleReg);
// Saturate to UINT32_MAX.
__ vminpd(
dst, dst,
__ ExternalReferenceAsOperand(
ExternalReference::address_of_wasm_uint32_max_as_double()));
// Truncate.
__ vroundpd(dst, dst, kRoundToZero);
// Add to special double where significant bits == uint32.
__ vaddpd(dst, dst,
__ ExternalReferenceAsOperand(
ExternalReference::address_of_wasm_double_2_power_52()));
// Extract low 32 bits of each double's significand, zero top lanes.
// dst = [dst[0], dst[2], 0, 0]
__ vshufps(dst, dst, kScratchDoubleReg, 0x88);
break;
} else {
CpuFeatureScope scope(tasm(), SSE4_1);
DCHECK_EQ(dst, src);
__ xorps(kScratchDoubleReg, kScratchDoubleReg);
__ maxpd(dst, kScratchDoubleReg);
__ minpd(
dst,
__ ExternalReferenceAsOperand(
ExternalReference::address_of_wasm_uint32_max_as_double()));
__ roundpd(dst, dst, kRoundToZero);
__ addpd(dst,
__ ExternalReferenceAsOperand(
ExternalReference::address_of_wasm_double_2_power_52()));
__ shufps(dst, kScratchDoubleReg, 0x88);
break;
}
__ I32x4TruncSatF64x2UZero(i.OutputSimd128Register(),
i.InputSimd128Register(0));
break;
}
case kX64F32x4Splat: {
......
......@@ -2583,6 +2583,21 @@ void LiftoffAssembler::emit_f64x2_pmax(LiftoffRegister dst, LiftoffRegister lhs,
vmov(dest.high(), right.high(), gt);
}
void LiftoffAssembler::emit_f64x2_convert_low_i32x4_s(LiftoffRegister dst,
LiftoffRegister src) {
bailout(kSimd, "f64x2.convert_low_i32x4_s");
}
void LiftoffAssembler::emit_f64x2_convert_low_i32x4_u(LiftoffRegister dst,
LiftoffRegister src) {
bailout(kSimd, "f64x2.convert_low_i32x4_u");
}
void LiftoffAssembler::emit_f64x2_promote_low_f32x4(LiftoffRegister dst,
LiftoffRegister src) {
bailout(kSimd, "f64x2.promote_low_f32x4");
}
void LiftoffAssembler::emit_f32x4_splat(LiftoffRegister dst,
LiftoffRegister src) {
vdup(Neon32, liftoff::GetSimd128Register(dst), src.fp(), 0);
......@@ -3818,6 +3833,11 @@ void LiftoffAssembler::emit_f32x4_uconvert_i32x4(LiftoffRegister dst,
liftoff::GetSimd128Register(src));
}
void LiftoffAssembler::emit_f32x4_demote_f64x2_zero(LiftoffRegister dst,
LiftoffRegister src) {
bailout(kSimd, "f32x4.demote_f64x2_zero");
}
void LiftoffAssembler::emit_i8x16_sconvert_i16x8(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
......@@ -3882,6 +3902,16 @@ void LiftoffAssembler::emit_i32x4_uconvert_i16x8_high(LiftoffRegister dst,
vmovl(NeonU16, liftoff::GetSimd128Register(dst), src.high_fp());
}
void LiftoffAssembler::emit_i32x4_trunc_sat_f64x2_s_zero(LiftoffRegister dst,
LiftoffRegister src) {
bailout(kSimd, "i32x4.trunc_sat_f64x2_s_zero");
}
void LiftoffAssembler::emit_i32x4_trunc_sat_f64x2_u_zero(LiftoffRegister dst,
LiftoffRegister src) {
bailout(kSimd, "i32x4.trunc_sat_f64x2_u_zero");
}
void LiftoffAssembler::emit_s128_and_not(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
......
......@@ -1826,6 +1826,21 @@ void LiftoffAssembler::emit_f64x2_pmax(LiftoffRegister dst, LiftoffRegister lhs,
}
}
void LiftoffAssembler::emit_f64x2_convert_low_i32x4_s(LiftoffRegister dst,
LiftoffRegister src) {
bailout(kSimd, "f64x2.convert_low_i32x4_s");
}
void LiftoffAssembler::emit_f64x2_convert_low_i32x4_u(LiftoffRegister dst,
LiftoffRegister src) {
bailout(kSimd, "f64x2.convert_low_i32x4_u");
}
void LiftoffAssembler::emit_f64x2_promote_low_f32x4(LiftoffRegister dst,
LiftoffRegister src) {
bailout(kSimd, "f64x2.promote_low_f32x4");
}
void LiftoffAssembler::emit_f32x4_splat(LiftoffRegister dst,
LiftoffRegister src) {
Dup(dst.fp().V4S(), src.fp().S(), 0);
......@@ -2815,6 +2830,11 @@ void LiftoffAssembler::emit_f32x4_uconvert_i32x4(LiftoffRegister dst,
Ucvtf(dst.fp().V4S(), src.fp().V4S());
}
void LiftoffAssembler::emit_f32x4_demote_f64x2_zero(LiftoffRegister dst,
LiftoffRegister src) {
bailout(kSimd, "f32x4.demote_f64x2_zero");
}
void LiftoffAssembler::emit_i8x16_sconvert_i16x8(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
......@@ -2911,6 +2931,16 @@ void LiftoffAssembler::emit_i32x4_uconvert_i16x8_high(LiftoffRegister dst,
Uxtl2(dst.fp().V4S(), src.fp().V8H());
}
void LiftoffAssembler::emit_i32x4_trunc_sat_f64x2_s_zero(LiftoffRegister dst,
LiftoffRegister src) {
bailout(kSimd, "i32x4.trunc_sat_f64x2_s_zero");
}
void LiftoffAssembler::emit_i32x4_trunc_sat_f64x2_u_zero(LiftoffRegister dst,
LiftoffRegister src) {
bailout(kSimd, "i32x4.trunc_sat_f64x2_u_zero");
}
void LiftoffAssembler::emit_s128_and_not(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
......
......@@ -4294,6 +4294,21 @@ void LiftoffAssembler::emit_f64x2_pmax(LiftoffRegister dst, LiftoffRegister lhs,
this, dst, rhs, lhs);
}
void LiftoffAssembler::emit_f64x2_convert_low_i32x4_s(LiftoffRegister dst,
LiftoffRegister src) {
bailout(kSimd, "f64x2.convert_low_i32x4_s");
}
void LiftoffAssembler::emit_f64x2_convert_low_i32x4_u(LiftoffRegister dst,
LiftoffRegister src) {
bailout(kSimd, "f64x2.convert_low_i32x4_u");
}
void LiftoffAssembler::emit_f64x2_promote_low_f32x4(LiftoffRegister dst,
LiftoffRegister src) {
bailout(kSimd, "f64x2.promote_low_f32x4");
}
void LiftoffAssembler::emit_i32x4_sconvert_f32x4(LiftoffRegister dst,
LiftoffRegister src) {
// NAN->0
......@@ -4385,6 +4400,11 @@ void LiftoffAssembler::emit_f32x4_uconvert_i32x4(LiftoffRegister dst,
liftoff::kScratchDoubleReg); // Add hi and lo, may round.
}
void LiftoffAssembler::emit_f32x4_demote_f64x2_zero(LiftoffRegister dst,
LiftoffRegister src) {
bailout(kSimd, "f32x4.demote_f64x2_zero");
}
void LiftoffAssembler::emit_i8x16_sconvert_i16x8(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
......@@ -4461,6 +4481,16 @@ void LiftoffAssembler::emit_i32x4_uconvert_i16x8_high(LiftoffRegister dst,
Pmovzxwd(dst.fp(), dst.fp());
}
void LiftoffAssembler::emit_i32x4_trunc_sat_f64x2_s_zero(LiftoffRegister dst,
LiftoffRegister src) {
bailout(kSimd, "i32x4.trunc_sat_f64x2_s_zero");
}
void LiftoffAssembler::emit_i32x4_trunc_sat_f64x2_u_zero(LiftoffRegister dst,
LiftoffRegister src) {
bailout(kSimd, "i32x4.trunc_sat_f64x2_u_zero");
}
void LiftoffAssembler::emit_s128_and_not(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
......
......@@ -1177,6 +1177,18 @@ class LiftoffAssembler : public TurboAssembler {
LiftoffRegister rhs);
inline void emit_f64x2_pmax(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs);
inline void emit_f64x2_convert_low_i32x4_s(LiftoffRegister dst,
LiftoffRegister src);
inline void emit_f64x2_convert_low_i32x4_u(LiftoffRegister dst,
LiftoffRegister src);
inline void emit_i32x4_trunc_sat_f64x2_s_zero(LiftoffRegister dst,
LiftoffRegister src);
inline void emit_i32x4_trunc_sat_f64x2_u_zero(LiftoffRegister dst,
LiftoffRegister src);
inline void emit_f32x4_demote_f64x2_zero(LiftoffRegister dst,
LiftoffRegister src);
inline void emit_f64x2_promote_low_f32x4(LiftoffRegister dst,
LiftoffRegister src);
inline void emit_i32x4_sconvert_f32x4(LiftoffRegister dst,
LiftoffRegister src);
inline void emit_i32x4_uconvert_f32x4(LiftoffRegister dst,
......
......@@ -3831,6 +3831,21 @@ void LiftoffAssembler::emit_f64x2_pmax(LiftoffRegister dst, LiftoffRegister lhs,
this, dst, rhs, lhs);
}
void LiftoffAssembler::emit_f64x2_convert_low_i32x4_s(LiftoffRegister dst,
LiftoffRegister src) {
Cvtdq2pd(dst.fp(), src.fp());
}
void LiftoffAssembler::emit_f64x2_convert_low_i32x4_u(LiftoffRegister dst,
LiftoffRegister src) {
F64x2ConvertLowI32x4U(dst.fp(), src.fp());
}
void LiftoffAssembler::emit_f64x2_promote_low_f32x4(LiftoffRegister dst,
LiftoffRegister src) {
Cvtps2pd(dst.fp(), src.fp());
}
void LiftoffAssembler::emit_i32x4_sconvert_f32x4(LiftoffRegister dst,
LiftoffRegister src) {
// NAN->0
......@@ -3914,6 +3929,11 @@ void LiftoffAssembler::emit_f32x4_uconvert_i32x4(LiftoffRegister dst,
Addps(dst.fp(), kScratchDoubleReg); // Add hi and lo, may round.
}
void LiftoffAssembler::emit_f32x4_demote_f64x2_zero(LiftoffRegister dst,
LiftoffRegister src) {
Cvtpd2ps(dst.fp(), src.fp());
}
void LiftoffAssembler::emit_i8x16_sconvert_i16x8(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
......@@ -3986,6 +4006,16 @@ void LiftoffAssembler::emit_i32x4_uconvert_i16x8_high(LiftoffRegister dst,
I32x4UConvertI16x8High(dst.fp(), src.fp());
}
void LiftoffAssembler::emit_i32x4_trunc_sat_f64x2_s_zero(LiftoffRegister dst,
LiftoffRegister src) {
I32x4TruncSatF64x2SZero(dst.fp(), src.fp());
}
void LiftoffAssembler::emit_i32x4_trunc_sat_f64x2_u_zero(LiftoffRegister dst,
LiftoffRegister src) {
I32x4TruncSatF64x2UZero(dst.fp(), src.fp());
}
void LiftoffAssembler::emit_s128_and_not(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment