Commit 5300b26d authored by Ng Zhi An's avatar Ng Zhi An Committed by Commit Bot

Reland "[wasm-simd][liftoff] Compile double precision conversions"

This is a reland of fe00fbd9

Original change's description:
> [wasm-simd][liftoff] Compile double precision conversions
>
> I missed actually handling these instructions in liftoff-compiler, so
> even though the assembler functions were implemented for all archs, we
> weren't running them.
>
> This properly handles the instructions and a couple of fixes:
>
> - for arm64, typos in using signed instructions for unsigned Wasm ops
> - for arm, handle the case where dst == src, which leads to us
> overwriting src and then reading junk from the overwritten portions to
> convert
>
> Bug: v8:11265
> Change-Id: I7919280bdf395137e95075deb30ed815100df222
> Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2728382
> Reviewed-by: Clemens Backes <clemensb@chromium.org>
> Reviewed-by: Deepti Gandluri <gdeepti@chromium.org>
> Commit-Queue: Zhi An Ng <zhin@chromium.org>
> Cr-Commit-Position: refs/heads/master@{#73178}

Bug: v8:11265
Change-Id: Ib854b526e74710f03e83d5007e3a3f501363ce86
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2733661Reviewed-by: 's avatarClemens Backes <clemensb@chromium.org>
Reviewed-by: 's avatarDeepti Gandluri <gdeepti@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#73206}
parent 645631f2
......@@ -2711,6 +2711,41 @@ void TurboAssembler::I64x2Abs(QwNeonRegister dst, QwNeonRegister src) {
vsub(Neon64, dst, dst, tmp);
}
namespace {
using AssemblerFunc = void (Assembler::*)(DwVfpRegister, SwVfpRegister,
VFPConversionMode, const Condition);
// Helper function for f64x2 convert low instructions.
// This ensures that we do not overwrite src, if dst == src.
void F64x2ConvertLowHelper(Assembler* assm, QwNeonRegister dst,
QwNeonRegister src, AssemblerFunc convert_fn) {
LowDwVfpRegister src_d = LowDwVfpRegister::from_code(src.low().code());
UseScratchRegisterScope temps(assm);
if (dst == src) {
LowDwVfpRegister tmp = temps.AcquireLowD();
assm->vmov(tmp, src_d);
src_d = tmp;
}
// Default arguments are not part of the function type
(assm->*convert_fn)(dst.low(), src_d.low(), kDefaultRoundToZero, al);
(assm->*convert_fn)(dst.high(), src_d.high(), kDefaultRoundToZero, al);
}
} // namespace
void TurboAssembler::F64x2ConvertLowI32x4S(QwNeonRegister dst,
QwNeonRegister src) {
F64x2ConvertLowHelper(this, dst, src, &Assembler::vcvt_f64_s32);
}
void TurboAssembler::F64x2ConvertLowI32x4U(QwNeonRegister dst,
QwNeonRegister src) {
F64x2ConvertLowHelper(this, dst, src, &Assembler::vcvt_f64_u32);
}
void TurboAssembler::F64x2PromoteLowF32x4(QwNeonRegister dst,
QwNeonRegister src) {
F64x2ConvertLowHelper(this, dst, src, &Assembler::vcvt_f64_f32);
}
} // namespace internal
} // namespace v8
......
......@@ -576,6 +576,9 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
void I64x2GeS(QwNeonRegister dst, QwNeonRegister src1, QwNeonRegister src2);
void V64x2AllTrue(Register dst, QwNeonRegister src);
void I64x2Abs(QwNeonRegister dst, QwNeonRegister src);
void F64x2ConvertLowI32x4S(QwNeonRegister dst, QwNeonRegister src);
void F64x2ConvertLowI32x4U(QwNeonRegister dst, QwNeonRegister src);
void F64x2PromoteLowF32x4(QwNeonRegister dst, QwNeonRegister src);
private:
// Compare single values and then load the fpscr flags to a register.
......
......@@ -963,6 +963,10 @@ void TurboAssembler::F64x2ConvertLowI32x4U(XMMRegister dst, XMMRegister src,
// dst = [ src_low, 0x43300000, src_high, 0x4330000 ];
// 0x43300000'00000000 is a special double where the significand bits
// precisely represents all uint32 numbers.
if (!CpuFeatures::IsSupported(AVX) && dst != src) {
movaps(dst, src);
src = dst;
}
Unpcklps(dst, src,
ExternalReferenceAsOperand(
ExternalReference::
......
......@@ -2407,6 +2407,10 @@ void TurboAssembler::F64x2ConvertLowI32x4U(XMMRegister dst, XMMRegister src) {
// dst = [ src_low, 0x43300000, src_high, 0x4330000 ];
// 0x43300000'00000000 is a special double where the significand bits
// precisely represents all uint32 numbers.
if (!CpuFeatures::IsSupported(AVX) && dst != src) {
movaps(dst, src);
src = dst;
}
Unpcklps(dst, src,
ExternalReferenceAsOperand(
ExternalReference::
......
......@@ -2067,24 +2067,18 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
case kArmF64x2ConvertLowI32x4S: {
Simd128Register dst = i.OutputSimd128Register();
Simd128Register src = i.InputSimd128Register(0);
__ vcvt_f64_s32(dst.low(), SwVfpRegister::from_code(src.code() * 4));
__ vcvt_f64_s32(dst.high(), SwVfpRegister::from_code(src.code() * 4 + 1));
__ F64x2ConvertLowI32x4S(i.OutputSimd128Register(),
i.InputSimd128Register(0));
break;
}
case kArmF64x2ConvertLowI32x4U: {
Simd128Register dst = i.OutputSimd128Register();
Simd128Register src = i.InputSimd128Register(0);
__ vcvt_f64_u32(dst.low(), SwVfpRegister::from_code(src.code() * 4));
__ vcvt_f64_u32(dst.high(), SwVfpRegister::from_code(src.code() * 4 + 1));
__ F64x2ConvertLowI32x4U(i.OutputSimd128Register(),
i.InputSimd128Register(0));
break;
}
case kArmF64x2PromoteLowF32x4: {
Simd128Register dst = i.OutputSimd128Register();
Simd128Register src = i.InputSimd128Register(0);
__ vcvt_f64_f32(dst.low(), SwVfpRegister::from_code(src.code() * 4));
__ vcvt_f64_f32(dst.high(), SwVfpRegister::from_code(src.code() * 4 + 1));
__ F64x2PromoteLowF32x4(i.OutputSimd128Register(),
i.InputSimd128Register(0));
break;
}
case kArmI64x2SplatI32Pair: {
......
......@@ -2599,23 +2599,20 @@ void LiftoffAssembler::emit_f64x2_pmax(LiftoffRegister dst, LiftoffRegister lhs,
void LiftoffAssembler::emit_f64x2_convert_low_i32x4_s(LiftoffRegister dst,
LiftoffRegister src) {
LowDwVfpRegister src_d = LowDwVfpRegister::from_code(src.low_fp().code());
vcvt_f64_s32(dst.low_fp(), src_d.low());
vcvt_f64_s32(dst.high_fp(), src_d.high());
F64x2ConvertLowI32x4S(liftoff::GetSimd128Register(dst),
liftoff::GetSimd128Register(src));
}
void LiftoffAssembler::emit_f64x2_convert_low_i32x4_u(LiftoffRegister dst,
LiftoffRegister src) {
LowDwVfpRegister src_d = LowDwVfpRegister::from_code(src.low_fp().code());
vcvt_f64_u32(dst.low_fp(), src_d.low());
vcvt_f64_u32(dst.high_fp(), src_d.high());
F64x2ConvertLowI32x4U(liftoff::GetSimd128Register(dst),
liftoff::GetSimd128Register(src));
}
void LiftoffAssembler::emit_f64x2_promote_low_f32x4(LiftoffRegister dst,
LiftoffRegister src) {
LowDwVfpRegister src_d = LowDwVfpRegister::from_code(src.low_fp().code());
vcvt_f64_f32(dst.low_fp(), src_d.low());
vcvt_f64_f32(dst.high_fp(), src_d.high());
F64x2PromoteLowF32x4(liftoff::GetSimd128Register(dst),
liftoff::GetSimd128Register(src));
}
void LiftoffAssembler::emit_f32x4_splat(LiftoffRegister dst,
......
......@@ -1845,14 +1845,14 @@ void LiftoffAssembler::emit_f64x2_pmax(LiftoffRegister dst, LiftoffRegister lhs,
void LiftoffAssembler::emit_f64x2_convert_low_i32x4_s(LiftoffRegister dst,
LiftoffRegister src) {
Sxtl(dst.fp(), src.fp().V2S());
Scvtf(dst.fp(), dst.fp());
Sxtl(dst.fp().V2D(), src.fp().V2S());
Scvtf(dst.fp().V2D(), dst.fp().V2D());
}
void LiftoffAssembler::emit_f64x2_convert_low_i32x4_u(LiftoffRegister dst,
LiftoffRegister src) {
Uxtl(dst.fp(), src.fp().V2S());
Ucvtf(dst.fp(), dst.fp());
Uxtl(dst.fp().V2D(), src.fp().V2S());
Ucvtf(dst.fp().V2D(), dst.fp().V2D());
}
void LiftoffAssembler::emit_f64x2_promote_low_f32x4(LiftoffRegister dst,
......@@ -2984,7 +2984,7 @@ void LiftoffAssembler::emit_i32x4_trunc_sat_f64x2_s_zero(LiftoffRegister dst,
void LiftoffAssembler::emit_i32x4_trunc_sat_f64x2_u_zero(LiftoffRegister dst,
LiftoffRegister src) {
Fcvtzs(dst.fp().V2D(), src.fp().V2D());
Fcvtzu(dst.fp().V2D(), src.fp().V2D());
Uqxtn(dst.fp().V2S(), dst.fp().V2D());
}
......
......@@ -3481,6 +3481,24 @@ class LiftoffCompiler {
return EmitUnOp<kS128, kS128>(&LiftoffAssembler::emit_i32x4_abs);
case wasm::kExprI64x2Abs:
return EmitUnOp<kS128, kS128>(&LiftoffAssembler::emit_i64x2_abs);
case wasm::kExprF64x2ConvertLowI32x4S:
return EmitUnOp<kS128, kS128>(
&LiftoffAssembler::emit_f64x2_convert_low_i32x4_s);
case wasm::kExprF64x2ConvertLowI32x4U:
return EmitUnOp<kS128, kS128>(
&LiftoffAssembler::emit_f64x2_convert_low_i32x4_u);
case wasm::kExprF64x2PromoteLowF32x4:
return EmitUnOp<kS128, kS128>(
&LiftoffAssembler::emit_f64x2_promote_low_f32x4);
case wasm::kExprF32x4DemoteF64x2Zero:
return EmitUnOp<kS128, kS128>(
&LiftoffAssembler::emit_f32x4_demote_f64x2_zero);
case wasm::kExprI32x4TruncSatF64x2SZero:
return EmitUnOp<kS128, kS128>(
&LiftoffAssembler::emit_i32x4_trunc_sat_f64x2_s_zero);
case wasm::kExprI32x4TruncSatF64x2UZero:
return EmitUnOp<kS128, kS128>(
&LiftoffAssembler::emit_i32x4_trunc_sat_f64x2_u_zero);
default:
unsupported(decoder, kSimd, "simd");
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment