Commit fe00fbd9 authored by Ng Zhi An's avatar Ng Zhi An Committed by Commit Bot

[wasm-simd][liftoff] Compile double precision conversions

I missed actually handling these instructions in liftoff-compiler, so
even though the assembler functions were implemented for all archs, we
weren't running them.

This properly handles the instructions and a couple of fixes:

- for arm64, typos in using signed instructions for unsigned Wasm ops
- for arm, handle the case where dst == src, which leads to us
overwriting src and then reading junk from the overwritten portions to
convert

Bug: v8:11265
Change-Id: I7919280bdf395137e95075deb30ed815100df222
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2728382Reviewed-by: 's avatarClemens Backes <clemensb@chromium.org>
Reviewed-by: 's avatarDeepti Gandluri <gdeepti@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#73178}
parent d2948ce9
......@@ -2704,6 +2704,41 @@ void TurboAssembler::I64x2Abs(QwNeonRegister dst, QwNeonRegister src) {
vsub(Neon64, dst, dst, tmp);
}
namespace {
using AssemblerFunc = void (Assembler::*)(DwVfpRegister, SwVfpRegister,
VFPConversionMode, const Condition);
// Helper function for f64x2 convert low instructions.
// This ensures that we do not overwrite src, if dst == src.
void F64x2ConvertLowHelper(Assembler* assm, QwNeonRegister dst,
QwNeonRegister src, AssemblerFunc convert_fn) {
LowDwVfpRegister src_d = LowDwVfpRegister::from_code(src.low().code());
UseScratchRegisterScope temps(assm);
if (dst == src) {
LowDwVfpRegister tmp = temps.AcquireLowD();
assm->vmov(tmp, src_d);
src_d = tmp;
}
// Default arguments are not part of the function type
(assm->*convert_fn)(dst.low(), src_d.low(), kDefaultRoundToZero, al);
(assm->*convert_fn)(dst.high(), src_d.high(), kDefaultRoundToZero, al);
}
} // namespace
void TurboAssembler::F64x2ConvertLowI32x4S(QwNeonRegister dst,
QwNeonRegister src) {
F64x2ConvertLowHelper(this, dst, src, &Assembler::vcvt_f64_s32);
}
void TurboAssembler::F64x2ConvertLowI32x4U(QwNeonRegister dst,
QwNeonRegister src) {
F64x2ConvertLowHelper(this, dst, src, &Assembler::vcvt_f64_u32);
}
void TurboAssembler::F64x2PromoteLowF32x4(QwNeonRegister dst,
QwNeonRegister src) {
F64x2ConvertLowHelper(this, dst, src, &Assembler::vcvt_f64_f32);
}
} // namespace internal
} // namespace v8
......
......@@ -576,6 +576,9 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
void I64x2GeS(QwNeonRegister dst, QwNeonRegister src1, QwNeonRegister src2);
void V64x2AllTrue(Register dst, QwNeonRegister src);
void I64x2Abs(QwNeonRegister dst, QwNeonRegister src);
void F64x2ConvertLowI32x4S(QwNeonRegister dst, QwNeonRegister src);
void F64x2ConvertLowI32x4U(QwNeonRegister dst, QwNeonRegister src);
void F64x2PromoteLowF32x4(QwNeonRegister dst, QwNeonRegister src);
private:
// Compare single values and then load the fpscr flags to a register.
......
......@@ -2068,24 +2068,18 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
case kArmF64x2ConvertLowI32x4S: {
Simd128Register dst = i.OutputSimd128Register();
Simd128Register src = i.InputSimd128Register(0);
__ vcvt_f64_s32(dst.low(), SwVfpRegister::from_code(src.code() * 4));
__ vcvt_f64_s32(dst.high(), SwVfpRegister::from_code(src.code() * 4 + 1));
__ F64x2ConvertLowI32x4S(i.OutputSimd128Register(),
i.InputSimd128Register(0));
break;
}
case kArmF64x2ConvertLowI32x4U: {
Simd128Register dst = i.OutputSimd128Register();
Simd128Register src = i.InputSimd128Register(0);
__ vcvt_f64_u32(dst.low(), SwVfpRegister::from_code(src.code() * 4));
__ vcvt_f64_u32(dst.high(), SwVfpRegister::from_code(src.code() * 4 + 1));
__ F64x2ConvertLowI32x4U(i.OutputSimd128Register(),
i.InputSimd128Register(0));
break;
}
case kArmF64x2PromoteLowF32x4: {
Simd128Register dst = i.OutputSimd128Register();
Simd128Register src = i.InputSimd128Register(0);
__ vcvt_f64_f32(dst.low(), SwVfpRegister::from_code(src.code() * 4));
__ vcvt_f64_f32(dst.high(), SwVfpRegister::from_code(src.code() * 4 + 1));
__ F64x2PromoteLowF32x4(i.OutputSimd128Register(),
i.InputSimd128Register(0));
break;
}
case kArmI64x2SplatI32Pair: {
......
......@@ -2599,23 +2599,20 @@ void LiftoffAssembler::emit_f64x2_pmax(LiftoffRegister dst, LiftoffRegister lhs,
void LiftoffAssembler::emit_f64x2_convert_low_i32x4_s(LiftoffRegister dst,
LiftoffRegister src) {
LowDwVfpRegister src_d = LowDwVfpRegister::from_code(src.low_fp().code());
vcvt_f64_s32(dst.low_fp(), src_d.low());
vcvt_f64_s32(dst.high_fp(), src_d.high());
F64x2ConvertLowI32x4S(liftoff::GetSimd128Register(dst),
liftoff::GetSimd128Register(src));
}
void LiftoffAssembler::emit_f64x2_convert_low_i32x4_u(LiftoffRegister dst,
LiftoffRegister src) {
LowDwVfpRegister src_d = LowDwVfpRegister::from_code(src.low_fp().code());
vcvt_f64_u32(dst.low_fp(), src_d.low());
vcvt_f64_u32(dst.high_fp(), src_d.high());
F64x2ConvertLowI32x4U(liftoff::GetSimd128Register(dst),
liftoff::GetSimd128Register(src));
}
void LiftoffAssembler::emit_f64x2_promote_low_f32x4(LiftoffRegister dst,
LiftoffRegister src) {
LowDwVfpRegister src_d = LowDwVfpRegister::from_code(src.low_fp().code());
vcvt_f64_f32(dst.low_fp(), src_d.low());
vcvt_f64_f32(dst.high_fp(), src_d.high());
F64x2PromoteLowF32x4(liftoff::GetSimd128Register(dst),
liftoff::GetSimd128Register(src));
}
void LiftoffAssembler::emit_f32x4_splat(LiftoffRegister dst,
......
......@@ -1845,14 +1845,14 @@ void LiftoffAssembler::emit_f64x2_pmax(LiftoffRegister dst, LiftoffRegister lhs,
void LiftoffAssembler::emit_f64x2_convert_low_i32x4_s(LiftoffRegister dst,
LiftoffRegister src) {
Sxtl(dst.fp(), src.fp().V2S());
Scvtf(dst.fp(), dst.fp());
Sxtl(dst.fp().V2D(), src.fp().V2S());
Scvtf(dst.fp().V2D(), dst.fp().V2D());
}
void LiftoffAssembler::emit_f64x2_convert_low_i32x4_u(LiftoffRegister dst,
LiftoffRegister src) {
Uxtl(dst.fp(), src.fp().V2S());
Ucvtf(dst.fp(), dst.fp());
Uxtl(dst.fp().V2D(), src.fp().V2S());
Ucvtf(dst.fp().V2D(), dst.fp().V2D());
}
void LiftoffAssembler::emit_f64x2_promote_low_f32x4(LiftoffRegister dst,
......@@ -2984,7 +2984,7 @@ void LiftoffAssembler::emit_i32x4_trunc_sat_f64x2_s_zero(LiftoffRegister dst,
void LiftoffAssembler::emit_i32x4_trunc_sat_f64x2_u_zero(LiftoffRegister dst,
LiftoffRegister src) {
Fcvtzs(dst.fp().V2D(), src.fp().V2D());
Fcvtzu(dst.fp().V2D(), src.fp().V2D());
Uqxtn(dst.fp().V2S(), dst.fp().V2D());
}
......
......@@ -3444,6 +3444,24 @@ class LiftoffCompiler {
return EmitUnOp<kS128, kS128>(&LiftoffAssembler::emit_i32x4_abs);
case wasm::kExprI64x2Abs:
return EmitUnOp<kS128, kS128>(&LiftoffAssembler::emit_i64x2_abs);
case wasm::kExprF64x2ConvertLowI32x4S:
return EmitUnOp<kS128, kS128>(
&LiftoffAssembler::emit_f64x2_convert_low_i32x4_s);
case wasm::kExprF64x2ConvertLowI32x4U:
return EmitUnOp<kS128, kS128>(
&LiftoffAssembler::emit_f64x2_convert_low_i32x4_u);
case wasm::kExprF64x2PromoteLowF32x4:
return EmitUnOp<kS128, kS128>(
&LiftoffAssembler::emit_f64x2_promote_low_f32x4);
case wasm::kExprF32x4DemoteF64x2Zero:
return EmitUnOp<kS128, kS128>(
&LiftoffAssembler::emit_f32x4_demote_f64x2_zero);
case wasm::kExprI32x4TruncSatF64x2SZero:
return EmitUnOp<kS128, kS128>(
&LiftoffAssembler::emit_i32x4_trunc_sat_f64x2_s_zero);
case wasm::kExprI32x4TruncSatF64x2UZero:
return EmitUnOp<kS128, kS128>(
&LiftoffAssembler::emit_i32x4_trunc_sat_f64x2_u_zero);
default:
unsupported(decoder, kSimd, "simd");
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment