Commit c604dcb5 authored by Ng Zhi An's avatar Ng Zhi An Committed by V8 LUCI CQ

[wasm-simd] Share f64x2.convert_low_i32x4_u implementation

We create a ExternalReferenceAsOperand helper function in
SharedTurboAssemblerBase that delegates to the actual arch specific
implementation of TurboAssembler, because the ia32 and x64
ExternalReferenceAsOperand differs slightly in their implementation.

Bug: v8:11589
Change-Id: I378ea6b72fb2bba1a37482cc31cd58db0ba35721
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3114604Reviewed-by: 's avatarAdam Klein <adamk@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/main@{#76494}
parent 3161d678
...@@ -701,25 +701,6 @@ void TurboAssembler::I8x16Popcnt(XMMRegister dst, XMMRegister src, ...@@ -701,25 +701,6 @@ void TurboAssembler::I8x16Popcnt(XMMRegister dst, XMMRegister src,
} }
} }
void TurboAssembler::F64x2ConvertLowI32x4U(XMMRegister dst, XMMRegister src,
Register tmp) {
// dst = [ src_low, 0x43300000, src_high, 0x4330000 ];
// 0x43300000'00000000 is a special double where the significand bits
// precisely represents all uint32 numbers.
if (!CpuFeatures::IsSupported(AVX) && dst != src) {
movaps(dst, src);
src = dst;
}
Unpcklps(dst, src,
ExternalReferenceAsOperand(
ExternalReference::
address_of_wasm_f64x2_convert_low_i32x4_u_int_mask(),
tmp));
Subpd(dst, dst,
ExternalReferenceAsOperand(
ExternalReference::address_of_wasm_double_2_power_52(), tmp));
}
void TurboAssembler::I32x4TruncSatF64x2SZero(XMMRegister dst, XMMRegister src, void TurboAssembler::I32x4TruncSatF64x2SZero(XMMRegister dst, XMMRegister src,
XMMRegister scratch, XMMRegister scratch,
Register tmp) { Register tmp) {
......
...@@ -397,7 +397,6 @@ class V8_EXPORT_PRIVATE TurboAssembler ...@@ -397,7 +397,6 @@ class V8_EXPORT_PRIVATE TurboAssembler
// Defined here to allow usage on both TurboFan and Liftoff. // Defined here to allow usage on both TurboFan and Liftoff.
void I8x16Popcnt(XMMRegister dst, XMMRegister src, XMMRegister tmp1, void I8x16Popcnt(XMMRegister dst, XMMRegister src, XMMRegister tmp1,
XMMRegister tmp2, Register scratch); XMMRegister tmp2, Register scratch);
void F64x2ConvertLowI32x4U(XMMRegister dst, XMMRegister src, Register tmp);
void I32x4TruncSatF64x2SZero(XMMRegister dst, XMMRegister src, void I32x4TruncSatF64x2SZero(XMMRegister dst, XMMRegister src,
XMMRegister scratch, Register tmp); XMMRegister scratch, Register tmp);
void I32x4TruncSatF64x2UZero(XMMRegister dst, XMMRegister src, void I32x4TruncSatF64x2UZero(XMMRegister dst, XMMRegister src,
......
...@@ -418,8 +418,36 @@ class V8_EXPORT_PRIVATE SharedTurboAssembler : public TurboAssemblerBase { ...@@ -418,8 +418,36 @@ class V8_EXPORT_PRIVATE SharedTurboAssembler : public TurboAssemblerBase {
template <typename Impl> template <typename Impl>
class V8_EXPORT_PRIVATE SharedTurboAssemblerBase : public SharedTurboAssembler { class V8_EXPORT_PRIVATE SharedTurboAssemblerBase : public SharedTurboAssembler {
using SharedTurboAssembler::SharedTurboAssembler; using SharedTurboAssembler::SharedTurboAssembler;
// TODO(zhin): intentionally empty for now, will move polymorphic functions
// here in future changes. public:
void F64x2ConvertLowI32x4U(XMMRegister dst, XMMRegister src,
Register scratch) {
ASM_CODE_COMMENT(this);
// dst = [ src_low, 0x43300000, src_high, 0x4330000 ];
// 0x43300000'00000000 is a special double where the significand bits
// precisely represents all uint32 numbers.
if (!CpuFeatures::IsSupported(AVX) && dst != src) {
movaps(dst, src);
src = dst;
}
Unpcklps(dst, src,
ExternalReferenceAsOperand(
ExternalReference::
address_of_wasm_f64x2_convert_low_i32x4_u_int_mask(),
scratch));
Subpd(dst,
ExternalReferenceAsOperand(
ExternalReference::address_of_wasm_double_2_power_52(), scratch));
}
private:
// All implementation-specific methods must be called through this.
Impl* impl() { return static_cast<Impl*>(this); }
Operand ExternalReferenceAsOperand(ExternalReference reference,
Register scratch) {
return impl()->ExternalReferenceAsOperand(reference, scratch);
}
}; };
} // namespace internal } // namespace internal
......
...@@ -2302,22 +2302,6 @@ void TurboAssembler::I8x16Popcnt(XMMRegister dst, XMMRegister src, ...@@ -2302,22 +2302,6 @@ void TurboAssembler::I8x16Popcnt(XMMRegister dst, XMMRegister src,
} }
} }
void TurboAssembler::F64x2ConvertLowI32x4U(XMMRegister dst, XMMRegister src) {
// dst = [ src_low, 0x43300000, src_high, 0x4330000 ];
// 0x43300000'00000000 is a special double where the significand bits
// precisely represents all uint32 numbers.
if (!CpuFeatures::IsSupported(AVX) && dst != src) {
movaps(dst, src);
src = dst;
}
Unpcklps(dst, src,
ExternalReferenceAsOperand(
ExternalReference::
address_of_wasm_f64x2_convert_low_i32x4_u_int_mask()));
Subpd(dst, ExternalReferenceAsOperand(
ExternalReference::address_of_wasm_double_2_power_52()));
}
void TurboAssembler::I32x4TruncSatF64x2SZero(XMMRegister dst, XMMRegister src) { void TurboAssembler::I32x4TruncSatF64x2SZero(XMMRegister dst, XMMRegister src) {
if (CpuFeatures::IsSupported(AVX)) { if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(this, AVX); CpuFeatureScope avx_scope(this, AVX);
......
...@@ -481,7 +481,6 @@ class V8_EXPORT_PRIVATE TurboAssembler ...@@ -481,7 +481,6 @@ class V8_EXPORT_PRIVATE TurboAssembler
// Defined here to allow usage on both TurboFan and Liftoff. // Defined here to allow usage on both TurboFan and Liftoff.
void I8x16Popcnt(XMMRegister dst, XMMRegister src, XMMRegister tmp); void I8x16Popcnt(XMMRegister dst, XMMRegister src, XMMRegister tmp);
void F64x2ConvertLowI32x4U(XMMRegister dst, XMMRegister src);
void I32x4TruncSatF64x2SZero(XMMRegister dst, XMMRegister src); void I32x4TruncSatF64x2SZero(XMMRegister dst, XMMRegister src);
void I32x4TruncSatF64x2UZero(XMMRegister dst, XMMRegister src); void I32x4TruncSatF64x2UZero(XMMRegister dst, XMMRegister src);
......
...@@ -2622,7 +2622,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -2622,7 +2622,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
} }
case kX64F64x2ConvertLowI32x4U: { case kX64F64x2ConvertLowI32x4U: {
__ F64x2ConvertLowI32x4U(i.OutputSimd128Register(), __ F64x2ConvertLowI32x4U(i.OutputSimd128Register(),
i.InputSimd128Register(0)); i.InputSimd128Register(0), kScratchRegister);
break; break;
} }
case kX64F64x2PromoteLowF32x4: { case kX64F64x2PromoteLowF32x4: {
......
...@@ -3753,7 +3753,7 @@ void LiftoffAssembler::emit_f64x2_convert_low_i32x4_s(LiftoffRegister dst, ...@@ -3753,7 +3753,7 @@ void LiftoffAssembler::emit_f64x2_convert_low_i32x4_s(LiftoffRegister dst,
void LiftoffAssembler::emit_f64x2_convert_low_i32x4_u(LiftoffRegister dst, void LiftoffAssembler::emit_f64x2_convert_low_i32x4_u(LiftoffRegister dst,
LiftoffRegister src) { LiftoffRegister src) {
F64x2ConvertLowI32x4U(dst.fp(), src.fp()); F64x2ConvertLowI32x4U(dst.fp(), src.fp(), kScratchRegister);
} }
void LiftoffAssembler::emit_f64x2_promote_low_f32x4(LiftoffRegister dst, void LiftoffAssembler::emit_f64x2_promote_low_f32x4(LiftoffRegister dst,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment