Commit 593ab78f authored by Ng Zhi An's avatar Ng Zhi An Committed by V8 LUCI CQ

[wasm-simd] Share i32x4.trunc_sat_f64x2 s,u zero implementation

Bug: v8:11589
Change-Id: I7b55efa76f60eacf31700a544f54042eec963f57
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3115545Reviewed-by: 's avatarAdam Klein <adamk@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/main@{#76495}
parent c604dcb5
......@@ -1545,6 +1545,9 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
void vmovdqa(XMMRegister dst, Operand src) {
vinstr(0x6F, dst, xmm0, src, k66, k0F, kWIG);
}
void vmovdqa(XMMRegister dst, XMMRegister src) {
vinstr(0x6F, dst, xmm0, src, k66, k0F, kWIG);
}
void vmovdqu(XMMRegister dst, Operand src) {
vinstr(0x6F, dst, xmm0, src, kF3, k0F, kWIG);
}
......
......@@ -701,88 +701,6 @@ void TurboAssembler::I8x16Popcnt(XMMRegister dst, XMMRegister src,
}
}
void TurboAssembler::I32x4TruncSatF64x2SZero(XMMRegister dst, XMMRegister src,
XMMRegister scratch,
Register tmp) {
ASM_CODE_COMMENT(this);
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(this, AVX);
XMMRegister original_dst = dst;
// Make sure we don't overwrite src.
if (dst == src) {
DCHECK_NE(scratch, src);
dst = scratch;
}
// dst = 0 if src == NaN, else all ones.
vcmpeqpd(dst, src, src);
// dst = 0 if src == NaN, else INT32_MAX as double.
vandpd(dst, dst,
ExternalReferenceAsOperand(
ExternalReference::address_of_wasm_int32_max_as_double(), tmp));
// dst = 0 if src == NaN, src is saturated to INT32_MAX as double.
vminpd(dst, src, dst);
// Values > INT32_MAX already saturated, values < INT32_MIN raises an
// exception, which is masked and returns 0x80000000.
vcvttpd2dq(dst, dst);
if (original_dst != dst) {
vmovaps(original_dst, dst);
}
} else {
if (dst != src) {
movaps(dst, src);
}
movaps(scratch, dst);
cmpeqpd(scratch, dst);
andps(scratch,
ExternalReferenceAsOperand(
ExternalReference::address_of_wasm_int32_max_as_double(), tmp));
minpd(dst, scratch);
cvttpd2dq(dst, dst);
}
}
void TurboAssembler::I32x4TruncSatF64x2UZero(XMMRegister dst, XMMRegister src,
XMMRegister scratch,
Register tmp) {
ASM_CODE_COMMENT(this);
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(this, AVX);
vxorpd(scratch, scratch, scratch);
// Saturate to 0.
vmaxpd(dst, src, scratch);
// Saturate to UINT32_MAX.
vminpd(dst, dst,
ExternalReferenceAsOperand(
ExternalReference::address_of_wasm_uint32_max_as_double(), tmp));
// Truncate.
vroundpd(dst, dst, kRoundToZero);
// Add to special double where significant bits == uint32.
vaddpd(dst, dst,
ExternalReferenceAsOperand(
ExternalReference::address_of_wasm_double_2_power_52(), tmp));
// Extract low 32 bits of each double's significand, zero top lanes.
// dst = [dst[0], dst[2], 0, 0]
vshufps(dst, dst, scratch, 0x88);
} else {
CpuFeatureScope scope(this, SSE4_1);
if (dst != src) {
movaps(dst, src);
}
xorps(scratch, scratch);
maxpd(dst, scratch);
minpd(dst,
ExternalReferenceAsOperand(
ExternalReference::address_of_wasm_uint32_max_as_double(), tmp));
roundpd(dst, dst, kRoundToZero);
addpd(dst,
ExternalReferenceAsOperand(
ExternalReference::address_of_wasm_double_2_power_52(), tmp));
shufps(dst, scratch, 0x88);
}
}
void TurboAssembler::I16x8ExtAddPairwiseI8x16S(XMMRegister dst, XMMRegister src,
XMMRegister tmp,
Register scratch) {
......
......@@ -397,10 +397,6 @@ class V8_EXPORT_PRIVATE TurboAssembler
// Defined here to allow usage on both TurboFan and Liftoff.
void I8x16Popcnt(XMMRegister dst, XMMRegister src, XMMRegister tmp1,
XMMRegister tmp2, Register scratch);
void I32x4TruncSatF64x2SZero(XMMRegister dst, XMMRegister src,
XMMRegister scratch, Register tmp);
void I32x4TruncSatF64x2UZero(XMMRegister dst, XMMRegister src,
XMMRegister scratch, Register tmp);
void I16x8ExtAddPairwiseI8x16S(XMMRegister dst, XMMRegister src,
XMMRegister tmp, Register scratch);
void I16x8ExtAddPairwiseI8x16U(XMMRegister dst, XMMRegister src,
......
......@@ -440,6 +440,81 @@ class V8_EXPORT_PRIVATE SharedTurboAssemblerBase : public SharedTurboAssembler {
ExternalReference::address_of_wasm_double_2_power_52(), scratch));
}
void I32x4TruncSatF64x2SZero(XMMRegister dst, XMMRegister src,
XMMRegister scratch, Register tmp) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(this, AVX);
XMMRegister original_dst = dst;
// Make sure we don't overwrite src.
if (dst == src) {
DCHECK_NE(src, scratch);
dst = scratch;
}
// dst = 0 if src == NaN, else all ones.
vcmpeqpd(dst, src, src);
// dst = 0 if src == NaN, else INT32_MAX as double.
vandpd(
dst, dst,
ExternalReferenceAsOperand(
ExternalReference::address_of_wasm_int32_max_as_double(), tmp));
// dst = 0 if src == NaN, src is saturated to INT32_MAX as double.
vminpd(dst, src, dst);
// Values > INT32_MAX already saturated, values < INT32_MIN raises an
// exception, which is masked and returns 0x80000000.
vcvttpd2dq(original_dst, dst);
} else {
if (dst != src) {
movaps(dst, src);
}
movaps(scratch, dst);
cmpeqpd(scratch, dst);
andps(scratch,
ExternalReferenceAsOperand(
ExternalReference::address_of_wasm_int32_max_as_double(), tmp));
minpd(dst, scratch);
cvttpd2dq(dst, dst);
}
}
void I32x4TruncSatF64x2UZero(XMMRegister dst, XMMRegister src,
XMMRegister scratch, Register tmp) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(this, AVX);
vxorpd(scratch, scratch, scratch);
// Saturate to 0.
vmaxpd(dst, src, scratch);
// Saturate to UINT32_MAX.
vminpd(
dst, dst,
ExternalReferenceAsOperand(
ExternalReference::address_of_wasm_uint32_max_as_double(), tmp));
// Truncate.
vroundpd(dst, dst, kRoundToZero);
// Add to special double where significant bits == uint32.
vaddpd(dst, dst,
ExternalReferenceAsOperand(
ExternalReference::address_of_wasm_double_2_power_52(), tmp));
// Extract low 32 bits of each double's significand, zero top lanes.
// dst = [dst[0], dst[2], 0, 0]
vshufps(dst, dst, scratch, 0x88);
} else {
CpuFeatureScope scope(this, SSE4_1);
if (dst != src) {
movaps(dst, src);
}
xorps(scratch, scratch);
maxpd(dst, scratch);
minpd(dst, ExternalReferenceAsOperand(
ExternalReference::address_of_wasm_uint32_max_as_double(),
tmp));
roundpd(dst, dst, kRoundToZero);
addpd(dst,
ExternalReferenceAsOperand(
ExternalReference::address_of_wasm_double_2_power_52(), tmp));
shufps(dst, scratch, 0x88);
}
}
private:
// All implementation-specific methods must be called through this.
Impl* impl() { return static_cast<Impl*>(this); }
......
......@@ -2302,78 +2302,6 @@ void TurboAssembler::I8x16Popcnt(XMMRegister dst, XMMRegister src,
}
}
void TurboAssembler::I32x4TruncSatF64x2SZero(XMMRegister dst, XMMRegister src) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(this, AVX);
XMMRegister original_dst = dst;
// Make sure we don't overwrite src.
if (dst == src) {
DCHECK_NE(src, kScratchDoubleReg);
dst = kScratchDoubleReg;
}
// dst = 0 if src == NaN, else all ones.
vcmpeqpd(dst, src, src);
// dst = 0 if src == NaN, else INT32_MAX as double.
vandpd(dst, dst,
ExternalReferenceAsOperand(
ExternalReference::address_of_wasm_int32_max_as_double()));
// dst = 0 if src == NaN, src is saturated to INT32_MAX as double.
vminpd(dst, src, dst);
// Values > INT32_MAX already saturated, values < INT32_MIN raises an
// exception, which is masked and returns 0x80000000.
vcvttpd2dq(dst, dst);
if (original_dst != dst) {
Move(original_dst, dst);
}
} else {
if (dst != src) {
Move(dst, src);
}
Move(kScratchDoubleReg, dst);
cmpeqpd(kScratchDoubleReg, dst);
andps(kScratchDoubleReg,
ExternalReferenceAsOperand(
ExternalReference::address_of_wasm_int32_max_as_double()));
minpd(dst, kScratchDoubleReg);
cvttpd2dq(dst, dst);
}
}
void TurboAssembler::I32x4TruncSatF64x2UZero(XMMRegister dst, XMMRegister src) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(this, AVX);
vxorpd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
// Saturate to 0.
vmaxpd(dst, src, kScratchDoubleReg);
// Saturate to UINT32_MAX.
vminpd(dst, dst,
ExternalReferenceAsOperand(
ExternalReference::address_of_wasm_uint32_max_as_double()));
// Truncate.
vroundpd(dst, dst, kRoundToZero);
// Add to special double where significant bits == uint32.
vaddpd(dst, dst,
ExternalReferenceAsOperand(
ExternalReference::address_of_wasm_double_2_power_52()));
// Extract low 32 bits of each double's significand, zero top lanes.
// dst = [dst[0], dst[2], 0, 0]
vshufps(dst, dst, kScratchDoubleReg, 0x88);
} else {
CpuFeatureScope scope(this, SSE4_1);
if (dst != src) {
Move(dst, src);
}
xorps(kScratchDoubleReg, kScratchDoubleReg);
maxpd(dst, kScratchDoubleReg);
minpd(dst, ExternalReferenceAsOperand(
ExternalReference::address_of_wasm_uint32_max_as_double()));
roundpd(dst, dst, kRoundToZero);
addpd(dst, ExternalReferenceAsOperand(
ExternalReference::address_of_wasm_double_2_power_52()));
shufps(dst, kScratchDoubleReg, 0x88);
}
}
void TurboAssembler::I16x8ExtAddPairwiseI8x16S(XMMRegister dst,
XMMRegister src) {
// pmaddubsw treats the first operand as unsigned, so the external reference
......
......@@ -481,9 +481,6 @@ class V8_EXPORT_PRIVATE TurboAssembler
// Defined here to allow usage on both TurboFan and Liftoff.
void I8x16Popcnt(XMMRegister dst, XMMRegister src, XMMRegister tmp);
void I32x4TruncSatF64x2SZero(XMMRegister dst, XMMRegister src);
void I32x4TruncSatF64x2UZero(XMMRegister dst, XMMRegister src);
void I16x8ExtAddPairwiseI8x16S(XMMRegister dst, XMMRegister src);
void I32x4ExtAddPairwiseI16x8U(XMMRegister dst, XMMRegister src);
......
......@@ -2635,12 +2635,14 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
case kX64I32x4TruncSatF64x2SZero: {
__ I32x4TruncSatF64x2SZero(i.OutputSimd128Register(),
i.InputSimd128Register(0));
i.InputSimd128Register(0), kScratchDoubleReg,
kScratchRegister);
break;
}
case kX64I32x4TruncSatF64x2UZero: {
__ I32x4TruncSatF64x2UZero(i.OutputSimd128Register(),
i.InputSimd128Register(0));
i.InputSimd128Register(0), kScratchDoubleReg,
kScratchRegister);
break;
}
case kX64F32x4Splat: {
......
......@@ -3904,12 +3904,14 @@ void LiftoffAssembler::emit_i32x4_uconvert_i16x8_high(LiftoffRegister dst,
void LiftoffAssembler::emit_i32x4_trunc_sat_f64x2_s_zero(LiftoffRegister dst,
LiftoffRegister src) {
I32x4TruncSatF64x2SZero(dst.fp(), src.fp());
I32x4TruncSatF64x2SZero(dst.fp(), src.fp(), kScratchDoubleReg,
kScratchRegister);
}
void LiftoffAssembler::emit_i32x4_trunc_sat_f64x2_u_zero(LiftoffRegister dst,
LiftoffRegister src) {
I32x4TruncSatF64x2UZero(dst.fp(), src.fp());
I32x4TruncSatF64x2UZero(dst.fp(), src.fp(), kScratchDoubleReg,
kScratchRegister);
}
void LiftoffAssembler::emit_s128_and_not(LiftoffRegister dst,
......
......@@ -825,6 +825,7 @@ TEST(DisasmIa320) {
__ vmovshdup(xmm1, xmm2);
__ vbroadcastss(xmm1, Operand(ebx, ecx, times_4, 10000));
__ vmovdqa(xmm0, Operand(ebx, ecx, times_4, 10000));
__ vmovdqa(xmm0, xmm7);
__ vmovdqu(xmm0, Operand(ebx, ecx, times_4, 10000));
__ vmovdqu(Operand(ebx, ecx, times_4, 10000), xmm0);
__ vmovd(xmm0, edi);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment