Commit 9edfb35a authored by Ng Zhi An's avatar Ng Zhi An Committed by Commit Bot

[wasm-simd] Move i64x2 abs gts ges into SharedTurboAssembler

Also clean up some comments in liftoff-assembler-x64.h.

Bug: v8:11589
Change-Id: I47fe5c2c794c863be1afde86d289ea197219a4f8
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2787591
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Reviewed-by: 's avatarBill Budge <bbudge@chromium.org>
Cr-Commit-Position: refs/heads/master@{#73692}
parent bda08490
......@@ -857,85 +857,6 @@ void TurboAssembler::I32x4TruncSatF64x2UZero(XMMRegister dst, XMMRegister src,
}
}
void TurboAssembler::I64x2Abs(XMMRegister dst, XMMRegister src,
XMMRegister scratch) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(this, AVX);
XMMRegister tmp = dst == src ? scratch : dst;
vpxor(tmp, tmp, tmp);
vpsubq(tmp, tmp, src);
vblendvpd(dst, src, tmp, src);
} else {
CpuFeatureScope sse_scope(this, SSE3);
movshdup(scratch, src);
if (dst != src) {
movaps(dst, src);
}
psrad(scratch, 31);
xorps(dst, scratch);
psubq(dst, scratch);
}
}
void TurboAssembler::I64x2GtS(XMMRegister dst, XMMRegister src0,
XMMRegister src1, XMMRegister scratch) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(this, AVX);
vpcmpgtq(dst, src0, src1);
} else if (CpuFeatures::IsSupported(SSE4_2)) {
CpuFeatureScope sse_scope(this, SSE4_2);
DCHECK_EQ(dst, src0);
pcmpgtq(dst, src1);
} else {
CpuFeatureScope sse_scope(this, SSE3);
DCHECK_NE(dst, src0);
DCHECK_NE(dst, src1);
movaps(dst, src1);
movaps(scratch, src0);
psubq(dst, src0);
pcmpeqd(scratch, src1);
andps(dst, scratch);
movaps(scratch, src0);
pcmpgtd(scratch, src1);
orps(dst, scratch);
movshdup(dst, dst);
}
}
void TurboAssembler::I64x2GeS(XMMRegister dst, XMMRegister src0,
XMMRegister src1, XMMRegister scratch) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(this, AVX);
vpcmpgtq(dst, src1, src0);
vpcmpeqd(scratch, scratch, scratch);
vpxor(dst, dst, scratch);
} else if (CpuFeatures::IsSupported(SSE4_2)) {
CpuFeatureScope sse_scope(this, SSE4_2);
DCHECK_NE(dst, src0);
if (dst != src1) {
movaps(dst, src1);
}
pcmpgtq(dst, src0);
pcmpeqd(scratch, scratch);
xorps(dst, scratch);
} else {
CpuFeatureScope sse_scope(this, SSE3);
DCHECK_NE(dst, src0);
DCHECK_NE(dst, src1);
movaps(dst, src0);
movaps(scratch, src1);
psubq(dst, src1);
pcmpeqd(scratch, src0);
andps(dst, scratch);
movaps(scratch, src1);
pcmpgtd(scratch, src0);
orps(dst, scratch);
movshdup(dst, dst);
pcmpeqd(scratch, scratch);
xorps(dst, scratch);
}
}
void TurboAssembler::I16x8ExtAddPairwiseI8x16S(XMMRegister dst, XMMRegister src,
XMMRegister tmp,
Register scratch) {
......
......@@ -727,11 +727,6 @@ class V8_EXPORT_PRIVATE TurboAssembler : public SharedTurboAssembler {
XMMRegister scratch, Register tmp);
void I32x4TruncSatF64x2UZero(XMMRegister dst, XMMRegister src,
XMMRegister scratch, Register tmp);
void I64x2Abs(XMMRegister dst, XMMRegister src, XMMRegister scratch);
void I64x2GtS(XMMRegister dst, XMMRegister src0, XMMRegister src1,
XMMRegister scratch);
void I64x2GeS(XMMRegister dst, XMMRegister src0, XMMRegister src1,
XMMRegister scratch);
void I16x8ExtAddPairwiseI8x16S(XMMRegister dst, XMMRegister src,
XMMRegister tmp, Register scratch);
void I16x8ExtAddPairwiseI8x16U(XMMRegister dst, XMMRegister src,
......
......@@ -204,6 +204,85 @@ void SharedTurboAssembler::I32x4UConvertI16x8High(XMMRegister dst,
}
}
void SharedTurboAssembler::I64x2Abs(XMMRegister dst, XMMRegister src,
XMMRegister scratch) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(this, AVX);
XMMRegister tmp = dst == src ? scratch : dst;
vpxor(tmp, tmp, tmp);
vpsubq(tmp, tmp, src);
vblendvpd(dst, src, tmp, src);
} else {
CpuFeatureScope sse_scope(this, SSE3);
movshdup(scratch, src);
if (dst != src) {
movaps(dst, src);
}
psrad(scratch, 31);
xorps(dst, scratch);
psubq(dst, scratch);
}
}
void SharedTurboAssembler::I64x2GtS(XMMRegister dst, XMMRegister src0,
XMMRegister src1, XMMRegister scratch) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(this, AVX);
vpcmpgtq(dst, src0, src1);
} else if (CpuFeatures::IsSupported(SSE4_2)) {
CpuFeatureScope sse_scope(this, SSE4_2);
DCHECK_EQ(dst, src0);
pcmpgtq(dst, src1);
} else {
CpuFeatureScope sse_scope(this, SSE3);
DCHECK_NE(dst, src0);
DCHECK_NE(dst, src1);
movaps(dst, src1);
movaps(scratch, src0);
psubq(dst, src0);
pcmpeqd(scratch, src1);
andps(dst, scratch);
movaps(scratch, src0);
pcmpgtd(scratch, src1);
orps(dst, scratch);
movshdup(dst, dst);
}
}
void SharedTurboAssembler::I64x2GeS(XMMRegister dst, XMMRegister src0,
XMMRegister src1, XMMRegister scratch) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(this, AVX);
vpcmpgtq(dst, src1, src0);
vpcmpeqd(scratch, scratch, scratch);
vpxor(dst, dst, scratch);
} else if (CpuFeatures::IsSupported(SSE4_2)) {
CpuFeatureScope sse_scope(this, SSE4_2);
DCHECK_NE(dst, src0);
if (dst != src1) {
movaps(dst, src1);
}
pcmpgtq(dst, src0);
pcmpeqd(scratch, scratch);
xorps(dst, scratch);
} else {
CpuFeatureScope sse_scope(this, SSE3);
DCHECK_NE(dst, src0);
DCHECK_NE(dst, src1);
movaps(dst, src0);
movaps(scratch, src1);
psubq(dst, src1);
pcmpeqd(scratch, src0);
andps(dst, scratch);
movaps(scratch, src1);
pcmpgtd(scratch, src0);
orps(dst, scratch);
movshdup(dst, dst);
pcmpeqd(scratch, scratch);
xorps(dst, scratch);
}
}
// 1. Unpack src0, src1 into even-number elements of scratch.
// 2. Unpack src1, src0 into even-number elements of dst.
// 3. Multiply 1. with 2.
......
......@@ -36,6 +36,11 @@ class V8_EXPORT_PRIVATE SharedTurboAssembler : public TurboAssemblerBase {
void I32x4SConvertI16x8High(XMMRegister dst, XMMRegister src);
void I32x4UConvertI16x8High(XMMRegister dst, XMMRegister src,
XMMRegister scratch);
void I64x2Abs(XMMRegister dst, XMMRegister src, XMMRegister scratch);
void I64x2GtS(XMMRegister dst, XMMRegister src0, XMMRegister src1,
XMMRegister scratch);
void I64x2GeS(XMMRegister dst, XMMRegister src0, XMMRegister src1,
XMMRegister scratch);
void I64x2ExtMul(XMMRegister dst, XMMRegister src1, XMMRegister src2,
XMMRegister scratch, bool low, bool is_signed);
void I64x2SConvertI32x4High(XMMRegister dst, XMMRegister src);
......
......@@ -2318,84 +2318,6 @@ void TurboAssembler::I32x4TruncSatF64x2UZero(XMMRegister dst, XMMRegister src) {
}
}
void TurboAssembler::I64x2Abs(XMMRegister dst, XMMRegister src) {
if (CpuFeatures::IsSupported(AVX)) {
XMMRegister tmp = dst == src ? kScratchDoubleReg : dst;
CpuFeatureScope avx_scope(this, AVX);
vpxor(tmp, tmp, tmp);
vpsubq(tmp, tmp, src);
vblendvpd(dst, src, tmp, src);
} else {
CpuFeatureScope sse_scope(this, SSE3);
movshdup(kScratchDoubleReg, src);
if (dst != src) {
movaps(dst, src);
}
psrad(kScratchDoubleReg, 31);
xorps(dst, kScratchDoubleReg);
psubq(dst, kScratchDoubleReg);
}
}
void TurboAssembler::I64x2GtS(XMMRegister dst, XMMRegister src0,
XMMRegister src1) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(this, AVX);
vpcmpgtq(dst, src0, src1);
} else if (CpuFeatures::IsSupported(SSE4_2)) {
CpuFeatureScope sse_scope(this, SSE4_2);
DCHECK_EQ(dst, src0);
pcmpgtq(dst, src1);
} else {
CpuFeatureScope sse_scope(this, SSE3);
DCHECK_NE(dst, src0);
DCHECK_NE(dst, src1);
movaps(dst, src1);
movaps(kScratchDoubleReg, src0);
psubq(dst, src0);
pcmpeqd(kScratchDoubleReg, src1);
andps(dst, kScratchDoubleReg);
movaps(kScratchDoubleReg, src0);
pcmpgtd(kScratchDoubleReg, src1);
orps(dst, kScratchDoubleReg);
movshdup(dst, dst);
}
}
void TurboAssembler::I64x2GeS(XMMRegister dst, XMMRegister src0,
XMMRegister src1) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(this, AVX);
vpcmpgtq(dst, src1, src0);
vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
vpxor(dst, dst, kScratchDoubleReg);
} else if (CpuFeatures::IsSupported(SSE4_2)) {
CpuFeatureScope sse_scope(this, SSE4_2);
DCHECK_NE(dst, src0);
if (dst != src1) {
movaps(dst, src1);
}
pcmpgtq(dst, src0);
pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
xorps(dst, kScratchDoubleReg);
} else {
CpuFeatureScope sse_scope(this, SSE3);
DCHECK_NE(dst, src0);
DCHECK_NE(dst, src1);
movaps(dst, src0);
movaps(kScratchDoubleReg, src1);
psubq(dst, src1);
pcmpeqd(kScratchDoubleReg, src0);
andps(dst, kScratchDoubleReg);
movaps(kScratchDoubleReg, src1);
pcmpgtd(kScratchDoubleReg, src0);
orps(dst, kScratchDoubleReg);
movshdup(dst, dst);
pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
xorps(dst, kScratchDoubleReg);
}
}
void TurboAssembler::I16x8ExtAddPairwiseI8x16S(XMMRegister dst,
XMMRegister src) {
// pmaddubsw treats the first operand as unsigned, so the external reference
......
......@@ -627,10 +627,6 @@ class V8_EXPORT_PRIVATE TurboAssembler : public SharedTurboAssembler {
void I32x4TruncSatF64x2SZero(XMMRegister dst, XMMRegister src);
void I32x4TruncSatF64x2UZero(XMMRegister dst, XMMRegister src);
void I64x2Abs(XMMRegister dst, XMMRegister src);
void I64x2GtS(XMMRegister dst, XMMRegister src0, XMMRegister src1);
void I64x2GeS(XMMRegister dst, XMMRegister src0, XMMRegister src1);
void I16x8ExtAddPairwiseI8x16S(XMMRegister dst, XMMRegister src);
void I32x4ExtAddPairwiseI16x8U(XMMRegister dst, XMMRegister src);
......
......@@ -2780,7 +2780,8 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
case kX64I64x2Abs: {
__ I64x2Abs(i.OutputSimd128Register(), i.InputSimd128Register(0));
__ I64x2Abs(i.OutputSimd128Register(), i.InputSimd128Register(0),
kScratchDoubleReg);
break;
}
case kX64I64x2Neg: {
......@@ -2869,12 +2870,12 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
case kX64I64x2GtS: {
__ I64x2GtS(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
i.InputSimd128Register(1), kScratchDoubleReg);
break;
}
case kX64I64x2GeS: {
__ I64x2GeS(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
i.InputSimd128Register(1), kScratchDoubleReg);
break;
}
case kX64I64x2ShrU: {
......
......@@ -2738,21 +2738,21 @@ void LiftoffAssembler::emit_i64x2_gt_s(LiftoffRegister dst, LiftoffRegister lhs,
// Different register alias requirements depending on CpuFeatures supported:
if (CpuFeatures::IsSupported(AVX)) {
// 1. AVX, no requirements.
I64x2GtS(dst.fp(), lhs.fp(), rhs.fp());
I64x2GtS(dst.fp(), lhs.fp(), rhs.fp(), kScratchDoubleReg);
} else if (CpuFeatures::IsSupported(SSE4_2)) {
// 2. SSE4_2, dst == lhs.
if (dst != lhs) {
movaps(dst.fp(), lhs.fp());
}
I64x2GtS(dst.fp(), dst.fp(), rhs.fp());
I64x2GtS(dst.fp(), dst.fp(), rhs.fp(), kScratchDoubleReg);
} else {
// 3. Else, dst != lhs && dst != rhs (lhs == rhs is ok).
if (dst == lhs || dst == rhs) {
// macro-assembler uses kScratchDoubleReg, so don't use it.
I64x2GtS(liftoff::kScratchDoubleReg2, lhs.fp(), rhs.fp());
I64x2GtS(liftoff::kScratchDoubleReg2, lhs.fp(), rhs.fp(),
kScratchDoubleReg);
movaps(dst.fp(), liftoff::kScratchDoubleReg2);
} else {
I64x2GtS(dst.fp(), lhs.fp(), rhs.fp());
I64x2GtS(dst.fp(), lhs.fp(), rhs.fp(), kScratchDoubleReg);
}
}
}
......@@ -2762,24 +2762,24 @@ void LiftoffAssembler::emit_i64x2_ge_s(LiftoffRegister dst, LiftoffRegister lhs,
// Different register alias requirements depending on CpuFeatures supported:
if (CpuFeatures::IsSupported(AVX)) {
// 1. AVX, no requirements.
I64x2GeS(dst.fp(), lhs.fp(), rhs.fp());
I64x2GeS(dst.fp(), lhs.fp(), rhs.fp(), kScratchDoubleReg);
} else if (CpuFeatures::IsSupported(SSE4_2)) {
// 2. SSE4_2, dst != lhs.
if (dst == lhs) {
// macro-assembler uses kScratchDoubleReg, so don't use it.
I64x2GeS(liftoff::kScratchDoubleReg2, lhs.fp(), rhs.fp());
I64x2GeS(liftoff::kScratchDoubleReg2, lhs.fp(), rhs.fp(),
kScratchDoubleReg);
movaps(dst.fp(), liftoff::kScratchDoubleReg2);
} else {
I64x2GeS(dst.fp(), lhs.fp(), rhs.fp());
I64x2GeS(dst.fp(), lhs.fp(), rhs.fp(), kScratchDoubleReg);
}
} else {
// 3. Else, dst != lhs && dst != rhs (lhs == rhs is ok).
if (dst == lhs || dst == rhs) {
// macro-assembler uses kScratchDoubleReg, so don't use it.
I64x2GeS(liftoff::kScratchDoubleReg2, lhs.fp(), rhs.fp());
I64x2GeS(liftoff::kScratchDoubleReg2, lhs.fp(), rhs.fp(),
kScratchDoubleReg);
movaps(dst.fp(), liftoff::kScratchDoubleReg2);
} else {
I64x2GeS(dst.fp(), lhs.fp(), rhs.fp());
I64x2GeS(dst.fp(), lhs.fp(), rhs.fp(), kScratchDoubleReg);
}
}
}
......@@ -4107,7 +4107,7 @@ void LiftoffAssembler::emit_i32x4_abs(LiftoffRegister dst,
void LiftoffAssembler::emit_i64x2_abs(LiftoffRegister dst,
LiftoffRegister src) {
I64x2Abs(dst.fp(), src.fp());
I64x2Abs(dst.fp(), src.fp(), kScratchDoubleReg);
}
void LiftoffAssembler::emit_i8x16_extract_lane_s(LiftoffRegister dst,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment