Commit 884968c0 authored by Ng Zhi An's avatar Ng Zhi An Committed by Commit Bot

[wasm-simd][x64] Optimize signed compares codegen

Improve SSE codegen similar to suggestions in
https://crrev.com/c/2698066:

- s/movdqa/movaps/
- s/por/orps
- s/pxor/xorps
- s/pand/andps
- use movshdup (SSSE3) insted of shuffle

Bug: v8:11415
Change-Id: I2e76cbbe16267c055e24f258354b77994aed47b8
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2713131Reviewed-by: 's avatarDeepti Gandluri <gdeepti@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#73057}
parent 1cc8772a
......@@ -1083,7 +1083,7 @@ void TurboAssembler::I64x2GtS(XMMRegister dst, XMMRegister src0,
DCHECK_EQ(dst, src0);
pcmpgtq(dst, src1);
} else {
CpuFeatureScope sse_scope(this, SSSE3);
CpuFeatureScope sse_scope(this, SSE3);
DCHECK_NE(dst, src0);
DCHECK_NE(dst, src1);
movaps(dst, src1);
......@@ -1115,7 +1115,7 @@ void TurboAssembler::I64x2GeS(XMMRegister dst, XMMRegister src0,
pcmpeqd(scratch, scratch);
xorps(dst, scratch);
} else {
CpuFeatureScope sse_scope(this, SSSE3);
CpuFeatureScope sse_scope(this, SSE3);
DCHECK_NE(dst, src0);
DCHECK_NE(dst, src1);
movaps(dst, src0);
......
......@@ -2514,17 +2514,18 @@ void TurboAssembler::I64x2GtS(XMMRegister dst, XMMRegister src0,
DCHECK_EQ(dst, src0);
pcmpgtq(dst, src1);
} else {
CpuFeatureScope sse_scope(this, SSE3);
DCHECK_NE(dst, src0);
DCHECK_NE(dst, src1);
movdqa(dst, src1);
movdqa(kScratchDoubleReg, src0);
movaps(dst, src1);
movaps(kScratchDoubleReg, src0);
psubq(dst, src0);
pcmpeqd(kScratchDoubleReg, src1);
pand(dst, kScratchDoubleReg);
movdqa(kScratchDoubleReg, src0);
andps(dst, kScratchDoubleReg);
movaps(kScratchDoubleReg, src0);
pcmpgtd(kScratchDoubleReg, src1);
por(dst, kScratchDoubleReg);
pshufd(dst, dst, 0xF5);
orps(dst, kScratchDoubleReg);
movshdup(dst, dst);
}
}
......@@ -2539,25 +2540,26 @@ void TurboAssembler::I64x2GeS(XMMRegister dst, XMMRegister src0,
CpuFeatureScope sse_scope(this, SSE4_2);
DCHECK_NE(dst, src0);
if (dst != src1) {
movdqa(dst, src1);
movaps(dst, src1);
}
pcmpgtq(dst, src0);
pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
pxor(dst, kScratchDoubleReg);
xorps(dst, kScratchDoubleReg);
} else {
CpuFeatureScope sse_scope(this, SSE3);
DCHECK_NE(dst, src0);
DCHECK_NE(dst, src1);
movdqa(dst, src0);
movdqa(kScratchDoubleReg, src1);
movaps(dst, src0);
movaps(kScratchDoubleReg, src1);
psubq(dst, src1);
pcmpeqd(kScratchDoubleReg, src0);
pand(dst, kScratchDoubleReg);
movdqa(kScratchDoubleReg, src1);
andps(dst, kScratchDoubleReg);
movaps(kScratchDoubleReg, src1);
pcmpgtd(kScratchDoubleReg, src0);
por(dst, kScratchDoubleReg);
pshufd(dst, dst, 0xF5);
orps(dst, kScratchDoubleReg);
movshdup(dst, dst);
pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
pxor(dst, kScratchDoubleReg);
xorps(dst, kScratchDoubleReg);
}
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment