Commit add293e8 authored by Ng Zhi An's avatar Ng Zhi An Committed by Commit Bot

[x64][ia32] Move more AVX_OP into SharedTurboAssembler

Bug: v8:11589
Change-Id: I30dbdbc6266d703ce697352780da1d543afbb457
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2826711
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Reviewed-by: 's avatarBill Budge <bbudge@chromium.org>
Cr-Commit-Position: refs/heads/master@{#73965}
parent d338a86b
......@@ -1852,34 +1852,6 @@ void TurboAssembler::Pshufb(XMMRegister dst, XMMRegister src, Operand mask) {
pshufb(dst, mask);
}
void TurboAssembler::Pblendw(XMMRegister dst, Operand src, uint8_t imm8) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
vpblendw(dst, dst, src, imm8);
return;
}
if (CpuFeatures::IsSupported(SSE4_1)) {
CpuFeatureScope sse_scope(this, SSE4_1);
pblendw(dst, src, imm8);
return;
}
FATAL("no AVX or SSE4.1 support");
}
void TurboAssembler::Palignr(XMMRegister dst, Operand src, uint8_t imm8) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
vpalignr(dst, dst, src, imm8);
return;
}
if (CpuFeatures::IsSupported(SSSE3)) {
CpuFeatureScope sse_scope(this, SSSE3);
palignr(dst, src, imm8);
return;
}
FATAL("no AVX or SSE3 support");
}
void TurboAssembler::Pextrd(Register dst, XMMRegister src, uint8_t imm8) {
if (imm8 == 0) {
Movd(dst, src);
......
......@@ -305,96 +305,7 @@ class V8_EXPORT_PRIVATE TurboAssembler : public SharedTurboAssembler {
AVX_OP(Pcmpeqb, pcmpeqb)
AVX_OP(Pcmpeqw, pcmpeqw)
AVX_OP(Pcmpeqd, pcmpeqd)
// Same as AVX_OP3_WITH_TYPE but supports a CpuFeatureScope
#define AVX_OP2_WITH_TYPE_SCOPE(macro_name, name, dst_type, src_type, \
sse_scope) \
void macro_name(dst_type dst, src_type src) { \
if (CpuFeatures::IsSupported(AVX)) { \
CpuFeatureScope scope(this, AVX); \
v##name(dst, dst, src); \
} else if (CpuFeatures::IsSupported(sse_scope)) { \
CpuFeatureScope scope(this, sse_scope); \
name(dst, src); \
} \
}
#define AVX_OP2_XO(macro_name, name, sse_scope) \
AVX_OP2_WITH_TYPE_SCOPE(macro_name, name, XMMRegister, XMMRegister, \
sse_scope) \
AVX_OP2_WITH_TYPE_SCOPE(macro_name, name, XMMRegister, Operand, sse_scope)
AVX_OP2_XO(Psignb, psignb, SSSE3)
AVX_OP2_XO(Psignw, psignw, SSSE3)
AVX_OP2_XO(Psignd, psignd, SSSE3)
AVX_OP2_XO(Pcmpeqq, pcmpeqq, SSE4_1)
#undef AVX_OP2_XO
#undef AVX_OP2_WITH_TYPE_SCOPE
// Only use this macro when dst and src1 is the same in SSE case.
#define AVX_PACKED_OP3_WITH_TYPE(macro_name, name, dst_type, src_type) \
void macro_name(dst_type dst, dst_type src1, src_type src2) { \
if (CpuFeatures::IsSupported(AVX)) { \
CpuFeatureScope scope(this, AVX); \
v##name(dst, src1, src2); \
} else { \
DCHECK_EQ(dst, src1); \
name(dst, src2); \
} \
}
#define AVX_PACKED_OP3(macro_name, name) \
AVX_PACKED_OP3_WITH_TYPE(macro_name, name, XMMRegister, XMMRegister) \
AVX_PACKED_OP3_WITH_TYPE(macro_name, name, XMMRegister, Operand)
AVX_PACKED_OP3(Unpcklps, unpcklps)
AVX_PACKED_OP3(Andnps, andnps)
AVX_PACKED_OP3(Addps, addps)
AVX_PACKED_OP3(Addpd, addpd)
AVX_PACKED_OP3(Subps, subps)
AVX_PACKED_OP3(Subpd, subpd)
AVX_PACKED_OP3(Mulps, mulps)
AVX_PACKED_OP3(Mulpd, mulpd)
AVX_PACKED_OP3(Divps, divps)
AVX_PACKED_OP3(Divpd, divpd)
AVX_PACKED_OP3(Cmpeqpd, cmpeqpd)
AVX_PACKED_OP3(Cmpneqpd, cmpneqpd)
AVX_PACKED_OP3(Cmpltpd, cmpltpd)
AVX_PACKED_OP3(Cmpleps, cmpleps)
AVX_PACKED_OP3(Cmplepd, cmplepd)
AVX_PACKED_OP3(Minps, minps)
AVX_PACKED_OP3(Minpd, minpd)
AVX_PACKED_OP3(Maxps, maxps)
AVX_PACKED_OP3(Maxpd, maxpd)
AVX_PACKED_OP3(Cmpunordps, cmpunordps)
AVX_PACKED_OP3(Cmpunordpd, cmpunordpd)
AVX_PACKED_OP3(Psllw, psllw)
AVX_PACKED_OP3(Pslld, pslld)
AVX_PACKED_OP3(Psllq, psllq)
AVX_PACKED_OP3(Psrlw, psrlw)
AVX_PACKED_OP3(Psrld, psrld)
AVX_PACKED_OP3(Psrad, psrad)
AVX_PACKED_OP3(Paddd, paddd)
AVX_PACKED_OP3(Paddq, paddq)
AVX_PACKED_OP3(Pmuludq, pmuludq)
AVX_PACKED_OP3(Pavgb, pavgb)
AVX_PACKED_OP3(Pavgw, pavgw)
AVX_PACKED_OP3(Pminub, pminub)
AVX_PACKED_OP3(Pmaxub, pmaxub)
AVX_PACKED_OP3(Paddusb, paddusb)
AVX_PACKED_OP3(Psubusb, psubusb)
AVX_PACKED_OP3(Pcmpgtb, pcmpgtb)
AVX_PACKED_OP3(Paddb, paddb)
AVX_PACKED_OP3(Paddsb, paddsb)
AVX_PACKED_OP3(Psubsb, psubsb)
#undef AVX_PACKED_OP3
AVX_PACKED_OP3_WITH_TYPE(Psllw, psllw, XMMRegister, uint8_t)
AVX_PACKED_OP3_WITH_TYPE(Pslld, pslld, XMMRegister, uint8_t)
AVX_PACKED_OP3_WITH_TYPE(Psllq, psllq, XMMRegister, uint8_t)
AVX_PACKED_OP3_WITH_TYPE(Psrlw, psrlw, XMMRegister, uint8_t)
AVX_PACKED_OP3_WITH_TYPE(Psrld, psrld, XMMRegister, uint8_t)
AVX_PACKED_OP3_WITH_TYPE(Psrad, psrad, XMMRegister, uint8_t)
#undef AVX_PACKED_OP3_WITH_TYPE
AVX_OP_SSE4_1(Pcmpeqq, pcmpeqq)
// Macro for instructions that have 2 operands for AVX version and 1 operand for
// SSE version. Will move src1 to dst if dst != src1.
......@@ -416,35 +327,6 @@ class V8_EXPORT_PRIVATE TurboAssembler : public SharedTurboAssembler {
AVX_OP3_WITH_MOVE(Pmaddwd, pmaddwd, XMMRegister, Operand)
#undef AVX_OP3_WITH_MOVE
#define AVX_OP3_WITH_TYPE_SCOPE(macro_name, name, dst_type, src_type, \
sse_scope) \
void macro_name(dst_type dst, dst_type src1, src_type src2) { \
if (CpuFeatures::IsSupported(AVX)) { \
CpuFeatureScope scope(this, AVX); \
v##name(dst, src1, src2); \
return; \
} \
if (CpuFeatures::IsSupported(sse_scope)) { \
CpuFeatureScope scope(this, sse_scope); \
DCHECK_EQ(dst, src1); \
name(dst, src2); \
return; \
} \
UNREACHABLE(); \
}
#define AVX_OP3_XO_SSE4(macro_name, name) \
AVX_OP3_WITH_TYPE_SCOPE(macro_name, name, XMMRegister, XMMRegister, SSE4_1) \
AVX_OP3_WITH_TYPE_SCOPE(macro_name, name, XMMRegister, Operand, SSE4_1)
AVX_OP3_WITH_TYPE_SCOPE(Haddps, haddps, XMMRegister, Operand, SSE3)
AVX_OP3_XO_SSE4(Pmaxsd, pmaxsd)
AVX_OP3_XO_SSE4(Pminsb, pminsb)
AVX_OP3_XO_SSE4(Pmaxsb, pmaxsb)
AVX_OP3_XO_SSE4(Pcmpeqq, pcmpeqq)
#undef AVX_OP3_XO_SSE4
#undef AVX_OP3_WITH_TYPE_SCOPE
// TODO(zhin): Remove after moving more definitions into SharedTurboAssembler.
void Movlps(Operand dst, XMMRegister src) {
SharedTurboAssembler::Movlps(dst, src);
......@@ -461,16 +343,6 @@ class V8_EXPORT_PRIVATE TurboAssembler : public SharedTurboAssembler {
}
void Pshufb(XMMRegister dst, XMMRegister src, Operand mask);
void Pblendw(XMMRegister dst, XMMRegister src, uint8_t imm8) {
Pblendw(dst, Operand(src), imm8);
}
void Pblendw(XMMRegister dst, Operand src, uint8_t imm8);
void Palignr(XMMRegister dst, XMMRegister src, uint8_t imm8) {
Palignr(dst, Operand(src), imm8);
}
void Palignr(XMMRegister dst, Operand src, uint8_t imm8);
void Pextrd(Register dst, XMMRegister src, uint8_t imm8);
void Pinsrb(XMMRegister dst, Register src, int8_t imm8) {
Pinsrb(dst, Operand(src), imm8);
......
......@@ -39,7 +39,7 @@ class V8_EXPORT_PRIVATE SharedTurboAssembler : public TurboAssemblerBase {
}
}
// Helper struct to implement functions that checks for AVX support and
// Helper struct to implement functions that check for AVX support and
// dispatch to the appropriate AVX/SSE instruction.
template <typename Dst, typename Arg, typename... Args>
struct AvxHelper {
......@@ -145,14 +145,30 @@ class V8_EXPORT_PRIVATE SharedTurboAssembler : public TurboAssemblerBase {
}
// Keep this list sorted by required extension, then instruction name.
AVX_OP(Addpd, addpd)
AVX_OP(Addps, addps)
AVX_OP(Andnpd, andnpd)
AVX_OP(Andnps, andnps)
AVX_OP(Andpd, andpd)
AVX_OP(Andps, andps)
AVX_OP(Cmpeqpd, cmpeqpd)
AVX_OP(Cmplepd, cmplepd)
AVX_OP(Cmpleps, cmpleps)
AVX_OP(Cmpltpd, cmpltpd)
AVX_OP(Cmpneqpd, cmpneqpd)
AVX_OP(Cmpunordpd, cmpunordpd)
AVX_OP(Cmpunordps, cmpunordps)
AVX_OP(Cvtdq2pd, cvtdq2pd)
AVX_OP(Cvtdq2ps, cvtdq2ps)
AVX_OP(Cvtpd2ps, cvtpd2ps)
AVX_OP(Cvtps2pd, cvtps2pd)
AVX_OP(Cvttps2dq, cvttps2dq)
AVX_OP(Divpd, divpd)
AVX_OP(Divps, divps)
AVX_OP(Maxpd, maxpd)
AVX_OP(Maxps, maxps)
AVX_OP(Minpd, minpd)
AVX_OP(Minps, minps)
AVX_OP(Movaps, movaps)
AVX_OP(Movd, movd)
AVX_OP(Movhlps, movhlps)
......@@ -164,25 +180,46 @@ class V8_EXPORT_PRIVATE SharedTurboAssembler : public TurboAssemblerBase {
AVX_OP(Movss, movss)
AVX_OP(Movupd, movupd)
AVX_OP(Movups, movups)
AVX_OP(Mulpd, mulpd)
AVX_OP(Mulps, mulps)
AVX_OP(Orpd, orpd)
AVX_OP(Orps, orps)
AVX_OP(Packssdw, packssdw)
AVX_OP(Packsswb, packsswb)
AVX_OP(Packuswb, packuswb)
AVX_OP(Paddb, paddb)
AVX_OP(Paddd, paddd)
AVX_OP(Paddq, paddq)
AVX_OP(Paddsb, paddsb)
AVX_OP(Paddusb, paddusb)
AVX_OP(Paddusw, paddusw)
AVX_OP(Paddw, paddw)
AVX_OP(Pand, pand)
AVX_OP(Pavgb, pavgb)
AVX_OP(Pavgw, pavgw)
AVX_OP(Pcmpgtb, pcmpgtb)
AVX_OP(Pmaxub, pmaxub)
AVX_OP(Pminub, pminub)
AVX_OP(Pmovmskb, pmovmskb)
AVX_OP(Pmullw, pmullw)
AVX_OP(Pmuludq, pmuludq)
AVX_OP(Por, por)
AVX_OP(Pshufd, pshufd)
AVX_OP(Pshufhw, pshufhw)
AVX_OP(Pshuflw, pshuflw)
AVX_OP(Pslld, pslld)
AVX_OP(Psllq, psllq)
AVX_OP(Psllw, psllw)
AVX_OP(Psrad, psrad)
AVX_OP(Psraw, psraw)
AVX_OP(Psrld, psrld)
AVX_OP(Psrlq, psrlq)
AVX_OP(Psrlw, psrlw)
AVX_OP(Psubb, psubb)
AVX_OP(Psubd, psubd)
AVX_OP(Psubq, psubq)
AVX_OP(Psubsb, psubsb)
AVX_OP(Psubusb, psubusb)
AVX_OP(Psubw, psubw)
AVX_OP(Punpckhbw, punpckhbw)
AVX_OP(Punpckhdq, punpckhdq)
......@@ -199,16 +236,31 @@ class V8_EXPORT_PRIVATE SharedTurboAssembler : public TurboAssemblerBase {
AVX_OP(Sqrtps, sqrtps)
AVX_OP(Sqrtsd, sqrtsd)
AVX_OP(Sqrtss, sqrtss)
AVX_OP(Subpd, subpd)
AVX_OP(Subps, subps)
AVX_OP(Unpcklps, unpcklps)
AVX_OP(Xorpd, xorpd)
AVX_OP(Xorps, xorps)
AVX_OP_SSE3(Haddps, haddps)
AVX_OP_SSE3(Movddup, movddup)
AVX_OP_SSE3(Movshdup, movshdup)
AVX_OP_SSSE3(Pabsb, pabsb)
AVX_OP_SSSE3(Pabsd, pabsd)
AVX_OP_SSSE3(Pabsw, pabsw)
AVX_OP_SSSE3(Palignr, palignr)
AVX_OP_SSSE3(Psignb, psignb)
AVX_OP_SSSE3(Psignd, psignd)
AVX_OP_SSSE3(Psignw, psignw)
AVX_OP_SSE4_1(Extractps, extractps)
AVX_OP_SSE4_1(Pblendw, pblendw)
AVX_OP_SSE4_1(Pextrb, pextrb)
AVX_OP_SSE4_1(Pextrw, pextrw)
AVX_OP_SSE4_1(Pmaxsb, pmaxsb)
AVX_OP_SSE4_1(Pmaxsd, pmaxsd)
AVX_OP_SSE4_1(Pminsb, pminsb)
AVX_OP_SSE4_1(Pmovsxbw, pmovsxbw)
AVX_OP_SSE4_1(Pmovsxdq, pmovsxdq)
AVX_OP_SSE4_1(Pmovsxwd, pmovsxwd)
......
......@@ -1274,6 +1274,7 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
SSE_CMP_P(cmpeq, 0x0)
SSE_CMP_P(cmplt, 0x1)
SSE_CMP_P(cmple, 0x2)
SSE_CMP_P(cmpunord, 0x3)
SSE_CMP_P(cmpneq, 0x4)
SSE_CMP_P(cmpnlt, 0x5)
SSE_CMP_P(cmpnle, 0x6)
......@@ -1571,6 +1572,7 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
AVX_CMP_P(vcmpeq, 0x0)
AVX_CMP_P(vcmplt, 0x1)
AVX_CMP_P(vcmple, 0x2)
AVX_CMP_P(vcmpunord, 0x3)
AVX_CMP_P(vcmpneq, 0x4)
AVX_CMP_P(vcmpnlt, 0x5)
AVX_CMP_P(vcmpnle, 0x6)
......
......@@ -1885,16 +1885,6 @@ void TurboAssembler::Pmaddubsw(XMMRegister dst, XMMRegister src1,
}
}
void TurboAssembler::Unpcklps(XMMRegister dst, XMMRegister src1, Operand src2) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(this, AVX);
vunpcklps(dst, src1, src2);
} else {
DCHECK_EQ(dst, src1);
unpcklps(dst, src2);
}
}
void TurboAssembler::Shufps(XMMRegister dst, XMMRegister src1, XMMRegister src2,
byte imm8) {
if (CpuFeatures::IsSupported(AVX)) {
......@@ -2039,26 +2029,6 @@ void TurboAssembler::Pinsrq(XMMRegister dst, XMMRegister src1, Operand src2,
imm8, base::Optional<CpuFeature>(SSE4_1));
}
void TurboAssembler::Psllq(XMMRegister dst, byte imm8) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
vpsllq(dst, dst, imm8);
} else {
DCHECK(!IsEnabled(AVX));
psllq(dst, imm8);
}
}
void TurboAssembler::Pslld(XMMRegister dst, byte imm8) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
vpslld(dst, dst, imm8);
} else {
DCHECK(!IsEnabled(AVX));
pslld(dst, imm8);
}
}
void TurboAssembler::Pblendvb(XMMRegister dst, XMMRegister src1,
XMMRegister src2, XMMRegister mask) {
if (CpuFeatures::IsSupported(AVX)) {
......@@ -2396,21 +2366,6 @@ void TurboAssembler::Negpd(XMMRegister dst) {
ExternalReference::address_of_double_neg_constant()));
}
void TurboAssembler::Psrld(XMMRegister dst, byte imm8) {
Psrld(dst, dst, imm8);
}
void TurboAssembler::Psrld(XMMRegister dst, XMMRegister src, byte imm8) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
vpsrld(dst, src, imm8);
} else {
DCHECK(!IsEnabled(AVX));
DCHECK_EQ(dst, src);
psrld(dst, imm8);
}
}
void TurboAssembler::Lzcntl(Register dst, Register src) {
if (CpuFeatures::IsSupported(LZCNT)) {
CpuFeatureScope scope(this, LZCNT);
......
......@@ -66,91 +66,43 @@ class V8_EXPORT_PRIVATE TurboAssembler : public SharedTurboAssembler {
AVX_OP(Subsd, subsd)
AVX_OP(Divss, divss)
AVX_OP(Divsd, divsd)
AVX_OP(Pcmpgtb, pcmpgtb)
AVX_OP(Pcmpgtw, pcmpgtw)
AVX_OP(Pmaxsw, pmaxsw)
AVX_OP(Pmaxub, pmaxub)
AVX_OP(Pminsw, pminsw)
AVX_OP(Pminub, pminub)
AVX_OP(Addss, addss)
AVX_OP(Addsd, addsd)
AVX_OP(Mulsd, mulsd)
AVX_OP(Andnps, andnps)
AVX_OP(Cmpeqps, cmpeqps)
AVX_OP(Cmpltps, cmpltps)
AVX_OP(Cmpleps, cmpleps)
AVX_OP(Cmpneqps, cmpneqps)
AVX_OP(Cmpnltps, cmpnltps)
AVX_OP(Cmpnleps, cmpnleps)
AVX_OP(Cmpeqpd, cmpeqpd)
AVX_OP(Cmpltpd, cmpltpd)
AVX_OP(Cmplepd, cmplepd)
AVX_OP(Cmpneqpd, cmpneqpd)
AVX_OP(Cmpnltpd, cmpnltpd)
AVX_OP(Cmpnlepd, cmpnlepd)
AVX_OP(Cvttpd2dq, cvttpd2dq)
AVX_OP(Ucomiss, ucomiss)
AVX_OP(Ucomisd, ucomisd)
AVX_OP(Psubsb, psubsb)
AVX_OP(Psubsw, psubsw)
AVX_OP(Psubusb, psubusb)
AVX_OP(Psubusw, psubusw)
AVX_OP(Pslld, pslld)
AVX_OP(Pavgb, pavgb)
AVX_OP(Pavgw, pavgw)
AVX_OP(Psrad, psrad)
AVX_OP(Psllw, psllw)
AVX_OP(Psllq, psllq)
AVX_OP(Psrlw, psrlw)
AVX_OP(Psrld, psrld)
AVX_OP(Paddb, paddb)
AVX_OP(Paddw, paddw)
AVX_OP(Paddd, paddd)
AVX_OP(Paddq, paddq)
AVX_OP(Paddsb, paddsb)
AVX_OP(Paddsw, paddsw)
AVX_OP(Pcmpgtd, pcmpgtd)
AVX_OP(Pmuludq, pmuludq)
AVX_OP(Addpd, addpd)
AVX_OP(Subpd, subpd)
AVX_OP(Mulpd, mulpd)
AVX_OP(Minps, minps)
AVX_OP(Minpd, minpd)
AVX_OP(Divpd, divpd)
AVX_OP(Maxps, maxps)
AVX_OP(Maxpd, maxpd)
AVX_OP(Addps, addps)
AVX_OP(Subps, subps)
AVX_OP(Mulps, mulps)
AVX_OP(Divps, divps)
AVX_OP(Pcmpeqb, pcmpeqb)
AVX_OP(Pcmpeqw, pcmpeqw)
AVX_OP(Pcmpeqd, pcmpeqd)
AVX_OP(Cmpps, cmpps)
AVX_OP(Cmppd, cmppd)
AVX_OP(Movlhps, movlhps)
AVX_OP_SSE3(Haddps, haddps)
AVX_OP_SSSE3(Phaddd, phaddd)
AVX_OP_SSSE3(Phaddw, phaddw)
AVX_OP_SSSE3(Pshufb, pshufb)
AVX_OP_SSSE3(Psignb, psignb)
AVX_OP_SSSE3(Psignw, psignw)
AVX_OP_SSSE3(Psignd, psignd)
AVX_OP_SSSE3(Palignr, palignr)
AVX_OP_SSE4_1(Pcmpeqq, pcmpeqq)
AVX_OP_SSE4_1(Packusdw, packusdw)
AVX_OP_SSE4_1(Pminsb, pminsb)
AVX_OP_SSE4_1(Pminsd, pminsd)
AVX_OP_SSE4_1(Pminuw, pminuw)
AVX_OP_SSE4_1(Pminud, pminud)
AVX_OP_SSE4_1(Pmaxsb, pmaxsb)
AVX_OP_SSE4_1(Pmaxsd, pmaxsd)
AVX_OP_SSE4_1(Pmaxuw, pmaxuw)
AVX_OP_SSE4_1(Pmaxud, pmaxud)
AVX_OP_SSE4_1(Pmulld, pmulld)
AVX_OP_SSE4_1(Insertps, insertps)
AVX_OP_SSE4_1(Pinsrq, pinsrq)
AVX_OP_SSE4_1(Pblendw, pblendw)
AVX_OP_SSE4_1(Pextrq, pextrq)
AVX_OP_SSE4_1(Roundss, roundss)
AVX_OP_SSE4_1(Roundsd, roundsd)
......@@ -427,7 +379,6 @@ class V8_EXPORT_PRIVATE TurboAssembler : public SharedTurboAssembler {
void Pmaddubsw(XMMRegister dst, XMMRegister src1, Operand src2);
void Pmaddubsw(XMMRegister dst, XMMRegister src1, XMMRegister src2);
void Unpcklps(XMMRegister dst, XMMRegister src1, Operand src2);
// Shufps that will mov src1 into dst if AVX is not supported.
void Shufps(XMMRegister dst, XMMRegister src1, XMMRegister src2, byte imm8);
......@@ -445,14 +396,6 @@ class V8_EXPORT_PRIVATE TurboAssembler : public SharedTurboAssembler {
void Pinsrq(XMMRegister dst, XMMRegister src1, Register src2, uint8_t imm8);
void Pinsrq(XMMRegister dst, XMMRegister src1, Operand src2, uint8_t imm8);
void Psllq(XMMRegister dst, int imm8) { Psllq(dst, static_cast<byte>(imm8)); }
void Psllq(XMMRegister dst, byte imm8);
void Pslld(XMMRegister dst, byte imm8);
void Psrld(XMMRegister dst, byte imm8);
// Supports both AVX (dst != src1) and SSE (checks that dst == src1).
void Psrld(XMMRegister dst, XMMRegister src, byte imm8);
void Pblendvb(XMMRegister dst, XMMRegister src1, XMMRegister src2,
XMMRegister mask);
void Blendvps(XMMRegister dst, XMMRegister src1, XMMRegister src2,
......
......@@ -2181,7 +2181,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
// Set up a mask [0x80000000,0,0x80000000,0].
__ Pcmpeqb(tmp2, tmp2);
__ Psllq(tmp2, tmp2, 63);
__ Psllq(tmp2, tmp2, byte{63});
__ Psrlq(tmp2, tmp2, tmp);
__ Psrlq(dst, src, tmp);
......@@ -2218,7 +2218,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ Pmuludq(tmp2, tmp2, left);
__ Paddq(tmp2, tmp2, tmp1);
__ Psllq(tmp2, tmp2, 32);
__ Psllq(tmp2, tmp2, byte{32});
__ Pmuludq(dst, left, right);
__ Paddq(dst, dst, tmp2);
......@@ -2357,10 +2357,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
XMMRegister dst = i.OutputSimd128Register();
XMMRegister src = i.InputSimd128Register(0);
__ Pxor(kScratchDoubleReg, kScratchDoubleReg); // zeros
__ Pblendw(kScratchDoubleReg, src, 0x55); // get lo 16 bits
__ Pblendw(kScratchDoubleReg, src, uint8_t{0x55}); // get lo 16 bits
__ Psubd(dst, src, kScratchDoubleReg); // get hi 16 bits
__ Cvtdq2ps(kScratchDoubleReg, kScratchDoubleReg); // convert lo exactly
__ Psrld(dst, dst, 1); // divide by 2 to get in unsigned range
__ Psrld(dst, dst, byte{1}); // divide by 2 to get in unsigned range
__ Cvtdq2ps(dst, dst); // convert hi exactly
__ Addps(dst, dst, dst); // double hi, exactly
__ Addps(dst, dst, kScratchDoubleReg); // add hi and lo, may round.
......@@ -2371,11 +2371,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
XMMRegister src = i.InputSimd128Register(0);
if (dst == src) {
__ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
__ Psrld(kScratchDoubleReg, kScratchDoubleReg, 1);
__ Psrld(kScratchDoubleReg, kScratchDoubleReg, byte{1});
__ Andps(dst, kScratchDoubleReg);
} else {
__ Pcmpeqd(dst, dst);
__ Psrld(dst, dst, 1);
__ Psrld(dst, dst, byte{1});
__ Andps(dst, src);
}
break;
......@@ -2385,11 +2385,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
XMMRegister src = i.InputSimd128Register(0);
if (dst == src) {
__ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
__ Pslld(kScratchDoubleReg, kScratchDoubleReg, 31);
__ Pslld(kScratchDoubleReg, kScratchDoubleReg, byte{31});
__ Xorps(dst, kScratchDoubleReg);
} else {
__ Pcmpeqd(dst, dst);
__ Pslld(dst, dst, 31);
__ Pslld(dst, dst, byte{31});
__ Xorps(dst, src);
}
break;
......@@ -2580,7 +2580,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ Cvttps2dq(dst, dst);
// Set top bit if >=0 is now < 0
__ Pand(kScratchDoubleReg, dst);
__ Psrad(kScratchDoubleReg, kScratchDoubleReg, 31);
__ Psrad(kScratchDoubleReg, kScratchDoubleReg, byte{31});
// Set positive overflow lanes to 0x7FFFFFFF
__ Pxor(dst, kScratchDoubleReg);
break;
......@@ -3739,7 +3739,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
DCHECK_NE(0xe4, shuffle); // A simple blend should be handled below.
__ Pshufd(kScratchDoubleReg, i.InputOperand(1), shuffle);
__ Pshufd(i.OutputSimd128Register(), i.InputOperand(0), shuffle);
__ Pblendw(i.OutputSimd128Register(), kScratchDoubleReg, i.InputInt8(3));
__ Pblendw(i.OutputSimd128Register(), kScratchDoubleReg, i.InputUint8(3));
break;
}
case kIA32S16x8Blend:
......@@ -3757,7 +3757,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ Pshufhw(kScratchDoubleReg, kScratchDoubleReg, i.InputUint8(3));
__ Pshuflw(dst, i.InputOperand(0), i.InputUint8(2));
__ Pshufhw(dst, dst, i.InputUint8(3));
__ Pblendw(dst, kScratchDoubleReg, i.InputInt8(4));
__ Pblendw(dst, kScratchDoubleReg, i.InputUint8(4));
break;
}
case kIA32S8x16Alignr:
......
......@@ -1486,7 +1486,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
// TODO(bmeurer): Use RIP relative 128-bit constants.
XMMRegister tmp = i.ToDoubleRegister(instr->TempAt(0));
__ Pcmpeqd(tmp, tmp);
__ Psllq(tmp, 31);
__ Psllq(tmp, byte{31});
__ Xorps(i.OutputDoubleRegister(), tmp);
break;
}
......@@ -2439,7 +2439,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
// propagate -0's and NaNs, which may be non-canonical.
__ Orpd(kScratchDoubleReg, dst);
// Canonicalize NaNs by quieting and clearing the payload.
__ Cmppd(dst, kScratchDoubleReg, int8_t{3});
__ Cmpunordpd(dst, kScratchDoubleReg);
__ Orpd(kScratchDoubleReg, dst);
__ Psrlq(dst, byte{13});
__ Andnpd(dst, kScratchDoubleReg);
......@@ -2461,7 +2461,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
// Propagate sign discrepancy and (subtle) quiet NaNs.
__ Subpd(kScratchDoubleReg, dst);
// Canonicalize NaNs by clearing the payload. Sign is non-deterministic.
__ Cmppd(dst, kScratchDoubleReg, int8_t{3});
__ Cmpunordpd(dst, kScratchDoubleReg);
__ Psrlq(dst, byte{13});
__ Andnpd(dst, kScratchDoubleReg);
break;
......@@ -2671,7 +2671,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
// propagate -0's and NaNs, which may be non-canonical.
__ Orps(kScratchDoubleReg, dst);
// Canonicalize NaNs by quieting and clearing the payload.
__ Cmpps(dst, kScratchDoubleReg, int8_t{3});
__ Cmpunordps(dst, kScratchDoubleReg);
__ Orps(kScratchDoubleReg, dst);
__ Psrld(dst, byte{10});
__ Andnps(dst, kScratchDoubleReg);
......@@ -2693,7 +2693,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
// Propagate sign discrepancy and (subtle) quiet NaNs.
__ Subps(kScratchDoubleReg, dst);
// Canonicalize NaNs by clearing the payload. Sign is non-deterministic.
__ Cmpps(dst, kScratchDoubleReg, int8_t{3});
__ Cmpunordps(dst, kScratchDoubleReg);
__ Psrld(dst, byte{10});
__ Andnps(dst, kScratchDoubleReg);
break;
......@@ -2851,7 +2851,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ Pmuludq(tmp2, left);
__ Paddq(tmp2, tmp1);
__ Psllq(tmp2, 32);
__ Psllq(tmp2, byte{32});
__ Pmuludq(left, right);
__ Paddq(left, tmp2); // left == dst
......
......@@ -3896,7 +3896,7 @@ void LiftoffAssembler::emit_i64x2_shr_s(LiftoffRegister dst,
// Set up a mask [0x80000000,0,0x80000000,0].
Pcmpeqb(tmp, tmp);
Psllq(tmp, tmp, 63);
Psllq(tmp, tmp, byte{63});
Psrlq(tmp, tmp, shift);
if (CpuFeatures::IsSupported(AVX)) {
......@@ -3919,7 +3919,7 @@ void LiftoffAssembler::emit_i64x2_shri_s(LiftoffRegister dst,
// Set up a mask [0x80000000,0,0x80000000,0].
Pcmpeqb(tmp, tmp);
Psllq(tmp, tmp, 63);
Psllq(tmp, tmp, byte{63});
Psrlq(tmp, tmp, byte{shift});
liftoff::EmitSimdShiftOpImm<&Assembler::vpsrlq, &Assembler::psrlq, 6>(
......@@ -3969,7 +3969,7 @@ void LiftoffAssembler::emit_i64x2_mul(LiftoffRegister dst, LiftoffRegister lhs,
Psrlq(tmp2.fp(), byte{32});
Pmuludq(tmp2.fp(), tmp2.fp(), lhs.fp());
Paddq(tmp2.fp(), tmp2.fp(), tmp1.fp());
Psllq(tmp2.fp(), tmp2.fp(), 32);
Psllq(tmp2.fp(), tmp2.fp(), byte{32});
liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmuludq, &Assembler::pmuludq>(
this, dst, lhs, rhs);
Paddq(dst.fp(), dst.fp(), tmp2.fp());
......@@ -4032,11 +4032,11 @@ void LiftoffAssembler::emit_f32x4_abs(LiftoffRegister dst,
LiftoffRegister src) {
if (dst.fp() == src.fp()) {
Pcmpeqd(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg);
Psrld(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg, 1);
Psrld(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg, byte{1});
Andps(dst.fp(), liftoff::kScratchDoubleReg);
} else {
Pcmpeqd(dst.fp(), dst.fp());
Psrld(dst.fp(), dst.fp(), 1);
Psrld(dst.fp(), dst.fp(), byte{1});
Andps(dst.fp(), src.fp());
}
}
......@@ -4045,11 +4045,11 @@ void LiftoffAssembler::emit_f32x4_neg(LiftoffRegister dst,
LiftoffRegister src) {
if (dst.fp() == src.fp()) {
Pcmpeqd(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg);
Pslld(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg, 31);
Pslld(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg, byte{31});
Xorps(dst.fp(), liftoff::kScratchDoubleReg);
} else {
Pcmpeqd(dst.fp(), dst.fp());
Pslld(dst.fp(), dst.fp(), 31);
Pslld(dst.fp(), dst.fp(), byte{31});
Xorps(dst.fp(), src.fp());
}
}
......@@ -4201,11 +4201,11 @@ void LiftoffAssembler::emit_f64x2_neg(LiftoffRegister dst,
LiftoffRegister src) {
if (dst.fp() == src.fp()) {
Pcmpeqd(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg);
Psllq(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg, 63);
Psllq(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg, byte{63});
Xorpd(dst.fp(), liftoff::kScratchDoubleReg);
} else {
Pcmpeqd(dst.fp(), dst.fp());
Psllq(dst.fp(), dst.fp(), 63);
Psllq(dst.fp(), dst.fp(), byte{63});
Xorpd(dst.fp(), src.fp());
}
}
......
......@@ -3510,7 +3510,7 @@ void LiftoffAssembler::emit_i64x2_mul(LiftoffRegister dst, LiftoffRegister lhs,
Psrlq(tmp2.fp(), byte{32});
Pmuludq(tmp2.fp(), lhs.fp());
Paddq(tmp2.fp(), tmp1.fp());
Psllq(tmp2.fp(), 32);
Psllq(tmp2.fp(), byte{32});
liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmuludq, &Assembler::pmuludq>(
this, dst, lhs, rhs);
Paddq(dst.fp(), tmp2.fp());
......@@ -3586,11 +3586,11 @@ void LiftoffAssembler::emit_f32x4_neg(LiftoffRegister dst,
LiftoffRegister src) {
if (dst.fp() == src.fp()) {
Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
Pslld(kScratchDoubleReg, static_cast<byte>(31));
Pslld(kScratchDoubleReg, byte{31});
Xorps(dst.fp(), kScratchDoubleReg);
} else {
Pcmpeqd(dst.fp(), dst.fp());
Pslld(dst.fp(), static_cast<byte>(31));
Pslld(dst.fp(), byte{31});
Xorps(dst.fp(), src.fp());
}
}
......@@ -3674,7 +3674,7 @@ void LiftoffAssembler::emit_f32x4_min(LiftoffRegister dst, LiftoffRegister lhs,
// propagate -0's and NaNs, which may be non-canonical.
Orps(kScratchDoubleReg, dst.fp());
// Canonicalize NaNs by quieting and clearing the payload.
Cmpps(dst.fp(), kScratchDoubleReg, int8_t{3});
Cmpunordps(dst.fp(), kScratchDoubleReg);
Orps(kScratchDoubleReg, dst.fp());
Psrld(dst.fp(), byte{10});
Andnps(dst.fp(), kScratchDoubleReg);
......@@ -3706,7 +3706,7 @@ void LiftoffAssembler::emit_f32x4_max(LiftoffRegister dst, LiftoffRegister lhs,
// Propagate sign discrepancy and (subtle) quiet NaNs.
Subps(kScratchDoubleReg, dst.fp());
// Canonicalize NaNs by clearing the payload. Sign is non-deterministic.
Cmpps(dst.fp(), kScratchDoubleReg, int8_t{3});
Cmpunordps(dst.fp(), kScratchDoubleReg);
Psrld(dst.fp(), byte{10});
Andnps(dst.fp(), kScratchDoubleReg);
}
......@@ -3830,7 +3830,7 @@ void LiftoffAssembler::emit_f64x2_min(LiftoffRegister dst, LiftoffRegister lhs,
// propagate -0's and NaNs, which may be non-canonical.
Orpd(kScratchDoubleReg, dst.fp());
// Canonicalize NaNs by quieting and clearing the payload.
Cmppd(dst.fp(), kScratchDoubleReg, int8_t{3});
Cmpunordpd(dst.fp(), kScratchDoubleReg);
Orpd(kScratchDoubleReg, dst.fp());
Psrlq(dst.fp(), byte{13});
Andnpd(dst.fp(), kScratchDoubleReg);
......@@ -3862,7 +3862,7 @@ void LiftoffAssembler::emit_f64x2_max(LiftoffRegister dst, LiftoffRegister lhs,
// Propagate sign discrepancy and (subtle) quiet NaNs.
Subpd(kScratchDoubleReg, dst.fp());
// Canonicalize NaNs by clearing the payload. Sign is non-deterministic.
Cmppd(dst.fp(), kScratchDoubleReg, int8_t{3});
Cmpunordpd(dst.fp(), kScratchDoubleReg);
Psrlq(dst.fp(), byte{13});
Andnpd(dst.fp(), kScratchDoubleReg);
}
......
......@@ -559,6 +559,8 @@ TEST(DisasmX64) {
__ cmpltps(xmm5, Operand(rbx, rcx, times_4, 10000));
__ cmpleps(xmm5, xmm1);
__ cmpleps(xmm5, Operand(rbx, rcx, times_4, 10000));
__ cmpunordps(xmm5, xmm1);
__ cmpunordps(xmm5, Operand(rbx, rcx, times_4, 10000));
__ cmpneqps(xmm5, xmm1);
__ cmpneqps(xmm5, Operand(rbx, rcx, times_4, 10000));
__ cmpnltps(xmm5, xmm1);
......@@ -573,6 +575,8 @@ TEST(DisasmX64) {
__ cmpltpd(xmm5, Operand(rbx, rcx, times_4, 10000));
__ cmplepd(xmm5, xmm1);
__ cmplepd(xmm5, Operand(rbx, rcx, times_4, 10000));
__ cmpunordpd(xmm5, xmm1);
__ cmpunordpd(xmm5, Operand(rbx, rcx, times_4, 10000));
__ cmpneqpd(xmm5, xmm1);
__ cmpneqpd(xmm5, Operand(rbx, rcx, times_4, 10000));
__ cmpnltpd(xmm5, xmm1);
......@@ -727,6 +731,8 @@ TEST(DisasmX64) {
__ vcmpltps(xmm5, xmm4, Operand(rbx, rcx, times_4, 10000));
__ vcmpleps(xmm5, xmm4, xmm1);
__ vcmpleps(xmm5, xmm4, Operand(rbx, rcx, times_4, 10000));
__ vcmpunordps(xmm5, xmm4, xmm1);
__ vcmpunordps(xmm5, xmm4, Operand(rbx, rcx, times_4, 10000));
__ vcmpneqps(xmm5, xmm4, xmm1);
__ vcmpneqps(xmm5, xmm4, Operand(rbx, rcx, times_4, 10000));
__ vcmpnltps(xmm5, xmm4, xmm1);
......@@ -741,6 +747,8 @@ TEST(DisasmX64) {
__ vcmpltpd(xmm5, xmm4, Operand(rbx, rcx, times_4, 10000));
__ vcmplepd(xmm5, xmm4, xmm1);
__ vcmplepd(xmm5, xmm4, Operand(rbx, rcx, times_4, 10000));
__ vcmpunordpd(xmm5, xmm4, xmm1);
__ vcmpunordpd(xmm5, xmm4, Operand(rbx, rcx, times_4, 10000));
__ vcmpneqpd(xmm5, xmm4, xmm1);
__ vcmpneqpd(xmm5, xmm4, Operand(rbx, rcx, times_4, 10000));
__ vcmpnltpd(xmm5, xmm4, xmm1);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment