Commit a9cd53c7 authored by Ng Zhi An's avatar Ng Zhi An Committed by Commit Bot

[x64][ia32] Move more AVX_OP into SharedTurboAssembler

We add one more member function template to AvxHelper to allow one new
way of calling:

- Andps(x, y, z) -> vandps(x, y, z), andps(x, z) && x == y

Clean up a bunch of places where we need to pass an int literal as a
byte.

Unfortunately we cannot define Movq using AVX_OP. Because of the way
movq is defined in the assembler, using function templates, there are
versions of movq with 1 argument defined. That is not a valid
instruction (but is valid for `dec`). We end up selecting
vmovq(XMMRegister, Register) and movq(XMMRegister), which is not valid.

Bug: v8:11589
Change-Id: I45e3bc213d93ece7f65da8eb1e3fa185aec4c573
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2815560
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Reviewed-by: 's avatarClemens Backes <clemensb@chromium.org>
Reviewed-by: 's avatarBill Budge <bbudge@chromium.org>
Cr-Commit-Position: refs/heads/master@{#73944}
parent 9d3f3545
......@@ -301,53 +301,10 @@ class V8_EXPORT_PRIVATE TurboAssembler : public SharedTurboAssembler {
// may be bigger than 2^16 - 1. Requires a scratch register.
void Ret(int bytes_dropped, Register scratch);
// Only use these macros when non-destructive source of AVX version is not
// needed.
#define AVX_OP3_WITH_TYPE(macro_name, name, dst_type, src_type) \
void macro_name(dst_type dst, src_type src) { \
if (CpuFeatures::IsSupported(AVX)) { \
CpuFeatureScope scope(this, AVX); \
v##name(dst, dst, src); \
} else { \
name(dst, src); \
} \
}
#define AVX_OP3_XO(macro_name, name) \
AVX_OP3_WITH_TYPE(macro_name, name, XMMRegister, XMMRegister) \
AVX_OP3_WITH_TYPE(macro_name, name, XMMRegister, Operand)
AVX_OP3_XO(Packsswb, packsswb)
AVX_OP3_XO(Packuswb, packuswb)
AVX_OP3_XO(Paddusb, paddusb)
AVX_OP3_XO(Pand, pand)
AVX_OP3_XO(Pcmpeqb, pcmpeqb)
AVX_OP3_XO(Pcmpeqw, pcmpeqw)
AVX_OP3_XO(Pcmpeqd, pcmpeqd)
AVX_OP3_XO(Por, por)
AVX_OP3_XO(Psubb, psubb)
AVX_OP3_XO(Psubw, psubw)
AVX_OP3_XO(Psubd, psubd)
AVX_OP3_XO(Psubq, psubq)
AVX_OP3_XO(Punpcklbw, punpcklbw)
AVX_OP3_XO(Punpckhbw, punpckhbw)
AVX_OP3_XO(Punpckldq, punpckldq)
AVX_OP3_XO(Punpcklqdq, punpcklqdq)
AVX_OP3_XO(Pxor, pxor)
AVX_OP3_XO(Andps, andps)
AVX_OP3_XO(Andpd, andpd)
AVX_OP3_XO(Xorps, xorps)
AVX_OP3_XO(Xorpd, xorpd)
AVX_OP3_XO(Sqrtss, sqrtss)
AVX_OP3_XO(Sqrtsd, sqrtsd)
AVX_OP3_XO(Orps, orps)
AVX_OP3_XO(Orpd, orpd)
AVX_OP3_XO(Andnpd, andnpd)
AVX_OP3_WITH_TYPE(Movhlps, movhlps, XMMRegister, XMMRegister)
AVX_OP3_WITH_TYPE(Psraw, psraw, XMMRegister, uint8_t)
AVX_OP3_WITH_TYPE(Psrlq, psrlq, XMMRegister, uint8_t)
#undef AVX_OP3_XO
#undef AVX_OP3_WITH_TYPE
// Defined here because some callers take a pointer to member functions.
AVX_OP(Pcmpeqb, pcmpeqb)
AVX_OP(Pcmpeqw, pcmpeqw)
AVX_OP(Pcmpeqd, pcmpeqd)
// Same as AVX_OP3_WITH_TYPE but supports a CpuFeatureScope
#define AVX_OP2_WITH_TYPE_SCOPE(macro_name, name, dst_type, src_type, \
......@@ -413,26 +370,19 @@ class V8_EXPORT_PRIVATE TurboAssembler : public SharedTurboAssembler {
AVX_PACKED_OP3(Psllq, psllq)
AVX_PACKED_OP3(Psrlw, psrlw)
AVX_PACKED_OP3(Psrld, psrld)
AVX_PACKED_OP3(Psrlq, psrlq)
AVX_PACKED_OP3(Psraw, psraw)
AVX_PACKED_OP3(Psrad, psrad)
AVX_PACKED_OP3(Paddd, paddd)
AVX_PACKED_OP3(Paddq, paddq)
AVX_PACKED_OP3(Psubd, psubd)
AVX_PACKED_OP3(Psubq, psubq)
AVX_PACKED_OP3(Pmuludq, pmuludq)
AVX_PACKED_OP3(Pavgb, pavgb)
AVX_PACKED_OP3(Pavgw, pavgw)
AVX_PACKED_OP3(Pand, pand)
AVX_PACKED_OP3(Pminub, pminub)
AVX_PACKED_OP3(Pmaxub, pmaxub)
AVX_PACKED_OP3(Paddusb, paddusb)
AVX_PACKED_OP3(Psubusb, psubusb)
AVX_PACKED_OP3(Pcmpgtb, pcmpgtb)
AVX_PACKED_OP3(Pcmpeqb, pcmpeqb)
AVX_PACKED_OP3(Paddb, paddb)
AVX_PACKED_OP3(Paddsb, paddsb)
AVX_PACKED_OP3(Psubb, psubb)
AVX_PACKED_OP3(Psubsb, psubsb)
#undef AVX_PACKED_OP3
......@@ -442,8 +392,6 @@ class V8_EXPORT_PRIVATE TurboAssembler : public SharedTurboAssembler {
AVX_PACKED_OP3_WITH_TYPE(Psllq, psllq, XMMRegister, uint8_t)
AVX_PACKED_OP3_WITH_TYPE(Psrlw, psrlw, XMMRegister, uint8_t)
AVX_PACKED_OP3_WITH_TYPE(Psrld, psrld, XMMRegister, uint8_t)
AVX_PACKED_OP3_WITH_TYPE(Psrlq, psrlq, XMMRegister, uint8_t)
AVX_PACKED_OP3_WITH_TYPE(Psraw, psraw, XMMRegister, uint8_t)
AVX_PACKED_OP3_WITH_TYPE(Psrad, psrad, XMMRegister, uint8_t)
#undef AVX_PACKED_OP3_WITH_TYPE
......
......@@ -39,120 +39,185 @@ class V8_EXPORT_PRIVATE SharedTurboAssembler : public TurboAssemblerBase {
}
}
template <typename Dst, typename... Args>
// Helper struct to implement functions that checks for AVX support and
// dispatch to the appropriate AVX/SSE instruction.
template <typename Dst, typename Arg, typename... Args>
struct AvxHelper {
Assembler* assm;
base::Optional<CpuFeature> feature = base::nullopt;
// Call a method where the AVX version expects the dst argument to be
// duplicated.
template <void (Assembler::*avx)(Dst, Dst, Args...),
// E.g. Andps(x, y) -> vandps(x, x, y)
// -> andps(x, y)
template <void (Assembler::*avx)(Dst, Dst, Arg, Args...),
void (Assembler::*no_avx)(Dst, Arg, Args...)>
void emit(Dst dst, Arg arg, Args... args) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(assm, AVX);
(assm->*avx)(dst, dst, arg, args...);
} else if (feature.has_value()) {
DCHECK(CpuFeatures::IsSupported(*feature));
CpuFeatureScope scope(assm, *feature);
(assm->*no_avx)(dst, arg, args...);
} else {
(assm->*no_avx)(dst, arg, args...);
}
}
// Call a method in the AVX form (one more operand), but if unsupported will
// check that dst == first src.
// E.g. Andps(x, y, z) -> vandps(x, y, z)
// -> andps(x, z) and check that x == y
template <void (Assembler::*avx)(Dst, Arg, Args...),
void (Assembler::*no_avx)(Dst, Args...)>
void emit(Dst dst, Args... args) {
void emit(Dst dst, Arg arg, Args... args) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(assm, AVX);
(assm->*avx)(dst, dst, args...);
(assm->*avx)(dst, arg, args...);
} else if (feature.has_value()) {
DCHECK_EQ(dst, arg);
DCHECK(CpuFeatures::IsSupported(*feature));
CpuFeatureScope scope(assm, *feature);
(assm->*no_avx)(dst, args...);
} else {
DCHECK_EQ(dst, arg);
(assm->*no_avx)(dst, args...);
}
}
// Call a method where the AVX version expects no duplicated dst argument.
template <void (Assembler::*avx)(Dst, Args...),
void (Assembler::*no_avx)(Dst, Args...)>
void emit(Dst dst, Args... args) {
// E.g. Movddup(x, y) -> vmovddup(x, y)
// -> movddup(x, y)
template <void (Assembler::*avx)(Dst, Arg, Args...),
void (Assembler::*no_avx)(Dst, Arg, Args...)>
void emit(Dst dst, Arg arg, Args... args) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(assm, AVX);
(assm->*avx)(dst, args...);
(assm->*avx)(dst, arg, args...);
} else if (feature.has_value()) {
DCHECK(CpuFeatures::IsSupported(*feature));
CpuFeatureScope scope(assm, *feature);
(assm->*no_avx)(dst, args...);
(assm->*no_avx)(dst, arg, args...);
} else {
(assm->*no_avx)(dst, args...);
(assm->*no_avx)(dst, arg, args...);
}
}
};
#define AVX_OP(macro_name, name) \
template <typename Dst, typename... Args> \
void macro_name(Dst dst, Args... args) { \
AvxHelper<Dst, Args...>{this} \
.template emit<&Assembler::v##name, &Assembler::name>(dst, args...); \
#define AVX_OP(macro_name, name) \
template <typename Dst, typename Arg, typename... Args> \
void macro_name(Dst dst, Arg arg, Args... args) { \
AvxHelper<Dst, Arg, Args...>{this} \
.template emit<&Assembler::v##name, &Assembler::name>(dst, arg, \
args...); \
}
#define AVX_OP_SSE3(macro_name, name) \
template <typename Dst, typename... Args> \
void macro_name(Dst dst, Args... args) { \
AvxHelper<Dst, Args...>{this, base::Optional<CpuFeature>(SSE3)} \
.template emit<&Assembler::v##name, &Assembler::name>(dst, args...); \
#define AVX_OP_SSE3(macro_name, name) \
template <typename Dst, typename Arg, typename... Args> \
void macro_name(Dst dst, Arg arg, Args... args) { \
AvxHelper<Dst, Arg, Args...>{this, base::Optional<CpuFeature>(SSE3)} \
.template emit<&Assembler::v##name, &Assembler::name>(dst, arg, \
args...); \
}
#define AVX_OP_SSSE3(macro_name, name) \
template <typename Dst, typename... Args> \
void macro_name(Dst dst, Args... args) { \
AvxHelper<Dst, Args...>{this, base::Optional<CpuFeature>(SSSE3)} \
.template emit<&Assembler::v##name, &Assembler::name>(dst, args...); \
#define AVX_OP_SSSE3(macro_name, name) \
template <typename Dst, typename Arg, typename... Args> \
void macro_name(Dst dst, Arg arg, Args... args) { \
AvxHelper<Dst, Arg, Args...>{this, base::Optional<CpuFeature>(SSSE3)} \
.template emit<&Assembler::v##name, &Assembler::name>(dst, arg, \
args...); \
}
#define AVX_OP_SSE4_1(macro_name, name) \
template <typename Dst, typename... Args> \
void macro_name(Dst dst, Args... args) { \
AvxHelper<Dst, Args...>{this, base::Optional<CpuFeature>(SSE4_1)} \
.template emit<&Assembler::v##name, &Assembler::name>(dst, args...); \
#define AVX_OP_SSE4_1(macro_name, name) \
template <typename Dst, typename Arg, typename... Args> \
void macro_name(Dst dst, Arg arg, Args... args) { \
AvxHelper<Dst, Arg, Args...>{this, base::Optional<CpuFeature>(SSE4_1)} \
.template emit<&Assembler::v##name, &Assembler::name>(dst, arg, \
args...); \
}
#define AVX_OP_SSE4_2(macro_name, name) \
template <typename Dst, typename... Args> \
void macro_name(Dst dst, Args... args) { \
AvxHelper<Dst, Args...>{this, base::Optional<CpuFeature>(SSE4_2)} \
.template emit<&Assembler::v##name, &Assembler::name>(dst, args...); \
#define AVX_OP_SSE4_2(macro_name, name) \
template <typename Dst, typename Arg, typename... Args> \
void macro_name(Dst dst, Arg arg, Args... args) { \
AvxHelper<Dst, Arg, Args...>{this, base::Optional<CpuFeature>(SSE4_2)} \
.template emit<&Assembler::v##name, &Assembler::name>(dst, arg, \
args...); \
}
// Keep this list sorted by required extension, then instruction name.
AVX_OP(Andnpd, andnpd)
AVX_OP(Andpd, andpd)
AVX_OP(Andps, andps)
AVX_OP(Cvtdq2pd, cvtdq2pd)
AVX_OP(Cvtdq2ps, cvtdq2ps)
AVX_OP(Cvtps2pd, cvtps2pd)
AVX_OP(Cvtpd2ps, cvtpd2ps)
AVX_OP(Cvtps2pd, cvtps2pd)
AVX_OP(Cvttps2dq, cvttps2dq)
AVX_OP(Movaps, movaps)
AVX_OP(Movd, movd)
AVX_OP(Movhlps, movhlps)
AVX_OP(Movhps, movhps)
AVX_OP(Movlps, movlps)
AVX_OP(Movmskpd, movmskpd)
AVX_OP(Movmskps, movmskps)
AVX_OP(Movss, movss)
AVX_OP(Movsd, movsd)
AVX_OP(Movss, movss)
AVX_OP(Movupd, movupd)
AVX_OP(Movups, movups)
AVX_OP(Orpd, orpd)
AVX_OP(Orps, orps)
AVX_OP(Packssdw, packssdw)
AVX_OP(Packsswb, packsswb)
AVX_OP(Packuswb, packuswb)
AVX_OP(Paddusb, paddusb)
AVX_OP(Paddusw, paddusw)
AVX_OP(Pand, pand)
AVX_OP(Pmovmskb, pmovmskb)
AVX_OP(Pmullw, pmullw)
AVX_OP(Pshuflw, pshuflw)
AVX_OP(Pshufhw, pshufhw)
AVX_OP(Por, por)
AVX_OP(Pshufd, pshufd)
AVX_OP(Pshufhw, pshufhw)
AVX_OP(Pshuflw, pshuflw)
AVX_OP(Psraw, psraw)
AVX_OP(Psrlq, psrlq)
AVX_OP(Psubb, psubb)
AVX_OP(Psubd, psubd)
AVX_OP(Psubq, psubq)
AVX_OP(Psubw, psubw)
AVX_OP(Punpckhbw, punpckhbw)
AVX_OP(Punpckhdq, punpckhdq)
AVX_OP(Punpckhqdq, punpckhqdq)
AVX_OP(Punpckhwd, punpckhwd)
AVX_OP(Punpcklbw, punpcklbw)
AVX_OP(Punpckldq, punpckldq)
AVX_OP(Punpcklqdq, punpcklqdq)
AVX_OP(Punpcklwd, punpcklwd)
AVX_OP(Pxor, pxor)
AVX_OP(Rcpps, rcpps)
AVX_OP(Rsqrtps, rsqrtps)
AVX_OP(Sqrtps, sqrtps)
AVX_OP(Sqrtpd, sqrtpd)
AVX_OP(Sqrtps, sqrtps)
AVX_OP(Sqrtsd, sqrtsd)
AVX_OP(Sqrtss, sqrtss)
AVX_OP(Xorpd, xorpd)
AVX_OP(Xorps, xorps)
AVX_OP_SSE3(Movddup, movddup)
AVX_OP_SSE3(Movshdup, movshdup)
AVX_OP_SSSE3(Pabsb, pabsb)
AVX_OP_SSSE3(Pabsw, pabsw)
AVX_OP_SSSE3(Pabsd, pabsd)
AVX_OP_SSSE3(Pabsw, pabsw)
AVX_OP_SSE4_1(Extractps, extractps)
AVX_OP_SSE4_1(Pextrb, pextrb)
AVX_OP_SSE4_1(Pextrw, pextrw)
AVX_OP_SSE4_1(Pmovsxbw, pmovsxbw)
AVX_OP_SSE4_1(Pmovsxwd, pmovsxwd)
AVX_OP_SSE4_1(Pmovsxdq, pmovsxdq)
AVX_OP_SSE4_1(Pmovsxwd, pmovsxwd)
AVX_OP_SSE4_1(Pmovzxbw, pmovzxbw)
AVX_OP_SSE4_1(Pmovzxwd, pmovzxwd)
AVX_OP_SSE4_1(Pmovzxdq, pmovzxdq)
AVX_OP_SSE4_1(Pmovzxwd, pmovzxwd)
AVX_OP_SSE4_1(Ptest, ptest)
AVX_OP_SSE4_1(Roundps, roundps)
AVX_OP_SSE4_1(Roundpd, roundpd)
AVX_OP_SSE4_1(Roundps, roundps)
void S128Store32Lane(Operand dst, XMMRegister src, uint8_t laneidx);
void I16x8ExtMulLow(XMMRegister dst, XMMRegister src1, XMMRegister src2,
......
......@@ -728,6 +728,24 @@ int TurboAssembler::PopCallerSaved(SaveFPRegsMode fp_mode, Register exclusion1,
return bytes;
}
void TurboAssembler::Movq(XMMRegister dst, Register src) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(this, AVX);
vmovq(dst, src);
} else {
movq(dst, src);
}
}
void TurboAssembler::Movq(Register dst, XMMRegister src) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(this, AVX);
vmovq(dst, src);
} else {
movq(dst, src);
}
}
void TurboAssembler::Movdqa(XMMRegister dst, Operand src) {
// See comments in Movdqa(XMMRegister, XMMRegister).
if (CpuFeatures::IsSupported(AVX)) {
......@@ -2031,16 +2049,6 @@ void TurboAssembler::Psllq(XMMRegister dst, byte imm8) {
}
}
void TurboAssembler::Psrlq(XMMRegister dst, byte imm8) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
vpsrlq(dst, dst, imm8);
} else {
DCHECK(!IsEnabled(AVX));
psrlq(dst, imm8);
}
}
void TurboAssembler::Pslld(XMMRegister dst, byte imm8) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
......
......@@ -66,14 +66,6 @@ class V8_EXPORT_PRIVATE TurboAssembler : public SharedTurboAssembler {
AVX_OP(Subsd, subsd)
AVX_OP(Divss, divss)
AVX_OP(Divsd, divsd)
AVX_OP(Orps, orps)
AVX_OP(Xorps, xorps)
AVX_OP(Xorpd, xorpd)
AVX_OP(Movq, movq)
AVX_OP(Movhlps, movhlps)
AVX_OP(Pcmpeqb, pcmpeqb)
AVX_OP(Pcmpeqw, pcmpeqw)
AVX_OP(Pcmpeqd, pcmpeqd)
AVX_OP(Pcmpgtb, pcmpgtb)
AVX_OP(Pcmpgtw, pcmpgtw)
AVX_OP(Pmaxsw, pmaxsw)
......@@ -83,11 +75,7 @@ class V8_EXPORT_PRIVATE TurboAssembler : public SharedTurboAssembler {
AVX_OP(Addss, addss)
AVX_OP(Addsd, addsd)
AVX_OP(Mulsd, mulsd)
AVX_OP(Andps, andps)
AVX_OP(Andnps, andnps)
AVX_OP(Andpd, andpd)
AVX_OP(Andnpd, andnpd)
AVX_OP(Orpd, orpd)
AVX_OP(Cmpeqps, cmpeqps)
AVX_OP(Cmpltps, cmpltps)
AVX_OP(Cmpleps, cmpleps)
......@@ -100,18 +88,9 @@ class V8_EXPORT_PRIVATE TurboAssembler : public SharedTurboAssembler {
AVX_OP(Cmpneqpd, cmpneqpd)
AVX_OP(Cmpnltpd, cmpnltpd)
AVX_OP(Cmpnlepd, cmpnlepd)
AVX_OP(Sqrtss, sqrtss)
AVX_OP(Sqrtsd, sqrtsd)
AVX_OP(Cvttpd2dq, cvttpd2dq)
AVX_OP(Ucomiss, ucomiss)
AVX_OP(Ucomisd, ucomisd)
AVX_OP(Pand, pand)
AVX_OP(Por, por)
AVX_OP(Pxor, pxor)
AVX_OP(Psubb, psubb)
AVX_OP(Psubw, psubw)
AVX_OP(Psubd, psubd)
AVX_OP(Psubq, psubq)
AVX_OP(Psubsb, psubsb)
AVX_OP(Psubsw, psubsw)
AVX_OP(Psubusb, psubusb)
......@@ -119,21 +98,17 @@ class V8_EXPORT_PRIVATE TurboAssembler : public SharedTurboAssembler {
AVX_OP(Pslld, pslld)
AVX_OP(Pavgb, pavgb)
AVX_OP(Pavgw, pavgw)
AVX_OP(Psraw, psraw)
AVX_OP(Psrad, psrad)
AVX_OP(Psllw, psllw)
AVX_OP(Psllq, psllq)
AVX_OP(Psrlw, psrlw)
AVX_OP(Psrld, psrld)
AVX_OP(Psrlq, psrlq)
AVX_OP(Paddb, paddb)
AVX_OP(Paddw, paddw)
AVX_OP(Paddd, paddd)
AVX_OP(Paddq, paddq)
AVX_OP(Paddsb, paddsb)
AVX_OP(Paddsw, paddsw)
AVX_OP(Paddusb, paddusb)
AVX_OP(Paddusw, paddusw)
AVX_OP(Pcmpgtd, pcmpgtd)
AVX_OP(Pmuludq, pmuludq)
AVX_OP(Addpd, addpd)
......@@ -148,17 +123,9 @@ class V8_EXPORT_PRIVATE TurboAssembler : public SharedTurboAssembler {
AVX_OP(Subps, subps)
AVX_OP(Mulps, mulps)
AVX_OP(Divps, divps)
AVX_OP(Packsswb, packsswb)
AVX_OP(Packuswb, packuswb)
AVX_OP(Packssdw, packssdw)
AVX_OP(Punpcklbw, punpcklbw)
AVX_OP(Punpcklwd, punpcklwd)
AVX_OP(Punpckldq, punpckldq)
AVX_OP(Punpckhbw, punpckhbw)
AVX_OP(Punpckhwd, punpckhwd)
AVX_OP(Punpckhdq, punpckhdq)
AVX_OP(Punpcklqdq, punpcklqdq)
AVX_OP(Punpckhqdq, punpckhqdq)
AVX_OP(Pcmpeqb, pcmpeqb)
AVX_OP(Pcmpeqw, pcmpeqw)
AVX_OP(Pcmpeqd, pcmpeqd)
AVX_OP(Cmpps, cmpps)
AVX_OP(Cmppd, cmppd)
AVX_OP(Movlhps, movlhps)
......@@ -191,6 +158,12 @@ class V8_EXPORT_PRIVATE TurboAssembler : public SharedTurboAssembler {
#undef AVX_OP
// Define movq here instead of using AVX_OP. movq is defined using templates
// and there is a function template `void movq(P1)`, while technically
// impossible, will be selected when deducing the arguments for AvxHelper.
void Movq(XMMRegister dst, Register src);
void Movq(Register dst, XMMRegister src);
void PushReturnAddressFrom(Register src) { pushq(src); }
void PopReturnAddressTo(Register dst) { popq(dst); }
......@@ -474,8 +447,6 @@ class V8_EXPORT_PRIVATE TurboAssembler : public SharedTurboAssembler {
void Psllq(XMMRegister dst, int imm8) { Psllq(dst, static_cast<byte>(imm8)); }
void Psllq(XMMRegister dst, byte imm8);
void Psrlq(XMMRegister dst, int imm8) { Psrlq(dst, static_cast<byte>(imm8)); }
void Psrlq(XMMRegister dst, byte imm8);
void Pslld(XMMRegister dst, byte imm8);
void Psrld(XMMRegister dst, byte imm8);
......
......@@ -1978,7 +1978,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
// Canonicalize NaNs by quieting and clearing the payload.
__ Cmpunordpd(dst, dst, tmp);
__ Orpd(tmp, dst);
__ Psrlq(dst, 13);
__ Psrlq(dst, byte{13});
__ Andnpd(dst, tmp);
break;
}
......@@ -2000,7 +2000,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ Subpd(tmp, tmp, dst);
// Canonicalize NaNs by clearing the payload. Sign is non-deterministic.
__ Cmpunordpd(dst, dst, tmp);
__ Psrlq(dst, 13);
__ Psrlq(dst, byte{13});
__ Andnpd(dst, tmp);
break;
}
......@@ -2210,11 +2210,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ Movaps(tmp2, right);
// Multiply high dword of each qword of left with right.
__ Psrlq(tmp1, 32);
__ Psrlq(tmp1, byte{32});
__ Pmuludq(tmp1, tmp1, right);
// Multiply high dword of each qword of right with left.
__ Psrlq(tmp2, 32);
__ Psrlq(tmp2, byte{32});
__ Pmuludq(tmp2, tmp2, left);
__ Paddq(tmp2, tmp2, tmp1);
......
......@@ -1478,7 +1478,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
// TODO(bmeurer): Use RIP relative 128-bit constants.
XMMRegister tmp = i.ToDoubleRegister(instr->TempAt(0));
__ Pcmpeqd(tmp, tmp);
__ Psrlq(tmp, 33);
__ Psrlq(tmp, byte{33});
__ Andps(i.OutputDoubleRegister(), tmp);
break;
}
......@@ -2441,7 +2441,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
// Canonicalize NaNs by quieting and clearing the payload.
__ Cmppd(dst, kScratchDoubleReg, int8_t{3});
__ Orpd(kScratchDoubleReg, dst);
__ Psrlq(dst, 13);
__ Psrlq(dst, byte{13});
__ Andnpd(dst, kScratchDoubleReg);
break;
}
......@@ -2462,7 +2462,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ Subpd(kScratchDoubleReg, dst);
// Canonicalize NaNs by clearing the payload. Sign is non-deterministic.
__ Cmppd(dst, kScratchDoubleReg, int8_t{3});
__ Psrlq(dst, 13);
__ Psrlq(dst, byte{13});
__ Andnpd(dst, kScratchDoubleReg);
break;
}
......@@ -2843,11 +2843,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ Movdqa(tmp2, right);
// Multiply high dword of each qword of left with right.
__ Psrlq(tmp1, 32);
__ Psrlq(tmp1, byte{32});
__ Pmuludq(tmp1, right);
// Multiply high dword of each qword of right with left.
__ Psrlq(tmp2, 32);
__ Psrlq(tmp2, byte{32});
__ Pmuludq(tmp2, left);
__ Paddq(tmp2, tmp1);
......
......@@ -3921,7 +3921,7 @@ void LiftoffAssembler::emit_i64x2_shri_s(LiftoffRegister dst,
Pcmpeqb(tmp, tmp);
Psllq(tmp, tmp, 63);
Psrlq(tmp, tmp, shift);
Psrlq(tmp, tmp, byte{shift});
liftoff::EmitSimdShiftOpImm<&Assembler::vpsrlq, &Assembler::psrlq, 6>(
this, dst, lhs, rhs);
Pxor(dst.fp(), tmp);
......@@ -3963,10 +3963,10 @@ void LiftoffAssembler::emit_i64x2_mul(LiftoffRegister dst, LiftoffRegister lhs,
Movaps(tmp1.fp(), lhs.fp());
Movaps(tmp2.fp(), rhs.fp());
// Multiply high dword of each qword of left with right.
Psrlq(tmp1.fp(), 32);
Psrlq(tmp1.fp(), byte{32});
Pmuludq(tmp1.fp(), tmp1.fp(), rhs.fp());
// Multiply high dword of each qword of right with left.
Psrlq(tmp2.fp(), 32);
Psrlq(tmp2.fp(), byte{32});
Pmuludq(tmp2.fp(), tmp2.fp(), lhs.fp());
Paddq(tmp2.fp(), tmp2.fp(), tmp1.fp());
Psllq(tmp2.fp(), tmp2.fp(), 32);
......@@ -4188,11 +4188,11 @@ void LiftoffAssembler::emit_f64x2_abs(LiftoffRegister dst,
LiftoffRegister src) {
if (dst.fp() == src.fp()) {
Pcmpeqd(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg);
Psrlq(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg, 1);
Psrlq(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg, byte{1});
Andpd(dst.fp(), liftoff::kScratchDoubleReg);
} else {
Pcmpeqd(dst.fp(), dst.fp());
Psrlq(dst.fp(), dst.fp(), 1);
Psrlq(dst.fp(), dst.fp(), byte{1});
Andpd(dst.fp(), src.fp());
}
}
......@@ -4291,7 +4291,7 @@ void LiftoffAssembler::emit_f64x2_min(LiftoffRegister dst, LiftoffRegister lhs,
// Canonicalize NaNs by quieting and clearing the payload.
Cmpunordpd(dst.fp(), dst.fp(), liftoff::kScratchDoubleReg);
Orpd(liftoff::kScratchDoubleReg, dst.fp());
Psrlq(dst.fp(), 13);
Psrlq(dst.fp(), byte{13});
Andnpd(dst.fp(), liftoff::kScratchDoubleReg);
}
......@@ -4322,7 +4322,7 @@ void LiftoffAssembler::emit_f64x2_max(LiftoffRegister dst, LiftoffRegister lhs,
Subpd(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg, dst.fp());
// Canonicalize NaNs by clearing the payload. Sign is non-deterministic.
Cmpunordpd(dst.fp(), dst.fp(), liftoff::kScratchDoubleReg);
Psrlq(dst.fp(), 13);
Psrlq(dst.fp(), byte{13});
Andnpd(dst.fp(), liftoff::kScratchDoubleReg);
}
......
......@@ -3504,10 +3504,10 @@ void LiftoffAssembler::emit_i64x2_mul(LiftoffRegister dst, LiftoffRegister lhs,
Movaps(tmp1.fp(), lhs.fp());
Movaps(tmp2.fp(), rhs.fp());
// Multiply high dword of each qword of left with right.
Psrlq(tmp1.fp(), 32);
Psrlq(tmp1.fp(), byte{32});
Pmuludq(tmp1.fp(), rhs.fp());
// Multiply high dword of each qword of right with left.
Psrlq(tmp2.fp(), 32);
Psrlq(tmp2.fp(), byte{32});
Pmuludq(tmp2.fp(), lhs.fp());
Paddq(tmp2.fp(), tmp1.fp());
Psllq(tmp2.fp(), 32);
......@@ -3729,11 +3729,11 @@ void LiftoffAssembler::emit_f64x2_abs(LiftoffRegister dst,
LiftoffRegister src) {
if (dst.fp() == src.fp()) {
Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
Psrlq(kScratchDoubleReg, static_cast<byte>(1));
Psrlq(kScratchDoubleReg, byte{1});
Andpd(dst.fp(), kScratchDoubleReg);
} else {
Pcmpeqd(dst.fp(), dst.fp());
Psrlq(dst.fp(), static_cast<byte>(1));
Psrlq(dst.fp(), byte{1});
Andpd(dst.fp(), src.fp());
}
}
......@@ -3832,7 +3832,7 @@ void LiftoffAssembler::emit_f64x2_min(LiftoffRegister dst, LiftoffRegister lhs,
// Canonicalize NaNs by quieting and clearing the payload.
Cmppd(dst.fp(), kScratchDoubleReg, int8_t{3});
Orpd(kScratchDoubleReg, dst.fp());
Psrlq(dst.fp(), 13);
Psrlq(dst.fp(), byte{13});
Andnpd(dst.fp(), kScratchDoubleReg);
}
......@@ -3863,7 +3863,7 @@ void LiftoffAssembler::emit_f64x2_max(LiftoffRegister dst, LiftoffRegister lhs,
Subpd(kScratchDoubleReg, dst.fp());
// Canonicalize NaNs by clearing the payload. Sign is non-deterministic.
Cmppd(dst.fp(), kScratchDoubleReg, int8_t{3});
Psrlq(dst.fp(), 13);
Psrlq(dst.fp(), byte{13});
Andnpd(dst.fp(), kScratchDoubleReg);
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment