Commit 129c1bd1 authored by Ng Zhi An's avatar Ng Zhi An Committed by V8 LUCI CQ

[ia32] Move pcmpeq, movlps, movhps into shared macro-assembler

Drive-by edit to use ASM_CODE_COMMENT for better code comments for
all the more complicated macro-assembler functions.

Also undef macros (AVX_OP et al) since they are not longer used outside
of shared-macro-assembler.

Bug: v8:11589
Change-Id: I424f27b5b742a8efb26ccef87dbffb01eae60335
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3173892Reviewed-by: 's avatarAdam Klein <adamk@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/main@{#76973}
parent c0d1f24b
......@@ -302,38 +302,6 @@ class V8_EXPORT_PRIVATE TurboAssembler
// may be bigger than 2^16 - 1. Requires a scratch register.
void Ret(int bytes_dropped, Register scratch);
// Defined here because some callers take a pointer to member functions.
AVX_OP(Pcmpeqb, pcmpeqb)
AVX_OP(Pcmpeqw, pcmpeqw)
AVX_OP(Pcmpeqd, pcmpeqd)
AVX_OP_SSE4_1(Pcmpeqq, pcmpeqq)
// Macro for instructions that have 2 operands for AVX version and 1 operand for
// SSE version. Will move src1 to dst if dst != src1.
#define AVX_OP3_WITH_MOVE(macro_name, name, dst_type, src_type) \
void macro_name(dst_type dst, dst_type src1, src_type src2) { \
if (CpuFeatures::IsSupported(AVX)) { \
CpuFeatureScope scope(this, AVX); \
v##name(dst, src1, src2); \
} else { \
if (dst != src1) { \
movaps(dst, src1); \
} \
name(dst, src2); \
} \
}
AVX_OP3_WITH_MOVE(Movlps, movlps, XMMRegister, Operand)
AVX_OP3_WITH_MOVE(Movhps, movhps, XMMRegister, Operand)
#undef AVX_OP3_WITH_MOVE
// TODO(zhin): Remove after moving more definitions into SharedTurboAssembler.
void Movlps(Operand dst, XMMRegister src) {
SharedTurboAssembler::Movlps(dst, src);
}
void Movhps(Operand dst, XMMRegister src) {
SharedTurboAssembler::Movhps(dst, src);
}
void PextrdPreSse41(Register dst, XMMRegister src, uint8_t imm8);
void PinsrdPreSse41(XMMRegister dst, Register src, uint8_t imm8,
uint32_t* load_pc_offset) {
......
......@@ -73,6 +73,32 @@ void SharedTurboAssembler::And(Register dst, Immediate src) {
#endif
}
void SharedTurboAssembler::Movhps(XMMRegister dst, XMMRegister src1,
Operand src2) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
vmovhps(dst, src1, src2);
} else {
if (dst != src1) {
movaps(dst, src1);
}
movhps(dst, src2);
}
}
void SharedTurboAssembler::Movlps(XMMRegister dst, XMMRegister src1,
Operand src2) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
vmovlps(dst, src1, src2);
} else {
if (dst != src1) {
movaps(dst, src1);
}
movlps(dst, src2);
}
}
void SharedTurboAssembler::Shufps(XMMRegister dst, XMMRegister src1,
XMMRegister src2, uint8_t imm8) {
if (CpuFeatures::IsSupported(AVX)) {
......@@ -88,6 +114,7 @@ void SharedTurboAssembler::Shufps(XMMRegister dst, XMMRegister src1,
void SharedTurboAssembler::F64x2ExtractLane(DoubleRegister dst, XMMRegister src,
uint8_t lane) {
ASM_CODE_COMMENT(this);
if (lane == 0) {
if (dst != src) {
Movaps(dst, src);
......@@ -106,6 +133,7 @@ void SharedTurboAssembler::F64x2ExtractLane(DoubleRegister dst, XMMRegister src,
void SharedTurboAssembler::F64x2ReplaceLane(XMMRegister dst, XMMRegister src,
DoubleRegister rep, uint8_t lane) {
ASM_CODE_COMMENT(this);
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
if (lane == 0) {
......@@ -129,6 +157,7 @@ void SharedTurboAssembler::F64x2ReplaceLane(XMMRegister dst, XMMRegister src,
void SharedTurboAssembler::F32x4Min(XMMRegister dst, XMMRegister lhs,
XMMRegister rhs, XMMRegister scratch) {
ASM_CODE_COMMENT(this);
// The minps instruction doesn't propagate NaNs and +0's in its first
// operand. Perform minps in both orders, merge the results, and adjust.
if (CpuFeatures::IsSupported(AVX)) {
......@@ -157,6 +186,7 @@ void SharedTurboAssembler::F32x4Min(XMMRegister dst, XMMRegister lhs,
void SharedTurboAssembler::F32x4Max(XMMRegister dst, XMMRegister lhs,
XMMRegister rhs, XMMRegister scratch) {
ASM_CODE_COMMENT(this);
// The maxps instruction doesn't propagate NaNs and +0's in its first
// operand. Perform maxps in both orders, merge the results, and adjust.
if (CpuFeatures::IsSupported(AVX)) {
......@@ -188,6 +218,7 @@ void SharedTurboAssembler::F32x4Max(XMMRegister dst, XMMRegister lhs,
void SharedTurboAssembler::F64x2Min(XMMRegister dst, XMMRegister lhs,
XMMRegister rhs, XMMRegister scratch) {
ASM_CODE_COMMENT(this);
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
// The minpd instruction doesn't propagate NaNs and +0's in its first
......@@ -225,6 +256,7 @@ void SharedTurboAssembler::F64x2Min(XMMRegister dst, XMMRegister lhs,
void SharedTurboAssembler::F64x2Max(XMMRegister dst, XMMRegister lhs,
XMMRegister rhs, XMMRegister scratch) {
ASM_CODE_COMMENT(this);
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
// The maxpd instruction doesn't propagate NaNs and +0's in its first
......@@ -263,6 +295,7 @@ void SharedTurboAssembler::F64x2Max(XMMRegister dst, XMMRegister lhs,
}
void SharedTurboAssembler::F32x4Splat(XMMRegister dst, DoubleRegister src) {
ASM_CODE_COMMENT(this);
if (CpuFeatures::IsSupported(AVX2)) {
CpuFeatureScope avx2_scope(this, AVX2);
vbroadcastss(dst, src);
......@@ -281,6 +314,7 @@ void SharedTurboAssembler::F32x4Splat(XMMRegister dst, DoubleRegister src) {
void SharedTurboAssembler::F32x4ExtractLane(FloatRegister dst, XMMRegister src,
uint8_t lane) {
ASM_CODE_COMMENT(this);
DCHECK_LT(lane, 4);
// These instructions are shorter than insertps, but will leave junk in
// the top lanes of dst.
......@@ -302,6 +336,7 @@ void SharedTurboAssembler::F32x4ExtractLane(FloatRegister dst, XMMRegister src,
void SharedTurboAssembler::S128Store32Lane(Operand dst, XMMRegister src,
uint8_t laneidx) {
ASM_CODE_COMMENT(this);
if (laneidx == 0) {
Movss(dst, src);
} else {
......@@ -313,6 +348,7 @@ void SharedTurboAssembler::S128Store32Lane(Operand dst, XMMRegister src,
template <typename Op>
void SharedTurboAssembler::I8x16SplatPreAvx2(XMMRegister dst, Op src,
XMMRegister scratch) {
ASM_CODE_COMMENT(this);
DCHECK(!CpuFeatures::IsSupported(AVX2));
CpuFeatureScope ssse3_scope(this, SSSE3);
Movd(dst, src);
......@@ -322,6 +358,7 @@ void SharedTurboAssembler::I8x16SplatPreAvx2(XMMRegister dst, Op src,
void SharedTurboAssembler::I8x16Splat(XMMRegister dst, Register src,
XMMRegister scratch) {
ASM_CODE_COMMENT(this);
if (CpuFeatures::IsSupported(AVX2)) {
CpuFeatureScope avx2_scope(this, AVX2);
Movd(scratch, src);
......@@ -333,6 +370,7 @@ void SharedTurboAssembler::I8x16Splat(XMMRegister dst, Register src,
void SharedTurboAssembler::I8x16Splat(XMMRegister dst, Operand src,
XMMRegister scratch) {
ASM_CODE_COMMENT(this);
DCHECK_OPERAND_IS_NOT_REG(src);
if (CpuFeatures::IsSupported(AVX2)) {
CpuFeatureScope avx2_scope(this, AVX2);
......@@ -345,6 +383,7 @@ void SharedTurboAssembler::I8x16Splat(XMMRegister dst, Operand src,
void SharedTurboAssembler::I8x16Shl(XMMRegister dst, XMMRegister src1,
uint8_t src2, Register tmp1,
XMMRegister tmp2) {
ASM_CODE_COMMENT(this);
DCHECK_NE(dst, tmp2);
// Perform 16-bit shift, then mask away low bits.
if (!CpuFeatures::IsSupported(AVX) && (dst != src1)) {
......@@ -366,6 +405,7 @@ void SharedTurboAssembler::I8x16Shl(XMMRegister dst, XMMRegister src1,
void SharedTurboAssembler::I8x16Shl(XMMRegister dst, XMMRegister src1,
Register src2, Register tmp1,
XMMRegister tmp2, XMMRegister tmp3) {
ASM_CODE_COMMENT(this);
DCHECK(!AreAliased(dst, tmp2, tmp3));
DCHECK(!AreAliased(src1, tmp2, tmp3));
......@@ -391,6 +431,7 @@ void SharedTurboAssembler::I8x16Shl(XMMRegister dst, XMMRegister src1,
void SharedTurboAssembler::I8x16ShrS(XMMRegister dst, XMMRegister src1,
uint8_t src2, XMMRegister tmp) {
ASM_CODE_COMMENT(this);
// Unpack bytes into words, do word (16-bit) shifts, and repack.
DCHECK_NE(dst, tmp);
uint8_t shift = truncate_to_int3(src2) + 8;
......@@ -405,6 +446,7 @@ void SharedTurboAssembler::I8x16ShrS(XMMRegister dst, XMMRegister src1,
void SharedTurboAssembler::I8x16ShrS(XMMRegister dst, XMMRegister src1,
Register src2, Register tmp1,
XMMRegister tmp2, XMMRegister tmp3) {
ASM_CODE_COMMENT(this);
DCHECK(!AreAliased(dst, tmp2, tmp3));
DCHECK_NE(src1, tmp2);
......@@ -425,6 +467,7 @@ void SharedTurboAssembler::I8x16ShrS(XMMRegister dst, XMMRegister src1,
void SharedTurboAssembler::I8x16ShrU(XMMRegister dst, XMMRegister src1,
uint8_t src2, Register tmp1,
XMMRegister tmp2) {
ASM_CODE_COMMENT(this);
DCHECK_NE(dst, tmp2);
if (!CpuFeatures::IsSupported(AVX) && (dst != src1)) {
movaps(dst, src1);
......@@ -446,6 +489,7 @@ void SharedTurboAssembler::I8x16ShrU(XMMRegister dst, XMMRegister src1,
void SharedTurboAssembler::I8x16ShrU(XMMRegister dst, XMMRegister src1,
Register src2, Register tmp1,
XMMRegister tmp2, XMMRegister tmp3) {
ASM_CODE_COMMENT(this);
DCHECK(!AreAliased(dst, tmp2, tmp3));
DCHECK_NE(src1, tmp2);
......@@ -472,6 +516,7 @@ void SharedTurboAssembler::I16x8SplatPreAvx2(XMMRegister dst, Op src) {
}
void SharedTurboAssembler::I16x8Splat(XMMRegister dst, Register src) {
ASM_CODE_COMMENT(this);
if (CpuFeatures::IsSupported(AVX2)) {
CpuFeatureScope avx2_scope(this, AVX2);
Movd(dst, src);
......@@ -482,6 +527,7 @@ void SharedTurboAssembler::I16x8Splat(XMMRegister dst, Register src) {
}
void SharedTurboAssembler::I16x8Splat(XMMRegister dst, Operand src) {
ASM_CODE_COMMENT(this);
DCHECK_OPERAND_IS_NOT_REG(src);
if (CpuFeatures::IsSupported(AVX2)) {
CpuFeatureScope avx2_scope(this, AVX2);
......@@ -494,6 +540,7 @@ void SharedTurboAssembler::I16x8Splat(XMMRegister dst, Operand src) {
void SharedTurboAssembler::I16x8ExtMulLow(XMMRegister dst, XMMRegister src1,
XMMRegister src2, XMMRegister scratch,
bool is_signed) {
ASM_CODE_COMMENT(this);
is_signed ? Pmovsxbw(scratch, src1) : Pmovzxbw(scratch, src1);
is_signed ? Pmovsxbw(dst, src2) : Pmovzxbw(dst, src2);
Pmullw(dst, scratch);
......@@ -502,6 +549,7 @@ void SharedTurboAssembler::I16x8ExtMulLow(XMMRegister dst, XMMRegister src1,
void SharedTurboAssembler::I16x8ExtMulHighS(XMMRegister dst, XMMRegister src1,
XMMRegister src2,
XMMRegister scratch) {
ASM_CODE_COMMENT(this);
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(this, AVX);
vpunpckhbw(scratch, src1, src1);
......@@ -525,6 +573,7 @@ void SharedTurboAssembler::I16x8ExtMulHighS(XMMRegister dst, XMMRegister src1,
void SharedTurboAssembler::I16x8ExtMulHighU(XMMRegister dst, XMMRegister src1,
XMMRegister src2,
XMMRegister scratch) {
ASM_CODE_COMMENT(this);
// The logic here is slightly complicated to handle all the cases of register
// aliasing. This allows flexibility for callers in TurboFan and Liftoff.
if (CpuFeatures::IsSupported(AVX)) {
......@@ -573,6 +622,7 @@ void SharedTurboAssembler::I16x8ExtMulHighU(XMMRegister dst, XMMRegister src1,
void SharedTurboAssembler::I16x8SConvertI8x16High(XMMRegister dst,
XMMRegister src) {
ASM_CODE_COMMENT(this);
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(this, AVX);
// src = |a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p| (high)
......@@ -596,6 +646,7 @@ void SharedTurboAssembler::I16x8SConvertI8x16High(XMMRegister dst,
void SharedTurboAssembler::I16x8UConvertI8x16High(XMMRegister dst,
XMMRegister src,
XMMRegister scratch) {
ASM_CODE_COMMENT(this);
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(this, AVX);
// tmp = |0|0|0|0|0|0|0|0 | 0|0|0|0|0|0|0|0|
......@@ -683,6 +734,7 @@ void SharedTurboAssembler::I32x4ExtAddPairwiseI16x8U(XMMRegister dst,
void SharedTurboAssembler::I32x4ExtMul(XMMRegister dst, XMMRegister src1,
XMMRegister src2, XMMRegister scratch,
bool low, bool is_signed) {
ASM_CODE_COMMENT(this);
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(this, AVX);
vpmullw(scratch, src1, src2);
......@@ -699,6 +751,7 @@ void SharedTurboAssembler::I32x4ExtMul(XMMRegister dst, XMMRegister src1,
void SharedTurboAssembler::I32x4SConvertI16x8High(XMMRegister dst,
XMMRegister src) {
ASM_CODE_COMMENT(this);
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(this, AVX);
// src = |a|b|c|d|e|f|g|h| (high)
......@@ -722,6 +775,7 @@ void SharedTurboAssembler::I32x4SConvertI16x8High(XMMRegister dst,
void SharedTurboAssembler::I32x4UConvertI16x8High(XMMRegister dst,
XMMRegister src,
XMMRegister scratch) {
ASM_CODE_COMMENT(this);
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(this, AVX);
// scratch = |0|0|0|0|0|0|0|0|
......@@ -746,6 +800,7 @@ void SharedTurboAssembler::I32x4UConvertI16x8High(XMMRegister dst,
void SharedTurboAssembler::I64x2Neg(XMMRegister dst, XMMRegister src,
XMMRegister scratch) {
ASM_CODE_COMMENT(this);
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
vpxor(scratch, scratch, scratch);
......@@ -762,6 +817,7 @@ void SharedTurboAssembler::I64x2Neg(XMMRegister dst, XMMRegister src,
void SharedTurboAssembler::I64x2Abs(XMMRegister dst, XMMRegister src,
XMMRegister scratch) {
ASM_CODE_COMMENT(this);
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(this, AVX);
XMMRegister tmp = dst == src ? scratch : dst;
......@@ -782,6 +838,7 @@ void SharedTurboAssembler::I64x2Abs(XMMRegister dst, XMMRegister src,
void SharedTurboAssembler::I64x2GtS(XMMRegister dst, XMMRegister src0,
XMMRegister src1, XMMRegister scratch) {
ASM_CODE_COMMENT(this);
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(this, AVX);
vpcmpgtq(dst, src0, src1);
......@@ -815,6 +872,7 @@ void SharedTurboAssembler::I64x2GtS(XMMRegister dst, XMMRegister src0,
void SharedTurboAssembler::I64x2GeS(XMMRegister dst, XMMRegister src0,
XMMRegister src1, XMMRegister scratch) {
ASM_CODE_COMMENT(this);
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(this, AVX);
vpcmpgtq(dst, src1, src0);
......@@ -849,6 +907,7 @@ void SharedTurboAssembler::I64x2GeS(XMMRegister dst, XMMRegister src0,
void SharedTurboAssembler::I64x2ShrS(XMMRegister dst, XMMRegister src,
uint8_t shift, XMMRegister xmm_tmp) {
ASM_CODE_COMMENT(this);
DCHECK_GT(64, shift);
DCHECK_NE(xmm_tmp, dst);
DCHECK_NE(xmm_tmp, src);
......@@ -883,6 +942,7 @@ void SharedTurboAssembler::I64x2ShrS(XMMRegister dst, XMMRegister src,
Register shift, XMMRegister xmm_tmp,
XMMRegister xmm_shift,
Register tmp_shift) {
ASM_CODE_COMMENT(this);
DCHECK_NE(xmm_tmp, dst);
DCHECK_NE(xmm_tmp, src);
DCHECK_NE(xmm_shift, dst);
......@@ -911,6 +971,7 @@ void SharedTurboAssembler::I64x2ShrS(XMMRegister dst, XMMRegister src,
void SharedTurboAssembler::I64x2Mul(XMMRegister dst, XMMRegister lhs,
XMMRegister rhs, XMMRegister tmp1,
XMMRegister tmp2) {
ASM_CODE_COMMENT(this);
DCHECK(!AreAliased(dst, tmp1, tmp2));
DCHECK(!AreAliased(lhs, tmp1, tmp2));
DCHECK(!AreAliased(rhs, tmp1, tmp2));
......@@ -960,6 +1021,7 @@ void SharedTurboAssembler::I64x2Mul(XMMRegister dst, XMMRegister lhs,
void SharedTurboAssembler::I64x2ExtMul(XMMRegister dst, XMMRegister src1,
XMMRegister src2, XMMRegister scratch,
bool low, bool is_signed) {
ASM_CODE_COMMENT(this);
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(this, AVX);
if (low) {
......@@ -989,6 +1051,7 @@ void SharedTurboAssembler::I64x2ExtMul(XMMRegister dst, XMMRegister src1,
void SharedTurboAssembler::I64x2SConvertI32x4High(XMMRegister dst,
XMMRegister src) {
ASM_CODE_COMMENT(this);
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(this, AVX);
vpunpckhqdq(dst, src, src);
......@@ -1007,6 +1070,7 @@ void SharedTurboAssembler::I64x2SConvertI32x4High(XMMRegister dst,
void SharedTurboAssembler::I64x2UConvertI32x4High(XMMRegister dst,
XMMRegister src,
XMMRegister scratch) {
ASM_CODE_COMMENT(this);
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(this, AVX);
vpxor(scratch, scratch, scratch);
......@@ -1027,6 +1091,7 @@ void SharedTurboAssembler::I64x2UConvertI32x4High(XMMRegister dst,
void SharedTurboAssembler::S128Not(XMMRegister dst, XMMRegister src,
XMMRegister scratch) {
ASM_CODE_COMMENT(this);
if (dst == src) {
Pcmpeqd(scratch, scratch);
Pxor(dst, scratch);
......@@ -1039,6 +1104,7 @@ void SharedTurboAssembler::S128Not(XMMRegister dst, XMMRegister src,
void SharedTurboAssembler::S128Select(XMMRegister dst, XMMRegister mask,
XMMRegister src1, XMMRegister src2,
XMMRegister scratch) {
ASM_CODE_COMMENT(this);
// v128.select = v128.or(v128.and(v1, c), v128.andnot(v2, c)).
// pandn(x, y) = !x & y, so we have to flip the mask and input.
if (CpuFeatures::IsSupported(AVX)) {
......@@ -1058,6 +1124,7 @@ void SharedTurboAssembler::S128Select(XMMRegister dst, XMMRegister mask,
void SharedTurboAssembler::S128Load8Splat(XMMRegister dst, Operand src,
XMMRegister scratch) {
ASM_CODE_COMMENT(this);
// The trap handler uses the current pc to creating a landing, so that it can
// determine if a trap occured in Wasm code due to a OOB load. Make sure the
// first instruction in each case below is the one that loads.
......@@ -1081,6 +1148,7 @@ void SharedTurboAssembler::S128Load8Splat(XMMRegister dst, Operand src,
void SharedTurboAssembler::S128Load16Splat(XMMRegister dst, Operand src,
XMMRegister scratch) {
ASM_CODE_COMMENT(this);
// The trap handler uses the current pc to creating a landing, so that it can
// determine if a trap occured in Wasm code due to a OOB load. Make sure the
// first instruction in each case below is the one that loads.
......@@ -1101,6 +1169,7 @@ void SharedTurboAssembler::S128Load16Splat(XMMRegister dst, Operand src,
}
void SharedTurboAssembler::S128Load32Splat(XMMRegister dst, Operand src) {
ASM_CODE_COMMENT(this);
// The trap handler uses the current pc to creating a landing, so that it can
// determine if a trap occured in Wasm code due to a OOB load. Make sure the
// first instruction in each case below is the one that loads.
......@@ -1115,6 +1184,7 @@ void SharedTurboAssembler::S128Load32Splat(XMMRegister dst, Operand src) {
void SharedTurboAssembler::S128Store64Lane(Operand dst, XMMRegister src,
uint8_t laneidx) {
ASM_CODE_COMMENT(this);
if (laneidx == 0) {
Movlps(dst, src);
} else {
......
......@@ -46,6 +46,10 @@ class V8_EXPORT_PRIVATE SharedTurboAssembler : public TurboAssemblerBase {
void Add(Register dst, Immediate src);
void And(Register dst, Immediate src);
// Will move src1 to dst if AVX is not supported.
void Movhps(XMMRegister dst, XMMRegister src1, Operand src2);
void Movlps(XMMRegister dst, XMMRegister src1, Operand src2);
template <typename Op>
void Pinsrb(XMMRegister dst, XMMRegister src1, Op src2, uint8_t imm8,
uint32_t* load_pc_offset = nullptr) {
......@@ -385,6 +389,12 @@ class V8_EXPORT_PRIVATE SharedTurboAssembler : public TurboAssemblerBase {
AVX_OP_SSE4_1(Roundsd, roundsd)
AVX_OP_SSE4_1(Roundss, roundss)
#undef AVX_OP
#undef AVX_OP_SSE3
#undef AVX_OP_SSSE3
#undef AVX_OP_SSE4_1
#undef AVX_OP_SSE4_2
void F64x2ExtractLane(DoubleRegister dst, XMMRegister src, uint8_t lane);
void F64x2ReplaceLane(XMMRegister dst, XMMRegister src, DoubleRegister rep,
uint8_t lane);
......@@ -598,6 +608,7 @@ class V8_EXPORT_PRIVATE SharedTurboAssemblerBase : public SharedTurboAssembler {
void I32x4SConvertF32x4(XMMRegister dst, XMMRegister src, XMMRegister tmp,
Register scratch) {
ASM_CODE_COMMENT(this);
Operand op = ExternalReferenceAsOperand(
ExternalReference::address_of_wasm_int32_overflow_as_float(), scratch);
......@@ -639,6 +650,7 @@ class V8_EXPORT_PRIVATE SharedTurboAssemblerBase : public SharedTurboAssembler {
void I32x4TruncSatF64x2SZero(XMMRegister dst, XMMRegister src,
XMMRegister scratch, Register tmp) {
ASM_CODE_COMMENT(this);
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(this, AVX);
XMMRegister original_dst = dst;
......@@ -675,6 +687,7 @@ class V8_EXPORT_PRIVATE SharedTurboAssemblerBase : public SharedTurboAssembler {
void I32x4TruncSatF64x2UZero(XMMRegister dst, XMMRegister src,
XMMRegister scratch, Register tmp) {
ASM_CODE_COMMENT(this);
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(this, AVX);
vxorpd(scratch, scratch, scratch);
......@@ -714,6 +727,7 @@ class V8_EXPORT_PRIVATE SharedTurboAssemblerBase : public SharedTurboAssembler {
void I32x4ExtAddPairwiseI16x8S(XMMRegister dst, XMMRegister src,
Register scratch) {
ASM_CODE_COMMENT(this);
Operand op = ExternalReferenceAsOperand(
ExternalReference::address_of_wasm_i16x8_splat_0x0001(), scratch);
// pmaddwd multiplies signed words in src and op, producing
......
......@@ -2728,7 +2728,7 @@ inline void EmitAnyTrue(LiftoffAssembler* assm, LiftoffRegister dst,
assm->cmov(zero, dst.gp(), tmp);
}
template <void (TurboAssembler::*pcmp)(XMMRegister, XMMRegister)>
template <void (SharedTurboAssembler::*pcmp)(XMMRegister, XMMRegister)>
inline void EmitAllTrue(LiftoffAssembler* assm, LiftoffRegister dst,
LiftoffRegister src,
base::Optional<CpuFeature> feature = base::nullopt) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment