Commit 06f515d8 authored by Ng Zhi An's avatar Ng Zhi An Committed by V8 LUCI CQ

[x64][ia32] Share Pinsrb/Pinsrw code

Pinsrb, Pinsrw was separately defined in ia32 and x64 macro assembler,
move them into shared-macro-assembler.

Pinsrd can be moved also, but is not as straightforward, move that in a
future patch.

Bug: v8:11589
Change-Id: Ib5c7517236e0c3fb7b9a4643f0a3ee8237ee6545
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3158681Reviewed-by: 's avatarAdam Klein <adamk@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/main@{#76820}
parent 0adc1410
......@@ -1613,28 +1613,6 @@ void TurboAssembler::Pextrd(Register dst, XMMRegister src, uint8_t imm8) {
add(esp, Immediate(kDoubleSize));
}
void TurboAssembler::Pinsrb(XMMRegister dst, Operand src, int8_t imm8) {
Pinsrb(dst, dst, src, imm8);
}
void TurboAssembler::Pinsrb(XMMRegister dst, XMMRegister src1, Operand src2,
int8_t imm8) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
vpinsrb(dst, src1, src2, imm8);
return;
}
if (CpuFeatures::IsSupported(SSE4_1)) {
CpuFeatureScope sse_scope(this, SSE4_1);
if (dst != src1) {
movaps(dst, src1);
}
pinsrb(dst, src2, imm8);
return;
}
FATAL("no AVX or SSE4.1 support");
}
void TurboAssembler::Pinsrd(XMMRegister dst, XMMRegister src1, Operand src2,
uint8_t imm8) {
if (CpuFeatures::IsSupported(AVX)) {
......@@ -1673,25 +1651,6 @@ void TurboAssembler::Pinsrd(XMMRegister dst, Operand src, uint8_t imm8) {
Pinsrd(dst, dst, src, imm8);
}
void TurboAssembler::Pinsrw(XMMRegister dst, Operand src, int8_t imm8) {
Pinsrw(dst, dst, src, imm8);
}
void TurboAssembler::Pinsrw(XMMRegister dst, XMMRegister src1, Operand src2,
int8_t imm8) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
vpinsrw(dst, src1, src2, imm8);
return;
} else {
if (dst != src1) {
movaps(dst, src1);
}
pinsrw(dst, src2, imm8);
return;
}
}
void TurboAssembler::Lzcnt(Register dst, Operand src) {
if (CpuFeatures::IsSupported(LZCNT)) {
CpuFeatureScope scope(this, LZCNT);
......
......@@ -337,24 +337,12 @@ class V8_EXPORT_PRIVATE TurboAssembler
}
void Pextrd(Register dst, XMMRegister src, uint8_t imm8);
void Pinsrb(XMMRegister dst, Register src, int8_t imm8) {
Pinsrb(dst, Operand(src), imm8);
}
void Pinsrb(XMMRegister dst, Operand src, int8_t imm8);
// Moves src1 to dst if AVX is not supported.
void Pinsrb(XMMRegister dst, XMMRegister src1, Operand src2, int8_t imm8);
void Pinsrd(XMMRegister dst, Register src, uint8_t imm8) {
Pinsrd(dst, Operand(src), imm8);
}
void Pinsrd(XMMRegister dst, Operand src, uint8_t imm8);
// Moves src1 to dst if AVX is not supported.
void Pinsrd(XMMRegister dst, XMMRegister src1, Operand src2, uint8_t imm8);
void Pinsrw(XMMRegister dst, Register src, int8_t imm8) {
Pinsrw(dst, Operand(src), imm8);
}
void Pinsrw(XMMRegister dst, Operand src, int8_t imm8);
// Moves src1 to dst if AVX is not supported.
void Pinsrw(XMMRegister dst, XMMRegister src1, Operand src2, int8_t imm8);
// Expression support
// cvtsi2sd instruction only writes to the low 64-bit of dst register, which
......
......@@ -46,6 +46,20 @@ class V8_EXPORT_PRIVATE SharedTurboAssembler : public TurboAssemblerBase {
void Add(Register dst, Immediate src);
void And(Register dst, Immediate src);
template <typename Op>
void Pinsrb(XMMRegister dst, XMMRegister src1, Op src2, uint8_t imm8,
uint32_t* load_pc_offset = nullptr) {
PinsrHelper(this, &Assembler::vpinsrb, &Assembler::pinsrb, dst, src1, src2,
imm8, load_pc_offset, {SSE4_1});
}
template <typename Op>
void Pinsrw(XMMRegister dst, XMMRegister src1, Op src2, uint8_t imm8,
uint32_t* load_pc_offset = nullptr) {
PinsrHelper(this, &Assembler::vpinsrw, &Assembler::pinsrw, dst, src1, src2,
imm8, load_pc_offset);
}
// Supports both SSE and AVX. Move src1 to dst if they are not equal on SSE.
template <typename Op>
void Pshufb(XMMRegister dst, XMMRegister src, Op mask) {
......@@ -268,7 +282,6 @@ class V8_EXPORT_PRIVATE SharedTurboAssembler : public TurboAssemblerBase {
AVX_OP(Pcmpeqb, pcmpeqb)
AVX_OP(Pcmpeqd, pcmpeqd)
AVX_OP(Pcmpeqw, pcmpeqw)
AVX_OP(Pinsrw, pinsrw)
AVX_OP(Pmaddwd, pmaddwd)
AVX_OP(Pmaxsw, pmaxsw)
AVX_OP(Pmaxub, pmaxub)
......@@ -344,7 +357,6 @@ class V8_EXPORT_PRIVATE SharedTurboAssembler : public TurboAssemblerBase {
AVX_OP_SSE4_1(Pcmpeqq, pcmpeqq)
AVX_OP_SSE4_1(Pextrb, pextrb)
AVX_OP_SSE4_1(Pextrw, pextrw)
AVX_OP_SSE4_1(Pinsrb, pinsrb)
AVX_OP_SSE4_1(Pmaxsb, pmaxsb)
AVX_OP_SSE4_1(Pmaxsd, pmaxsd)
AVX_OP_SSE4_1(Pmaxud, pmaxud)
......@@ -441,6 +453,35 @@ class V8_EXPORT_PRIVATE SharedTurboAssembler : public TurboAssemblerBase {
void S128Load32Splat(XMMRegister dst, Operand src);
void S128Store64Lane(Operand dst, XMMRegister src, uint8_t laneidx);
protected:
template <typename Op>
using AvxFn = void (Assembler::*)(XMMRegister, XMMRegister, Op, uint8_t);
template <typename Op>
using NoAvxFn = void (Assembler::*)(XMMRegister, Op, uint8_t);
template <typename Op>
void PinsrHelper(Assembler* assm, AvxFn<Op> avx, NoAvxFn<Op> noavx,
XMMRegister dst, XMMRegister src1, Op src2, uint8_t imm8,
uint32_t* load_pc_offset = nullptr,
base::Optional<CpuFeature> feature = base::nullopt) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(assm, AVX);
if (load_pc_offset) *load_pc_offset = assm->pc_offset();
(assm->*avx)(dst, src1, src2, imm8);
return;
}
if (dst != src1) assm->movaps(dst, src1);
if (load_pc_offset) *load_pc_offset = assm->pc_offset();
if (feature.has_value()) {
DCHECK(CpuFeatures::IsSupported(*feature));
CpuFeatureScope scope(assm, *feature);
(assm->*noavx)(dst, src2, imm8);
} else {
(assm->*noavx)(dst, src2, imm8);
}
}
private:
template <typename Op>
void I8x16SplatPreAvx2(XMMRegister dst, Op src, XMMRegister scratch);
......
......@@ -2118,60 +2118,8 @@ void TurboAssembler::Pextrd(Register dst, XMMRegister src, uint8_t imm8) {
}
namespace {
template <typename Src>
using AvxFn = void (Assembler::*)(XMMRegister, XMMRegister, Src, uint8_t);
template <typename Src>
using NoAvxFn = void (Assembler::*)(XMMRegister, Src, uint8_t);
template <typename Src>
void PinsrHelper(Assembler* assm, AvxFn<Src> avx, NoAvxFn<Src> noavx,
XMMRegister dst, XMMRegister src1, Src src2, uint8_t imm8,
uint32_t* load_pc_offset = nullptr,
base::Optional<CpuFeature> feature = base::nullopt) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(assm, AVX);
if (load_pc_offset) *load_pc_offset = assm->pc_offset();
(assm->*avx)(dst, src1, src2, imm8);
return;
}
if (dst != src1) assm->movaps(dst, src1);
if (load_pc_offset) *load_pc_offset = assm->pc_offset();
if (feature.has_value()) {
DCHECK(CpuFeatures::IsSupported(*feature));
CpuFeatureScope scope(assm, *feature);
(assm->*noavx)(dst, src2, imm8);
} else {
(assm->*noavx)(dst, src2, imm8);
}
}
} // namespace
void TurboAssembler::Pinsrb(XMMRegister dst, XMMRegister src1, Register src2,
uint8_t imm8, uint32_t* load_pc_offset) {
PinsrHelper(this, &Assembler::vpinsrb, &Assembler::pinsrb, dst, src1, src2,
imm8, load_pc_offset, {SSE4_1});
}
void TurboAssembler::Pinsrb(XMMRegister dst, XMMRegister src1, Operand src2,
uint8_t imm8, uint32_t* load_pc_offset) {
PinsrHelper(this, &Assembler::vpinsrb, &Assembler::pinsrb, dst, src1, src2,
imm8, load_pc_offset, {SSE4_1});
}
void TurboAssembler::Pinsrw(XMMRegister dst, XMMRegister src1, Register src2,
uint8_t imm8, uint32_t* load_pc_offset) {
PinsrHelper(this, &Assembler::vpinsrw, &Assembler::pinsrw, dst, src1, src2,
imm8, load_pc_offset);
}
void TurboAssembler::Pinsrw(XMMRegister dst, XMMRegister src1, Operand src2,
uint8_t imm8, uint32_t* load_pc_offset) {
PinsrHelper(this, &Assembler::vpinsrw, &Assembler::pinsrw, dst, src1, src2,
imm8, load_pc_offset);
}
void TurboAssembler::Pinsrd(XMMRegister dst, XMMRegister src1, Register src2,
uint8_t imm8, uint32_t* load_pc_offset) {
// Need a fall back when SSE4_1 is unavailable. Pinsrb and Pinsrq are used
......
......@@ -415,14 +415,6 @@ class V8_EXPORT_PRIVATE TurboAssembler
// Non-SSE2 instructions.
void Pextrd(Register dst, XMMRegister src, uint8_t imm8);
void Pinsrb(XMMRegister dst, XMMRegister src1, Register src2, uint8_t imm8,
uint32_t* load_pc_offset = nullptr);
void Pinsrb(XMMRegister dst, XMMRegister src1, Operand src2, uint8_t imm8,
uint32_t* load_pc_offset = nullptr);
void Pinsrw(XMMRegister dst, XMMRegister src1, Register src2, uint8_t imm8,
uint32_t* load_pc_offset = nullptr);
void Pinsrw(XMMRegister dst, XMMRegister src1, Operand src2, uint8_t imm8,
uint32_t* load_pc_offset = nullptr);
void Pinsrd(XMMRegister dst, XMMRegister src1, Register src2, uint8_t imm8,
uint32_t* load_pc_offset = nullptr);
void Pinsrd(XMMRegister dst, XMMRegister src1, Operand src2, uint8_t imm8,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment