Commit b6520eda authored by Ng Zhi An's avatar Ng Zhi An Committed by Commit Bot

[ia32][x64] Move AVX helper class into SharedTurboAssembler

Move the helper class and some function definitions into
SharedTurboAssembler. We leave most of the other function definitions
inside of macro-assembler-x64, and will move them later.

Also move i16x8.ext_mul high as a check that this code movement works.

Bug: v8:11589
Change-Id: I8ec1fa24cb93b4c4c8bd936a9df06cbf5328374f
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2792080Reviewed-by: 's avatarBill Budge <bbudge@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#73750}
parent 76c6fd5e
......@@ -642,14 +642,6 @@ void TurboAssembler::Pmulhrsw(XMMRegister dst, XMMRegister src1,
}
}
void TurboAssembler::I16x8ExtMulLow(XMMRegister dst, XMMRegister src1,
XMMRegister src2, XMMRegister scratch,
bool is_signed) {
is_signed ? Pmovsxbw(scratch, src1) : Pmovzxbw(scratch, src1);
is_signed ? Pmovsxbw(dst, src2) : Pmovzxbw(dst, src2);
Pmullw(dst, scratch);
}
void TurboAssembler::I16x8Q15MulRSatS(XMMRegister dst, XMMRegister src1,
XMMRegister src2, XMMRegister scratch) {
// k = i16x8.splat(0x8000)
......
......@@ -433,7 +433,6 @@ class V8_EXPORT_PRIVATE TurboAssembler : public SharedTurboAssembler {
AVX_OP3_XO(Orps, orps)
AVX_OP3_XO(Orpd, orpd)
AVX_OP3_XO(Andnpd, andnpd)
AVX_OP3_XO(Pmullw, pmullw)
AVX_OP3_WITH_TYPE(Movhlps, movhlps, XMMRegister, XMMRegister)
AVX_OP3_WITH_TYPE(Psraw, psraw, XMMRegister, uint8_t)
AVX_OP3_WITH_TYPE(Psrlq, psrlq, XMMRegister, uint8_t)
......@@ -598,10 +597,8 @@ class V8_EXPORT_PRIVATE TurboAssembler : public SharedTurboAssembler {
AVX_OP2_WITH_TYPE_SCOPE(macro_name, name, XMMRegister, Operand, SSE4_1)
AVX_OP2_XO_SSE4(Ptest, ptest)
AVX_OP2_XO_SSE4(Pmovsxbw, pmovsxbw)
AVX_OP2_XO_SSE4(Pmovsxwd, pmovsxwd)
AVX_OP2_XO_SSE4(Pmovsxdq, pmovsxdq)
AVX_OP2_XO_SSE4(Pmovzxbw, pmovzxbw)
AVX_OP2_XO_SSE4(Pmovzxwd, pmovzxwd)
AVX_OP2_XO_SSE4(Pmovzxdq, pmovzxdq)
......@@ -712,8 +709,6 @@ class V8_EXPORT_PRIVATE TurboAssembler : public SharedTurboAssembler {
// These Wasm SIMD ops do not have direct lowerings on IA32. These
// helpers are optimized to produce the fastest and smallest codegen.
// Defined here to allow usage on both TurboFan and Liftoff.
void I16x8ExtMulLow(XMMRegister dst, XMMRegister src1, XMMRegister src2,
XMMRegister scratch, bool is_signed);
void I16x8Q15MulRSatS(XMMRegister dst, XMMRegister src1, XMMRegister src2,
XMMRegister scratch);
void S128Store32Lane(Operand dst, XMMRegister src, uint8_t laneidx);
......
......@@ -18,6 +18,14 @@
namespace v8 {
namespace internal {
void SharedTurboAssembler::I16x8ExtMulLow(XMMRegister dst, XMMRegister src1,
XMMRegister src2, XMMRegister scratch,
bool is_signed) {
is_signed ? Pmovsxbw(scratch, src1) : Pmovzxbw(scratch, src1);
is_signed ? Pmovsxbw(dst, src2) : Pmovzxbw(dst, src2);
Pmullw(dst, scratch);
}
void SharedTurboAssembler::I16x8ExtMulHighS(XMMRegister dst, XMMRegister src1,
XMMRegister src2,
XMMRegister scratch) {
......
......@@ -6,6 +6,7 @@
#define V8_CODEGEN_SHARED_IA32_X64_MACRO_ASSEMBLER_SHARED_IA32_X64_H_
#include "src/base/macros.h"
#include "src/codegen/cpu-features.h"
#include "src/codegen/turbo-assembler.h"
#if V8_TARGET_ARCH_IA32
......@@ -18,11 +19,91 @@
namespace v8 {
namespace internal {
class Assembler;
class V8_EXPORT_PRIVATE SharedTurboAssembler : public TurboAssemblerBase {
public:
using TurboAssemblerBase::TurboAssemblerBase;
template <typename Dst, typename... Args>
struct AvxHelper {
Assembler* assm;
base::Optional<CpuFeature> feature = base::nullopt;
// Call a method where the AVX version expects the dst argument to be
// duplicated.
template <void (Assembler::*avx)(Dst, Dst, Args...),
void (Assembler::*no_avx)(Dst, Args...)>
void emit(Dst dst, Args... args) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(assm, AVX);
(assm->*avx)(dst, dst, args...);
} else if (feature.has_value()) {
DCHECK(CpuFeatures::IsSupported(*feature));
CpuFeatureScope scope(assm, *feature);
(assm->*no_avx)(dst, args...);
} else {
(assm->*no_avx)(dst, args...);
}
}
// Call a method where the AVX version expects no duplicated dst argument.
template <void (Assembler::*avx)(Dst, Args...),
void (Assembler::*no_avx)(Dst, Args...)>
void emit(Dst dst, Args... args) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(assm, AVX);
(assm->*avx)(dst, args...);
} else if (feature.has_value()) {
DCHECK(CpuFeatures::IsSupported(*feature));
CpuFeatureScope scope(assm, *feature);
(assm->*no_avx)(dst, args...);
} else {
(assm->*no_avx)(dst, args...);
}
}
};
#define AVX_OP(macro_name, name) \
template <typename Dst, typename... Args> \
void macro_name(Dst dst, Args... args) { \
AvxHelper<Dst, Args...>{this} \
.template emit<&Assembler::v##name, &Assembler::name>(dst, args...); \
}
#define AVX_OP_SSE3(macro_name, name) \
template <typename Dst, typename... Args> \
void macro_name(Dst dst, Args... args) { \
AvxHelper<Dst, Args...>{this, base::Optional<CpuFeature>(SSE3)} \
.template emit<&Assembler::v##name, &Assembler::name>(dst, args...); \
}
#define AVX_OP_SSSE3(macro_name, name) \
template <typename Dst, typename... Args> \
void macro_name(Dst dst, Args... args) { \
AvxHelper<Dst, Args...>{this, base::Optional<CpuFeature>(SSSE3)} \
.template emit<&Assembler::v##name, &Assembler::name>(dst, args...); \
}
#define AVX_OP_SSE4_1(macro_name, name) \
template <typename Dst, typename... Args> \
void macro_name(Dst dst, Args... args) { \
AvxHelper<Dst, Args...>{this, base::Optional<CpuFeature>(SSE4_1)} \
.template emit<&Assembler::v##name, &Assembler::name>(dst, args...); \
}
#define AVX_OP_SSE4_2(macro_name, name) \
template <typename Dst, typename... Args> \
void macro_name(Dst dst, Args... args) { \
AvxHelper<Dst, Args...>{this, base::Optional<CpuFeature>(SSE4_2)} \
.template emit<&Assembler::v##name, &Assembler::name>(dst, args...); \
}
AVX_OP(Pmullw, pmullw)
AVX_OP_SSE4_1(Pmovsxbw, pmovsxbw)
AVX_OP_SSE4_1(Pmovzxbw, pmovzxbw)
void I16x8ExtMulLow(XMMRegister dst, XMMRegister src1, XMMRegister src2,
XMMRegister scrat, bool is_signed);
void I16x8ExtMulHighS(XMMRegister dst, XMMRegister src1, XMMRegister src2,
XMMRegister scratch);
void I16x8ExtMulHighU(XMMRegister dst, XMMRegister src1, XMMRegister src2,
......
......@@ -2132,14 +2132,6 @@ void TurboAssembler::Pmulhrsw(XMMRegister dst, XMMRegister src1,
}
}
void TurboAssembler::I16x8ExtMulLow(XMMRegister dst, XMMRegister src1,
XMMRegister src2, bool is_signed) {
is_signed ? Pmovsxbw(kScratchDoubleReg, src1)
: Pmovzxbw(kScratchDoubleReg, src1);
is_signed ? Pmovsxbw(dst, src2) : Pmovzxbw(dst, src2);
Pmullw(dst, kScratchDoubleReg);
}
void TurboAssembler::I16x8Q15MulRSatS(XMMRegister dst, XMMRegister src1,
XMMRegister src2) {
// k = i16x8.splat(0x8000)
......
......@@ -62,78 +62,6 @@ class StackArgumentsAccessor {
class V8_EXPORT_PRIVATE TurboAssembler : public SharedTurboAssembler {
public:
using SharedTurboAssembler::SharedTurboAssembler;
template <typename Dst, typename... Args>
struct AvxHelper {
Assembler* assm;
base::Optional<CpuFeature> feature = base::nullopt;
// Call a method where the AVX version expects the dst argument to be
// duplicated.
template <void (Assembler::*avx)(Dst, Dst, Args...),
void (Assembler::*no_avx)(Dst, Args...)>
void emit(Dst dst, Args... args) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(assm, AVX);
(assm->*avx)(dst, dst, args...);
} else if (feature.has_value()) {
DCHECK(CpuFeatures::IsSupported(*feature));
CpuFeatureScope scope(assm, *feature);
(assm->*no_avx)(dst, args...);
} else {
(assm->*no_avx)(dst, args...);
}
}
// Call a method where the AVX version expects no duplicated dst argument.
template <void (Assembler::*avx)(Dst, Args...),
void (Assembler::*no_avx)(Dst, Args...)>
void emit(Dst dst, Args... args) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(assm, AVX);
(assm->*avx)(dst, args...);
} else if (feature.has_value()) {
DCHECK(CpuFeatures::IsSupported(*feature));
CpuFeatureScope scope(assm, *feature);
(assm->*no_avx)(dst, args...);
} else {
(assm->*no_avx)(dst, args...);
}
}
};
#define AVX_OP(macro_name, name) \
template <typename Dst, typename... Args> \
void macro_name(Dst dst, Args... args) { \
AvxHelper<Dst, Args...>{this} \
.template emit<&Assembler::v##name, &Assembler::name>(dst, args...); \
}
#define AVX_OP_SSE3(macro_name, name) \
template <typename Dst, typename... Args> \
void macro_name(Dst dst, Args... args) { \
AvxHelper<Dst, Args...>{this, base::Optional<CpuFeature>(SSE3)} \
.template emit<&Assembler::v##name, &Assembler::name>(dst, args...); \
}
#define AVX_OP_SSSE3(macro_name, name) \
template <typename Dst, typename... Args> \
void macro_name(Dst dst, Args... args) { \
AvxHelper<Dst, Args...>{this, base::Optional<CpuFeature>(SSSE3)} \
.template emit<&Assembler::v##name, &Assembler::name>(dst, args...); \
}
#define AVX_OP_SSE4_1(macro_name, name) \
template <typename Dst, typename... Args> \
void macro_name(Dst dst, Args... args) { \
AvxHelper<Dst, Args...>{this, base::Optional<CpuFeature>(SSE4_1)} \
.template emit<&Assembler::v##name, &Assembler::name>(dst, args...); \
}
#define AVX_OP_SSE4_2(macro_name, name) \
template <typename Dst, typename... Args> \
void macro_name(Dst dst, Args... args) { \
AvxHelper<Dst, Args...>{this, base::Optional<CpuFeature>(SSE4_2)} \
.template emit<&Assembler::v##name, &Assembler::name>(dst, args...); \
}
AVX_OP(Subsd, subsd)
AVX_OP(Divss, divss)
AVX_OP(Divsd, divsd)
......@@ -219,7 +147,6 @@ class V8_EXPORT_PRIVATE TurboAssembler : public SharedTurboAssembler {
AVX_OP(Paddusb, paddusb)
AVX_OP(Paddusw, paddusw)
AVX_OP(Pcmpgtd, pcmpgtd)
AVX_OP(Pmullw, pmullw)
AVX_OP(Pmuludq, pmuludq)
AVX_OP(Addpd, addpd)
AVX_OP(Subpd, subpd)
......@@ -285,10 +212,8 @@ class V8_EXPORT_PRIVATE TurboAssembler : public SharedTurboAssembler {
AVX_OP_SSE4_1(Pinsrq, pinsrq)
AVX_OP_SSE4_1(Pblendw, pblendw)
AVX_OP_SSE4_1(Ptest, ptest)
AVX_OP_SSE4_1(Pmovsxbw, pmovsxbw)
AVX_OP_SSE4_1(Pmovsxwd, pmovsxwd)
AVX_OP_SSE4_1(Pmovsxdq, pmovsxdq)
AVX_OP_SSE4_1(Pmovzxbw, pmovzxbw)
AVX_OP_SSE4_1(Pmovzxwd, pmovzxwd)
AVX_OP_SSE4_1(Pmovzxdq, pmovzxdq)
AVX_OP_SSE4_1(Pextrb, pextrb)
......@@ -607,11 +532,6 @@ class V8_EXPORT_PRIVATE TurboAssembler : public SharedTurboAssembler {
// These Wasm SIMD ops do not have direct lowerings on x64. These
// helpers are optimized to produce the fastest and smallest codegen.
// Defined here to allow usage on both TurboFan and Liftoff.
// TODO(zhin): Move this into shared-ia32-x64-macro-assembler.
void I16x8ExtMulLow(XMMRegister dst, XMMRegister src1, XMMRegister src2,
bool is_signed);
void I16x8Q15MulRSatS(XMMRegister dst, XMMRegister src1, XMMRegister src2);
void S128Store32Lane(Operand dst, XMMRegister src, uint8_t laneidx);
......
......@@ -3318,7 +3318,8 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
case kX64I16x8ExtMulLowI8x16S: {
__ I16x8ExtMulLow(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1), /*is_signed=*/true);
i.InputSimd128Register(1), kScratchDoubleReg,
/*is_signed=*/true);
break;
}
case kX64I16x8ExtMulHighI8x16S: {
......@@ -3328,7 +3329,8 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
case kX64I16x8ExtMulLowI8x16U: {
__ I16x8ExtMulLow(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1), /*is_signed=*/false);
i.InputSimd128Register(1), kScratchDoubleReg,
/*is_signed=*/false);
break;
}
case kX64I16x8ExtMulHighI8x16U: {
......
......@@ -3222,13 +3222,15 @@ void LiftoffAssembler::emit_i16x8_extadd_pairwise_i8x16_u(LiftoffRegister dst,
void LiftoffAssembler::emit_i16x8_extmul_low_i8x16_s(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
I16x8ExtMulLow(dst.fp(), src1.fp(), src2.fp(), /*is_signed=*/true);
I16x8ExtMulLow(dst.fp(), src1.fp(), src2.fp(), kScratchDoubleReg,
/*is_signed=*/true);
}
void LiftoffAssembler::emit_i16x8_extmul_low_i8x16_u(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
I16x8ExtMulLow(dst.fp(), src1.fp(), src2.fp(), /*is_signed=*/false);
I16x8ExtMulLow(dst.fp(), src1.fp(), src2.fp(), kScratchDoubleReg,
/*is_signed=*/false);
}
void LiftoffAssembler::emit_i16x8_extmul_high_i8x16_s(LiftoffRegister dst,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment