Commit b699d1dc authored by Ng Zhi An's avatar Ng Zhi An Committed by Commit Bot

[wasm-simd][x64][ia32] Factor f32x4.splat into shared code

Bug: v8:11589
Change-Id: I13c57e1dcc77345bcc9d95a14cf878db6dd60e02
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2837589Reviewed-by: 's avatarBill Budge <bbudge@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#74073}
parent 82beb6fc
......@@ -3275,9 +3275,9 @@ void Assembler::sse4_instr(XMMRegister dst, Operand src, byte prefix,
}
void Assembler::vinstr(byte op, XMMRegister dst, XMMRegister src1,
XMMRegister src2, SIMDPrefix pp, LeadingOpcode m,
VexW w) {
DCHECK(IsEnabled(AVX));
XMMRegister src2, SIMDPrefix pp, LeadingOpcode m, VexW w,
CpuFeature feature) {
DCHECK(IsEnabled(feature));
EnsureSpace ensure_space(this);
emit_vex_prefix(src1, kL128, pp, m, w);
EMIT(op);
......@@ -3285,8 +3285,9 @@ void Assembler::vinstr(byte op, XMMRegister dst, XMMRegister src1,
}
void Assembler::vinstr(byte op, XMMRegister dst, XMMRegister src1, Operand src2,
SIMDPrefix pp, LeadingOpcode m, VexW w) {
DCHECK(IsEnabled(AVX));
SIMDPrefix pp, LeadingOpcode m, VexW w,
CpuFeature feature) {
DCHECK(IsEnabled(feature));
EnsureSpace ensure_space(this);
emit_vex_prefix(src1, kL128, pp, m, w);
EMIT(op);
......
......@@ -1516,6 +1516,9 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
void vmovshdup(XMMRegister dst, XMMRegister src) {
vinstr(0x16, dst, xmm0, src, kF3, k0F, kWIG);
}
void vbroadcastss(XMMRegister dst, XMMRegister src) {
vinstr(0x18, dst, xmm0, src, k66, k0F38, kW0, AVX2);
}
void vbroadcastss(XMMRegister dst, Operand src) {
vinstr(0x18, dst, xmm0, src, k66, k0F38, kW0);
}
......@@ -1892,9 +1895,9 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
void sse4_instr(XMMRegister dst, Operand src, byte prefix, byte escape1,
byte escape2, byte opcode);
void vinstr(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2,
SIMDPrefix pp, LeadingOpcode m, VexW w);
SIMDPrefix pp, LeadingOpcode m, VexW w, CpuFeature = AVX);
void vinstr(byte op, XMMRegister dst, XMMRegister src1, Operand src2,
SIMDPrefix pp, LeadingOpcode m, VexW w);
SIMDPrefix pp, LeadingOpcode m, VexW w, CpuFeature = AVX);
// Most BMI instructions are similar.
void bmi1(byte op, Register reg, Register vreg, Operand rm);
void bmi2(SIMDPrefix pp, byte op, Register reg, Register vreg, Operand rm);
......
......@@ -47,6 +47,23 @@ void SharedTurboAssembler::F64x2ExtractLane(DoubleRegister dst, XMMRegister src,
}
}
void SharedTurboAssembler::F32x4Splat(XMMRegister dst, DoubleRegister src) {
if (CpuFeatures::IsSupported(AVX2)) {
CpuFeatureScope avx2_scope(this, AVX2);
vbroadcastss(dst, src);
} else if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(this, AVX);
vshufps(dst, src, src, 0);
} else {
if (dst == src) {
// 1 byte shorter than pshufd.
shufps(dst, src, 0);
} else {
pshufd(dst, src, 0);
}
}
}
void SharedTurboAssembler::S128Store32Lane(Operand dst, XMMRegister src,
uint8_t laneidx) {
if (laneidx == 0) {
......
......@@ -272,6 +272,7 @@ class V8_EXPORT_PRIVATE SharedTurboAssembler : public TurboAssemblerBase {
AVX_OP_SSE4_1(Roundps, roundps)
void F64x2ExtractLane(DoubleRegister dst, XMMRegister src, uint8_t lane);
void F32x4Splat(XMMRegister dst, DoubleRegister src);
void S128Store32Lane(Operand dst, XMMRegister src, uint8_t laneidx);
void I16x8ExtMulLow(XMMRegister dst, XMMRegister src1, XMMRegister src2,
XMMRegister scrat, bool is_signed);
......
......@@ -2274,15 +2274,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
case kIA32F32x4Splat: {
XMMRegister dst = i.OutputDoubleRegister();
XMMRegister src = i.InputDoubleRegister(0);
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(tasm(), AVX);
__ vshufps(i.OutputSimd128Register(), src, src, 0x0);
} else {
DCHECK_EQ(dst, src);
__ shufps(dst, src, 0x0);
}
__ F32x4Splat(i.OutputSimd128Register(), i.InputDoubleRegister(0));
break;
}
case kIA32F32x4ExtractLane: {
......
......@@ -2521,22 +2521,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
case kX64F32x4Splat: {
XMMRegister dst = i.OutputSimd128Register();
XMMRegister src = i.InputDoubleRegister(0);
if (CpuFeatures::IsSupported(AVX2)) {
CpuFeatureScope avx2_scope(tasm(), AVX2);
__ vbroadcastss(dst, src);
} else if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(tasm(), AVX);
__ vshufps(dst, src, src, 0);
} else {
if (dst == src) {
// 1 byte shorter than pshufd.
__ shufps(dst, src, 0);
} else {
__ pshufd(dst, src, 0);
}
}
__ F32x4Splat(i.OutputSimd128Register(), i.InputDoubleRegister(0));
break;
}
case kX64F32x4ExtractLane: {
......
......@@ -2938,15 +2938,7 @@ void LiftoffAssembler::emit_i64x2_splat(LiftoffRegister dst,
void LiftoffAssembler::emit_f32x4_splat(LiftoffRegister dst,
LiftoffRegister src) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
vshufps(dst.fp(), src.fp(), src.fp(), 0);
} else {
if (dst.fp() != src.fp()) {
movss(dst.fp(), src.fp());
}
shufps(dst.fp(), src.fp(), 0);
}
F32x4Splat(dst.fp(), src.fp());
}
void LiftoffAssembler::emit_f64x2_splat(LiftoffRegister dst,
......
......@@ -2540,7 +2540,7 @@ void LiftoffAssembler::emit_i64x2_splat(LiftoffRegister dst,
void LiftoffAssembler::emit_f32x4_splat(LiftoffRegister dst,
LiftoffRegister src) {
Shufps(dst.fp(), src.fp(), src.fp(), 0);
F32x4Splat(dst.fp(), src.fp());
}
void LiftoffAssembler::emit_f64x2_splat(LiftoffRegister dst,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment