Commit b699d1dc authored by Ng Zhi An's avatar Ng Zhi An Committed by Commit Bot

[wasm-simd][x64][ia32] Factor f32x4.splat into shared code

Bug: v8:11589
Change-Id: I13c57e1dcc77345bcc9d95a14cf878db6dd60e02
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2837589Reviewed-by: 's avatarBill Budge <bbudge@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#74073}
parent 82beb6fc
...@@ -3275,9 +3275,9 @@ void Assembler::sse4_instr(XMMRegister dst, Operand src, byte prefix, ...@@ -3275,9 +3275,9 @@ void Assembler::sse4_instr(XMMRegister dst, Operand src, byte prefix,
} }
void Assembler::vinstr(byte op, XMMRegister dst, XMMRegister src1, void Assembler::vinstr(byte op, XMMRegister dst, XMMRegister src1,
XMMRegister src2, SIMDPrefix pp, LeadingOpcode m, XMMRegister src2, SIMDPrefix pp, LeadingOpcode m, VexW w,
VexW w) { CpuFeature feature) {
DCHECK(IsEnabled(AVX)); DCHECK(IsEnabled(feature));
EnsureSpace ensure_space(this); EnsureSpace ensure_space(this);
emit_vex_prefix(src1, kL128, pp, m, w); emit_vex_prefix(src1, kL128, pp, m, w);
EMIT(op); EMIT(op);
...@@ -3285,8 +3285,9 @@ void Assembler::vinstr(byte op, XMMRegister dst, XMMRegister src1, ...@@ -3285,8 +3285,9 @@ void Assembler::vinstr(byte op, XMMRegister dst, XMMRegister src1,
} }
void Assembler::vinstr(byte op, XMMRegister dst, XMMRegister src1, Operand src2, void Assembler::vinstr(byte op, XMMRegister dst, XMMRegister src1, Operand src2,
SIMDPrefix pp, LeadingOpcode m, VexW w) { SIMDPrefix pp, LeadingOpcode m, VexW w,
DCHECK(IsEnabled(AVX)); CpuFeature feature) {
DCHECK(IsEnabled(feature));
EnsureSpace ensure_space(this); EnsureSpace ensure_space(this);
emit_vex_prefix(src1, kL128, pp, m, w); emit_vex_prefix(src1, kL128, pp, m, w);
EMIT(op); EMIT(op);
......
...@@ -1516,6 +1516,9 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase { ...@@ -1516,6 +1516,9 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
void vmovshdup(XMMRegister dst, XMMRegister src) { void vmovshdup(XMMRegister dst, XMMRegister src) {
vinstr(0x16, dst, xmm0, src, kF3, k0F, kWIG); vinstr(0x16, dst, xmm0, src, kF3, k0F, kWIG);
} }
void vbroadcastss(XMMRegister dst, XMMRegister src) {
vinstr(0x18, dst, xmm0, src, k66, k0F38, kW0, AVX2);
}
void vbroadcastss(XMMRegister dst, Operand src) { void vbroadcastss(XMMRegister dst, Operand src) {
vinstr(0x18, dst, xmm0, src, k66, k0F38, kW0); vinstr(0x18, dst, xmm0, src, k66, k0F38, kW0);
} }
...@@ -1892,9 +1895,9 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase { ...@@ -1892,9 +1895,9 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
void sse4_instr(XMMRegister dst, Operand src, byte prefix, byte escape1, void sse4_instr(XMMRegister dst, Operand src, byte prefix, byte escape1,
byte escape2, byte opcode); byte escape2, byte opcode);
void vinstr(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2, void vinstr(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2,
SIMDPrefix pp, LeadingOpcode m, VexW w); SIMDPrefix pp, LeadingOpcode m, VexW w, CpuFeature = AVX);
void vinstr(byte op, XMMRegister dst, XMMRegister src1, Operand src2, void vinstr(byte op, XMMRegister dst, XMMRegister src1, Operand src2,
SIMDPrefix pp, LeadingOpcode m, VexW w); SIMDPrefix pp, LeadingOpcode m, VexW w, CpuFeature = AVX);
// Most BMI instructions are similar. // Most BMI instructions are similar.
void bmi1(byte op, Register reg, Register vreg, Operand rm); void bmi1(byte op, Register reg, Register vreg, Operand rm);
void bmi2(SIMDPrefix pp, byte op, Register reg, Register vreg, Operand rm); void bmi2(SIMDPrefix pp, byte op, Register reg, Register vreg, Operand rm);
......
...@@ -47,6 +47,23 @@ void SharedTurboAssembler::F64x2ExtractLane(DoubleRegister dst, XMMRegister src, ...@@ -47,6 +47,23 @@ void SharedTurboAssembler::F64x2ExtractLane(DoubleRegister dst, XMMRegister src,
} }
} }
void SharedTurboAssembler::F32x4Splat(XMMRegister dst, DoubleRegister src) {
if (CpuFeatures::IsSupported(AVX2)) {
CpuFeatureScope avx2_scope(this, AVX2);
vbroadcastss(dst, src);
} else if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(this, AVX);
vshufps(dst, src, src, 0);
} else {
if (dst == src) {
// 1 byte shorter than pshufd.
shufps(dst, src, 0);
} else {
pshufd(dst, src, 0);
}
}
}
void SharedTurboAssembler::S128Store32Lane(Operand dst, XMMRegister src, void SharedTurboAssembler::S128Store32Lane(Operand dst, XMMRegister src,
uint8_t laneidx) { uint8_t laneidx) {
if (laneidx == 0) { if (laneidx == 0) {
......
...@@ -272,6 +272,7 @@ class V8_EXPORT_PRIVATE SharedTurboAssembler : public TurboAssemblerBase { ...@@ -272,6 +272,7 @@ class V8_EXPORT_PRIVATE SharedTurboAssembler : public TurboAssemblerBase {
AVX_OP_SSE4_1(Roundps, roundps) AVX_OP_SSE4_1(Roundps, roundps)
void F64x2ExtractLane(DoubleRegister dst, XMMRegister src, uint8_t lane); void F64x2ExtractLane(DoubleRegister dst, XMMRegister src, uint8_t lane);
void F32x4Splat(XMMRegister dst, DoubleRegister src);
void S128Store32Lane(Operand dst, XMMRegister src, uint8_t laneidx); void S128Store32Lane(Operand dst, XMMRegister src, uint8_t laneidx);
void I16x8ExtMulLow(XMMRegister dst, XMMRegister src1, XMMRegister src2, void I16x8ExtMulLow(XMMRegister dst, XMMRegister src1, XMMRegister src2,
XMMRegister scrat, bool is_signed); XMMRegister scrat, bool is_signed);
......
...@@ -2274,15 +2274,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -2274,15 +2274,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break; break;
} }
case kIA32F32x4Splat: { case kIA32F32x4Splat: {
XMMRegister dst = i.OutputDoubleRegister(); __ F32x4Splat(i.OutputSimd128Register(), i.InputDoubleRegister(0));
XMMRegister src = i.InputDoubleRegister(0);
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(tasm(), AVX);
__ vshufps(i.OutputSimd128Register(), src, src, 0x0);
} else {
DCHECK_EQ(dst, src);
__ shufps(dst, src, 0x0);
}
break; break;
} }
case kIA32F32x4ExtractLane: { case kIA32F32x4ExtractLane: {
......
...@@ -2521,22 +2521,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -2521,22 +2521,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break; break;
} }
case kX64F32x4Splat: { case kX64F32x4Splat: {
XMMRegister dst = i.OutputSimd128Register(); __ F32x4Splat(i.OutputSimd128Register(), i.InputDoubleRegister(0));
XMMRegister src = i.InputDoubleRegister(0);
if (CpuFeatures::IsSupported(AVX2)) {
CpuFeatureScope avx2_scope(tasm(), AVX2);
__ vbroadcastss(dst, src);
} else if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(tasm(), AVX);
__ vshufps(dst, src, src, 0);
} else {
if (dst == src) {
// 1 byte shorter than pshufd.
__ shufps(dst, src, 0);
} else {
__ pshufd(dst, src, 0);
}
}
break; break;
} }
case kX64F32x4ExtractLane: { case kX64F32x4ExtractLane: {
......
...@@ -2938,15 +2938,7 @@ void LiftoffAssembler::emit_i64x2_splat(LiftoffRegister dst, ...@@ -2938,15 +2938,7 @@ void LiftoffAssembler::emit_i64x2_splat(LiftoffRegister dst,
void LiftoffAssembler::emit_f32x4_splat(LiftoffRegister dst, void LiftoffAssembler::emit_f32x4_splat(LiftoffRegister dst,
LiftoffRegister src) { LiftoffRegister src) {
if (CpuFeatures::IsSupported(AVX)) { F32x4Splat(dst.fp(), src.fp());
CpuFeatureScope scope(this, AVX);
vshufps(dst.fp(), src.fp(), src.fp(), 0);
} else {
if (dst.fp() != src.fp()) {
movss(dst.fp(), src.fp());
}
shufps(dst.fp(), src.fp(), 0);
}
} }
void LiftoffAssembler::emit_f64x2_splat(LiftoffRegister dst, void LiftoffAssembler::emit_f64x2_splat(LiftoffRegister dst,
......
...@@ -2540,7 +2540,7 @@ void LiftoffAssembler::emit_i64x2_splat(LiftoffRegister dst, ...@@ -2540,7 +2540,7 @@ void LiftoffAssembler::emit_i64x2_splat(LiftoffRegister dst,
void LiftoffAssembler::emit_f32x4_splat(LiftoffRegister dst, void LiftoffAssembler::emit_f32x4_splat(LiftoffRegister dst,
LiftoffRegister src) { LiftoffRegister src) {
Shufps(dst.fp(), src.fp(), src.fp(), 0); F32x4Splat(dst.fp(), src.fp());
} }
void LiftoffAssembler::emit_f64x2_splat(LiftoffRegister dst, void LiftoffAssembler::emit_f64x2_splat(LiftoffRegister dst,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment