Commit d8ce100f authored by Ng Zhi An's avatar Ng Zhi An Committed by Commit Bot

[wasm-simd][x64][ia32] Factor f32x4.extract_lane into shared code

Bug: v8:11589
Change-Id: I90a0c9f8325eb56c607addf1adde60673dfbc9c7
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2840688Reviewed-by: 's avatarBill Budge <bbudge@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#74076}
parent 5067f049
...@@ -1966,19 +1966,6 @@ void TurboAssembler::Vbroadcastss(XMMRegister dst, Operand src) { ...@@ -1966,19 +1966,6 @@ void TurboAssembler::Vbroadcastss(XMMRegister dst, Operand src) {
shufps(dst, dst, static_cast<byte>(0)); shufps(dst, dst, static_cast<byte>(0));
} }
void TurboAssembler::Shufps(XMMRegister dst, XMMRegister src1, XMMRegister src2,
uint8_t imm8) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(this, AVX);
vshufps(dst, src1, src2, imm8);
} else {
if (dst != src1) {
movaps(dst, src1);
}
shufps(dst, src2, imm8);
}
}
void TurboAssembler::Lzcnt(Register dst, Operand src) { void TurboAssembler::Lzcnt(Register dst, Operand src) {
if (CpuFeatures::IsSupported(LZCNT)) { if (CpuFeatures::IsSupported(LZCNT)) {
CpuFeatureScope scope(this, LZCNT); CpuFeatureScope scope(this, LZCNT);
......
...@@ -364,10 +364,6 @@ class V8_EXPORT_PRIVATE TurboAssembler : public SharedTurboAssembler { ...@@ -364,10 +364,6 @@ class V8_EXPORT_PRIVATE TurboAssembler : public SharedTurboAssembler {
void Pinsrw(XMMRegister dst, XMMRegister src1, Operand src2, int8_t imm8); void Pinsrw(XMMRegister dst, XMMRegister src1, Operand src2, int8_t imm8);
void Vbroadcastss(XMMRegister dst, Operand src); void Vbroadcastss(XMMRegister dst, Operand src);
// Shufps that will mov src1 into dst if AVX is not supported.
void Shufps(XMMRegister dst, XMMRegister src1, XMMRegister src2,
uint8_t imm8);
// Expression support // Expression support
// cvtsi2sd instruction only writes to the low 64-bit of dst register, which // cvtsi2sd instruction only writes to the low 64-bit of dst register, which
// hinders register renaming and makes dependence chains longer. So we use // hinders register renaming and makes dependence chains longer. So we use
......
...@@ -29,6 +29,19 @@ void SharedTurboAssembler::Movapd(XMMRegister dst, XMMRegister src) { ...@@ -29,6 +29,19 @@ void SharedTurboAssembler::Movapd(XMMRegister dst, XMMRegister src) {
} }
} }
void SharedTurboAssembler::Shufps(XMMRegister dst, XMMRegister src1,
XMMRegister src2, uint8_t imm8) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(this, AVX);
vshufps(dst, src1, src2, imm8);
} else {
if (dst != src1) {
movaps(dst, src1);
}
shufps(dst, src2, imm8);
}
}
void SharedTurboAssembler::F64x2ExtractLane(DoubleRegister dst, XMMRegister src, void SharedTurboAssembler::F64x2ExtractLane(DoubleRegister dst, XMMRegister src,
uint8_t lane) { uint8_t lane) {
if (lane == 0) { if (lane == 0) {
...@@ -64,6 +77,27 @@ void SharedTurboAssembler::F32x4Splat(XMMRegister dst, DoubleRegister src) { ...@@ -64,6 +77,27 @@ void SharedTurboAssembler::F32x4Splat(XMMRegister dst, DoubleRegister src) {
} }
} }
void SharedTurboAssembler::F32x4ExtractLane(FloatRegister dst, XMMRegister src,
uint8_t lane) {
DCHECK_LT(lane, 4);
// These instructions are shorter than insertps, but will leave junk in
// the top lanes of dst.
if (lane == 0) {
if (dst != src) {
Movaps(dst, src);
}
} else if (lane == 1) {
Movshdup(dst, src);
} else if (lane == 2 && dst == src) {
// Check dst == src to avoid false dependency on dst.
Movhlps(dst, src);
} else if (dst == src) {
Shufps(dst, src, src, lane);
} else {
Pshufd(dst, src, lane);
}
}
void SharedTurboAssembler::S128Store32Lane(Operand dst, XMMRegister src, void SharedTurboAssembler::S128Store32Lane(Operand dst, XMMRegister src,
uint8_t laneidx) { uint8_t laneidx) {
if (laneidx == 0) { if (laneidx == 0) {
......
...@@ -39,6 +39,10 @@ class V8_EXPORT_PRIVATE SharedTurboAssembler : public TurboAssemblerBase { ...@@ -39,6 +39,10 @@ class V8_EXPORT_PRIVATE SharedTurboAssembler : public TurboAssemblerBase {
} }
} }
// Shufps that will mov src1 into dst if AVX is not supported.
void Shufps(XMMRegister dst, XMMRegister src1, XMMRegister src2,
uint8_t imm8);
// Helper struct to implement functions that check for AVX support and // Helper struct to implement functions that check for AVX support and
// dispatch to the appropriate AVX/SSE instruction. // dispatch to the appropriate AVX/SSE instruction.
template <typename Dst, typename Arg, typename... Args> template <typename Dst, typename Arg, typename... Args>
...@@ -273,6 +277,7 @@ class V8_EXPORT_PRIVATE SharedTurboAssembler : public TurboAssemblerBase { ...@@ -273,6 +277,7 @@ class V8_EXPORT_PRIVATE SharedTurboAssembler : public TurboAssemblerBase {
void F64x2ExtractLane(DoubleRegister dst, XMMRegister src, uint8_t lane); void F64x2ExtractLane(DoubleRegister dst, XMMRegister src, uint8_t lane);
void F32x4Splat(XMMRegister dst, DoubleRegister src); void F32x4Splat(XMMRegister dst, DoubleRegister src);
void F32x4ExtractLane(FloatRegister dst, XMMRegister src, uint8_t lane);
void S128Store32Lane(Operand dst, XMMRegister src, uint8_t laneidx); void S128Store32Lane(Operand dst, XMMRegister src, uint8_t laneidx);
void I16x8ExtMulLow(XMMRegister dst, XMMRegister src1, XMMRegister src2, void I16x8ExtMulLow(XMMRegister dst, XMMRegister src1, XMMRegister src2,
XMMRegister scrat, bool is_signed); XMMRegister scrat, bool is_signed);
......
...@@ -1886,19 +1886,6 @@ void TurboAssembler::Pmaddubsw(XMMRegister dst, XMMRegister src1, ...@@ -1886,19 +1886,6 @@ void TurboAssembler::Pmaddubsw(XMMRegister dst, XMMRegister src1,
} }
} }
void TurboAssembler::Shufps(XMMRegister dst, XMMRegister src1, XMMRegister src2,
byte imm8) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(this, AVX);
vshufps(dst, src1, src2, imm8);
} else {
if (dst != src1) {
movaps(dst, src1);
}
shufps(dst, src2, imm8);
}
}
void TurboAssembler::Pextrd(Register dst, XMMRegister src, uint8_t imm8) { void TurboAssembler::Pextrd(Register dst, XMMRegister src, uint8_t imm8) {
if (imm8 == 0) { if (imm8 == 0) {
Movd(dst, src); Movd(dst, src);
......
...@@ -379,9 +379,6 @@ class V8_EXPORT_PRIVATE TurboAssembler : public SharedTurboAssembler { ...@@ -379,9 +379,6 @@ class V8_EXPORT_PRIVATE TurboAssembler : public SharedTurboAssembler {
void Pmaddubsw(XMMRegister dst, XMMRegister src1, Operand src2); void Pmaddubsw(XMMRegister dst, XMMRegister src1, Operand src2);
void Pmaddubsw(XMMRegister dst, XMMRegister src1, XMMRegister src2); void Pmaddubsw(XMMRegister dst, XMMRegister src1, XMMRegister src2);
// Shufps that will mov src1 into dst if AVX is not supported.
void Shufps(XMMRegister dst, XMMRegister src1, XMMRegister src2, byte imm8);
// Non-SSE2 instructions. // Non-SSE2 instructions.
void Pextrd(Register dst, XMMRegister src, uint8_t imm8); void Pextrd(Register dst, XMMRegister src, uint8_t imm8);
......
...@@ -2278,26 +2278,8 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -2278,26 +2278,8 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break; break;
} }
case kIA32F32x4ExtractLane: { case kIA32F32x4ExtractLane: {
XMMRegister dst = i.OutputFloatRegister(); __ F32x4ExtractLane(i.OutputFloatRegister(), i.InputSimd128Register(0),
XMMRegister src = i.InputSimd128Register(0); i.InputUint8(1));
uint8_t lane = i.InputUint8(1);
DCHECK_LT(lane, 4);
// These instructions are shorter than insertps, but will leave junk in
// the top lanes of dst.
if (lane == 0) {
if (dst != src) {
__ Movaps(dst, src);
}
} else if (lane == 1) {
__ Movshdup(dst, src);
} else if (lane == 2 && dst == src) {
// Check dst == src to avoid false dependency on dst.
__ Movhlps(dst, src);
} else if (dst == src) {
__ Shufps(dst, src, src, lane);
} else {
__ Pshufd(dst, src, lane);
}
break; break;
} }
case kIA32Insertps: { case kIA32Insertps: {
......
...@@ -2525,24 +2525,8 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -2525,24 +2525,8 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break; break;
} }
case kX64F32x4ExtractLane: { case kX64F32x4ExtractLane: {
XMMRegister dst = i.OutputDoubleRegister(); __ F32x4ExtractLane(i.OutputFloatRegister(), i.InputSimd128Register(0),
XMMRegister src = i.InputSimd128Register(0); i.InputUint8(1));
uint8_t lane = i.InputUint8(1);
DCHECK_LT(lane, 4);
// These instructions are shorter than insertps, but will leave junk in
// the top lanes of dst.
if (lane == 0) {
__ Move(dst, src);
} else if (lane == 1) {
__ Movshdup(dst, src);
} else if (lane == 2 && dst == src) {
// Check dst == src to avoid false dependency on dst.
__ Movhlps(dst, src);
} else if (dst == src) {
__ Shufps(dst, src, src, lane);
} else {
__ Pshufd(dst, src, lane);
}
break; break;
} }
case kX64F32x4ReplaceLane: { case kX64F32x4ReplaceLane: {
......
...@@ -4605,13 +4605,7 @@ void LiftoffAssembler::emit_i64x2_extract_lane(LiftoffRegister dst, ...@@ -4605,13 +4605,7 @@ void LiftoffAssembler::emit_i64x2_extract_lane(LiftoffRegister dst,
void LiftoffAssembler::emit_f32x4_extract_lane(LiftoffRegister dst, void LiftoffAssembler::emit_f32x4_extract_lane(LiftoffRegister dst,
LiftoffRegister lhs, LiftoffRegister lhs,
uint8_t imm_lane_idx) { uint8_t imm_lane_idx) {
if (CpuFeatures::IsSupported(AVX)) { F32x4ExtractLane(dst.fp(), lhs.fp(), imm_lane_idx);
CpuFeatureScope scope(this, AVX);
vshufps(dst.fp(), lhs.fp(), lhs.fp(), imm_lane_idx);
} else {
if (dst.fp() != lhs.fp()) movaps(dst.fp(), lhs.fp());
if (imm_lane_idx != 0) shufps(dst.fp(), dst.fp(), imm_lane_idx);
}
} }
void LiftoffAssembler::emit_f64x2_extract_lane(LiftoffRegister dst, void LiftoffAssembler::emit_f64x2_extract_lane(LiftoffRegister dst,
......
...@@ -4146,13 +4146,7 @@ void LiftoffAssembler::emit_i64x2_extract_lane(LiftoffRegister dst, ...@@ -4146,13 +4146,7 @@ void LiftoffAssembler::emit_i64x2_extract_lane(LiftoffRegister dst,
void LiftoffAssembler::emit_f32x4_extract_lane(LiftoffRegister dst, void LiftoffAssembler::emit_f32x4_extract_lane(LiftoffRegister dst,
LiftoffRegister lhs, LiftoffRegister lhs,
uint8_t imm_lane_idx) { uint8_t imm_lane_idx) {
if (CpuFeatures::IsSupported(AVX)) { F32x4ExtractLane(dst.fp(), lhs.fp(), imm_lane_idx);
CpuFeatureScope scope(this, AVX);
vshufps(dst.fp(), lhs.fp(), lhs.fp(), imm_lane_idx);
} else {
if (dst.fp() != lhs.fp()) movaps(dst.fp(), lhs.fp());
if (imm_lane_idx != 0) shufps(dst.fp(), dst.fp(), imm_lane_idx);
}
} }
void LiftoffAssembler::emit_f64x2_extract_lane(LiftoffRegister dst, void LiftoffAssembler::emit_f64x2_extract_lane(LiftoffRegister dst,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment