Commit d988dc08 authored by Ng Zhi An's avatar Ng Zhi An Committed by Commit Bot

[wasm-simd][x64][ia32] Factor f64x2.extract_lane into shared implementation

Bug: v8:11589
Change-Id: I6f43e6382b3441adf59dbaea58d766013cf3793b
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2826712Reviewed-by: 's avatarBill Budge <bbudge@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#73983}
parent 05b38588
......@@ -29,6 +29,24 @@ void SharedTurboAssembler::Movapd(XMMRegister dst, XMMRegister src) {
}
}
void SharedTurboAssembler::F64x2ExtractLane(DoubleRegister dst, XMMRegister src,
uint8_t lane) {
if (lane == 0) {
if (dst != src) {
Movaps(dst, src);
}
} else {
DCHECK_EQ(1, lane);
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(this, AVX);
// Pass src as operand to avoid false-dependency on dst.
vmovhlps(dst, src, src);
} else {
movhlps(dst, src);
}
}
}
void SharedTurboAssembler::S128Store32Lane(Operand dst, XMMRegister src,
uint8_t laneidx) {
if (laneidx == 0) {
......
......@@ -271,6 +271,7 @@ class V8_EXPORT_PRIVATE SharedTurboAssembler : public TurboAssemblerBase {
AVX_OP_SSE4_1(Roundpd, roundpd)
AVX_OP_SSE4_1(Roundps, roundps)
void F64x2ExtractLane(DoubleRegister dst, XMMRegister src, uint8_t lane);
void S128Store32Lane(Operand dst, XMMRegister src, uint8_t laneidx);
void I16x8ExtMulLow(XMMRegister dst, XMMRegister src1, XMMRegister src2,
XMMRegister scrat, bool is_signed);
......
......@@ -1866,38 +1866,12 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
case kIA32F64x2Splat: {
XMMRegister dst = i.OutputDoubleRegister();
XMMRegister src = i.InputDoubleRegister(0);
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(tasm(), AVX);
__ vshufpd(i.OutputSimd128Register(), src, src, 0x0);
} else {
DCHECK_EQ(dst, src);
__ shufpd(dst, src, 0x0);
}
__ Movddup(i.OutputSimd128Register(), i.InputDoubleRegister(0));
break;
}
case kSSEF64x2ExtractLane: {
DCHECK_EQ(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
XMMRegister dst = i.OutputDoubleRegister();
int8_t lane = i.InputInt8(1);
if (lane != 0) {
DCHECK_EQ(lane, 1);
__ shufpd(dst, dst, lane);
}
break;
}
case kAVXF64x2ExtractLane: {
CpuFeatureScope avx_scope(tasm(), AVX);
XMMRegister dst = i.OutputDoubleRegister();
XMMRegister src = i.InputSimd128Register(0);
int8_t lane = i.InputInt8(1);
if (lane == 0) {
if (dst != src) __ vmovapd(dst, src);
} else {
DCHECK_EQ(lane, 1);
__ vshufpd(dst, src, src, lane);
}
case kF64x2ExtractLane: {
__ F64x2ExtractLane(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
i.InputUint8(1));
break;
}
case kSSEF64x2ReplaceLane: {
......
......@@ -117,8 +117,7 @@ namespace compiler {
V(IA32Poke) \
V(IA32Peek) \
V(IA32F64x2Splat) \
V(SSEF64x2ExtractLane) \
V(AVXF64x2ExtractLane) \
V(F64x2ExtractLane) \
V(SSEF64x2ReplaceLane) \
V(AVXF64x2ReplaceLane) \
V(IA32F64x2Sqrt) \
......
......@@ -102,8 +102,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kIA32BitcastFI:
case kIA32BitcastIF:
case kIA32F64x2Splat:
case kSSEF64x2ExtractLane:
case kAVXF64x2ExtractLane:
case kF64x2ExtractLane:
case kSSEF64x2ReplaceLane:
case kAVXF64x2ReplaceLane:
case kIA32F64x2Sqrt:
......
......@@ -2380,7 +2380,7 @@ void InstructionSelector::VisitF64x2Splat(Node* node) {
}
void InstructionSelector::VisitF64x2ExtractLane(Node* node) {
VisitRRISimd(this, node, kAVXF64x2ExtractLane, kSSEF64x2ExtractLane);
VisitRRISimd(this, node, kF64x2ExtractLane, kF64x2ExtractLane);
}
void InstructionSelector::VisitI64x2SplatI32Pair(Node* node) {
......
......@@ -2390,21 +2390,8 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
case kX64F64x2ExtractLane: {
DoubleRegister dst = i.OutputDoubleRegister();
XMMRegister src = i.InputSimd128Register(0);
uint8_t lane = i.InputUint8(1);
if (lane == 0) {
__ Move(dst, src);
} else {
DCHECK_EQ(1, lane);
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(tasm(), AVX);
// Pass src as operand to avoid false-dependency on dst.
__ vmovhlps(dst, src, src);
} else {
__ movhlps(dst, src);
}
}
__ F64x2ExtractLane(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
i.InputUint8(1));
break;
}
case kX64F64x2Sqrt: {
......
......@@ -4634,13 +4634,7 @@ void LiftoffAssembler::emit_f32x4_extract_lane(LiftoffRegister dst,
void LiftoffAssembler::emit_f64x2_extract_lane(LiftoffRegister dst,
LiftoffRegister lhs,
uint8_t imm_lane_idx) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
vshufpd(dst.fp(), lhs.fp(), lhs.fp(), imm_lane_idx);
} else {
if (dst.fp() != lhs.fp()) movaps(dst.fp(), lhs.fp());
if (imm_lane_idx != 0) shufpd(dst.fp(), dst.fp(), imm_lane_idx);
}
F64x2ExtractLane(dst.fp(), lhs.fp(), imm_lane_idx);
}
void LiftoffAssembler::emit_i8x16_replace_lane(LiftoffRegister dst,
......
......@@ -4166,8 +4166,7 @@ void LiftoffAssembler::emit_f32x4_extract_lane(LiftoffRegister dst,
void LiftoffAssembler::emit_f64x2_extract_lane(LiftoffRegister dst,
LiftoffRegister lhs,
uint8_t imm_lane_idx) {
Pextrq(kScratchRegister, lhs.fp(), static_cast<int8_t>(imm_lane_idx));
Movq(dst.fp(), kScratchRegister);
F64x2ExtractLane(dst.fp(), lhs.fp(), imm_lane_idx);
}
void LiftoffAssembler::emit_i8x16_replace_lane(LiftoffRegister dst,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment