Commit d988dc08 authored by Ng Zhi An's avatar Ng Zhi An Committed by Commit Bot

[wasm-simd][x64][ia32] Factor f64x2.extract_lane into shared implementation

Bug: v8:11589
Change-Id: I6f43e6382b3441adf59dbaea58d766013cf3793b
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2826712Reviewed-by: 's avatarBill Budge <bbudge@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#73983}
parent 05b38588
...@@ -29,6 +29,24 @@ void SharedTurboAssembler::Movapd(XMMRegister dst, XMMRegister src) { ...@@ -29,6 +29,24 @@ void SharedTurboAssembler::Movapd(XMMRegister dst, XMMRegister src) {
} }
} }
void SharedTurboAssembler::F64x2ExtractLane(DoubleRegister dst, XMMRegister src,
uint8_t lane) {
if (lane == 0) {
if (dst != src) {
Movaps(dst, src);
}
} else {
DCHECK_EQ(1, lane);
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(this, AVX);
// Pass src as operand to avoid false-dependency on dst.
vmovhlps(dst, src, src);
} else {
movhlps(dst, src);
}
}
}
void SharedTurboAssembler::S128Store32Lane(Operand dst, XMMRegister src, void SharedTurboAssembler::S128Store32Lane(Operand dst, XMMRegister src,
uint8_t laneidx) { uint8_t laneidx) {
if (laneidx == 0) { if (laneidx == 0) {
......
...@@ -271,6 +271,7 @@ class V8_EXPORT_PRIVATE SharedTurboAssembler : public TurboAssemblerBase { ...@@ -271,6 +271,7 @@ class V8_EXPORT_PRIVATE SharedTurboAssembler : public TurboAssemblerBase {
AVX_OP_SSE4_1(Roundpd, roundpd) AVX_OP_SSE4_1(Roundpd, roundpd)
AVX_OP_SSE4_1(Roundps, roundps) AVX_OP_SSE4_1(Roundps, roundps)
void F64x2ExtractLane(DoubleRegister dst, XMMRegister src, uint8_t lane);
void S128Store32Lane(Operand dst, XMMRegister src, uint8_t laneidx); void S128Store32Lane(Operand dst, XMMRegister src, uint8_t laneidx);
void I16x8ExtMulLow(XMMRegister dst, XMMRegister src1, XMMRegister src2, void I16x8ExtMulLow(XMMRegister dst, XMMRegister src1, XMMRegister src2,
XMMRegister scrat, bool is_signed); XMMRegister scrat, bool is_signed);
......
...@@ -1866,38 +1866,12 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -1866,38 +1866,12 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break; break;
} }
case kIA32F64x2Splat: { case kIA32F64x2Splat: {
XMMRegister dst = i.OutputDoubleRegister(); __ Movddup(i.OutputSimd128Register(), i.InputDoubleRegister(0));
XMMRegister src = i.InputDoubleRegister(0);
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(tasm(), AVX);
__ vshufpd(i.OutputSimd128Register(), src, src, 0x0);
} else {
DCHECK_EQ(dst, src);
__ shufpd(dst, src, 0x0);
}
break; break;
} }
case kSSEF64x2ExtractLane: { case kF64x2ExtractLane: {
DCHECK_EQ(i.OutputDoubleRegister(), i.InputDoubleRegister(0)); __ F64x2ExtractLane(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
XMMRegister dst = i.OutputDoubleRegister(); i.InputUint8(1));
int8_t lane = i.InputInt8(1);
if (lane != 0) {
DCHECK_EQ(lane, 1);
__ shufpd(dst, dst, lane);
}
break;
}
case kAVXF64x2ExtractLane: {
CpuFeatureScope avx_scope(tasm(), AVX);
XMMRegister dst = i.OutputDoubleRegister();
XMMRegister src = i.InputSimd128Register(0);
int8_t lane = i.InputInt8(1);
if (lane == 0) {
if (dst != src) __ vmovapd(dst, src);
} else {
DCHECK_EQ(lane, 1);
__ vshufpd(dst, src, src, lane);
}
break; break;
} }
case kSSEF64x2ReplaceLane: { case kSSEF64x2ReplaceLane: {
......
...@@ -117,8 +117,7 @@ namespace compiler { ...@@ -117,8 +117,7 @@ namespace compiler {
V(IA32Poke) \ V(IA32Poke) \
V(IA32Peek) \ V(IA32Peek) \
V(IA32F64x2Splat) \ V(IA32F64x2Splat) \
V(SSEF64x2ExtractLane) \ V(F64x2ExtractLane) \
V(AVXF64x2ExtractLane) \
V(SSEF64x2ReplaceLane) \ V(SSEF64x2ReplaceLane) \
V(AVXF64x2ReplaceLane) \ V(AVXF64x2ReplaceLane) \
V(IA32F64x2Sqrt) \ V(IA32F64x2Sqrt) \
......
...@@ -102,8 +102,7 @@ int InstructionScheduler::GetTargetInstructionFlags( ...@@ -102,8 +102,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kIA32BitcastFI: case kIA32BitcastFI:
case kIA32BitcastIF: case kIA32BitcastIF:
case kIA32F64x2Splat: case kIA32F64x2Splat:
case kSSEF64x2ExtractLane: case kF64x2ExtractLane:
case kAVXF64x2ExtractLane:
case kSSEF64x2ReplaceLane: case kSSEF64x2ReplaceLane:
case kAVXF64x2ReplaceLane: case kAVXF64x2ReplaceLane:
case kIA32F64x2Sqrt: case kIA32F64x2Sqrt:
......
...@@ -2380,7 +2380,7 @@ void InstructionSelector::VisitF64x2Splat(Node* node) { ...@@ -2380,7 +2380,7 @@ void InstructionSelector::VisitF64x2Splat(Node* node) {
} }
void InstructionSelector::VisitF64x2ExtractLane(Node* node) { void InstructionSelector::VisitF64x2ExtractLane(Node* node) {
VisitRRISimd(this, node, kAVXF64x2ExtractLane, kSSEF64x2ExtractLane); VisitRRISimd(this, node, kF64x2ExtractLane, kF64x2ExtractLane);
} }
void InstructionSelector::VisitI64x2SplatI32Pair(Node* node) { void InstructionSelector::VisitI64x2SplatI32Pair(Node* node) {
......
...@@ -2390,21 +2390,8 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -2390,21 +2390,8 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break; break;
} }
case kX64F64x2ExtractLane: { case kX64F64x2ExtractLane: {
DoubleRegister dst = i.OutputDoubleRegister(); __ F64x2ExtractLane(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
XMMRegister src = i.InputSimd128Register(0); i.InputUint8(1));
uint8_t lane = i.InputUint8(1);
if (lane == 0) {
__ Move(dst, src);
} else {
DCHECK_EQ(1, lane);
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(tasm(), AVX);
// Pass src as operand to avoid false-dependency on dst.
__ vmovhlps(dst, src, src);
} else {
__ movhlps(dst, src);
}
}
break; break;
} }
case kX64F64x2Sqrt: { case kX64F64x2Sqrt: {
......
...@@ -4634,13 +4634,7 @@ void LiftoffAssembler::emit_f32x4_extract_lane(LiftoffRegister dst, ...@@ -4634,13 +4634,7 @@ void LiftoffAssembler::emit_f32x4_extract_lane(LiftoffRegister dst,
void LiftoffAssembler::emit_f64x2_extract_lane(LiftoffRegister dst, void LiftoffAssembler::emit_f64x2_extract_lane(LiftoffRegister dst,
LiftoffRegister lhs, LiftoffRegister lhs,
uint8_t imm_lane_idx) { uint8_t imm_lane_idx) {
if (CpuFeatures::IsSupported(AVX)) { F64x2ExtractLane(dst.fp(), lhs.fp(), imm_lane_idx);
CpuFeatureScope scope(this, AVX);
vshufpd(dst.fp(), lhs.fp(), lhs.fp(), imm_lane_idx);
} else {
if (dst.fp() != lhs.fp()) movaps(dst.fp(), lhs.fp());
if (imm_lane_idx != 0) shufpd(dst.fp(), dst.fp(), imm_lane_idx);
}
} }
void LiftoffAssembler::emit_i8x16_replace_lane(LiftoffRegister dst, void LiftoffAssembler::emit_i8x16_replace_lane(LiftoffRegister dst,
......
...@@ -4166,8 +4166,7 @@ void LiftoffAssembler::emit_f32x4_extract_lane(LiftoffRegister dst, ...@@ -4166,8 +4166,7 @@ void LiftoffAssembler::emit_f32x4_extract_lane(LiftoffRegister dst,
void LiftoffAssembler::emit_f64x2_extract_lane(LiftoffRegister dst, void LiftoffAssembler::emit_f64x2_extract_lane(LiftoffRegister dst,
LiftoffRegister lhs, LiftoffRegister lhs,
uint8_t imm_lane_idx) { uint8_t imm_lane_idx) {
Pextrq(kScratchRegister, lhs.fp(), static_cast<int8_t>(imm_lane_idx)); F64x2ExtractLane(dst.fp(), lhs.fp(), imm_lane_idx);
Movq(dst.fp(), kScratchRegister);
} }
void LiftoffAssembler::emit_i8x16_replace_lane(LiftoffRegister dst, void LiftoffAssembler::emit_i8x16_replace_lane(LiftoffRegister dst,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment