Commit 3356078a authored by Zhi An Ng's avatar Zhi An Ng Committed by Commit Bot

Revert "[wasm-simd][x64][ia32] Factor f64x2.replace_lane into shared code"

This reverts commit b824d853.

Reason for revert: https://ci.chromium.org/ui/p/v8/builders/ci/V8%20Linux64%20-%20debug/36784/overview

Original change's description:
> [wasm-simd][x64][ia32] Factor f64x2.replace_lane into shared code
>
> This pblendw/movlhps combination has lower latency and requires less
> unop than pinsrq (1 v.s. 2).
>
> Bug: v8:11589
> Change-Id: I770b0c20a286774afefbac5ef0adffe463318f21
> Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2828871
> Reviewed-by: Bill Budge <bbudge@chromium.org>
> Commit-Queue: Zhi An Ng <zhin@chromium.org>
> Cr-Commit-Position: refs/heads/master@{#74049}

Bug: v8:11589
Change-Id: I1be96e59fdb844db1e228be3a09d4a06798a16c3
No-Presubmit: true
No-Tree-Checks: true
No-Try: true
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2837805
Auto-Submit: Zhi An Ng <zhin@chromium.org>
Commit-Queue: Rubber Stamper <rubber-stamper@appspot.gserviceaccount.com>
Bot-Commit: Rubber Stamper <rubber-stamper@appspot.gserviceaccount.com>
Cr-Commit-Position: refs/heads/master@{#74050}
parent b824d853
......@@ -2488,13 +2488,6 @@ void Assembler::movhlps(XMMRegister dst, XMMRegister src) {
emit_sse_operand(dst, src);
}
void Assembler::movlhps(XMMRegister dst, XMMRegister src) {
EnsureSpace ensure_space(this);
EMIT(0x0F);
EMIT(0x16);
emit_sse_operand(dst, src);
}
void Assembler::movlps(XMMRegister dst, Operand src) {
EnsureSpace ensure_space(this);
EMIT(0x0F);
......@@ -2986,10 +2979,6 @@ void Assembler::vmovhlps(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
vinstr(0x12, dst, src1, src2, kNone, k0F, kWIG);
}
void Assembler::vmovlhps(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
vinstr(0x16, dst, src1, src2, kNone, k0F, kWIG);
}
void Assembler::vmovlps(XMMRegister dst, XMMRegister src1, Operand src2) {
vinstr(0x12, dst, src1, src2, kNone, k0F, kWIG);
}
......
......@@ -868,7 +868,6 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
void shufpd(XMMRegister dst, XMMRegister src, byte imm8);
void movhlps(XMMRegister dst, XMMRegister src);
void movlhps(XMMRegister dst, XMMRegister src);
void movlps(XMMRegister dst, Operand src);
void movlps(Operand dst, XMMRegister src);
void movhps(XMMRegister dst, Operand src);
......@@ -1399,7 +1398,6 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
void vshufpd(XMMRegister dst, XMMRegister src1, Operand src2, byte imm8);
void vmovhlps(XMMRegister dst, XMMRegister src1, XMMRegister src2);
void vmovlhps(XMMRegister dst, XMMRegister src1, XMMRegister src2);
void vmovlps(XMMRegister dst, XMMRegister src1, Operand src2);
void vmovlps(Operand dst, XMMRegister src);
void vmovhps(XMMRegister dst, XMMRegister src1, Operand src2);
......
......@@ -47,27 +47,6 @@ void SharedTurboAssembler::F64x2ExtractLane(DoubleRegister dst, XMMRegister src,
}
}
void SharedTurboAssembler::F64x2ReplaceLane(XMMRegister dst, XMMRegister src,
DoubleRegister rep, uint8_t lane) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
if (lane == 0) {
vpblendw(dst, src, rep, 0b00001111);
} else {
vmovlhps(dst, src, rep);
}
} else {
CpuFeatureScope scope(this, SSE4_1);
DCHECK_NE(dst, rep);
if (dst != src) movaps(dst, src);
if (lane == 0) {
pblendw(dst, rep, 0b00001111);
} else {
movlhps(dst, rep);
}
}
}
void SharedTurboAssembler::S128Store32Lane(Operand dst, XMMRegister src,
uint8_t laneidx) {
if (laneidx == 0) {
......
......@@ -272,8 +272,6 @@ class V8_EXPORT_PRIVATE SharedTurboAssembler : public TurboAssemblerBase {
AVX_OP_SSE4_1(Roundps, roundps)
void F64x2ExtractLane(DoubleRegister dst, XMMRegister src, uint8_t lane);
void F64x2ReplaceLane(XMMRegister dst, XMMRegister src, DoubleRegister rep,
uint8_t lane);
void S128Store32Lane(Operand dst, XMMRegister src, uint8_t laneidx);
void I16x8ExtMulLow(XMMRegister dst, XMMRegister src1, XMMRegister src2,
XMMRegister scrat, bool is_signed);
......
......@@ -1874,9 +1874,43 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
i.InputUint8(1));
break;
}
case kF64x2ReplaceLane: {
__ F64x2ReplaceLane(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputDoubleRegister(2), i.InputInt8(1));
case kSSEF64x2ReplaceLane: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
CpuFeatureScope sse_scope(tasm(), SSE4_1);
XMMRegister dst = i.OutputSimd128Register();
int8_t lane = i.InputInt8(1);
DoubleRegister rep = i.InputDoubleRegister(2);
// insertps takes a mask which contains (high to low):
// - 2 bit specifying source float element to copy
// - 2 bit specifying destination float element to write to
// - 4 bits specifying which elements of the destination to zero
DCHECK_LT(lane, 2);
if (lane == 0) {
__ insertps(dst, rep, 0b00000000);
__ insertps(dst, rep, 0b01010000);
} else {
__ insertps(dst, rep, 0b00100000);
__ insertps(dst, rep, 0b01110000);
}
break;
}
case kAVXF64x2ReplaceLane: {
CpuFeatureScope avx_scope(tasm(), AVX);
XMMRegister dst = i.OutputSimd128Register();
XMMRegister src = i.InputSimd128Register(0);
int8_t lane = i.InputInt8(1);
DoubleRegister rep = i.InputDoubleRegister(2);
DCHECK_NE(dst, rep);
DCHECK_LT(lane, 2);
if (lane == 0) {
__ vinsertps(dst, src, rep, 0b00000000);
__ vinsertps(dst, dst, rep, 0b01010000);
} else {
__ vinsertps(dst, src, rep, 0b00100000);
__ vinsertps(dst, dst, rep, 0b01110000);
}
break;
}
case kIA32F64x2Sqrt: {
......
......@@ -118,7 +118,8 @@ namespace compiler {
V(IA32Peek) \
V(IA32F64x2Splat) \
V(F64x2ExtractLane) \
V(F64x2ReplaceLane) \
V(SSEF64x2ReplaceLane) \
V(AVXF64x2ReplaceLane) \
V(IA32F64x2Sqrt) \
V(IA32F64x2Add) \
V(IA32F64x2Sub) \
......
......@@ -103,7 +103,8 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kIA32BitcastIF:
case kIA32F64x2Splat:
case kF64x2ExtractLane:
case kF64x2ReplaceLane:
case kSSEF64x2ReplaceLane:
case kAVXF64x2ReplaceLane:
case kIA32F64x2Sqrt:
case kIA32F64x2Add:
case kIA32F64x2Sub:
......
......@@ -2552,16 +2552,20 @@ SIMD_REPLACE_LANE_TYPE_OP(VISIT_SIMD_REPLACE_LANE)
// operand2 to be UseRegister, because the codegen relies on insertps using
// registers.
// TODO(v8:9764) Remove this UseRegister requirement
#define VISIT_SIMD_REPLACE_LANE_USE_REG(Type) \
void InstructionSelector::Visit##Type##ReplaceLane(Node* node) { \
IA32OperandGenerator g(this); \
InstructionOperand operand0 = g.UseRegister(node->InputAt(0)); \
InstructionOperand operand1 = \
g.UseImmediate(OpParameter<int32_t>(node->op())); \
InstructionOperand operand2 = g.UseUniqueRegister(node->InputAt(1)); \
InstructionOperand dst = IsSupported(AVX) ? g.DefineAsRegister(node) \
: g.DefineSameAsFirst(node); \
Emit(k##Type##ReplaceLane, dst, operand0, operand1, operand2); \
#define VISIT_SIMD_REPLACE_LANE_USE_REG(Type) \
void InstructionSelector::Visit##Type##ReplaceLane(Node* node) { \
IA32OperandGenerator g(this); \
InstructionOperand operand0 = g.UseRegister(node->InputAt(0)); \
InstructionOperand operand1 = \
g.UseImmediate(OpParameter<int32_t>(node->op())); \
InstructionOperand operand2 = g.UseUniqueRegister(node->InputAt(1)); \
if (IsSupported(AVX)) { \
Emit(kAVX##Type##ReplaceLane, g.DefineAsRegister(node), operand0, \
operand1, operand2); \
} else { \
Emit(kSSE##Type##ReplaceLane, g.DefineSameAsFirst(node), operand0, \
operand1, operand2); \
} \
}
VISIT_SIMD_REPLACE_LANE_USE_REG(F64x2)
#undef VISIT_SIMD_REPLACE_LANE_USE_REG
......
......@@ -2394,11 +2394,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
i.InputUint8(1));
break;
}
case kX64F64x2ReplaceLane: {
__ F64x2ReplaceLane(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputDoubleRegister(2), i.InputInt8(1));
break;
}
case kX64F64x2Sqrt: {
__ Sqrtpd(i.OutputSimd128Register(), i.InputSimd128Register(0));
break;
......
......@@ -156,7 +156,6 @@ namespace compiler {
V(X64Peek) \
V(X64F64x2Splat) \
V(X64F64x2ExtractLane) \
V(X64F64x2ReplaceLane) \
V(X64F64x2Abs) \
V(X64F64x2Neg) \
V(X64F64x2Sqrt) \
......
......@@ -132,7 +132,6 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kX64Pinsrq:
case kX64F64x2Splat:
case kX64F64x2ExtractLane:
case kX64F64x2ReplaceLane:
case kX64F64x2Abs:
case kX64F64x2Neg:
case kX64F64x2Sqrt:
......
......@@ -3070,7 +3070,7 @@ void InstructionSelector::VisitF32x4ReplaceLane(Node* node) {
}
#define SIMD_TYPES_FOR_REPLACE_LANE(V) \
V(F64x2, kX64F64x2ReplaceLane) \
V(F64x2, kX64Pinsrq) \
V(I64x2, kX64Pinsrq) \
V(I32x4, kX64Pinsrd) \
V(I16x8, kX64Pinsrw) \
......
......@@ -4712,7 +4712,27 @@ void LiftoffAssembler::emit_f64x2_replace_lane(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2,
uint8_t imm_lane_idx) {
F64x2ReplaceLane(dst.fp(), src1.fp(), src2.fp(), imm_lane_idx);
// TODO(fanchenk): Use movlhps and blendpd
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
if (imm_lane_idx == 0) {
vinsertps(dst.fp(), src1.fp(), src2.fp(), 0b00000000);
vinsertps(dst.fp(), dst.fp(), src2.fp(), 0b01010000);
} else {
vinsertps(dst.fp(), src1.fp(), src2.fp(), 0b00100000);
vinsertps(dst.fp(), dst.fp(), src2.fp(), 0b01110000);
}
} else {
CpuFeatureScope scope(this, SSE4_1);
if (dst.fp() != src1.fp()) movaps(dst.fp(), src1.fp());
if (imm_lane_idx == 0) {
insertps(dst.fp(), src2.fp(), 0b00000000);
insertps(dst.fp(), src2.fp(), 0b01010000);
} else {
insertps(dst.fp(), src2.fp(), 0b00100000);
insertps(dst.fp(), src2.fp(), 0b01110000);
}
}
}
void LiftoffAssembler::StackCheck(Label* ool_code, Register limit_address) {
......
......@@ -4242,7 +4242,22 @@ void LiftoffAssembler::emit_f64x2_replace_lane(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2,
uint8_t imm_lane_idx) {
F64x2ReplaceLane(dst.fp(), src1.fp(), src2.fp(), imm_lane_idx);
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
if (imm_lane_idx == 0) {
vpblendw(dst.fp(), src1.fp(), src2.fp(), 0b00001111);
} else {
vmovlhps(dst.fp(), src1.fp(), src2.fp());
}
} else {
CpuFeatureScope scope(this, SSE4_1);
if (dst.fp() != src1.fp()) movaps(dst.fp(), src1.fp());
if (imm_lane_idx == 0) {
pblendw(dst.fp(), src2.fp(), 0b00001111);
} else {
movlhps(dst.fp(), src2.fp());
}
}
}
void LiftoffAssembler::StackCheck(Label* ool_code, Register limit_address) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment