Commit 1fa2d2f3 authored by Milad Fa's avatar Milad Fa Committed by V8 LUCI CQ

S390 [simd]: cleanup the instruction selector

This CL includes the following changes:
- Avoid using `UniqueRegister` as much as possible
- Try to group opcodes under Binary or Unary when possible

Separate selectors are added to use `UniqueRegister` when necessary
mainly when `Temp` registers are used. This is to make sure temp, dst
and src registers are allocated differently and cannot be overwritten
during codegen.

Some codegen ops had to also be modified to avoid using `Temp`
registers.

Change-Id: I4d5bdec58cb4874e7c3d344091cde8c8a9a4d01b
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3024149Reviewed-by: 's avatarJunliang Yan <junyan@redhat.com>
Commit-Queue: Milad Fa <mfarazma@redhat.com>
Cr-Commit-Position: refs/heads/master@{#75723}
parent dc318df3
......@@ -2224,14 +2224,15 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
case kS390_LoadReverseSimd128: {
AddressingMode mode = kMode_None;
MemOperand operand = i.MemoryOperand(&mode);
Simd128Register dst = i.OutputSimd128Register();
if (CpuFeatures::IsSupported(VECTOR_ENHANCE_FACILITY_2) &&
is_uint12(operand.offset())) {
__ vlbr(i.OutputSimd128Register(), operand, Condition(4));
__ vlbr(dst, operand, Condition(4));
} else {
__ lrvg(r0, operand);
__ lrvg(r1, MemOperand(operand.rx(), operand.rb(),
operand.offset() + kSystemPointerSize));
__ vlvgp(i.OutputSimd128Register(), r1, r0);
__ vlvgp(dst, r1, r0);
}
break;
}
......@@ -2587,8 +2588,8 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
if (src != dst) {
__ vlr(dst, src, Condition(0), Condition(0), Condition(0));
}
__ vlvg(i.OutputSimd128Register(), i.InputRegister(2),
MemOperand(r0, 1 - i.InputInt8(1)), Condition(3));
__ vlvg(dst, i.InputRegister(2), MemOperand(r0, 1 - i.InputInt8(1)),
Condition(3));
break;
}
case kS390_I32x4ReplaceLane: {
......@@ -2597,8 +2598,8 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
if (src != dst) {
__ vlr(dst, src, Condition(0), Condition(0), Condition(0));
}
__ vlvg(i.OutputSimd128Register(), i.InputRegister(2),
MemOperand(r0, 3 - i.InputInt8(1)), Condition(2));
__ vlvg(dst, i.InputRegister(2), MemOperand(r0, 3 - i.InputInt8(1)),
Condition(2));
break;
}
case kS390_I16x8ReplaceLane: {
......@@ -2607,8 +2608,8 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
if (src != dst) {
__ vlr(dst, src, Condition(0), Condition(0), Condition(0));
}
__ vlvg(i.OutputSimd128Register(), i.InputRegister(2),
MemOperand(r0, 7 - i.InputInt8(1)), Condition(1));
__ vlvg(dst, i.InputRegister(2), MemOperand(r0, 7 - i.InputInt8(1)),
Condition(1));
break;
}
case kS390_I8x16ReplaceLane: {
......@@ -2617,8 +2618,8 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
if (src != dst) {
__ vlr(dst, src, Condition(0), Condition(0), Condition(0));
}
__ vlvg(i.OutputSimd128Register(), i.InputRegister(2),
MemOperand(r0, 15 - i.InputInt8(1)), Condition(0));
__ vlvg(dst, i.InputRegister(2), MemOperand(r0, 15 - i.InputInt8(1)),
Condition(0));
break;
}
// vector binops
......@@ -3195,14 +3196,14 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
case kS390_F32x4RecipSqrtApprox: {
DoubleRegister tempFPReg1 = i.ToSimd128Register(instr->TempAt(0));
__ vfsq(tempFPReg1, i.InputSimd128Register(0), Condition(0), Condition(0),
Simd128Register dst = i.OutputSimd128Register();
__ vfsq(dst, i.InputSimd128Register(0), Condition(0), Condition(0),
Condition(2));
__ mov(kScratchReg, Operand(1));
__ ConvertIntToFloat(kScratchDoubleReg, kScratchReg);
__ vrep(kScratchDoubleReg, kScratchDoubleReg, Operand(0), Condition(2));
__ vfd(i.OutputSimd128Register(), kScratchDoubleReg, tempFPReg1,
Condition(0), Condition(0), Condition(2));
__ vfd(dst, kScratchDoubleReg, dst, Condition(0), Condition(0),
Condition(2));
break;
}
case kS390_F32x4Sqrt: {
......@@ -3240,18 +3241,16 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
case kS390_V128AnyTrue: {
Simd128Register src = i.InputSimd128Register(0);
Register dst = i.OutputRegister();
Register temp = i.TempRegister(0);
__ mov(dst, Operand(1));
__ xgr(temp, temp);
__ xgr(kScratchReg, kScratchReg);
__ vtm(src, src, Condition(0), Condition(0), Condition(0));
__ locgr(Condition(8), dst, temp);
__ locgr(Condition(8), dst, kScratchReg);
break;
}
#define SIMD_ALL_TRUE(mode) \
Simd128Register src = i.InputSimd128Register(0); \
Register dst = i.OutputRegister(); \
Register temp = i.TempRegister(0); \
__ mov(temp, Operand(1)); \
__ mov(kScratchReg, Operand(1)); \
__ xgr(dst, dst); \
__ vx(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg, Condition(0), \
Condition(0), Condition(2)); \
......@@ -3259,7 +3258,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
Condition(mode)); \
__ vtm(kScratchDoubleReg, kScratchDoubleReg, Condition(0), Condition(0), \
Condition(0)); \
__ locgr(Condition(8), dst, temp);
__ locgr(Condition(8), dst, kScratchReg);
case kS390_I64x2AllTrue: {
SIMD_ALL_TRUE(3)
break;
......@@ -3344,7 +3343,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
case kS390_I32x4SConvertF32x4: {
Simd128Register src = i.InputSimd128Register(0);
Simd128Register dst = i.OutputSimd128Register();
Simd128Register tempFPReg1 = i.ToSimd128Register(instr->TempAt(0));
Simd128Register tempFPReg1 = i.ToDoubleRegister(instr->TempAt(0));
// NaN to 0
__ vlr(kScratchDoubleReg, src, Condition(0), Condition(0), Condition(0));
__ vfce(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg,
......@@ -3362,7 +3361,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
case kS390_I32x4UConvertF32x4: {
Simd128Register src = i.InputSimd128Register(0);
Simd128Register dst = i.OutputSimd128Register();
Simd128Register tempFPReg1 = i.ToSimd128Register(instr->TempAt(0));
Simd128Register tempFPReg1 = i.ToDoubleRegister(instr->TempAt(0));
// NaN to 0, negative to 0
__ vx(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg,
Condition(0), Condition(0), Condition(0));
......@@ -3576,8 +3575,8 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
uint64_t high = make_uint64(i.InputUint32(5), i.InputUint32(4));
__ mov(r0, Operand(low));
__ mov(ip, Operand(high));
__ vlvgp(dst, ip, r0);
__ vperm(dst, src0, src1, dst, Condition(0), Condition(0));
__ vlvgp(kScratchDoubleReg, ip, r0);
__ vperm(dst, src0, src1, kScratchDoubleReg, Condition(0), Condition(0));
break;
}
case kS390_I8x16Swizzle: {
......@@ -3724,10 +3723,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
Simd128Register dst = i.OutputSimd128Register(), \
src0 = i.InputSimd128Register(0), \
src1 = i.InputSimd128Register(1); \
__ mul_even(dst, src0, src1, Condition(0), Condition(0), Condition(mode)); \
__ mul_odd(kScratchDoubleReg, src0, src1, Condition(0), Condition(0), \
__ mul_even(kScratchDoubleReg, src0, src1, Condition(0), Condition(0), \
Condition(mode)); \
__ merge(dst, dst, kScratchDoubleReg, Condition(0), Condition(0), \
__ mul_odd(dst, src0, src1, Condition(0), Condition(0), Condition(mode)); \
__ merge(dst, kScratchDoubleReg, dst, Condition(0), Condition(0), \
Condition(mode + 1));
case kS390_I64x2ExtMulLowI32x4S: {
EXT_MUL(vme, vmo, vmrl, 2)
......@@ -3783,12 +3782,12 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
Simd128Register src = i.InputSimd128Register(0); \
Simd128Register dst = i.OutputSimd128Register(); \
Simd128Register tempFPReg1 = i.ToSimd128Register(instr->TempAt(0)); \
__ vrepi(kScratchDoubleReg, Operand(1), Condition(lane_size)); \
__ mul_even(tempFPReg1, src, kScratchDoubleReg, Condition(0), Condition(0), \
__ vrepi(tempFPReg1, Operand(1), Condition(lane_size)); \
__ mul_even(kScratchDoubleReg, src, tempFPReg1, Condition(0), Condition(0), \
Condition(lane_size)); \
__ mul_odd(kScratchDoubleReg, src, kScratchDoubleReg, Condition(0), \
Condition(0), Condition(lane_size)); \
__ va(dst, tempFPReg1, kScratchDoubleReg, Condition(0), Condition(0), \
__ mul_odd(tempFPReg1, src, tempFPReg1, Condition(0), Condition(0), \
Condition(lane_size)); \
__ va(dst, kScratchDoubleReg, tempFPReg1, Condition(0), Condition(0), \
Condition(lane_size + 1));
case kS390_I32x4ExtAddPairwiseI16x8S: {
EXT_ADD_PAIRWISE(1, vme, vmo)
......@@ -3801,7 +3800,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
Condition(0), Condition(0), Condition(3));
__ vsum(dst, src0, kScratchDoubleReg, Condition(0), Condition(0),
Condition(1));
break;
}
case kS390_I16x8ExtAddPairwiseI8x16S: {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment