Commit f458cade authored by Milad Fa's avatar Milad Fa Committed by Commit Bot

PPC/s390: Reland "[wasm-simd] Remove add horiz instructions"

Port 430407cd

Original Commit Message:

    This is a reland of 77838343

    No changes in this reland, this wasn't causing the failures, see
    https://crbug.com/1163833 for the actual cause.

    Original change's description:
    > [wasm-simd] Remove add horiz instructions
    >
    > Bug: v8:6020
    > Change-Id: I0605798d03f2e9f9c3c07c49141289889a10a3b0
    > Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2727204
    > Reviewed-by: Deepti Gandluri <gdeepti@chromium.org>
    > Commit-Queue: Zhi An Ng <zhin@chromium.org>
    > Cr-Commit-Position: refs/heads/master@{#73180}

R=zhin@chromium.org, joransiu@ca.ibm.com, junyan@redhat.com, midawson@redhat.com
BUG=
LOG=N

Change-Id: I1d69a63c2394e8a703e1bcd8b18a4f9f666f03b5
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2737066Reviewed-by: 's avatarJunliang Yan <junyan@redhat.com>
Commit-Queue: Milad Fa <mfarazma@redhat.com>
Cr-Commit-Position: refs/heads/master@{#73203}
parent 81bb9cc8
...@@ -2381,26 +2381,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -2381,26 +2381,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
i.InputSimd128Register(1)); i.InputSimd128Register(1));
break; break;
} }
case kPPC_F32x4AddHoriz: {
Simd128Register src0 = i.InputSimd128Register(0);
Simd128Register src1 = i.InputSimd128Register(1);
Simd128Register dst = i.OutputSimd128Register();
Simd128Register tempFPReg1 = i.ToSimd128Register(instr->TempAt(0));
Simd128Register tempFPReg2 = i.ToSimd128Register(instr->TempAt(1));
constexpr int shift_bits = 32;
// generate first operand
__ vpkudum(dst, src1, src0);
// generate second operand
__ li(ip, Operand(shift_bits));
__ mtvsrd(tempFPReg2, ip);
__ vspltb(tempFPReg2, tempFPReg2, Operand(7));
__ vsro(tempFPReg1, src0, tempFPReg2);
__ vsro(tempFPReg2, src1, tempFPReg2);
__ vpkudum(kScratchSimd128Reg, tempFPReg2, tempFPReg1);
// add the operands
__ vaddfp(dst, kScratchSimd128Reg, dst);
break;
}
case kPPC_F32x4Sub: { case kPPC_F32x4Sub: {
__ vsubfp(i.OutputSimd128Register(), i.InputSimd128Register(0), __ vsubfp(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1)); i.InputSimd128Register(1));
...@@ -2453,16 +2433,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -2453,16 +2433,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
i.InputSimd128Register(1)); i.InputSimd128Register(1));
break; break;
} }
case kPPC_I32x4AddHoriz: {
Simd128Register src0 = i.InputSimd128Register(0);
Simd128Register src1 = i.InputSimd128Register(1);
Simd128Register dst = i.OutputSimd128Register();
__ vxor(kScratchSimd128Reg, kScratchSimd128Reg, kScratchSimd128Reg);
__ vsum2sws(dst, src0, kScratchSimd128Reg);
__ vsum2sws(kScratchSimd128Reg, src1, kScratchSimd128Reg);
__ vpkudum(dst, kScratchSimd128Reg, dst);
break;
}
case kPPC_I32x4Sub: { case kPPC_I32x4Sub: {
__ vsubuwm(i.OutputSimd128Register(), i.InputSimd128Register(0), __ vsubuwm(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1)); i.InputSimd128Register(1));
...@@ -2478,16 +2448,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -2478,16 +2448,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
i.InputSimd128Register(1)); i.InputSimd128Register(1));
break; break;
} }
case kPPC_I16x8AddHoriz: {
Simd128Register src0 = i.InputSimd128Register(0);
Simd128Register src1 = i.InputSimd128Register(1);
Simd128Register dst = i.OutputSimd128Register();
__ vxor(kScratchSimd128Reg, kScratchSimd128Reg, kScratchSimd128Reg);
__ vsum4shs(dst, src0, kScratchSimd128Reg);
__ vsum4shs(kScratchSimd128Reg, src1, kScratchSimd128Reg);
__ vpkuwus(dst, kScratchSimd128Reg, dst);
break;
}
case kPPC_I16x8Sub: { case kPPC_I16x8Sub: {
__ vsubuhm(i.OutputSimd128Register(), i.InputSimd128Register(0), __ vsubuhm(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1)); i.InputSimd128Register(1));
......
...@@ -220,7 +220,6 @@ namespace compiler { ...@@ -220,7 +220,6 @@ namespace compiler {
V(PPC_F32x4ExtractLane) \ V(PPC_F32x4ExtractLane) \
V(PPC_F32x4ReplaceLane) \ V(PPC_F32x4ReplaceLane) \
V(PPC_F32x4Add) \ V(PPC_F32x4Add) \
V(PPC_F32x4AddHoriz) \
V(PPC_F32x4Sub) \ V(PPC_F32x4Sub) \
V(PPC_F32x4Mul) \ V(PPC_F32x4Mul) \
V(PPC_F32x4Eq) \ V(PPC_F32x4Eq) \
...@@ -272,7 +271,6 @@ namespace compiler { ...@@ -272,7 +271,6 @@ namespace compiler {
V(PPC_I32x4ExtractLane) \ V(PPC_I32x4ExtractLane) \
V(PPC_I32x4ReplaceLane) \ V(PPC_I32x4ReplaceLane) \
V(PPC_I32x4Add) \ V(PPC_I32x4Add) \
V(PPC_I32x4AddHoriz) \
V(PPC_I32x4Sub) \ V(PPC_I32x4Sub) \
V(PPC_I32x4Mul) \ V(PPC_I32x4Mul) \
V(PPC_I32x4MinS) \ V(PPC_I32x4MinS) \
...@@ -307,7 +305,6 @@ namespace compiler { ...@@ -307,7 +305,6 @@ namespace compiler {
V(PPC_I16x8ExtractLaneS) \ V(PPC_I16x8ExtractLaneS) \
V(PPC_I16x8ReplaceLane) \ V(PPC_I16x8ReplaceLane) \
V(PPC_I16x8Add) \ V(PPC_I16x8Add) \
V(PPC_I16x8AddHoriz) \
V(PPC_I16x8Sub) \ V(PPC_I16x8Sub) \
V(PPC_I16x8Mul) \ V(PPC_I16x8Mul) \
V(PPC_I16x8MinS) \ V(PPC_I16x8MinS) \
......
...@@ -143,7 +143,6 @@ int InstructionScheduler::GetTargetInstructionFlags( ...@@ -143,7 +143,6 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kPPC_F32x4ExtractLane: case kPPC_F32x4ExtractLane:
case kPPC_F32x4ReplaceLane: case kPPC_F32x4ReplaceLane:
case kPPC_F32x4Add: case kPPC_F32x4Add:
case kPPC_F32x4AddHoriz:
case kPPC_F32x4Sub: case kPPC_F32x4Sub:
case kPPC_F32x4Mul: case kPPC_F32x4Mul:
case kPPC_F32x4Eq: case kPPC_F32x4Eq:
...@@ -197,7 +196,6 @@ int InstructionScheduler::GetTargetInstructionFlags( ...@@ -197,7 +196,6 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kPPC_I32x4ExtractLane: case kPPC_I32x4ExtractLane:
case kPPC_I32x4ReplaceLane: case kPPC_I32x4ReplaceLane:
case kPPC_I32x4Add: case kPPC_I32x4Add:
case kPPC_I32x4AddHoriz:
case kPPC_I32x4Sub: case kPPC_I32x4Sub:
case kPPC_I32x4Mul: case kPPC_I32x4Mul:
case kPPC_I32x4MinS: case kPPC_I32x4MinS:
...@@ -230,7 +228,6 @@ int InstructionScheduler::GetTargetInstructionFlags( ...@@ -230,7 +228,6 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kPPC_I16x8ExtractLaneS: case kPPC_I16x8ExtractLaneS:
case kPPC_I16x8ReplaceLane: case kPPC_I16x8ReplaceLane:
case kPPC_I16x8Add: case kPPC_I16x8Add:
case kPPC_I16x8AddHoriz:
case kPPC_I16x8Sub: case kPPC_I16x8Sub:
case kPPC_I16x8Mul: case kPPC_I16x8Mul:
case kPPC_I16x8MinS: case kPPC_I16x8MinS:
......
...@@ -2163,7 +2163,6 @@ void InstructionSelector::VisitInt64AbsWithOverflow(Node* node) { ...@@ -2163,7 +2163,6 @@ void InstructionSelector::VisitInt64AbsWithOverflow(Node* node) {
V(F64x2Min) \ V(F64x2Min) \
V(F64x2Max) \ V(F64x2Max) \
V(F32x4Add) \ V(F32x4Add) \
V(F32x4AddHoriz) \
V(F32x4Sub) \ V(F32x4Sub) \
V(F32x4Mul) \ V(F32x4Mul) \
V(F32x4Eq) \ V(F32x4Eq) \
...@@ -2179,7 +2178,6 @@ void InstructionSelector::VisitInt64AbsWithOverflow(Node* node) { ...@@ -2179,7 +2178,6 @@ void InstructionSelector::VisitInt64AbsWithOverflow(Node* node) {
V(I64x2Eq) \ V(I64x2Eq) \
V(I64x2Ne) \ V(I64x2Ne) \
V(I32x4Add) \ V(I32x4Add) \
V(I32x4AddHoriz) \
V(I32x4Sub) \ V(I32x4Sub) \
V(I32x4Mul) \ V(I32x4Mul) \
V(I32x4MinS) \ V(I32x4MinS) \
...@@ -2194,7 +2192,6 @@ void InstructionSelector::VisitInt64AbsWithOverflow(Node* node) { ...@@ -2194,7 +2192,6 @@ void InstructionSelector::VisitInt64AbsWithOverflow(Node* node) {
V(I32x4GeU) \ V(I32x4GeU) \
V(I32x4DotI16x8S) \ V(I32x4DotI16x8S) \
V(I16x8Add) \ V(I16x8Add) \
V(I16x8AddHoriz) \
V(I16x8Sub) \ V(I16x8Sub) \
V(I16x8Mul) \ V(I16x8Mul) \
V(I16x8MinS) \ V(I16x8MinS) \
......
...@@ -2835,22 +2835,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -2835,22 +2835,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
Condition(2)); Condition(2));
break; break;
} }
case kS390_F32x4AddHoriz: {
Simd128Register src0 = i.InputSimd128Register(0);
Simd128Register src1 = i.InputSimd128Register(1);
Simd128Register dst = i.OutputSimd128Register();
DoubleRegister tempFPReg1 = i.ToSimd128Register(instr->TempAt(0));
DoubleRegister tempFPReg2 = i.ToSimd128Register(instr->TempAt(1));
constexpr int shift_bits = 32;
__ vpk(dst, src1, src0, Condition(0), Condition(0), Condition(3));
__ vesrl(tempFPReg2, src1, MemOperand(r0, shift_bits), Condition(3));
__ vesrl(tempFPReg1, src0, MemOperand(r0, shift_bits), Condition(3));
__ vpk(kScratchDoubleReg, tempFPReg2, tempFPReg1, Condition(0),
Condition(0), Condition(3));
__ vfa(dst, kScratchDoubleReg, dst, Condition(0), Condition(0),
Condition(2));
break;
}
case kS390_F32x4Sub: { case kS390_F32x4Sub: {
__ vfs(i.OutputSimd128Register(), i.InputSimd128Register(0), __ vfs(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1), Condition(0), Condition(0), i.InputSimd128Register(1), Condition(0), Condition(0),
...@@ -2930,20 +2914,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -2930,20 +2914,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
Condition(2)); Condition(2));
break; break;
} }
case kS390_I32x4AddHoriz: {
Simd128Register src0 = i.InputSimd128Register(0);
Simd128Register src1 = i.InputSimd128Register(1);
Simd128Register dst = i.OutputSimd128Register();
__ vs(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg,
Condition(0), Condition(0), Condition(2));
__ vsumg(dst, src0, kScratchDoubleReg, Condition(0), Condition(0),
Condition(2));
__ vsumg(kScratchDoubleReg, src1, kScratchDoubleReg, Condition(0),
Condition(0), Condition(2));
__ vpk(dst, kScratchDoubleReg, dst, Condition(0), Condition(0),
Condition(3));
break;
}
case kS390_I32x4Sub: { case kS390_I32x4Sub: {
__ vs(i.OutputSimd128Register(), i.InputSimd128Register(0), __ vs(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1), Condition(0), Condition(0), i.InputSimd128Register(1), Condition(0), Condition(0),
...@@ -2962,20 +2932,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -2962,20 +2932,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
Condition(1)); Condition(1));
break; break;
} }
case kS390_I16x8AddHoriz: {
Simd128Register src0 = i.InputSimd128Register(0);
Simd128Register src1 = i.InputSimd128Register(1);
Simd128Register dst = i.OutputSimd128Register();
__ vs(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg,
Condition(0), Condition(0), Condition(1));
__ vsum(dst, src0, kScratchDoubleReg, Condition(0), Condition(0),
Condition(1));
__ vsum(kScratchDoubleReg, src1, kScratchDoubleReg, Condition(0),
Condition(0), Condition(1));
__ vpk(dst, kScratchDoubleReg, dst, Condition(0), Condition(0),
Condition(2));
break;
}
case kS390_I16x8Sub: { case kS390_I16x8Sub: {
__ vs(i.OutputSimd128Register(), i.InputSimd128Register(0), __ vs(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1), Condition(0), Condition(0), i.InputSimd128Register(1), Condition(0), Condition(0),
......
...@@ -221,7 +221,6 @@ namespace compiler { ...@@ -221,7 +221,6 @@ namespace compiler {
V(S390_F32x4ExtractLane) \ V(S390_F32x4ExtractLane) \
V(S390_F32x4ReplaceLane) \ V(S390_F32x4ReplaceLane) \
V(S390_F32x4Add) \ V(S390_F32x4Add) \
V(S390_F32x4AddHoriz) \
V(S390_F32x4Sub) \ V(S390_F32x4Sub) \
V(S390_F32x4Mul) \ V(S390_F32x4Mul) \
V(S390_F32x4Eq) \ V(S390_F32x4Eq) \
...@@ -275,7 +274,6 @@ namespace compiler { ...@@ -275,7 +274,6 @@ namespace compiler {
V(S390_I32x4ExtractLane) \ V(S390_I32x4ExtractLane) \
V(S390_I32x4ReplaceLane) \ V(S390_I32x4ReplaceLane) \
V(S390_I32x4Add) \ V(S390_I32x4Add) \
V(S390_I32x4AddHoriz) \
V(S390_I32x4Sub) \ V(S390_I32x4Sub) \
V(S390_I32x4Mul) \ V(S390_I32x4Mul) \
V(S390_I32x4MinS) \ V(S390_I32x4MinS) \
...@@ -314,7 +312,6 @@ namespace compiler { ...@@ -314,7 +312,6 @@ namespace compiler {
V(S390_I16x8ExtractLaneS) \ V(S390_I16x8ExtractLaneS) \
V(S390_I16x8ReplaceLane) \ V(S390_I16x8ReplaceLane) \
V(S390_I16x8Add) \ V(S390_I16x8Add) \
V(S390_I16x8AddHoriz) \
V(S390_I16x8Sub) \ V(S390_I16x8Sub) \
V(S390_I16x8Mul) \ V(S390_I16x8Mul) \
V(S390_I16x8MinS) \ V(S390_I16x8MinS) \
......
...@@ -168,7 +168,6 @@ int InstructionScheduler::GetTargetInstructionFlags( ...@@ -168,7 +168,6 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kS390_F32x4ExtractLane: case kS390_F32x4ExtractLane:
case kS390_F32x4ReplaceLane: case kS390_F32x4ReplaceLane:
case kS390_F32x4Add: case kS390_F32x4Add:
case kS390_F32x4AddHoriz:
case kS390_F32x4Sub: case kS390_F32x4Sub:
case kS390_F32x4Mul: case kS390_F32x4Mul:
case kS390_F32x4Eq: case kS390_F32x4Eq:
...@@ -222,7 +221,6 @@ int InstructionScheduler::GetTargetInstructionFlags( ...@@ -222,7 +221,6 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kS390_I32x4ExtractLane: case kS390_I32x4ExtractLane:
case kS390_I32x4ReplaceLane: case kS390_I32x4ReplaceLane:
case kS390_I32x4Add: case kS390_I32x4Add:
case kS390_I32x4AddHoriz:
case kS390_I32x4Sub: case kS390_I32x4Sub:
case kS390_I32x4Mul: case kS390_I32x4Mul:
case kS390_I32x4MinS: case kS390_I32x4MinS:
...@@ -261,7 +259,6 @@ int InstructionScheduler::GetTargetInstructionFlags( ...@@ -261,7 +259,6 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kS390_I16x8ExtractLaneS: case kS390_I16x8ExtractLaneS:
case kS390_I16x8ReplaceLane: case kS390_I16x8ReplaceLane:
case kS390_I16x8Add: case kS390_I16x8Add:
case kS390_I16x8AddHoriz:
case kS390_I16x8Sub: case kS390_I16x8Sub:
case kS390_I16x8Mul: case kS390_I16x8Mul:
case kS390_I16x8MinS: case kS390_I16x8MinS:
......
...@@ -2409,7 +2409,6 @@ void InstructionSelector::VisitWord64AtomicStore(Node* node) { ...@@ -2409,7 +2409,6 @@ void InstructionSelector::VisitWord64AtomicStore(Node* node) {
V(F64x2Min) \ V(F64x2Min) \
V(F64x2Max) \ V(F64x2Max) \
V(F32x4Add) \ V(F32x4Add) \
V(F32x4AddHoriz) \
V(F32x4Sub) \ V(F32x4Sub) \
V(F32x4Mul) \ V(F32x4Mul) \
V(F32x4Eq) \ V(F32x4Eq) \
...@@ -2432,7 +2431,6 @@ void InstructionSelector::VisitWord64AtomicStore(Node* node) { ...@@ -2432,7 +2431,6 @@ void InstructionSelector::VisitWord64AtomicStore(Node* node) {
V(I64x2GtS) \ V(I64x2GtS) \
V(I64x2GeS) \ V(I64x2GeS) \
V(I32x4Add) \ V(I32x4Add) \
V(I32x4AddHoriz) \
V(I32x4Sub) \ V(I32x4Sub) \
V(I32x4Mul) \ V(I32x4Mul) \
V(I32x4MinS) \ V(I32x4MinS) \
...@@ -2451,7 +2449,6 @@ void InstructionSelector::VisitWord64AtomicStore(Node* node) { ...@@ -2451,7 +2449,6 @@ void InstructionSelector::VisitWord64AtomicStore(Node* node) {
V(I32x4ExtMulLowI16x8U) \ V(I32x4ExtMulLowI16x8U) \
V(I32x4ExtMulHighI16x8U) \ V(I32x4ExtMulHighI16x8U) \
V(I16x8Add) \ V(I16x8Add) \
V(I16x8AddHoriz) \
V(I16x8Sub) \ V(I16x8Sub) \
V(I16x8Mul) \ V(I16x8Mul) \
V(I16x8MinS) \ V(I16x8MinS) \
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment