Commit 7e89ba7f authored by Milad Farazmand's avatar Milad Farazmand Committed by Commit Bot

PPC: [wasm-simd] Implement horizontal add

Change-Id: I8962c08329c57367ff82d4669880c7efb1db8875
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2229304Reviewed-by: 's avatarJunliang Yan <jyan@ca.ibm.com>
Commit-Queue: Milad Farazmand <miladfar@ca.ibm.com>
Cr-Commit-Position: refs/heads/master@{#68158}
parent 02ee6904
......@@ -1758,12 +1758,12 @@ void Assembler::fmsub(const DoubleRegister frt, const DoubleRegister fra,
}
// Vector instructions
void Assembler::mfvsrd(const Register ra, const DoubleRegister rs) {
void Assembler::mfvsrd(const Register ra, const Simd128Register rs) {
int SX = 1;
emit(MFVSRD | rs.code() * B21 | ra.code() * B16 | SX);
}
void Assembler::mfvsrwz(const Register ra, const DoubleRegister rs) {
void Assembler::mfvsrwz(const Register ra, const Simd128Register rs) {
int SX = 1;
emit(MFVSRWZ | rs.code() * B21 | ra.code() * B16 | SX);
}
......
......@@ -974,8 +974,8 @@ class Assembler : public AssemblerBase {
RCBit rc = LeaveRC);
// Vector instructions
void mfvsrd(const Register ra, const DoubleRegister r);
void mfvsrwz(const Register ra, const DoubleRegister r);
void mfvsrd(const Register ra, const Simd128Register r);
void mfvsrwz(const Register ra, const Simd128Register r);
void mtvsrd(const Simd128Register rt, const Register ra);
// Pseudo instructions
......
......@@ -2244,7 +2244,15 @@ using Instr = uint32_t;
/* Vector Multiply Even Unsigned Byte */ \
V(vmuleub, VMULEUB, 0x10000208) \
/* Vector Multiply Odd Unsigned Byte */ \
V(vmuloub, VMULOUB, 0x10000008)
V(vmuloub, VMULOUB, 0x10000008) \
/* Vector Sum across Quarter Signed Halfword Saturate */ \
V(vsum4shs, VSUM4SHS, 0x10000648) \
/* Vector Pack Unsigned Word Unsigned Saturate */ \
V(vpkuwus, VPKUWUS, 0x100000CE) \
/* Vector Sum across Half Signed Word Saturate */ \
V(vsum2sws, VSUM2SWS, 0x10000688) \
/* Vector Pack Unsigned Doubleword Unsigned Modulo */ \
V(vpkudum, VPKUDUM, 0x1000044E)
#define PPC_VX_OPCODE_UNUSED_LIST(V) \
/* Decimal Add Modulo */ \
......@@ -2401,16 +2409,12 @@ using Instr = uint32_t;
V(vpkswss, VPKSWSS, 0x100001CE) \
/* Vector Pack Signed Word Unsigned Saturate */ \
V(vpkswus, VPKSWUS, 0x1000014E) \
/* Vector Pack Unsigned Doubleword Unsigned Modulo */ \
V(vpkudum, VPKUDUM, 0x1000044E) \
/* Vector Pack Unsigned Doubleword Unsigned Saturate */ \
V(vpkudus, VPKUDUS, 0x100004CE) \
/* Vector Pack Unsigned Halfword Unsigned Saturate */ \
V(vpkuhus, VPKUHUS, 0x1000008E) \
/* Vector Pack Unsigned Word Unsigned Modulo */ \
V(vpkuwum, VPKUWUM, 0x1000004E) \
/* Vector Pack Unsigned Word Unsigned Saturate */ \
V(vpkuwus, VPKUWUS, 0x100000CE) \
/* Vector Polynomial Multiply-Sum Byte */ \
V(vpmsumb, VPMSUMB, 0x10000408) \
/* Vector Polynomial Multiply-Sum Doubleword */ \
......@@ -2499,12 +2503,8 @@ using Instr = uint32_t;
V(vsubuqm, VSUBUQM, 0x10000500) \
/* Vector Subtract Unsigned Word Saturate */ \
V(vsubuws, VSUBUWS, 0x10000680) \
/* Vector Sum across Half Signed Word Saturate */ \
V(vsum2sws, VSUM2SWS, 0x10000688) \
/* Vector Sum across Quarter Signed Byte Saturate */ \
V(vsum4sbs, VSUM4SBS, 0x10000708) \
/* Vector Sum across Quarter Signed Halfword Saturate */ \
V(vsum4shs, VSUM4SHS, 0x10000648) \
/* Vector Sum across Quarter Unsigned Byte Saturate */ \
V(vsum4bus, VSUM4BUS, 0x10000608) \
/* Vector Sum across Signed Word Saturate */ \
......
......@@ -2402,6 +2402,26 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
i.InputSimd128Register(1));
break;
}
case kPPC_F32x4AddHoriz: {
Simd128Register src0 = i.InputSimd128Register(0);
Simd128Register src1 = i.InputSimd128Register(1);
Simd128Register dst = i.OutputSimd128Register();
Simd128Register tempFPReg1 = i.ToSimd128Register(instr->TempAt(0));
Simd128Register tempFPReg2 = i.ToSimd128Register(instr->TempAt(1));
constexpr int shift_bits = 32;
// generate first operand
__ vpkudum(dst, src1, src0);
// generate second operand
__ li(ip, Operand(shift_bits));
__ mtvsrd(tempFPReg2, ip);
__ vspltb(tempFPReg2, tempFPReg2, Operand(7));
__ vsro(tempFPReg1, src0, tempFPReg2);
__ vsro(tempFPReg2, src1, tempFPReg2);
__ vpkudum(kScratchDoubleReg, tempFPReg2, tempFPReg1);
// add the operands
__ vaddfp(dst, kScratchDoubleReg, dst);
break;
}
case kPPC_F32x4Sub: {
__ vsubfp(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
......@@ -2445,6 +2465,16 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
i.InputSimd128Register(1));
break;
}
case kPPC_I32x4AddHoriz: {
Simd128Register src0 = i.InputSimd128Register(0);
Simd128Register src1 = i.InputSimd128Register(1);
Simd128Register dst = i.OutputSimd128Register();
__ vxor(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
__ vsum2sws(dst, src0, kScratchDoubleReg);
__ vsum2sws(kScratchDoubleReg, src1, kScratchDoubleReg);
__ vpkudum(dst, kScratchDoubleReg, dst);
break;
}
case kPPC_I32x4Sub: {
__ vsubuwm(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
......@@ -2460,6 +2490,16 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
i.InputSimd128Register(1));
break;
}
case kPPC_I16x8AddHoriz: {
Simd128Register src0 = i.InputSimd128Register(0);
Simd128Register src1 = i.InputSimd128Register(1);
Simd128Register dst = i.OutputSimd128Register();
__ vxor(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
__ vsum4shs(dst, src0, kScratchDoubleReg);
__ vsum4shs(kScratchDoubleReg, src1, kScratchDoubleReg);
__ vpkuwus(dst, kScratchDoubleReg, dst);
break;
}
case kPPC_I16x8Sub: {
__ vsubuhm(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
......
......@@ -200,6 +200,7 @@ namespace compiler {
V(PPC_F32x4ExtractLane) \
V(PPC_F32x4ReplaceLane) \
V(PPC_F32x4Add) \
V(PPC_F32x4AddHoriz) \
V(PPC_F32x4Sub) \
V(PPC_F32x4Mul) \
V(PPC_I64x2Splat) \
......@@ -212,6 +213,7 @@ namespace compiler {
V(PPC_I32x4ExtractLane) \
V(PPC_I32x4ReplaceLane) \
V(PPC_I32x4Add) \
V(PPC_I32x4AddHoriz) \
V(PPC_I32x4Sub) \
V(PPC_I32x4Mul) \
V(PPC_I16x8Splat) \
......@@ -219,6 +221,7 @@ namespace compiler {
V(PPC_I16x8ExtractLaneS) \
V(PPC_I16x8ReplaceLane) \
V(PPC_I16x8Add) \
V(PPC_I16x8AddHoriz) \
V(PPC_I16x8Sub) \
V(PPC_I16x8Mul) \
V(PPC_I8x16Splat) \
......
......@@ -123,6 +123,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kPPC_F32x4ExtractLane:
case kPPC_F32x4ReplaceLane:
case kPPC_F32x4Add:
case kPPC_F32x4AddHoriz:
case kPPC_F32x4Sub:
case kPPC_F32x4Mul:
case kPPC_I64x2Splat:
......@@ -135,6 +136,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kPPC_I32x4ExtractLane:
case kPPC_I32x4ReplaceLane:
case kPPC_I32x4Add:
case kPPC_I32x4AddHoriz:
case kPPC_I32x4Sub:
case kPPC_I32x4Mul:
case kPPC_I16x8Splat:
......@@ -142,6 +144,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kPPC_I16x8ExtractLaneS:
case kPPC_I16x8ReplaceLane:
case kPPC_I16x8Add:
case kPPC_I16x8AddHoriz:
case kPPC_I16x8Sub:
case kPPC_I16x8Mul:
case kPPC_I8x16Splat:
......
......@@ -2132,15 +2132,18 @@ void InstructionSelector::VisitInt64AbsWithOverflow(Node* node) {
V(F64x2Sub) \
V(F64x2Mul) \
V(F32x4Add) \
V(F32x4AddHoriz) \
V(F32x4Sub) \
V(F32x4Mul) \
V(I64x2Add) \
V(I64x2Sub) \
V(I64x2Mul) \
V(I32x4Add) \
V(I32x4AddHoriz) \
V(I32x4Sub) \
V(I32x4Mul) \
V(I16x8Add) \
V(I16x8AddHoriz) \
V(I16x8Sub) \
V(I16x8Mul) \
V(I8x16Add) \
......@@ -2384,10 +2387,6 @@ void InstructionSelector::VisitF32x4RecipSqrtApprox(Node* node) {
void InstructionSelector::VisitF32x4RecipApprox(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF32x4AddHoriz(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI32x4AddHoriz(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI16x8AddHoriz(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF32x4SConvertI32x4(Node* node) {
UNIMPLEMENTED();
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment