Commit 7e89ba7f authored by Milad Farazmand's avatar Milad Farazmand Committed by Commit Bot

PPC: [wasm-simd] Implement horizontal add

Change-Id: I8962c08329c57367ff82d4669880c7efb1db8875
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2229304Reviewed-by: 's avatarJunliang Yan <jyan@ca.ibm.com>
Commit-Queue: Milad Farazmand <miladfar@ca.ibm.com>
Cr-Commit-Position: refs/heads/master@{#68158}
parent 02ee6904
......@@ -1758,12 +1758,12 @@ void Assembler::fmsub(const DoubleRegister frt, const DoubleRegister fra,
}
// Vector instructions
void Assembler::mfvsrd(const Register ra, const DoubleRegister rs) {
void Assembler::mfvsrd(const Register ra, const Simd128Register rs) {
int SX = 1;
emit(MFVSRD | rs.code() * B21 | ra.code() * B16 | SX);
}
void Assembler::mfvsrwz(const Register ra, const DoubleRegister rs) {
void Assembler::mfvsrwz(const Register ra, const Simd128Register rs) {
int SX = 1;
emit(MFVSRWZ | rs.code() * B21 | ra.code() * B16 | SX);
}
......
......@@ -974,8 +974,8 @@ class Assembler : public AssemblerBase {
RCBit rc = LeaveRC);
// Vector instructions
void mfvsrd(const Register ra, const DoubleRegister r);
void mfvsrwz(const Register ra, const DoubleRegister r);
void mfvsrd(const Register ra, const Simd128Register r);
void mfvsrwz(const Register ra, const Simd128Register r);
void mtvsrd(const Simd128Register rt, const Register ra);
// Pseudo instructions
......
......@@ -2206,45 +2206,53 @@ using Instr = uint32_t;
/* Vector Splat Halfword */ \
V(vsplth, VSPLTH, 0x1000024C)
#define PPC_VX_OPCODE_B_FORM_LIST(V) \
/* Vector Logical OR */ \
V(vor, VOR, 0x10000484) \
/* Vector Logical XOR */ \
V(vxor, VXOR, 0x100004C4) \
/* Vector Logical NOR */ \
V(vnor, VNOR, 0x10000504) \
/* Vector Shift Right by Octet */ \
V(vsro, VSRO, 0x1000044C) \
/* Vector Shift Left by Octet */ \
V(vslo, VSLO, 0x1000040C) \
/* Vector Add Unsigned Doubleword Modulo */ \
V(vaddudm, VADDUDM, 0x100000C0) \
/* Vector Add Unsigned Word Modulo */ \
V(vadduwm, VADDUWM, 0x10000080) \
/* Vector Add Unsigned Halfword Modulo */ \
V(vadduhm, VADDUHM, 0x10000040) \
/* Vector Add Unsigned Byte Modulo */ \
V(vaddubm, VADDUBM, 0x10000000) \
/* Vector Add Single-Precision */ \
V(vaddfp, VADDFP, 0x1000000A) \
/* Vector Subtract Single-Precision */ \
V(vsubfp, VSUBFP, 0x1000004A) \
/* Vector Subtract Unsigned Doubleword Modulo */ \
V(vsubudm, VSUBUDM, 0x100004C0) \
/* Vector Subtract Unsigned Word Modulo */ \
V(vsubuwm, VSUBUWM, 0x10000480) \
/* Vector Subtract Unsigned Halfword Modulo */ \
V(vsubuhm, VSUBUHM, 0x10000440) \
/* Vector Subtract Unsigned Byte Modulo */ \
V(vsububm, VSUBUBM, 0x10000400) \
/* Vector Multiply Unsigned Word Modulo */ \
V(vmuluwm, VMULUWM, 0x10000089) \
/* Vector Pack Unsigned Halfword Unsigned Modulo */ \
V(vpkuhum, VPKUHUM, 0x1000000E) \
/* Vector Multiply Even Unsigned Byte */ \
V(vmuleub, VMULEUB, 0x10000208) \
/* Vector Multiply Odd Unsigned Byte */ \
V(vmuloub, VMULOUB, 0x10000008)
#define PPC_VX_OPCODE_B_FORM_LIST(V) \
/* Vector Logical OR */ \
V(vor, VOR, 0x10000484) \
/* Vector Logical XOR */ \
V(vxor, VXOR, 0x100004C4) \
/* Vector Logical NOR */ \
V(vnor, VNOR, 0x10000504) \
/* Vector Shift Right by Octet */ \
V(vsro, VSRO, 0x1000044C) \
/* Vector Shift Left by Octet */ \
V(vslo, VSLO, 0x1000040C) \
/* Vector Add Unsigned Doubleword Modulo */ \
V(vaddudm, VADDUDM, 0x100000C0) \
/* Vector Add Unsigned Word Modulo */ \
V(vadduwm, VADDUWM, 0x10000080) \
/* Vector Add Unsigned Halfword Modulo */ \
V(vadduhm, VADDUHM, 0x10000040) \
/* Vector Add Unsigned Byte Modulo */ \
V(vaddubm, VADDUBM, 0x10000000) \
/* Vector Add Single-Precision */ \
V(vaddfp, VADDFP, 0x1000000A) \
/* Vector Subtract Single-Precision */ \
V(vsubfp, VSUBFP, 0x1000004A) \
/* Vector Subtract Unsigned Doubleword Modulo */ \
V(vsubudm, VSUBUDM, 0x100004C0) \
/* Vector Subtract Unsigned Word Modulo */ \
V(vsubuwm, VSUBUWM, 0x10000480) \
/* Vector Subtract Unsigned Halfword Modulo */ \
V(vsubuhm, VSUBUHM, 0x10000440) \
/* Vector Subtract Unsigned Byte Modulo */ \
V(vsububm, VSUBUBM, 0x10000400) \
/* Vector Multiply Unsigned Word Modulo */ \
V(vmuluwm, VMULUWM, 0x10000089) \
/* Vector Pack Unsigned Halfword Unsigned Modulo */ \
V(vpkuhum, VPKUHUM, 0x1000000E) \
/* Vector Multiply Even Unsigned Byte */ \
V(vmuleub, VMULEUB, 0x10000208) \
/* Vector Multiply Odd Unsigned Byte */ \
V(vmuloub, VMULOUB, 0x10000008) \
/* Vector Sum across Quarter Signed Halfword Saturate */ \
V(vsum4shs, VSUM4SHS, 0x10000648) \
/* Vector Pack Unsigned Word Unsigned Saturate */ \
V(vpkuwus, VPKUWUS, 0x100000CE) \
/* Vector Sum across Half Signed Word Saturate */ \
V(vsum2sws, VSUM2SWS, 0x10000688) \
/* Vector Pack Unsigned Doubleword Unsigned Modulo */ \
V(vpkudum, VPKUDUM, 0x1000044E)
#define PPC_VX_OPCODE_UNUSED_LIST(V) \
/* Decimal Add Modulo */ \
......@@ -2401,16 +2409,12 @@ using Instr = uint32_t;
V(vpkswss, VPKSWSS, 0x100001CE) \
/* Vector Pack Signed Word Unsigned Saturate */ \
V(vpkswus, VPKSWUS, 0x1000014E) \
/* Vector Pack Unsigned Doubleword Unsigned Modulo */ \
V(vpkudum, VPKUDUM, 0x1000044E) \
/* Vector Pack Unsigned Doubleword Unsigned Saturate */ \
V(vpkudus, VPKUDUS, 0x100004CE) \
/* Vector Pack Unsigned Halfword Unsigned Saturate */ \
V(vpkuhus, VPKUHUS, 0x1000008E) \
/* Vector Pack Unsigned Word Unsigned Modulo */ \
V(vpkuwum, VPKUWUM, 0x1000004E) \
/* Vector Pack Unsigned Word Unsigned Saturate */ \
V(vpkuwus, VPKUWUS, 0x100000CE) \
/* Vector Polynomial Multiply-Sum Byte */ \
V(vpmsumb, VPMSUMB, 0x10000408) \
/* Vector Polynomial Multiply-Sum Doubleword */ \
......@@ -2499,12 +2503,8 @@ using Instr = uint32_t;
V(vsubuqm, VSUBUQM, 0x10000500) \
/* Vector Subtract Unsigned Word Saturate */ \
V(vsubuws, VSUBUWS, 0x10000680) \
/* Vector Sum across Half Signed Word Saturate */ \
V(vsum2sws, VSUM2SWS, 0x10000688) \
/* Vector Sum across Quarter Signed Byte Saturate */ \
V(vsum4sbs, VSUM4SBS, 0x10000708) \
/* Vector Sum across Quarter Signed Halfword Saturate */ \
V(vsum4shs, VSUM4SHS, 0x10000648) \
/* Vector Sum across Quarter Unsigned Byte Saturate */ \
V(vsum4bus, VSUM4BUS, 0x10000608) \
/* Vector Sum across Signed Word Saturate */ \
......
......@@ -2402,6 +2402,26 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
i.InputSimd128Register(1));
break;
}
case kPPC_F32x4AddHoriz: {
Simd128Register src0 = i.InputSimd128Register(0);
Simd128Register src1 = i.InputSimd128Register(1);
Simd128Register dst = i.OutputSimd128Register();
Simd128Register tempFPReg1 = i.ToSimd128Register(instr->TempAt(0));
Simd128Register tempFPReg2 = i.ToSimd128Register(instr->TempAt(1));
constexpr int shift_bits = 32;
// generate first operand
__ vpkudum(dst, src1, src0);
// generate second operand
__ li(ip, Operand(shift_bits));
__ mtvsrd(tempFPReg2, ip);
__ vspltb(tempFPReg2, tempFPReg2, Operand(7));
__ vsro(tempFPReg1, src0, tempFPReg2);
__ vsro(tempFPReg2, src1, tempFPReg2);
__ vpkudum(kScratchDoubleReg, tempFPReg2, tempFPReg1);
// add the operands
__ vaddfp(dst, kScratchDoubleReg, dst);
break;
}
case kPPC_F32x4Sub: {
__ vsubfp(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
......@@ -2445,6 +2465,16 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
i.InputSimd128Register(1));
break;
}
case kPPC_I32x4AddHoriz: {
Simd128Register src0 = i.InputSimd128Register(0);
Simd128Register src1 = i.InputSimd128Register(1);
Simd128Register dst = i.OutputSimd128Register();
__ vxor(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
__ vsum2sws(dst, src0, kScratchDoubleReg);
__ vsum2sws(kScratchDoubleReg, src1, kScratchDoubleReg);
__ vpkudum(dst, kScratchDoubleReg, dst);
break;
}
case kPPC_I32x4Sub: {
__ vsubuwm(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
......@@ -2460,6 +2490,16 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
i.InputSimd128Register(1));
break;
}
case kPPC_I16x8AddHoriz: {
Simd128Register src0 = i.InputSimd128Register(0);
Simd128Register src1 = i.InputSimd128Register(1);
Simd128Register dst = i.OutputSimd128Register();
__ vxor(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
__ vsum4shs(dst, src0, kScratchDoubleReg);
__ vsum4shs(kScratchDoubleReg, src1, kScratchDoubleReg);
__ vpkuwus(dst, kScratchDoubleReg, dst);
break;
}
case kPPC_I16x8Sub: {
__ vsubuhm(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
......
......@@ -200,6 +200,7 @@ namespace compiler {
V(PPC_F32x4ExtractLane) \
V(PPC_F32x4ReplaceLane) \
V(PPC_F32x4Add) \
V(PPC_F32x4AddHoriz) \
V(PPC_F32x4Sub) \
V(PPC_F32x4Mul) \
V(PPC_I64x2Splat) \
......@@ -212,6 +213,7 @@ namespace compiler {
V(PPC_I32x4ExtractLane) \
V(PPC_I32x4ReplaceLane) \
V(PPC_I32x4Add) \
V(PPC_I32x4AddHoriz) \
V(PPC_I32x4Sub) \
V(PPC_I32x4Mul) \
V(PPC_I16x8Splat) \
......@@ -219,6 +221,7 @@ namespace compiler {
V(PPC_I16x8ExtractLaneS) \
V(PPC_I16x8ReplaceLane) \
V(PPC_I16x8Add) \
V(PPC_I16x8AddHoriz) \
V(PPC_I16x8Sub) \
V(PPC_I16x8Mul) \
V(PPC_I8x16Splat) \
......
......@@ -123,6 +123,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kPPC_F32x4ExtractLane:
case kPPC_F32x4ReplaceLane:
case kPPC_F32x4Add:
case kPPC_F32x4AddHoriz:
case kPPC_F32x4Sub:
case kPPC_F32x4Mul:
case kPPC_I64x2Splat:
......@@ -135,6 +136,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kPPC_I32x4ExtractLane:
case kPPC_I32x4ReplaceLane:
case kPPC_I32x4Add:
case kPPC_I32x4AddHoriz:
case kPPC_I32x4Sub:
case kPPC_I32x4Mul:
case kPPC_I16x8Splat:
......@@ -142,6 +144,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kPPC_I16x8ExtractLaneS:
case kPPC_I16x8ReplaceLane:
case kPPC_I16x8Add:
case kPPC_I16x8AddHoriz:
case kPPC_I16x8Sub:
case kPPC_I16x8Mul:
case kPPC_I8x16Splat:
......
......@@ -2132,15 +2132,18 @@ void InstructionSelector::VisitInt64AbsWithOverflow(Node* node) {
V(F64x2Sub) \
V(F64x2Mul) \
V(F32x4Add) \
V(F32x4AddHoriz) \
V(F32x4Sub) \
V(F32x4Mul) \
V(I64x2Add) \
V(I64x2Sub) \
V(I64x2Mul) \
V(I32x4Add) \
V(I32x4AddHoriz) \
V(I32x4Sub) \
V(I32x4Mul) \
V(I16x8Add) \
V(I16x8AddHoriz) \
V(I16x8Sub) \
V(I16x8Mul) \
V(I8x16Add) \
......@@ -2384,10 +2387,6 @@ void InstructionSelector::VisitF32x4RecipSqrtApprox(Node* node) {
void InstructionSelector::VisitF32x4RecipApprox(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF32x4AddHoriz(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI32x4AddHoriz(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI16x8AddHoriz(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF32x4SConvertI32x4(Node* node) {
UNIMPLEMENTED();
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment