Commit a02038b3 authored by Milad Farazmand's avatar Milad Farazmand Committed by Commit Bot

PPC: [wasm-simd] Implement simd binary operations

Change-Id: I5a93231b16c8291c87fce57062837dce886bc2f8
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2216231Reviewed-by: 's avatarJunliang Yan <jyan@ca.ibm.com>
Commit-Queue: Milad Farazmand <miladfar@ca.ibm.com>
Cr-Commit-Position: refs/heads/master@{#67975}
parent be91c6c5
......@@ -1804,6 +1804,82 @@ void Assembler::vperm(const DoubleRegister rt, const DoubleRegister ra,
rc.code() * B6);
}
void Assembler::vaddudm(const Simd128Register rt, const Simd128Register ra,
const Simd128Register rb) {
emit(VADDUDM | rt.code() * B21 | ra.code() * B16 | rb.code() * B11);
}
void Assembler::vadduwm(const Simd128Register rt, const Simd128Register ra,
const Simd128Register rb) {
emit(VADDUWM | rt.code() * B21 | ra.code() * B16 | rb.code() * B11);
}
void Assembler::vadduhm(const Simd128Register rt, const Simd128Register ra,
const Simd128Register rb) {
emit(VADDUHM | rt.code() * B21 | ra.code() * B16 | rb.code() * B11);
}
void Assembler::vaddubm(const Simd128Register rt, const Simd128Register ra,
const Simd128Register rb) {
emit(VADDUBM | rt.code() * B21 | ra.code() * B16 | rb.code() * B11);
}
void Assembler::vaddfp(const Simd128Register rt, const Simd128Register ra,
const Simd128Register rb) {
emit(VADDFP | rt.code() * B21 | ra.code() * B16 | rb.code() * B11);
}
void Assembler::vsubfp(const Simd128Register rt, const Simd128Register ra,
const Simd128Register rb) {
emit(VSUBFP | rt.code() * B21 | ra.code() * B16 | rb.code() * B11);
}
void Assembler::vsubudm(const Simd128Register rt, const Simd128Register ra,
const Simd128Register rb) {
emit(VSUBUDM | rt.code() * B21 | ra.code() * B16 | rb.code() * B11);
}
void Assembler::vsubuwm(const Simd128Register rt, const Simd128Register ra,
const Simd128Register rb) {
emit(VSUBUWM | rt.code() * B21 | ra.code() * B16 | rb.code() * B11);
}
void Assembler::vsubuhm(const Simd128Register rt, const Simd128Register ra,
const Simd128Register rb) {
emit(VSUBUHM | rt.code() * B21 | ra.code() * B16 | rb.code() * B11);
}
void Assembler::vsububm(const Simd128Register rt, const Simd128Register ra,
const Simd128Register rb) {
emit(VSUBUBM | rt.code() * B21 | ra.code() * B16 | rb.code() * B11);
}
void Assembler::vmuluwm(const Simd128Register rt, const Simd128Register ra,
const Simd128Register rb) {
emit(VMULUWM | rt.code() * B21 | ra.code() * B16 | rb.code() * B11);
}
void Assembler::vpkuhum(const Simd128Register rt, const Simd128Register ra,
const Simd128Register rb) {
emit(VPKUHUM | rt.code() * B21 | ra.code() * B16 | rb.code() * B11);
}
void Assembler::vmuleub(const Simd128Register rt, const Simd128Register ra,
const Simd128Register rb) {
emit(VMULEUB | rt.code() * B21 | ra.code() * B16 | rb.code() * B11);
}
void Assembler::vmuloub(const Simd128Register rt, const Simd128Register ra,
const Simd128Register rb) {
emit(VMULOUB | rt.code() * B21 | ra.code() * B16 | rb.code() * B11);
}
void Assembler::vmladduhm(const Simd128Register rt, const Simd128Register ra,
const Simd128Register rb, const Simd128Register rc) {
emit(VMLADDUHM | rt.code() * B21 | ra.code() * B16 | rb.code() * B11 |
rc.code() * B6);
}
// Pseudo instructions.
void Assembler::nop(int type) {
Register reg = r0;
......
......@@ -435,9 +435,10 @@ class Assembler : public AssemblerBase {
inline void xx3_form(Instr instr, DoubleRegister t, DoubleRegister a,
DoubleRegister b) {
int AX = ((a.code() & 0x20) >> 5) & 0x1;
int BX = ((b.code() & 0x20) >> 5) & 0x1;
int TX = ((t.code() & 0x20) >> 5) & 0x1;
// Using VR (high VSR) registers.
int AX = 1;
int BX = 1;
int TX = 1;
emit(instr | (t.code() & 0x1F) * B21 | (a.code() & 0x1F) * B16 |
(b.code() & 0x1F) * B11 | AX * B2 | BX * B1 | TX);
......@@ -962,6 +963,36 @@ class Assembler : public AssemblerBase {
const DoubleRegister rb);
void vperm(const DoubleRegister rt, const DoubleRegister ra,
const DoubleRegister rb, const DoubleRegister rc);
void vaddudm(const Simd128Register rt, const Simd128Register ra,
const Simd128Register rb);
void vadduwm(const Simd128Register rt, const Simd128Register ra,
const Simd128Register rb);
void vadduhm(const Simd128Register rt, const Simd128Register ra,
const Simd128Register rb);
void vaddubm(const Simd128Register rt, const Simd128Register ra,
const Simd128Register rb);
void vaddfp(const Simd128Register rt, const Simd128Register ra,
const Simd128Register rb);
void vsubfp(const Simd128Register rt, const Simd128Register ra,
const Simd128Register rb);
void vsubudm(const Simd128Register rt, const Simd128Register ra,
const Simd128Register rb);
void vsubuwm(const Simd128Register rt, const Simd128Register ra,
const Simd128Register rb);
void vsubuhm(const Simd128Register rt, const Simd128Register ra,
const Simd128Register rb);
void vsububm(const Simd128Register rt, const Simd128Register ra,
const Simd128Register rb);
void vmuluwm(const Simd128Register rt, const Simd128Register ra,
const Simd128Register rb);
void vpkuhum(const Simd128Register rt, const Simd128Register ra,
const Simd128Register rb);
void vmuleub(const Simd128Register rt, const Simd128Register ra,
const Simd128Register rb);
void vmuloub(const Simd128Register rt, const Simd128Register ra,
const Simd128Register rb);
void vmladduhm(const Simd128Register rt, const Simd128Register ra,
const Simd128Register rb, const Simd128Register rc);
// Pseudo instructions
......
......@@ -2164,6 +2164,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ vspltb(kScratchDoubleReg, kScratchDoubleReg, Operand(7));
__ vsro(dst, dst, kScratchDoubleReg);
// reload
__ vxor(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
__ mtvsrd(kScratchDoubleReg, r0);
__ vor(dst, dst, kScratchDoubleReg);
break;
......@@ -2186,6 +2187,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ vspltb(kScratchDoubleReg, kScratchDoubleReg, Operand(7));
__ vsro(dst, dst, kScratchDoubleReg);
// reload
__ vxor(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
__ mtvsrd(kScratchDoubleReg, src);
__ vor(dst, dst, kScratchDoubleReg);
break;
......@@ -2380,6 +2382,114 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
#undef GENERATE_REPLACE_LANE_MASK
case kPPC_F64x2Add: {
__ xvadddp(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
case kPPC_F64x2Sub: {
__ xvsubdp(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
case kPPC_F64x2Mul: {
__ xvmuldp(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
case kPPC_F32x4Add: {
__ vaddfp(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
case kPPC_F32x4Sub: {
__ vsubfp(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
case kPPC_F32x4Mul: {
__ xvmulsp(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
case kPPC_I64x2Add: {
__ vaddudm(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
case kPPC_I64x2Sub: {
__ vsubudm(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
case kPPC_I64x2Mul: {
// Need to maintain 16 byte alignment for stvx and lvx.
__ addi(sp, sp, Operand(-40));
__ li(r0, Operand(0));
__ stvx(i.InputSimd128Register(0), MemOperand(sp, r0));
__ li(r0, Operand(16));
__ stvx(i.InputSimd128Register(1), MemOperand(sp, r0));
for (int i = 0; i < 2; i++) {
__ LoadP(r0, MemOperand(sp, kBitsPerByte * i));
__ LoadP(ip, MemOperand(sp, (kBitsPerByte * i) + kSimd128Size));
__ mulld(r0, r0, ip);
__ StoreP(r0, MemOperand(sp, i * kBitsPerByte));
}
__ li(r0, Operand(0));
__ lvx(i.OutputSimd128Register(), MemOperand(sp, r0));
__ addi(sp, sp, Operand(40));
break;
}
case kPPC_I32x4Add: {
__ vadduwm(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
case kPPC_I32x4Sub: {
__ vsubuwm(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
case kPPC_I32x4Mul: {
__ vmuluwm(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
case kPPC_I16x8Add: {
__ vadduhm(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
case kPPC_I16x8Sub: {
__ vsubuhm(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
case kPPC_I16x8Mul: {
__ vxor(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
__ vmladduhm(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1), kScratchDoubleReg);
break;
}
case kPPC_I8x16Add: {
__ vaddubm(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
case kPPC_I8x16Sub: {
__ vsububm(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
case kPPC_I8x16Mul: {
__ vmuleub(kScratchDoubleReg, i.InputSimd128Register(0),
i.InputSimd128Register(1));
__ vmuloub(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
__ vpkuhum(i.OutputSimd128Register(), kScratchDoubleReg,
i.OutputSimd128Register());
break;
}
case kPPC_StoreCompressTagged: {
ASSEMBLE_STORE_INTEGER(StoreTaggedField, StoreTaggedFieldX);
break;
......
......@@ -193,23 +193,41 @@ namespace compiler {
V(PPC_F64x2Splat) \
V(PPC_F64x2ExtractLane) \
V(PPC_F64x2ReplaceLane) \
V(PPC_F64x2Add) \
V(PPC_F64x2Sub) \
V(PPC_F64x2Mul) \
V(PPC_F32x4Splat) \
V(PPC_F32x4ExtractLane) \
V(PPC_F32x4ReplaceLane) \
V(PPC_F32x4Add) \
V(PPC_F32x4Sub) \
V(PPC_F32x4Mul) \
V(PPC_I64x2Splat) \
V(PPC_I64x2ExtractLane) \
V(PPC_I64x2ReplaceLane) \
V(PPC_I64x2Add) \
V(PPC_I64x2Sub) \
V(PPC_I64x2Mul) \
V(PPC_I32x4Splat) \
V(PPC_I32x4ExtractLane) \
V(PPC_I32x4ReplaceLane) \
V(PPC_I32x4Add) \
V(PPC_I32x4Sub) \
V(PPC_I32x4Mul) \
V(PPC_I16x8Splat) \
V(PPC_I16x8ExtractLaneU) \
V(PPC_I16x8ExtractLaneS) \
V(PPC_I16x8ReplaceLane) \
V(PPC_I16x8Add) \
V(PPC_I16x8Sub) \
V(PPC_I16x8Mul) \
V(PPC_I8x16Splat) \
V(PPC_I8x16ExtractLaneU) \
V(PPC_I8x16ExtractLaneS) \
V(PPC_I8x16ReplaceLane) \
V(PPC_I8x16Add) \
V(PPC_I8x16Sub) \
V(PPC_I8x16Mul) \
V(PPC_StoreCompressTagged) \
V(PPC_LoadDecompressTaggedSigned) \
V(PPC_LoadDecompressTaggedPointer) \
......
......@@ -116,23 +116,41 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kPPC_F64x2Splat:
case kPPC_F64x2ExtractLane:
case kPPC_F64x2ReplaceLane:
case kPPC_F64x2Add:
case kPPC_F64x2Sub:
case kPPC_F64x2Mul:
case kPPC_F32x4Splat:
case kPPC_F32x4ExtractLane:
case kPPC_F32x4ReplaceLane:
case kPPC_F32x4Add:
case kPPC_F32x4Sub:
case kPPC_F32x4Mul:
case kPPC_I64x2Splat:
case kPPC_I64x2ExtractLane:
case kPPC_I64x2ReplaceLane:
case kPPC_I64x2Add:
case kPPC_I64x2Sub:
case kPPC_I64x2Mul:
case kPPC_I32x4Splat:
case kPPC_I32x4ExtractLane:
case kPPC_I32x4ReplaceLane:
case kPPC_I32x4Add:
case kPPC_I32x4Sub:
case kPPC_I32x4Mul:
case kPPC_I16x8Splat:
case kPPC_I16x8ExtractLaneU:
case kPPC_I16x8ExtractLaneS:
case kPPC_I16x8ReplaceLane:
case kPPC_I16x8Add:
case kPPC_I16x8Sub:
case kPPC_I16x8Mul:
case kPPC_I8x16Splat:
case kPPC_I8x16ExtractLaneU:
case kPPC_I8x16ExtractLaneS:
case kPPC_I8x16ReplaceLane:
case kPPC_I8x16Add:
case kPPC_I8x16Sub:
case kPPC_I8x16Mul:
return kNoOpcodeFlags;
case kPPC_LoadWordS8:
......
......@@ -2127,6 +2127,26 @@ void InstructionSelector::VisitInt64AbsWithOverflow(Node* node) {
V(I16x8) \
V(I8x16)
#define SIMD_BINOP_LIST(V) \
V(F64x2Add) \
V(F64x2Sub) \
V(F64x2Mul) \
V(F32x4Add) \
V(F32x4Sub) \
V(F32x4Mul) \
V(I64x2Add) \
V(I64x2Sub) \
V(I64x2Mul) \
V(I32x4Add) \
V(I32x4Sub) \
V(I32x4Mul) \
V(I16x8Add) \
V(I16x8Sub) \
V(I16x8Mul) \
V(I8x16Add) \
V(I8x16Sub) \
V(I8x16Mul)
#define SIMD_VISIT_SPLAT(Type) \
void InstructionSelector::Visit##Type##Splat(Node* node) { \
PPCOperandGenerator g(this); \
......@@ -2162,18 +2182,25 @@ SIMD_VISIT_EXTRACT_LANE(I8x16, S)
}
SIMD_TYPES(SIMD_VISIT_REPLACE_LANE)
#undef SIMD_VISIT_REPLACE_LANE
#undef SIMD_TYPES
void InstructionSelector::VisitI32x4Add(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI32x4Sub(Node* node) { UNIMPLEMENTED(); }
#define SIMD_VISIT_BINOP(Opcode) \
void InstructionSelector::Visit##Opcode(Node* node) { \
PPCOperandGenerator g(this); \
InstructionOperand temps[] = {g.TempSimd128Register(), \
g.TempSimd128Register()}; \
Emit(kPPC_##Opcode, g.DefineAsRegister(node), \
g.UseUniqueRegister(node->InputAt(0)), \
g.UseUniqueRegister(node->InputAt(1)), arraysize(temps), temps); \
}
SIMD_BINOP_LIST(SIMD_VISIT_BINOP)
#undef SIMD_VISIT_BINOP
#undef SIMD_BINOP_LIST
#undef SIMD_TYPES
void InstructionSelector::VisitI32x4Shl(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI32x4ShrS(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI32x4Mul(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI32x4MaxS(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI32x4MinS(Node* node) { UNIMPLEMENTED(); }
......@@ -2204,20 +2231,14 @@ void InstructionSelector::VisitI16x8ShrS(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI16x8ShrU(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI16x8Add(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI16x8AddSaturateS(Node* node) {
UNIMPLEMENTED();
}
void InstructionSelector::VisitI16x8Sub(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI16x8SubSaturateS(Node* node) {
UNIMPLEMENTED();
}
void InstructionSelector::VisitI16x8Mul(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI16x8MinS(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI16x8MaxS(Node* node) { UNIMPLEMENTED(); }
......@@ -2258,14 +2279,10 @@ void InstructionSelector::VisitI8x16RoundingAverageU(Node* node) {
void InstructionSelector::VisitI8x16Neg(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI8x16Add(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI8x16AddSaturateS(Node* node) {
UNIMPLEMENTED();
}
void InstructionSelector::VisitI8x16Sub(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI8x16SubSaturateS(Node* node) {
UNIMPLEMENTED();
}
......@@ -2341,12 +2358,6 @@ void InstructionSelector::EmitPrepareResults(
}
}
void InstructionSelector::VisitF32x4Add(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF32x4Sub(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF32x4Mul(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF32x4Sqrt(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF32x4Div(Node* node) { UNIMPLEMENTED(); }
......@@ -2452,8 +2463,6 @@ void InstructionSelector::VisitI8x16ShrS(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI8x16ShrU(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI8x16Mul(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitS8x16Shuffle(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitS8x16Swizzle(Node* node) { UNIMPLEMENTED(); }
......@@ -2464,12 +2473,6 @@ void InstructionSelector::VisitF64x2Neg(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Sqrt(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Add(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Sub(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Mul(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Div(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Eq(Node* node) { UNIMPLEMENTED(); }
......@@ -2482,18 +2485,12 @@ void InstructionSelector::VisitF64x2Le(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI64x2Neg(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI64x2Add(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI64x2Sub(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI64x2Shl(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI64x2ShrS(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI64x2ShrU(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI64x2Mul(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Min(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Max(Node* node) { UNIMPLEMENTED(); }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment