Commit b6635637 authored by Milad Farazmand's avatar Milad Farazmand Committed by Commit Bot

PPC: [wasm-simd] Implement simd unary operations

Change-Id: I1f323ecb531880feb49cb70797b8f39ad863a75d
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2269841Reviewed-by: 's avatarJunliang Yan <jyan@ca.ibm.com>
Commit-Queue: Milad Farazmand <miladfar@ca.ibm.com>
Cr-Commit-Position: refs/heads/master@{#68565}
parent 04ce88ea
......@@ -427,6 +427,23 @@ class Assembler : public AssemblerBase {
#undef DECLARE_PPC_X_INSTRUCTIONS_EH_S_FORM
#undef DECLARE_PPC_X_INSTRUCTIONS_EH_L_FORM
#define DECLARE_PPC_XX2_INSTRUCTIONS(name, instr_name, instr_value) \
inline void name(const Simd128Register rt, const Simd128Register rb) { \
xx2_form(instr_name, rt, rb); \
}
inline void xx2_form(Instr instr, Simd128Register t, Simd128Register b) {
// Using VR (high VSR) registers.
int BX = 1;
int TX = 1;
emit(instr | (t.code() & 0x1F) * B21 | (b.code() & 0x1F) * B11 | BX * B1 |
TX);
}
PPC_XX2_OPCODE_A_FORM_LIST(DECLARE_PPC_XX2_INSTRUCTIONS)
#undef DECLARE_PPC_XX2_INSTRUCTIONS
#define DECLARE_PPC_XX3_INSTRUCTIONS(name, instr_name, instr_value) \
inline void name(const DoubleRegister rt, const DoubleRegister ra, \
const DoubleRegister rb) { \
......
......@@ -194,8 +194,6 @@ using Instr = uint32_t;
V(xsnmsubmsp, XSNMSUBMSP, 0xF00004C8) \
/* VSX Scalar Reciprocal Estimate Double-Precision */ \
V(xsredp, XSREDP, 0xF0000168) \
/* VSX Scalar Reciprocal Estimate Single-Precision */ \
V(xsresp, XSRESP, 0xF0000068) \
/* VSX Scalar Subtract Double-Precision */ \
V(xssubdp, XSSUBDP, 0xF0000140) \
/* VSX Scalar Subtract Single-Precision */ \
......@@ -286,8 +284,6 @@ using Instr = uint32_t;
V(xvnmsubmsp, XVNMSUBMSP, 0xF00006C8) \
/* VSX Vector Reciprocal Estimate Double-Precision */ \
V(xvredp, XVREDP, 0xF0000368) \
/* VSX Vector Reciprocal Estimate Single-Precision */ \
V(xvresp, XVRESP, 0xF0000268) \
/* VSX Vector Subtract Double-Precision */ \
V(xvsubdp, XVSUBDP, 0xF0000340) \
/* VSX Vector Subtract Single-Precision */ \
......@@ -363,7 +359,33 @@ using Instr = uint32_t;
/* Decimal Floating Test Data Group Quad */ \
V(dtstdgq, DTSTDGQ, 0xFC0001C4)
#define PPC_XX2_OPCODE_LIST(V) \
#define PPC_XX2_OPCODE_A_FORM_LIST(V) \
/* VSX Vector Absolute Value Double-Precision */ \
V(xvabsdp, XVABSDP, 0xF0000764) \
/* VSX Vector Negate Double-Precision */ \
V(xvnegdp, XVNEGDP, 0xF00007E4) \
/* VSX Vector Square Root Double-Precision */ \
V(xvsqrtdp, XVSQRTDP, 0xF000032C) \
/* VSX Vector Absolute Value Single-Precision */ \
V(xvabssp, XVABSSP, 0xF0000664) \
/* VSX Vector Negate Single-Precision */ \
V(xvnegsp, XVNEGSP, 0xF00006E4) \
/* VSX Vector Reciprocal Estimate Single-Precision */ \
V(xvresp, XVRESP, 0xF0000268) \
/* VSX Vector Reciprocal Square Root Estimate Single-Precision */ \
V(xvrsqrtesp, XVRSQRTESP, 0xF0000228) \
/* VSX Vector Square Root Single-Precision */ \
V(xvsqrtsp, XVSQRTSP, 0xF000022C)
#define PPC_XX2_OPCODE_UNUSED_LIST(V) \
/* VSX Scalar Square Root Double-Precision */ \
V(xssqrtdp, XSSQRTDP, 0xF000012C) \
/* VSX Scalar Reciprocal Estimate Single-Precision */ \
V(xsresp, XSRESP, 0xF0000068) \
/* VSX Scalar Reciprocal Square Root Estimate Single-Precision */ \
V(xsrsqrtesp, XSRSQRTESP, 0xF0000028) \
/* VSX Scalar Square Root Single-Precision */ \
V(xssqrtsp, XSSQRTSP, 0xF000002C) \
/* Move To VSR Doubleword */ \
V(mtvsrd, MTVSRD, 0x7C000166) \
/* Move To VSR Word Algebraic */ \
......@@ -423,18 +445,8 @@ using Instr = uint32_t;
V(xsrsp, XSRSP, 0xF0000464) \
/* VSX Scalar Reciprocal Square Root Estimate Double-Precision */ \
V(xsrsqrtedp, XSRSQRTEDP, 0xF0000128) \
/* VSX Scalar Reciprocal Square Root Estimate Single-Precision */ \
V(xsrsqrtesp, XSRSQRTESP, 0xF0000028) \
/* VSX Scalar Square Root Double-Precision */ \
V(xssqrtdp, XSSQRTDP, 0xF000012C) \
/* VSX Scalar Square Root Single-Precision */ \
V(xssqrtsp, XSSQRTSP, 0xF000002C) \
/* VSX Scalar Test for software Square Root Double-Precision */ \
V(xstsqrtdp, XSTSQRTDP, 0xF00001A8) \
/* VSX Vector Absolute Value Double-Precision */ \
V(xvabsdp, XVABSDP, 0xF0000764) \
/* VSX Vector Absolute Value Single-Precision */ \
V(xvabssp, XVABSSP, 0xF0000664) \
/* VSX Vector Convert Double-Precision to Single-Precision */ \
V(xvcvdpsp, XVCVDPSP, 0xF0000624) \
/* VSX Vector Convert Double-Precision to Signed Fixed-Point Doubleword */ \
......@@ -485,10 +497,6 @@ using Instr = uint32_t;
V(xvnabsdp, XVNABSDP, 0xF00007A4) \
/* VSX Vector Negative Absolute Value Single-Precision */ \
V(xvnabssp, XVNABSSP, 0xF00006A4) \
/* VSX Vector Negate Double-Precision */ \
V(xvnegdp, XVNEGDP, 0xF00007E4) \
/* VSX Vector Negate Single-Precision */ \
V(xvnegsp, XVNEGSP, 0xF00006E4) \
/* VSX Vector Round to Double-Precision Integer */ \
V(xvrdpi, XVRDPI, 0xF0000324) \
/* VSX Vector Round to Double-Precision Integer using Current rounding */ \
......@@ -513,17 +521,15 @@ using Instr = uint32_t;
V(xvrspiz, XVRSPIZ, 0xF0000264) \
/* VSX Vector Reciprocal Square Root Estimate Double-Precision */ \
V(xvrsqrtedp, XVRSQRTEDP, 0xF0000328) \
/* VSX Vector Reciprocal Square Root Estimate Single-Precision */ \
V(xvrsqrtesp, XVRSQRTESP, 0xF0000228) \
/* VSX Vector Square Root Double-Precision */ \
V(xvsqrtdp, XVSQRTDP, 0xF000032C) \
/* VSX Vector Square Root Single-Precision */ \
V(xvsqrtsp, XVSQRTSP, 0xF000022C) \
/* VSX Vector Test for software Square Root Double-Precision */ \
V(xvtsqrtdp, XVTSQRTDP, 0xF00003A8) \
/* VSX Vector Test for software Square Root Single-Precision */ \
V(xvtsqrtsp, XVTSQRTSP, 0xF00002A8)
#define PPC_XX2_OPCODE_LIST(V) \
PPC_XX2_OPCODE_A_FORM_LIST(V) \
PPC_XX2_OPCODE_UNUSED_LIST(V)
#define PPC_EVX_OPCODE_LIST(V) \
/* Vector Load Double Word into Double Word by External PID Indexed */ \
V(evlddepx, EVLDDEPX, 0x7C00063E) \
......
......@@ -2913,6 +2913,116 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ vsel(dst, src2, src1, mask);
break;
}
case kPPC_F64x2Abs: {
__ xvabsdp(i.OutputSimd128Register(), i.InputSimd128Register(0));
break;
}
case kPPC_F64x2Neg: {
__ xvnegdp(i.OutputSimd128Register(), i.InputSimd128Register(0));
break;
}
case kPPC_F64x2Sqrt: {
__ xvsqrtdp(i.OutputSimd128Register(), i.InputSimd128Register(0));
break;
}
case kPPC_F32x4Abs: {
__ xvabssp(i.OutputSimd128Register(), i.InputSimd128Register(0));
break;
}
case kPPC_F32x4Neg: {
__ xvnegsp(i.OutputSimd128Register(), i.InputSimd128Register(0));
break;
}
case kPPC_F32x4RecipApprox: {
__ xvresp(i.OutputSimd128Register(), i.InputSimd128Register(0));
break;
}
case kPPC_F32x4RecipSqrtApprox: {
__ xvrsqrtesp(i.OutputSimd128Register(), i.InputSimd128Register(0));
break;
}
case kPPC_F32x4Sqrt: {
__ xvsqrtsp(i.OutputSimd128Register(), i.InputSimd128Register(0));
break;
}
case kPPC_I64x2Neg: {
Simd128Register tempFPReg1 = i.ToSimd128Register(instr->TempAt(0));
__ li(ip, Operand(1));
// Need to maintain 16 byte alignment for lvx.
__ addi(sp, sp, Operand(-24));
__ StoreP(ip, MemOperand(sp, 0));
__ StoreP(ip, MemOperand(sp, 8));
__ li(r0, Operand(0));
__ lvx(kScratchDoubleReg, MemOperand(sp, r0));
__ addi(sp, sp, Operand(24));
// Perform negation.
__ vnor(tempFPReg1, i.InputSimd128Register(0), i.InputSimd128Register(0));
__ vaddudm(i.OutputSimd128Register(), tempFPReg1, kScratchDoubleReg);
break;
}
case kPPC_I32x4Neg: {
Simd128Register tempFPReg1 = i.ToSimd128Register(instr->TempAt(0));
__ li(ip, Operand(1));
__ mtvsrd(kScratchDoubleReg, ip);
__ vspltw(kScratchDoubleReg, kScratchDoubleReg, Operand(1));
__ vnor(tempFPReg1, i.InputSimd128Register(0), i.InputSimd128Register(0));
__ vadduwm(i.OutputSimd128Register(), kScratchDoubleReg, tempFPReg1);
break;
}
case kPPC_I32x4Abs: {
Simd128Register tempFPReg1 = i.ToSimd128Register(instr->TempAt(0));
Simd128Register src = i.InputSimd128Register(0);
constexpr int shift_bits = 31;
__ li(ip, Operand(shift_bits));
__ mtvsrd(kScratchDoubleReg, ip);
__ vspltb(kScratchDoubleReg, kScratchDoubleReg, Operand(7));
__ vsraw(kScratchDoubleReg, src, kScratchDoubleReg);
__ vxor(tempFPReg1, src, kScratchDoubleReg);
__ vsubuwm(i.OutputSimd128Register(), tempFPReg1, kScratchDoubleReg);
break;
}
case kPPC_I16x8Neg: {
Simd128Register tempFPReg1 = i.ToSimd128Register(instr->TempAt(0));
__ li(ip, Operand(1));
__ mtvsrd(kScratchDoubleReg, ip);
__ vsplth(kScratchDoubleReg, kScratchDoubleReg, Operand(3));
__ vnor(tempFPReg1, i.InputSimd128Register(0), i.InputSimd128Register(0));
__ vadduhm(i.OutputSimd128Register(), kScratchDoubleReg, tempFPReg1);
break;
}
case kPPC_I16x8Abs: {
Simd128Register tempFPReg1 = i.ToSimd128Register(instr->TempAt(0));
Simd128Register src = i.InputSimd128Register(0);
constexpr int shift_bits = 15;
__ li(ip, Operand(shift_bits));
__ mtvsrd(kScratchDoubleReg, ip);
__ vspltb(kScratchDoubleReg, kScratchDoubleReg, Operand(7));
__ vsrah(kScratchDoubleReg, src, kScratchDoubleReg);
__ vxor(tempFPReg1, src, kScratchDoubleReg);
__ vsubuhm(i.OutputSimd128Register(), tempFPReg1, kScratchDoubleReg);
break;
}
case kPPC_I8x16Neg: {
Simd128Register tempFPReg1 = i.ToSimd128Register(instr->TempAt(0));
__ li(ip, Operand(1));
__ mtvsrd(kScratchDoubleReg, ip);
__ vspltb(kScratchDoubleReg, kScratchDoubleReg, Operand(7));
__ vnor(tempFPReg1, i.InputSimd128Register(0), i.InputSimd128Register(0));
__ vaddubm(i.OutputSimd128Register(), kScratchDoubleReg, tempFPReg1);
break;
}
case kPPC_I8x16Abs: {
Simd128Register tempFPReg1 = i.ToSimd128Register(instr->TempAt(0));
Simd128Register src = i.InputSimd128Register(0);
constexpr int shift_bits = 7;
__ li(ip, Operand(shift_bits));
__ mtvsrd(kScratchDoubleReg, ip);
__ vspltb(kScratchDoubleReg, kScratchDoubleReg, Operand(7));
__ vsrab(kScratchDoubleReg, src, kScratchDoubleReg);
__ vxor(tempFPReg1, src, kScratchDoubleReg);
__ vsububm(i.OutputSimd128Register(), tempFPReg1, kScratchDoubleReg);
break;
}
case kPPC_StoreCompressTagged: {
ASSEMBLE_STORE_INTEGER(StoreTaggedField, StoreTaggedFieldX);
break;
......
......@@ -200,6 +200,9 @@ namespace compiler {
V(PPC_F64x2Ne) \
V(PPC_F64x2Le) \
V(PPC_F64x2Lt) \
V(PPC_F64x2Abs) \
V(PPC_F64x2Neg) \
V(PPC_F64x2Sqrt) \
V(PPC_F32x4Splat) \
V(PPC_F32x4ExtractLane) \
V(PPC_F32x4ReplaceLane) \
......@@ -211,6 +214,11 @@ namespace compiler {
V(PPC_F32x4Ne) \
V(PPC_F32x4Lt) \
V(PPC_F32x4Le) \
V(PPC_F32x4Abs) \
V(PPC_F32x4Neg) \
V(PPC_F32x4RecipApprox) \
V(PPC_F32x4RecipSqrtApprox) \
V(PPC_F32x4Sqrt) \
V(PPC_I64x2Splat) \
V(PPC_I64x2ExtractLane) \
V(PPC_I64x2ReplaceLane) \
......@@ -230,6 +238,7 @@ namespace compiler {
V(PPC_I64x2Shl) \
V(PPC_I64x2ShrS) \
V(PPC_I64x2ShrU) \
V(PPC_I64x2Neg) \
V(PPC_I32x4Splat) \
V(PPC_I32x4ExtractLane) \
V(PPC_I32x4ReplaceLane) \
......@@ -250,6 +259,8 @@ namespace compiler {
V(PPC_I32x4Shl) \
V(PPC_I32x4ShrS) \
V(PPC_I32x4ShrU) \
V(PPC_I32x4Neg) \
V(PPC_I32x4Abs) \
V(PPC_I16x8Splat) \
V(PPC_I16x8ExtractLaneU) \
V(PPC_I16x8ExtractLaneS) \
......@@ -271,6 +282,8 @@ namespace compiler {
V(PPC_I16x8Shl) \
V(PPC_I16x8ShrS) \
V(PPC_I16x8ShrU) \
V(PPC_I16x8Neg) \
V(PPC_I16x8Abs) \
V(PPC_I8x16Splat) \
V(PPC_I8x16ExtractLaneU) \
V(PPC_I8x16ExtractLaneS) \
......@@ -291,6 +304,8 @@ namespace compiler {
V(PPC_I8x16Shl) \
V(PPC_I8x16ShrS) \
V(PPC_I8x16ShrU) \
V(PPC_I8x16Neg) \
V(PPC_I8x16Abs) \
V(PPC_S128And) \
V(PPC_S128Or) \
V(PPC_S128Xor) \
......
......@@ -123,6 +123,9 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kPPC_F64x2Ne:
case kPPC_F64x2Le:
case kPPC_F64x2Lt:
case kPPC_F64x2Abs:
case kPPC_F64x2Neg:
case kPPC_F64x2Sqrt:
case kPPC_F32x4Splat:
case kPPC_F32x4ExtractLane:
case kPPC_F32x4ReplaceLane:
......@@ -134,6 +137,11 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kPPC_F32x4Ne:
case kPPC_F32x4Lt:
case kPPC_F32x4Le:
case kPPC_F32x4Abs:
case kPPC_F32x4Neg:
case kPPC_F32x4RecipApprox:
case kPPC_F32x4RecipSqrtApprox:
case kPPC_F32x4Sqrt:
case kPPC_I64x2Splat:
case kPPC_I64x2ExtractLane:
case kPPC_I64x2ReplaceLane:
......@@ -153,6 +161,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kPPC_I64x2Shl:
case kPPC_I64x2ShrS:
case kPPC_I64x2ShrU:
case kPPC_I64x2Neg:
case kPPC_I32x4Splat:
case kPPC_I32x4ExtractLane:
case kPPC_I32x4ReplaceLane:
......@@ -173,6 +182,8 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kPPC_I32x4Shl:
case kPPC_I32x4ShrS:
case kPPC_I32x4ShrU:
case kPPC_I32x4Neg:
case kPPC_I32x4Abs:
case kPPC_I16x8Splat:
case kPPC_I16x8ExtractLaneU:
case kPPC_I16x8ExtractLaneS:
......@@ -194,6 +205,8 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kPPC_I16x8Shl:
case kPPC_I16x8ShrS:
case kPPC_I16x8ShrU:
case kPPC_I16x8Neg:
case kPPC_I16x8Abs:
case kPPC_I8x16Splat:
case kPPC_I8x16ExtractLaneU:
case kPPC_I8x16ExtractLaneS:
......@@ -214,6 +227,8 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kPPC_I8x16Shl:
case kPPC_I8x16ShrS:
case kPPC_I8x16ShrU:
case kPPC_I8x16Neg:
case kPPC_I8x16Abs:
case kPPC_S128And:
case kPPC_S128Or:
case kPPC_S128Xor:
......
......@@ -2191,7 +2191,23 @@ void InstructionSelector::VisitInt64AbsWithOverflow(Node* node) {
V(S128Or) \
V(S128Xor)
#define SIMD_UNOP_LIST(V) V(S128Not)
#define SIMD_UNOP_LIST(V) \
V(F64x2Abs) \
V(F64x2Neg) \
V(F64x2Sqrt) \
V(F32x4Abs) \
V(F32x4Neg) \
V(F32x4RecipApprox) \
V(F32x4RecipSqrtApprox) \
V(F32x4Sqrt) \
V(I64x2Neg) \
V(I32x4Neg) \
V(I32x4Abs) \
V(I16x8Neg) \
V(I16x8Abs) \
V(I8x16Neg) \
V(I8x16Abs) \
V(S128Not)
#define SIMD_SHIFT_LIST(V) \
V(I64x2Shl) \
......@@ -2256,11 +2272,12 @@ SIMD_BINOP_LIST(SIMD_VISIT_BINOP)
#undef SIMD_VISIT_BINOP
#undef SIMD_BINOP_LIST
#define SIMD_VISIT_UNOP(Opcode) \
void InstructionSelector::Visit##Opcode(Node* node) { \
PPCOperandGenerator g(this); \
Emit(kPPC_##Opcode, g.DefineAsRegister(node), \
g.UseRegister(node->InputAt(0))); \
#define SIMD_VISIT_UNOP(Opcode) \
void InstructionSelector::Visit##Opcode(Node* node) { \
PPCOperandGenerator g(this); \
InstructionOperand temps[] = {g.TempSimd128Register()}; \
Emit(kPPC_##Opcode, g.DefineAsRegister(node), \
g.UseRegister(node->InputAt(0)), arraysize(temps), temps); \
}
SIMD_UNOP_LIST(SIMD_VISIT_UNOP)
#undef SIMD_VISIT_UNOP
......@@ -2290,8 +2307,6 @@ void InstructionSelector::VisitS128Select(Node* node) {
g.UseRegister(node->InputAt(2)));
}
void InstructionSelector::VisitI32x4Neg(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI16x8AddSaturateS(Node* node) {
UNIMPLEMENTED();
}
......@@ -2308,8 +2323,6 @@ void InstructionSelector::VisitI16x8SubSaturateU(Node* node) {
UNIMPLEMENTED();
}
void InstructionSelector::VisitI16x8Neg(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI16x8RoundingAverageU(Node* node) {
UNIMPLEMENTED();
}
......@@ -2318,8 +2331,6 @@ void InstructionSelector::VisitI8x16RoundingAverageU(Node* node) {
UNIMPLEMENTED();
}
void InstructionSelector::VisitI8x16Neg(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI8x16AddSaturateS(Node* node) {
UNIMPLEMENTED();
}
......@@ -2361,24 +2372,12 @@ void InstructionSelector::EmitPrepareResults(
}
}
void InstructionSelector::VisitF32x4Sqrt(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF32x4Div(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF32x4Min(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF32x4Max(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF32x4Neg(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF32x4Abs(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF32x4RecipSqrtApprox(Node* node) {
UNIMPLEMENTED();
}
void InstructionSelector::VisitF32x4RecipApprox(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF32x4SConvertI32x4(Node* node) {
UNIMPLEMENTED();
}
......@@ -2458,28 +2457,14 @@ void InstructionSelector::VisitS8x16Shuffle(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitS8x16Swizzle(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Abs(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Neg(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Sqrt(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Div(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI64x2Neg(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Min(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Max(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitLoadTransform(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI8x16Abs(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI16x8Abs(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI32x4Abs(Node* node) { UNIMPLEMENTED(); }
// static
MachineOperatorBuilder::Flags
InstructionSelector::SupportedMachineOperatorFlags() {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment