Commit b67dd429 authored by Milad Fa's avatar Milad Fa Committed by V8 LUCI CQ

PPC [simd]: optimize I64x2Mul on Power10

Cl also optimizes the usage on Power9 by using
mtvsrdd.

Change-Id: Ibd6b227111adc0c262c621be6ce4068d3de2e659
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3060493Reviewed-by: 's avatarJunliang Yan <junyan@redhat.com>
Reviewed-by: 's avatarMilad Fa <mfarazma@redhat.com>
Commit-Queue: Milad Fa <mfarazma@redhat.com>
Cr-Commit-Position: refs/heads/master@{#76008}
parent d63ca69c
...@@ -2313,6 +2313,8 @@ using Instr = uint32_t; ...@@ -2313,6 +2313,8 @@ using Instr = uint32_t;
V(vmulosw, VMULOSW, 0x10000188) \ V(vmulosw, VMULOSW, 0x10000188) \
/* Vector Multiply Odd Unsigned Word */ \ /* Vector Multiply Odd Unsigned Word */ \
V(vmulouw, VMULOUW, 0x10000088) \ V(vmulouw, VMULOUW, 0x10000088) \
/* Vector Multiply Low Doubleword */ \
V(vmulld, VMULLD, 0x100001C9) \
/* Vector Sum across Quarter Signed Halfword Saturate */ \ /* Vector Sum across Quarter Signed Halfword Saturate */ \
V(vsum4shs, VSUM4SHS, 0x10000648) \ V(vsum4shs, VSUM4SHS, 0x10000648) \
/* Vector Pack Unsigned Word Unsigned Saturate */ \ /* Vector Pack Unsigned Word Unsigned Saturate */ \
......
...@@ -2372,26 +2372,29 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -2372,26 +2372,29 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
constexpr int lane_width_in_bytes = 8; constexpr int lane_width_in_bytes = 8;
Simd128Register src0 = i.InputSimd128Register(0); Simd128Register src0 = i.InputSimd128Register(0);
Simd128Register src1 = i.InputSimd128Register(1); Simd128Register src1 = i.InputSimd128Register(1);
Simd128Register tempFPReg1 = i.ToSimd128Register(instr->TempAt(0)); Simd128Register tempFPReg0 = i.ToSimd128Register(instr->TempAt(0));
Register tempReg1 = i.ToRegister(instr->TempAt(2));
Register scratch_0 = ip;
Register scratch_1 = r0;
Simd128Register dst = i.OutputSimd128Register(); Simd128Register dst = i.OutputSimd128Register();
for (int i = 0; i < 2; i++) { if (CpuFeatures::IsSupported(PPC_10_PLUS)) {
if (i > 0) { __ vmulld(dst, src0, src1);
__ vextractd(kScratchSimd128Reg, src0, } else {
Operand(1 * lane_width_in_bytes)); for (int i = 0; i < 2; i++) {
__ vextractd(tempFPReg1, src1, Operand(1 * lane_width_in_bytes)); if (i > 0) {
src0 = kScratchSimd128Reg; __ vextractd(kScratchSimd128Reg, src0,
src1 = tempFPReg1; Operand(1 * lane_width_in_bytes));
} __ vextractd(tempFPReg0, src1, Operand(1 * lane_width_in_bytes));
__ mfvsrd(r0, src0); src0 = kScratchSimd128Reg;
__ mfvsrd(ip, src1); src1 = tempFPReg0;
__ mulld(r0, r0, ip); }
if (i <= 0) { __ mfvsrd(scratch_0, src0);
__ mtvsrd(dst, r0); __ mfvsrd(scratch_1, src1);
} else { __ mulld(scratch_0, scratch_0, scratch_1);
__ mtvsrd(kScratchSimd128Reg, r0); scratch_0 = r0;
__ vinsertd(dst, kScratchSimd128Reg, scratch_1 = tempReg1;
Operand(1 * lane_width_in_bytes));
} }
__ mtvsrdd(dst, ip, r0);
} }
break; break;
} }
......
...@@ -2395,14 +2395,14 @@ SIMD_VISIT_EXTRACT_LANE(I8x16, S) ...@@ -2395,14 +2395,14 @@ SIMD_VISIT_EXTRACT_LANE(I8x16, S)
SIMD_TYPES(SIMD_VISIT_REPLACE_LANE) SIMD_TYPES(SIMD_VISIT_REPLACE_LANE)
#undef SIMD_VISIT_REPLACE_LANE #undef SIMD_VISIT_REPLACE_LANE
#define SIMD_VISIT_BINOP(Opcode) \ #define SIMD_VISIT_BINOP(Opcode) \
void InstructionSelector::Visit##Opcode(Node* node) { \ void InstructionSelector::Visit##Opcode(Node* node) { \
PPCOperandGenerator g(this); \ PPCOperandGenerator g(this); \
InstructionOperand temps[] = {g.TempSimd128Register(), \ InstructionOperand temps[] = {g.TempSimd128Register(), \
g.TempSimd128Register()}; \ g.TempSimd128Register(), g.TempRegister()}; \
Emit(kPPC_##Opcode, g.DefineAsRegister(node), \ Emit(kPPC_##Opcode, g.DefineAsRegister(node), \
g.UseUniqueRegister(node->InputAt(0)), \ g.UseUniqueRegister(node->InputAt(0)), \
g.UseUniqueRegister(node->InputAt(1)), arraysize(temps), temps); \ g.UseUniqueRegister(node->InputAt(1)), arraysize(temps), temps); \
} }
SIMD_BINOP_LIST(SIMD_VISIT_BINOP) SIMD_BINOP_LIST(SIMD_VISIT_BINOP)
#undef SIMD_VISIT_BINOP #undef SIMD_VISIT_BINOP
......
...@@ -4252,6 +4252,10 @@ void Simulator::ExecuteGeneric(Instruction* instr) { ...@@ -4252,6 +4252,10 @@ void Simulator::ExecuteGeneric(Instruction* instr) {
VECTOR_ARITHMETIC_OP(int64_t, -) VECTOR_ARITHMETIC_OP(int64_t, -)
break; break;
} }
case VMULLD: {
VECTOR_ARITHMETIC_OP(int64_t, *)
break;
}
case VADDUWM: { case VADDUWM: {
VECTOR_ARITHMETIC_OP(int32_t, +) VECTOR_ARITHMETIC_OP(int32_t, +)
break; break;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment