Commit 80aed7e2 authored by Milad Fa's avatar Milad Fa Committed by V8 LUCI CQ

PPC [simd]: Optimize ReplaceLane on Power10

I/F 32x4 and 64x2 ReplaceLane opcodes are optimized
on P10.

Change-Id: I28ddc2b4e66ca39414e9c3ed2efd0eea268f1a07
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3067803Reviewed-by: 's avatarJunliang Yan <junyan@redhat.com>
Commit-Queue: Milad Fa <mfarazma@redhat.com>
Cr-Commit-Position: refs/heads/master@{#76066}
parent 8f23e48e
......@@ -501,6 +501,11 @@ class Assembler : public AssemblerBase {
inline void name(const Register rt, const Simd128Register rb) { \
vx_form(instr_name, rt, rb); \
}
#define DECLARE_PPC_VX_INSTRUCTIONS_G_FORM(name, instr_name, instr_value) \
inline void name(const Simd128Register rt, const Register rb, \
const Operand& imm) { \
vx_form(instr_name, rt, rb, imm); \
}
inline void vx_form(Instr instr, Simd128Register rt, Simd128Register rb,
const Operand& imm) {
......@@ -521,6 +526,11 @@ class Assembler : public AssemblerBase {
inline void vx_form(Instr instr, Register rt, Simd128Register rb) {
emit(instr | (rt.code() & 0x1F) * B21 | (rb.code() & 0x1F) * B11);
}
inline void vx_form(Instr instr, Simd128Register rt, Register rb,
const Operand& imm) {
emit(instr | (rt.code() & 0x1F) * B21 | (imm.immediate() & 0x1F) * B16 |
(rb.code() & 0x1F) * B11);
}
PPC_VX_OPCODE_A_FORM_LIST(DECLARE_PPC_VX_INSTRUCTIONS_A_FORM)
PPC_VX_OPCODE_B_FORM_LIST(DECLARE_PPC_VX_INSTRUCTIONS_B_FORM)
......@@ -530,11 +540,13 @@ class Assembler : public AssemblerBase {
INSTRUCTIONS_C_FORM */
PPC_VX_OPCODE_E_FORM_LIST(DECLARE_PPC_VX_INSTRUCTIONS_E_FORM)
PPC_VX_OPCODE_F_FORM_LIST(DECLARE_PPC_VX_INSTRUCTIONS_F_FORM)
PPC_VX_OPCODE_G_FORM_LIST(DECLARE_PPC_VX_INSTRUCTIONS_G_FORM)
#undef DECLARE_PPC_VX_INSTRUCTIONS_A_FORM
#undef DECLARE_PPC_VX_INSTRUCTIONS_B_FORM
#undef DECLARE_PPC_VX_INSTRUCTIONS_C_FORM
#undef DECLARE_PPC_VX_INSTRUCTIONS_E_FORM
#undef DECLARE_PPC_VX_INSTRUCTIONS_F_FORM
#undef DECLARE_PPC_VX_INSTRUCTIONS_G_FORM
#define DECLARE_PPC_VA_INSTRUCTIONS_A_FORM(name, instr_name, instr_value) \
inline void name(const Simd128Register rt, const Simd128Register ra, \
......
......@@ -2470,6 +2470,14 @@ using Instr = uint32_t;
/* Vector Extract Doubleword Mask */ \
V(vextractdm, VEXTRACTDM, 0x100B0642)
#define PPC_VX_OPCODE_G_FORM_LIST(V) \
/* Vector Insert Word from GPR using \
immediate-specified index */ \
V(vinsw, VINSW, 0x100000CF) \
/* Vector Insert Doubleword from GPR using \
immediate-specified index */ \
V(vinsd, VINSD, 0x100001CF)
#define PPC_VX_OPCODE_UNUSED_LIST(V) \
/* Decimal Add Modulo */ \
V(bcdadd, BCDADD, 0xF0000400) \
......@@ -2623,6 +2631,7 @@ using Instr = uint32_t;
PPC_VX_OPCODE_D_FORM_LIST(V) \
PPC_VX_OPCODE_E_FORM_LIST(V) \
PPC_VX_OPCODE_F_FORM_LIST(V) \
PPC_VX_OPCODE_G_FORM_LIST(V) \
PPC_VX_OPCODE_UNUSED_LIST(V)
#define PPC_XS_OPCODE_LIST(V) \
......@@ -2970,6 +2979,7 @@ class Instruction {
PPC_VX_OPCODE_B_FORM_LIST(OPCODE_CASES)
PPC_VX_OPCODE_C_FORM_LIST(OPCODE_CASES)
PPC_VX_OPCODE_E_FORM_LIST(OPCODE_CASES)
PPC_VX_OPCODE_G_FORM_LIST(OPCODE_CASES)
PPC_VX_OPCODE_UNUSED_LIST(OPCODE_CASES)
PPC_X_OPCODE_EH_S_FORM_LIST(OPCODE_CASES)
return static_cast<Opcode>(opcode);
......
......@@ -2279,9 +2279,13 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
constexpr int lane_width_in_bytes = 8;
Simd128Register dst = i.OutputSimd128Register();
__ MovDoubleToInt64(r0, i.InputDoubleRegister(2));
__ mtvsrd(kScratchSimd128Reg, r0);
__ vinsertd(dst, kScratchSimd128Reg,
Operand((1 - i.InputInt8(1)) * lane_width_in_bytes));
if (CpuFeatures::IsSupported(PPC_10_PLUS)) {
__ vinsd(dst, r0, Operand((1 - i.InputInt8(1)) * lane_width_in_bytes));
} else {
__ mtvsrd(kScratchSimd128Reg, r0);
__ vinsertd(dst, kScratchSimd128Reg,
Operand((1 - i.InputInt8(1)) * lane_width_in_bytes));
}
break;
}
case kPPC_F32x4ReplaceLane: {
......@@ -2289,27 +2293,41 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
constexpr int lane_width_in_bytes = 4;
Simd128Register dst = i.OutputSimd128Register();
__ MovFloatToInt(r0, i.InputDoubleRegister(2));
__ mtvsrd(kScratchSimd128Reg, r0);
__ vinsertw(dst, kScratchSimd128Reg,
Operand((3 - i.InputInt8(1)) * lane_width_in_bytes));
if (CpuFeatures::IsSupported(PPC_10_PLUS)) {
__ vinsw(dst, r0, Operand((3 - i.InputInt8(1)) * lane_width_in_bytes));
} else {
__ mtvsrd(kScratchSimd128Reg, r0);
__ vinsertw(dst, kScratchSimd128Reg,
Operand((3 - i.InputInt8(1)) * lane_width_in_bytes));
}
break;
}
case kPPC_I64x2ReplaceLane: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
constexpr int lane_width_in_bytes = 8;
Simd128Register dst = i.OutputSimd128Register();
__ mtvsrd(kScratchSimd128Reg, i.InputRegister(2));
__ vinsertd(dst, kScratchSimd128Reg,
Operand((1 - i.InputInt8(1)) * lane_width_in_bytes));
if (CpuFeatures::IsSupported(PPC_10_PLUS)) {
__ vinsd(dst, i.InputRegister(2),
Operand((1 - i.InputInt8(1)) * lane_width_in_bytes));
} else {
__ mtvsrd(kScratchSimd128Reg, i.InputRegister(2));
__ vinsertd(dst, kScratchSimd128Reg,
Operand((1 - i.InputInt8(1)) * lane_width_in_bytes));
}
break;
}
case kPPC_I32x4ReplaceLane: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
constexpr int lane_width_in_bytes = 4;
Simd128Register dst = i.OutputSimd128Register();
__ mtvsrd(kScratchSimd128Reg, i.InputRegister(2));
__ vinsertw(dst, kScratchSimd128Reg,
Operand((3 - i.InputInt8(1)) * lane_width_in_bytes));
if (CpuFeatures::IsSupported(PPC_10_PLUS)) {
__ vinsw(dst, i.InputRegister(2),
Operand((3 - i.InputInt8(1)) * lane_width_in_bytes));
} else {
__ mtvsrd(kScratchSimd128Reg, i.InputRegister(2));
__ vinsertw(dst, kScratchSimd128Reg,
Operand((3 - i.InputInt8(1)) * lane_width_in_bytes));
}
break;
}
case kPPC_I16x8ReplaceLane: {
......
......@@ -492,6 +492,13 @@ void Decoder::DecodeExt0(Instruction* instr) {
}
PPC_VX_OPCODE_E_FORM_LIST(DECODE_VX_E_FORM__INSTRUCTIONS)
#undef DECODE_VX_E_FORM__INSTRUCTIONS
#define DECODE_VX_G_FORM__INSTRUCTIONS(name, opcode_name, opcode_value) \
case opcode_name: { \
Format(instr, #name " 'Vt, 'rb, 'UIM"); \
return; \
}
PPC_VX_OPCODE_G_FORM_LIST(DECODE_VX_G_FORM__INSTRUCTIONS)
#undef DECODE_VX_G_FORM__INSTRUCTIONS
}
}
......
......@@ -4161,6 +4161,21 @@ void Simulator::ExecuteGeneric(Instruction* instr) {
break;
}
#undef VINSERT
#define VINSERT_IMMEDIATE(type) \
uint8_t uim = instr->Bits(19, 16); \
int vrt = instr->RTValue(); \
int rb = instr->RBValue(); \
type src = static_cast<type>(get_register(rb)); \
set_simd_register_bytes<type>(vrt, uim, src);
case VINSD: {
VINSERT_IMMEDIATE(int64_t)
break;
}
case VINSW: {
VINSERT_IMMEDIATE(int32_t)
break;
}
#undef VINSERT_IMMEDIATE
#define VEXTRACT(type, element) \
uint8_t uim = instr->Bits(19, 16); \
int vrt = instr->RTValue(); \
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment