Commit 80aed7e2 authored by Milad Fa's avatar Milad Fa Committed by V8 LUCI CQ

PPC [simd]: Optimize ReplaceLane on Power10

I/F 32x4 and 64x2 ReplaceLane opcodes are optimized
on P10.

Change-Id: I28ddc2b4e66ca39414e9c3ed2efd0eea268f1a07
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3067803Reviewed-by: 's avatarJunliang Yan <junyan@redhat.com>
Commit-Queue: Milad Fa <mfarazma@redhat.com>
Cr-Commit-Position: refs/heads/master@{#76066}
parent 8f23e48e
...@@ -501,6 +501,11 @@ class Assembler : public AssemblerBase { ...@@ -501,6 +501,11 @@ class Assembler : public AssemblerBase {
inline void name(const Register rt, const Simd128Register rb) { \ inline void name(const Register rt, const Simd128Register rb) { \
vx_form(instr_name, rt, rb); \ vx_form(instr_name, rt, rb); \
} }
#define DECLARE_PPC_VX_INSTRUCTIONS_G_FORM(name, instr_name, instr_value) \
inline void name(const Simd128Register rt, const Register rb, \
const Operand& imm) { \
vx_form(instr_name, rt, rb, imm); \
}
inline void vx_form(Instr instr, Simd128Register rt, Simd128Register rb, inline void vx_form(Instr instr, Simd128Register rt, Simd128Register rb,
const Operand& imm) { const Operand& imm) {
...@@ -521,6 +526,11 @@ class Assembler : public AssemblerBase { ...@@ -521,6 +526,11 @@ class Assembler : public AssemblerBase {
inline void vx_form(Instr instr, Register rt, Simd128Register rb) { inline void vx_form(Instr instr, Register rt, Simd128Register rb) {
emit(instr | (rt.code() & 0x1F) * B21 | (rb.code() & 0x1F) * B11); emit(instr | (rt.code() & 0x1F) * B21 | (rb.code() & 0x1F) * B11);
} }
inline void vx_form(Instr instr, Simd128Register rt, Register rb,
const Operand& imm) {
emit(instr | (rt.code() & 0x1F) * B21 | (imm.immediate() & 0x1F) * B16 |
(rb.code() & 0x1F) * B11);
}
PPC_VX_OPCODE_A_FORM_LIST(DECLARE_PPC_VX_INSTRUCTIONS_A_FORM) PPC_VX_OPCODE_A_FORM_LIST(DECLARE_PPC_VX_INSTRUCTIONS_A_FORM)
PPC_VX_OPCODE_B_FORM_LIST(DECLARE_PPC_VX_INSTRUCTIONS_B_FORM) PPC_VX_OPCODE_B_FORM_LIST(DECLARE_PPC_VX_INSTRUCTIONS_B_FORM)
...@@ -530,11 +540,13 @@ class Assembler : public AssemblerBase { ...@@ -530,11 +540,13 @@ class Assembler : public AssemblerBase {
INSTRUCTIONS_C_FORM */ INSTRUCTIONS_C_FORM */
PPC_VX_OPCODE_E_FORM_LIST(DECLARE_PPC_VX_INSTRUCTIONS_E_FORM) PPC_VX_OPCODE_E_FORM_LIST(DECLARE_PPC_VX_INSTRUCTIONS_E_FORM)
PPC_VX_OPCODE_F_FORM_LIST(DECLARE_PPC_VX_INSTRUCTIONS_F_FORM) PPC_VX_OPCODE_F_FORM_LIST(DECLARE_PPC_VX_INSTRUCTIONS_F_FORM)
PPC_VX_OPCODE_G_FORM_LIST(DECLARE_PPC_VX_INSTRUCTIONS_G_FORM)
#undef DECLARE_PPC_VX_INSTRUCTIONS_A_FORM #undef DECLARE_PPC_VX_INSTRUCTIONS_A_FORM
#undef DECLARE_PPC_VX_INSTRUCTIONS_B_FORM #undef DECLARE_PPC_VX_INSTRUCTIONS_B_FORM
#undef DECLARE_PPC_VX_INSTRUCTIONS_C_FORM #undef DECLARE_PPC_VX_INSTRUCTIONS_C_FORM
#undef DECLARE_PPC_VX_INSTRUCTIONS_E_FORM #undef DECLARE_PPC_VX_INSTRUCTIONS_E_FORM
#undef DECLARE_PPC_VX_INSTRUCTIONS_F_FORM #undef DECLARE_PPC_VX_INSTRUCTIONS_F_FORM
#undef DECLARE_PPC_VX_INSTRUCTIONS_G_FORM
#define DECLARE_PPC_VA_INSTRUCTIONS_A_FORM(name, instr_name, instr_value) \ #define DECLARE_PPC_VA_INSTRUCTIONS_A_FORM(name, instr_name, instr_value) \
inline void name(const Simd128Register rt, const Simd128Register ra, \ inline void name(const Simd128Register rt, const Simd128Register ra, \
......
...@@ -2470,6 +2470,14 @@ using Instr = uint32_t; ...@@ -2470,6 +2470,14 @@ using Instr = uint32_t;
/* Vector Extract Doubleword Mask */ \ /* Vector Extract Doubleword Mask */ \
V(vextractdm, VEXTRACTDM, 0x100B0642) V(vextractdm, VEXTRACTDM, 0x100B0642)
#define PPC_VX_OPCODE_G_FORM_LIST(V) \
/* Vector Insert Word from GPR using \
immediate-specified index */ \
V(vinsw, VINSW, 0x100000CF) \
/* Vector Insert Doubleword from GPR using \
immediate-specified index */ \
V(vinsd, VINSD, 0x100001CF)
#define PPC_VX_OPCODE_UNUSED_LIST(V) \ #define PPC_VX_OPCODE_UNUSED_LIST(V) \
/* Decimal Add Modulo */ \ /* Decimal Add Modulo */ \
V(bcdadd, BCDADD, 0xF0000400) \ V(bcdadd, BCDADD, 0xF0000400) \
...@@ -2623,6 +2631,7 @@ using Instr = uint32_t; ...@@ -2623,6 +2631,7 @@ using Instr = uint32_t;
PPC_VX_OPCODE_D_FORM_LIST(V) \ PPC_VX_OPCODE_D_FORM_LIST(V) \
PPC_VX_OPCODE_E_FORM_LIST(V) \ PPC_VX_OPCODE_E_FORM_LIST(V) \
PPC_VX_OPCODE_F_FORM_LIST(V) \ PPC_VX_OPCODE_F_FORM_LIST(V) \
PPC_VX_OPCODE_G_FORM_LIST(V) \
PPC_VX_OPCODE_UNUSED_LIST(V) PPC_VX_OPCODE_UNUSED_LIST(V)
#define PPC_XS_OPCODE_LIST(V) \ #define PPC_XS_OPCODE_LIST(V) \
...@@ -2970,6 +2979,7 @@ class Instruction { ...@@ -2970,6 +2979,7 @@ class Instruction {
PPC_VX_OPCODE_B_FORM_LIST(OPCODE_CASES) PPC_VX_OPCODE_B_FORM_LIST(OPCODE_CASES)
PPC_VX_OPCODE_C_FORM_LIST(OPCODE_CASES) PPC_VX_OPCODE_C_FORM_LIST(OPCODE_CASES)
PPC_VX_OPCODE_E_FORM_LIST(OPCODE_CASES) PPC_VX_OPCODE_E_FORM_LIST(OPCODE_CASES)
PPC_VX_OPCODE_G_FORM_LIST(OPCODE_CASES)
PPC_VX_OPCODE_UNUSED_LIST(OPCODE_CASES) PPC_VX_OPCODE_UNUSED_LIST(OPCODE_CASES)
PPC_X_OPCODE_EH_S_FORM_LIST(OPCODE_CASES) PPC_X_OPCODE_EH_S_FORM_LIST(OPCODE_CASES)
return static_cast<Opcode>(opcode); return static_cast<Opcode>(opcode);
......
...@@ -2279,9 +2279,13 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -2279,9 +2279,13 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
constexpr int lane_width_in_bytes = 8; constexpr int lane_width_in_bytes = 8;
Simd128Register dst = i.OutputSimd128Register(); Simd128Register dst = i.OutputSimd128Register();
__ MovDoubleToInt64(r0, i.InputDoubleRegister(2)); __ MovDoubleToInt64(r0, i.InputDoubleRegister(2));
if (CpuFeatures::IsSupported(PPC_10_PLUS)) {
__ vinsd(dst, r0, Operand((1 - i.InputInt8(1)) * lane_width_in_bytes));
} else {
__ mtvsrd(kScratchSimd128Reg, r0); __ mtvsrd(kScratchSimd128Reg, r0);
__ vinsertd(dst, kScratchSimd128Reg, __ vinsertd(dst, kScratchSimd128Reg,
Operand((1 - i.InputInt8(1)) * lane_width_in_bytes)); Operand((1 - i.InputInt8(1)) * lane_width_in_bytes));
}
break; break;
} }
case kPPC_F32x4ReplaceLane: { case kPPC_F32x4ReplaceLane: {
...@@ -2289,27 +2293,41 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -2289,27 +2293,41 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
constexpr int lane_width_in_bytes = 4; constexpr int lane_width_in_bytes = 4;
Simd128Register dst = i.OutputSimd128Register(); Simd128Register dst = i.OutputSimd128Register();
__ MovFloatToInt(r0, i.InputDoubleRegister(2)); __ MovFloatToInt(r0, i.InputDoubleRegister(2));
if (CpuFeatures::IsSupported(PPC_10_PLUS)) {
__ vinsw(dst, r0, Operand((3 - i.InputInt8(1)) * lane_width_in_bytes));
} else {
__ mtvsrd(kScratchSimd128Reg, r0); __ mtvsrd(kScratchSimd128Reg, r0);
__ vinsertw(dst, kScratchSimd128Reg, __ vinsertw(dst, kScratchSimd128Reg,
Operand((3 - i.InputInt8(1)) * lane_width_in_bytes)); Operand((3 - i.InputInt8(1)) * lane_width_in_bytes));
}
break; break;
} }
case kPPC_I64x2ReplaceLane: { case kPPC_I64x2ReplaceLane: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
constexpr int lane_width_in_bytes = 8; constexpr int lane_width_in_bytes = 8;
Simd128Register dst = i.OutputSimd128Register(); Simd128Register dst = i.OutputSimd128Register();
if (CpuFeatures::IsSupported(PPC_10_PLUS)) {
__ vinsd(dst, i.InputRegister(2),
Operand((1 - i.InputInt8(1)) * lane_width_in_bytes));
} else {
__ mtvsrd(kScratchSimd128Reg, i.InputRegister(2)); __ mtvsrd(kScratchSimd128Reg, i.InputRegister(2));
__ vinsertd(dst, kScratchSimd128Reg, __ vinsertd(dst, kScratchSimd128Reg,
Operand((1 - i.InputInt8(1)) * lane_width_in_bytes)); Operand((1 - i.InputInt8(1)) * lane_width_in_bytes));
}
break; break;
} }
case kPPC_I32x4ReplaceLane: { case kPPC_I32x4ReplaceLane: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
constexpr int lane_width_in_bytes = 4; constexpr int lane_width_in_bytes = 4;
Simd128Register dst = i.OutputSimd128Register(); Simd128Register dst = i.OutputSimd128Register();
if (CpuFeatures::IsSupported(PPC_10_PLUS)) {
__ vinsw(dst, i.InputRegister(2),
Operand((3 - i.InputInt8(1)) * lane_width_in_bytes));
} else {
__ mtvsrd(kScratchSimd128Reg, i.InputRegister(2)); __ mtvsrd(kScratchSimd128Reg, i.InputRegister(2));
__ vinsertw(dst, kScratchSimd128Reg, __ vinsertw(dst, kScratchSimd128Reg,
Operand((3 - i.InputInt8(1)) * lane_width_in_bytes)); Operand((3 - i.InputInt8(1)) * lane_width_in_bytes));
}
break; break;
} }
case kPPC_I16x8ReplaceLane: { case kPPC_I16x8ReplaceLane: {
......
...@@ -492,6 +492,13 @@ void Decoder::DecodeExt0(Instruction* instr) { ...@@ -492,6 +492,13 @@ void Decoder::DecodeExt0(Instruction* instr) {
} }
PPC_VX_OPCODE_E_FORM_LIST(DECODE_VX_E_FORM__INSTRUCTIONS) PPC_VX_OPCODE_E_FORM_LIST(DECODE_VX_E_FORM__INSTRUCTIONS)
#undef DECODE_VX_E_FORM__INSTRUCTIONS #undef DECODE_VX_E_FORM__INSTRUCTIONS
#define DECODE_VX_G_FORM__INSTRUCTIONS(name, opcode_name, opcode_value) \
case opcode_name: { \
Format(instr, #name " 'Vt, 'rb, 'UIM"); \
return; \
}
PPC_VX_OPCODE_G_FORM_LIST(DECODE_VX_G_FORM__INSTRUCTIONS)
#undef DECODE_VX_G_FORM__INSTRUCTIONS
} }
} }
......
...@@ -4161,6 +4161,21 @@ void Simulator::ExecuteGeneric(Instruction* instr) { ...@@ -4161,6 +4161,21 @@ void Simulator::ExecuteGeneric(Instruction* instr) {
break; break;
} }
#undef VINSERT #undef VINSERT
#define VINSERT_IMMEDIATE(type) \
uint8_t uim = instr->Bits(19, 16); \
int vrt = instr->RTValue(); \
int rb = instr->RBValue(); \
type src = static_cast<type>(get_register(rb)); \
set_simd_register_bytes<type>(vrt, uim, src);
case VINSD: {
VINSERT_IMMEDIATE(int64_t)
break;
}
case VINSW: {
VINSERT_IMMEDIATE(int32_t)
break;
}
#undef VINSERT_IMMEDIATE
#define VEXTRACT(type, element) \ #define VEXTRACT(type, element) \
uint8_t uim = instr->Bits(19, 16); \ uint8_t uim = instr->Bits(19, 16); \
int vrt = instr->RTValue(); \ int vrt = instr->RTValue(); \
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment