Commit c55ee71f authored by Milad Fa's avatar Milad Fa Committed by V8 LUCI CQ

PPC [simd]: Implement vector (VMX) splat immediates

This CL adds vector splat byte, word and halfword to
codegen, disassembler and the simulator.

It also optimizes a number of Simd opcodes by using
the added instructions as well as VSX splat immediate (xxspltib).

Change-Id: I2c4eba33e81542f901d7cdc669c50b510e48c4c8
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2909525Reviewed-by: 's avatarJunliang Yan <junyan@redhat.com>
Commit-Queue: Milad Fa <mfarazma@redhat.com>
Cr-Commit-Position: refs/heads/master@{#74701}
parent 4636b43b
...@@ -485,6 +485,10 @@ class Assembler : public AssemblerBase { ...@@ -485,6 +485,10 @@ class Assembler : public AssemblerBase {
inline void name(const Simd128Register rt, const Simd128Register rb) { \ inline void name(const Simd128Register rt, const Simd128Register rb) { \
vx_form(instr_name, rt, rb); \ vx_form(instr_name, rt, rb); \
} }
#define DECLARE_PPC_VX_INSTRUCTIONS_E_FORM(name, instr_name, instr_value) \
inline void name(const Simd128Register rt, const Operand& imm) { \
vx_form(instr_name, rt, imm); \
}
inline void vx_form(Instr instr, Simd128Register rt, Simd128Register rb, inline void vx_form(Instr instr, Simd128Register rt, Simd128Register rb,
const Operand& imm) { const Operand& imm) {
...@@ -497,6 +501,9 @@ class Assembler : public AssemblerBase { ...@@ -497,6 +501,9 @@ class Assembler : public AssemblerBase {
inline void vx_form(Instr instr, Simd128Register rt, Simd128Register rb) { inline void vx_form(Instr instr, Simd128Register rt, Simd128Register rb) {
emit(instr | rt.code() * B21 | rb.code() * B11); emit(instr | rt.code() * B21 | rb.code() * B11);
} }
inline void vx_form(Instr instr, Simd128Register rt, const Operand& imm) {
emit(instr | rt.code() * B21 | (imm.immediate() & 0x1F) * B16);
}
PPC_VX_OPCODE_A_FORM_LIST(DECLARE_PPC_VX_INSTRUCTIONS_A_FORM) PPC_VX_OPCODE_A_FORM_LIST(DECLARE_PPC_VX_INSTRUCTIONS_A_FORM)
PPC_VX_OPCODE_B_FORM_LIST(DECLARE_PPC_VX_INSTRUCTIONS_B_FORM) PPC_VX_OPCODE_B_FORM_LIST(DECLARE_PPC_VX_INSTRUCTIONS_B_FORM)
...@@ -504,9 +511,11 @@ class Assembler : public AssemblerBase { ...@@ -504,9 +511,11 @@ class Assembler : public AssemblerBase {
PPC_VX_OPCODE_D_FORM_LIST( PPC_VX_OPCODE_D_FORM_LIST(
DECLARE_PPC_VX_INSTRUCTIONS_C_FORM) /* OPCODE_D_FORM can use DECLARE_PPC_VX_INSTRUCTIONS_C_FORM) /* OPCODE_D_FORM can use
INSTRUCTIONS_C_FORM */ INSTRUCTIONS_C_FORM */
PPC_VX_OPCODE_E_FORM_LIST(DECLARE_PPC_VX_INSTRUCTIONS_E_FORM)
#undef DECLARE_PPC_VX_INSTRUCTIONS_A_FORM #undef DECLARE_PPC_VX_INSTRUCTIONS_A_FORM
#undef DECLARE_PPC_VX_INSTRUCTIONS_B_FORM #undef DECLARE_PPC_VX_INSTRUCTIONS_B_FORM
#undef DECLARE_PPC_VX_INSTRUCTIONS_C_FORM #undef DECLARE_PPC_VX_INSTRUCTIONS_C_FORM
#undef DECLARE_PPC_VX_INSTRUCTIONS_E_FORM
#define DECLARE_PPC_VA_INSTRUCTIONS_A_FORM(name, instr_name, instr_value) \ #define DECLARE_PPC_VA_INSTRUCTIONS_A_FORM(name, instr_name, instr_value) \
inline void name(const Simd128Register rt, const Simd128Register ra, \ inline void name(const Simd128Register rt, const Simd128Register ra, \
......
...@@ -86,6 +86,9 @@ const int kLoadDoubleMaxReachBits = 15; ...@@ -86,6 +86,9 @@ const int kLoadDoubleMaxReachBits = 15;
// TODO(sigurds): Choose best value. // TODO(sigurds): Choose best value.
constexpr int kRootRegisterBias = 128; constexpr int kRootRegisterBias = 128;
// sign-extend the least significant 5-bits of value <imm>
#define SIGN_EXT_IMM5(imm) ((static_cast<int>(imm) << 27) >> 27)
// sign-extend the least significant 16-bits of value <imm> // sign-extend the least significant 16-bits of value <imm>
#define SIGN_EXT_IMM16(imm) ((static_cast<int>(imm) << 16) >> 16) #define SIGN_EXT_IMM16(imm) ((static_cast<int>(imm) << 16) >> 16)
...@@ -2443,6 +2446,14 @@ using Instr = uint32_t; ...@@ -2443,6 +2446,14 @@ using Instr = uint32_t;
/* Vector Negate Doubleword */ \ /* Vector Negate Doubleword */ \
V(vnegd, VNEGD, 0x10070602) V(vnegd, VNEGD, 0x10070602)
#define PPC_VX_OPCODE_E_FORM_LIST(V) \
/* Vector Splat Immediate Signed Byte */ \
V(vspltisb, VSPLTISB, 0x1000030C) \
/* Vector Splat Immediate Signed Halfword */ \
V(vspltish, VSPLTISH, 0x1000034C) \
/* Vector Splat Immediate Signed Word */ \
V(vspltisw, VSPLTISW, 0x1000038C)
#define PPC_VX_OPCODE_UNUSED_LIST(V) \ #define PPC_VX_OPCODE_UNUSED_LIST(V) \
/* Decimal Add Modulo */ \ /* Decimal Add Modulo */ \
V(bcdadd, BCDADD, 0xF0000400) \ V(bcdadd, BCDADD, 0xF0000400) \
...@@ -2548,12 +2559,6 @@ using Instr = uint32_t; ...@@ -2548,12 +2559,6 @@ using Instr = uint32_t;
V(vrsqrtefp, VRSQRTEFP, 0x1000014A) \ V(vrsqrtefp, VRSQRTEFP, 0x1000014A) \
/* Vector Shift Left */ \ /* Vector Shift Left */ \
V(vsl, VSL, 0x100001C4) \ V(vsl, VSL, 0x100001C4) \
/* Vector Splat Immediate Signed Byte */ \
V(vspltisb, VSPLTISB, 0x1000030C) \
/* Vector Splat Immediate Signed Halfword */ \
V(vspltish, VSPLTISH, 0x1000034C) \
/* Vector Splat Immediate Signed Word */ \
V(vspltisw, VSPLTISW, 0x1000038C) \
/* Vector Shift Right */ \ /* Vector Shift Right */ \
V(vsr, VSR, 0x100002C4) \ V(vsr, VSR, 0x100002C4) \
/* Vector Subtract & write Carry Unsigned Quadword */ \ /* Vector Subtract & write Carry Unsigned Quadword */ \
...@@ -2600,6 +2605,7 @@ using Instr = uint32_t; ...@@ -2600,6 +2605,7 @@ using Instr = uint32_t;
PPC_VX_OPCODE_B_FORM_LIST(V) \ PPC_VX_OPCODE_B_FORM_LIST(V) \
PPC_VX_OPCODE_C_FORM_LIST(V) \ PPC_VX_OPCODE_C_FORM_LIST(V) \
PPC_VX_OPCODE_D_FORM_LIST(V) \ PPC_VX_OPCODE_D_FORM_LIST(V) \
PPC_VX_OPCODE_E_FORM_LIST(V) \
PPC_VX_OPCODE_UNUSED_LIST(V) PPC_VX_OPCODE_UNUSED_LIST(V)
#define PPC_XS_OPCODE_LIST(V) \ #define PPC_XS_OPCODE_LIST(V) \
...@@ -2945,6 +2951,7 @@ class Instruction { ...@@ -2945,6 +2951,7 @@ class Instruction {
PPC_VX_OPCODE_A_FORM_LIST(OPCODE_CASES) PPC_VX_OPCODE_A_FORM_LIST(OPCODE_CASES)
PPC_VX_OPCODE_B_FORM_LIST(OPCODE_CASES) PPC_VX_OPCODE_B_FORM_LIST(OPCODE_CASES)
PPC_VX_OPCODE_C_FORM_LIST(OPCODE_CASES) PPC_VX_OPCODE_C_FORM_LIST(OPCODE_CASES)
PPC_VX_OPCODE_E_FORM_LIST(OPCODE_CASES)
PPC_VX_OPCODE_UNUSED_LIST(OPCODE_CASES) PPC_VX_OPCODE_UNUSED_LIST(OPCODE_CASES)
PPC_X_OPCODE_EH_S_FORM_LIST(OPCODE_CASES) PPC_X_OPCODE_EH_S_FORM_LIST(OPCODE_CASES)
return static_cast<Opcode>(opcode); return static_cast<Opcode>(opcode);
......
...@@ -2870,9 +2870,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -2870,9 +2870,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
Simd128Register tempFPReg1 = i.ToSimd128Register(instr->TempAt(0)); Simd128Register tempFPReg1 = i.ToSimd128Register(instr->TempAt(0));
Simd128Register src = i.InputSimd128Register(0); Simd128Register src = i.InputSimd128Register(0);
constexpr int shift_bits = 63; constexpr int shift_bits = 63;
__ li(ip, Operand(shift_bits)); __ xxspltib(kScratchSimd128Reg, Operand(shift_bits));
__ mtvsrd(kScratchSimd128Reg, ip);
__ vspltb(kScratchSimd128Reg, kScratchSimd128Reg, Operand(7));
__ vsrad(kScratchSimd128Reg, src, kScratchSimd128Reg); __ vsrad(kScratchSimd128Reg, src, kScratchSimd128Reg);
__ vxor(tempFPReg1, src, kScratchSimd128Reg); __ vxor(tempFPReg1, src, kScratchSimd128Reg);
__ vsubudm(i.OutputSimd128Register(), tempFPReg1, kScratchSimd128Reg); __ vsubudm(i.OutputSimd128Register(), tempFPReg1, kScratchSimd128Reg);
...@@ -2882,9 +2880,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -2882,9 +2880,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
Simd128Register tempFPReg1 = i.ToSimd128Register(instr->TempAt(0)); Simd128Register tempFPReg1 = i.ToSimd128Register(instr->TempAt(0));
Simd128Register src = i.InputSimd128Register(0); Simd128Register src = i.InputSimd128Register(0);
constexpr int shift_bits = 31; constexpr int shift_bits = 31;
__ li(ip, Operand(shift_bits)); __ xxspltib(kScratchSimd128Reg, Operand(shift_bits));
__ mtvsrd(kScratchSimd128Reg, ip);
__ vspltb(kScratchSimd128Reg, kScratchSimd128Reg, Operand(7));
__ vsraw(kScratchSimd128Reg, src, kScratchSimd128Reg); __ vsraw(kScratchSimd128Reg, src, kScratchSimd128Reg);
__ vxor(tempFPReg1, src, kScratchSimd128Reg); __ vxor(tempFPReg1, src, kScratchSimd128Reg);
__ vsubuwm(i.OutputSimd128Register(), tempFPReg1, kScratchSimd128Reg); __ vsubuwm(i.OutputSimd128Register(), tempFPReg1, kScratchSimd128Reg);
...@@ -2892,9 +2888,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -2892,9 +2888,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
} }
case kPPC_I16x8Neg: { case kPPC_I16x8Neg: {
Simd128Register tempFPReg1 = i.ToSimd128Register(instr->TempAt(0)); Simd128Register tempFPReg1 = i.ToSimd128Register(instr->TempAt(0));
__ li(ip, Operand(1)); __ vspltish(kScratchSimd128Reg, Operand(1));
__ mtvsrd(kScratchSimd128Reg, ip);
__ vsplth(kScratchSimd128Reg, kScratchSimd128Reg, Operand(3));
__ vnor(tempFPReg1, i.InputSimd128Register(0), i.InputSimd128Register(0)); __ vnor(tempFPReg1, i.InputSimd128Register(0), i.InputSimd128Register(0));
__ vadduhm(i.OutputSimd128Register(), kScratchSimd128Reg, tempFPReg1); __ vadduhm(i.OutputSimd128Register(), kScratchSimd128Reg, tempFPReg1);
break; break;
...@@ -2903,9 +2897,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -2903,9 +2897,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
Simd128Register tempFPReg1 = i.ToSimd128Register(instr->TempAt(0)); Simd128Register tempFPReg1 = i.ToSimd128Register(instr->TempAt(0));
Simd128Register src = i.InputSimd128Register(0); Simd128Register src = i.InputSimd128Register(0);
constexpr int shift_bits = 15; constexpr int shift_bits = 15;
__ li(ip, Operand(shift_bits)); __ xxspltib(kScratchSimd128Reg, Operand(shift_bits));
__ mtvsrd(kScratchSimd128Reg, ip);
__ vspltb(kScratchSimd128Reg, kScratchSimd128Reg, Operand(7));
__ vsrah(kScratchSimd128Reg, src, kScratchSimd128Reg); __ vsrah(kScratchSimd128Reg, src, kScratchSimd128Reg);
__ vxor(tempFPReg1, src, kScratchSimd128Reg); __ vxor(tempFPReg1, src, kScratchSimd128Reg);
__ vsubuhm(i.OutputSimd128Register(), tempFPReg1, kScratchSimd128Reg); __ vsubuhm(i.OutputSimd128Register(), tempFPReg1, kScratchSimd128Reg);
...@@ -2913,9 +2905,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -2913,9 +2905,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
} }
case kPPC_I8x16Neg: { case kPPC_I8x16Neg: {
Simd128Register tempFPReg1 = i.ToSimd128Register(instr->TempAt(0)); Simd128Register tempFPReg1 = i.ToSimd128Register(instr->TempAt(0));
__ li(ip, Operand(1)); __ xxspltib(kScratchSimd128Reg, Operand(1));
__ mtvsrd(kScratchSimd128Reg, ip);
__ vspltb(kScratchSimd128Reg, kScratchSimd128Reg, Operand(7));
__ vnor(tempFPReg1, i.InputSimd128Register(0), i.InputSimd128Register(0)); __ vnor(tempFPReg1, i.InputSimd128Register(0), i.InputSimd128Register(0));
__ vaddubm(i.OutputSimd128Register(), kScratchSimd128Reg, tempFPReg1); __ vaddubm(i.OutputSimd128Register(), kScratchSimd128Reg, tempFPReg1);
break; break;
...@@ -2924,9 +2914,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -2924,9 +2914,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
Simd128Register tempFPReg1 = i.ToSimd128Register(instr->TempAt(0)); Simd128Register tempFPReg1 = i.ToSimd128Register(instr->TempAt(0));
Simd128Register src = i.InputSimd128Register(0); Simd128Register src = i.InputSimd128Register(0);
constexpr int shift_bits = 7; constexpr int shift_bits = 7;
__ li(ip, Operand(shift_bits)); __ xxspltib(kScratchSimd128Reg, Operand(shift_bits));
__ mtvsrd(kScratchSimd128Reg, ip);
__ vspltb(kScratchSimd128Reg, kScratchSimd128Reg, Operand(7));
__ vsrab(kScratchSimd128Reg, src, kScratchSimd128Reg); __ vsrab(kScratchSimd128Reg, src, kScratchSimd128Reg);
__ vxor(tempFPReg1, src, kScratchSimd128Reg); __ vxor(tempFPReg1, src, kScratchSimd128Reg);
__ vsububm(i.OutputSimd128Register(), tempFPReg1, kScratchSimd128Reg); __ vsububm(i.OutputSimd128Register(), tempFPReg1, kScratchSimd128Reg);
...@@ -3589,9 +3577,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -3589,9 +3577,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
Simd128Register src = i.InputSimd128Register(0); Simd128Register src = i.InputSimd128Register(0);
Simd128Register dst = i.OutputSimd128Register(); Simd128Register dst = i.OutputSimd128Register();
Simd128Register tempFPReg1 = i.ToSimd128Register(instr->TempAt(0)); Simd128Register tempFPReg1 = i.ToSimd128Register(instr->TempAt(0));
__ li(kScratchReg, Operand(1)); __ vspltish(kScratchSimd128Reg, Operand(1));
__ mtvsrd(kScratchSimd128Reg, kScratchReg);
__ vsplth(kScratchSimd128Reg, kScratchSimd128Reg, Operand(3));
EXT_ADD_PAIRWISE(vmulesh, vmulosh, vadduwm) EXT_ADD_PAIRWISE(vmulesh, vmulosh, vadduwm)
break; break;
} }
...@@ -3599,9 +3585,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -3599,9 +3585,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
Simd128Register src = i.InputSimd128Register(0); Simd128Register src = i.InputSimd128Register(0);
Simd128Register dst = i.OutputSimd128Register(); Simd128Register dst = i.OutputSimd128Register();
Simd128Register tempFPReg1 = i.ToSimd128Register(instr->TempAt(0)); Simd128Register tempFPReg1 = i.ToSimd128Register(instr->TempAt(0));
__ li(kScratchReg, Operand(1)); __ vspltish(kScratchSimd128Reg, Operand(1));
__ mtvsrd(kScratchSimd128Reg, kScratchReg);
__ vsplth(kScratchSimd128Reg, kScratchSimd128Reg, Operand(3));
EXT_ADD_PAIRWISE(vmuleuh, vmulouh, vadduwm) EXT_ADD_PAIRWISE(vmuleuh, vmulouh, vadduwm)
break; break;
} }
......
...@@ -270,6 +270,11 @@ int Decoder::FormatOption(Instruction* instr, const char* format) { ...@@ -270,6 +270,11 @@ int Decoder::FormatOption(Instruction* instr, const char* format) {
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_, "%d", value); out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_, "%d", value);
return 3; return 3;
} }
case 'S': { // SIM
int32_t value = static_cast<int32_t>(SIGN_EXT_IMM5(instr->Bits(20, 16)));
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_, "%d", value);
return 3;
}
case 'U': { // UIM case 'U': { // UIM
int32_t value = instr->Bits(20, 16); int32_t value = instr->Bits(20, 16);
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_, "%d", value); out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_, "%d", value);
...@@ -460,6 +465,13 @@ void Decoder::DecodeExt0(Instruction* instr) { ...@@ -460,6 +465,13 @@ void Decoder::DecodeExt0(Instruction* instr) {
} }
PPC_VX_OPCODE_C_FORM_LIST(DECODE_VX_C_FORM__INSTRUCTIONS) PPC_VX_OPCODE_C_FORM_LIST(DECODE_VX_C_FORM__INSTRUCTIONS)
#undef DECODE_VX_C_FORM__INSTRUCTIONS #undef DECODE_VX_C_FORM__INSTRUCTIONS
#define DECODE_VX_E_FORM__INSTRUCTIONS(name, opcode_name, opcode_value) \
case opcode_name: { \
Format(instr, #name " 'Vt, 'SIM"); \
return; \
}
PPC_VX_OPCODE_E_FORM_LIST(DECODE_VX_E_FORM__INSTRUCTIONS)
#undef DECODE_VX_E_FORM__INSTRUCTIONS
} }
} }
......
...@@ -4037,6 +4037,23 @@ void Simulator::ExecuteGeneric(Instruction* instr) { ...@@ -4037,6 +4037,23 @@ void Simulator::ExecuteGeneric(Instruction* instr) {
break; break;
} }
#undef VSPLT #undef VSPLT
#define VSPLTI(type) \
type sim = static_cast<type>(SIGN_EXT_IMM5(instr->Bits(20, 16))); \
int vrt = instr->RTValue(); \
FOR_EACH_LANE(i, type) { set_simd_register_by_lane<type>(vrt, i, sim); }
case VSPLTISW: {
VSPLTI(int32_t)
break;
}
case VSPLTISH: {
VSPLTI(int16_t)
break;
}
case VSPLTISB: {
VSPLTI(int8_t)
break;
}
#undef VSPLTI
#define VINSERT(type, element) \ #define VINSERT(type, element) \
uint32_t uim = static_cast<uint32_t>(instr->Bits(20, 16)) / sizeof(type); \ uint32_t uim = static_cast<uint32_t>(instr->Bits(20, 16)) / sizeof(type); \
int vrt = instr->RTValue(); \ int vrt = instr->RTValue(); \
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment