Commit c55ee71f authored by Milad Fa's avatar Milad Fa Committed by V8 LUCI CQ

PPC [simd]: Implement vector (VMX) splat immediates

This CL adds vector splat byte, word and halfword to
codegen, disassembler and the simulator.

It also optimizes a number of Simd opcodes by using
the added instructions as well as VSX splat immediate (xxspltib).

Change-Id: I2c4eba33e81542f901d7cdc669c50b510e48c4c8
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2909525Reviewed-by: 's avatarJunliang Yan <junyan@redhat.com>
Commit-Queue: Milad Fa <mfarazma@redhat.com>
Cr-Commit-Position: refs/heads/master@{#74701}
parent 4636b43b
......@@ -485,6 +485,10 @@ class Assembler : public AssemblerBase {
inline void name(const Simd128Register rt, const Simd128Register rb) { \
vx_form(instr_name, rt, rb); \
}
#define DECLARE_PPC_VX_INSTRUCTIONS_E_FORM(name, instr_name, instr_value) \
inline void name(const Simd128Register rt, const Operand& imm) { \
vx_form(instr_name, rt, imm); \
}
inline void vx_form(Instr instr, Simd128Register rt, Simd128Register rb,
const Operand& imm) {
......@@ -497,6 +501,9 @@ class Assembler : public AssemblerBase {
inline void vx_form(Instr instr, Simd128Register rt, Simd128Register rb) {
emit(instr | rt.code() * B21 | rb.code() * B11);
}
inline void vx_form(Instr instr, Simd128Register rt, const Operand& imm) {
emit(instr | rt.code() * B21 | (imm.immediate() & 0x1F) * B16);
}
PPC_VX_OPCODE_A_FORM_LIST(DECLARE_PPC_VX_INSTRUCTIONS_A_FORM)
PPC_VX_OPCODE_B_FORM_LIST(DECLARE_PPC_VX_INSTRUCTIONS_B_FORM)
......@@ -504,9 +511,11 @@ class Assembler : public AssemblerBase {
PPC_VX_OPCODE_D_FORM_LIST(
DECLARE_PPC_VX_INSTRUCTIONS_C_FORM) /* OPCODE_D_FORM can use
INSTRUCTIONS_C_FORM */
PPC_VX_OPCODE_E_FORM_LIST(DECLARE_PPC_VX_INSTRUCTIONS_E_FORM)
#undef DECLARE_PPC_VX_INSTRUCTIONS_A_FORM
#undef DECLARE_PPC_VX_INSTRUCTIONS_B_FORM
#undef DECLARE_PPC_VX_INSTRUCTIONS_C_FORM
#undef DECLARE_PPC_VX_INSTRUCTIONS_E_FORM
#define DECLARE_PPC_VA_INSTRUCTIONS_A_FORM(name, instr_name, instr_value) \
inline void name(const Simd128Register rt, const Simd128Register ra, \
......
......@@ -86,6 +86,9 @@ const int kLoadDoubleMaxReachBits = 15;
// TODO(sigurds): Choose best value.
constexpr int kRootRegisterBias = 128;
// sign-extend the least significant 5-bits of value <imm>
#define SIGN_EXT_IMM5(imm) ((static_cast<int>(imm) << 27) >> 27)
// sign-extend the least significant 16-bits of value <imm>
#define SIGN_EXT_IMM16(imm) ((static_cast<int>(imm) << 16) >> 16)
......@@ -2443,6 +2446,14 @@ using Instr = uint32_t;
/* Vector Negate Doubleword */ \
V(vnegd, VNEGD, 0x10070602)
#define PPC_VX_OPCODE_E_FORM_LIST(V) \
/* Vector Splat Immediate Signed Byte */ \
V(vspltisb, VSPLTISB, 0x1000030C) \
/* Vector Splat Immediate Signed Halfword */ \
V(vspltish, VSPLTISH, 0x1000034C) \
/* Vector Splat Immediate Signed Word */ \
V(vspltisw, VSPLTISW, 0x1000038C)
#define PPC_VX_OPCODE_UNUSED_LIST(V) \
/* Decimal Add Modulo */ \
V(bcdadd, BCDADD, 0xF0000400) \
......@@ -2548,12 +2559,6 @@ using Instr = uint32_t;
V(vrsqrtefp, VRSQRTEFP, 0x1000014A) \
/* Vector Shift Left */ \
V(vsl, VSL, 0x100001C4) \
/* Vector Splat Immediate Signed Byte */ \
V(vspltisb, VSPLTISB, 0x1000030C) \
/* Vector Splat Immediate Signed Halfword */ \
V(vspltish, VSPLTISH, 0x1000034C) \
/* Vector Splat Immediate Signed Word */ \
V(vspltisw, VSPLTISW, 0x1000038C) \
/* Vector Shift Right */ \
V(vsr, VSR, 0x100002C4) \
/* Vector Subtract & write Carry Unsigned Quadword */ \
......@@ -2600,6 +2605,7 @@ using Instr = uint32_t;
PPC_VX_OPCODE_B_FORM_LIST(V) \
PPC_VX_OPCODE_C_FORM_LIST(V) \
PPC_VX_OPCODE_D_FORM_LIST(V) \
PPC_VX_OPCODE_E_FORM_LIST(V) \
PPC_VX_OPCODE_UNUSED_LIST(V)
#define PPC_XS_OPCODE_LIST(V) \
......@@ -2945,6 +2951,7 @@ class Instruction {
PPC_VX_OPCODE_A_FORM_LIST(OPCODE_CASES)
PPC_VX_OPCODE_B_FORM_LIST(OPCODE_CASES)
PPC_VX_OPCODE_C_FORM_LIST(OPCODE_CASES)
PPC_VX_OPCODE_E_FORM_LIST(OPCODE_CASES)
PPC_VX_OPCODE_UNUSED_LIST(OPCODE_CASES)
PPC_X_OPCODE_EH_S_FORM_LIST(OPCODE_CASES)
return static_cast<Opcode>(opcode);
......
......@@ -2870,9 +2870,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
Simd128Register tempFPReg1 = i.ToSimd128Register(instr->TempAt(0));
Simd128Register src = i.InputSimd128Register(0);
constexpr int shift_bits = 63;
__ li(ip, Operand(shift_bits));
__ mtvsrd(kScratchSimd128Reg, ip);
__ vspltb(kScratchSimd128Reg, kScratchSimd128Reg, Operand(7));
__ xxspltib(kScratchSimd128Reg, Operand(shift_bits));
__ vsrad(kScratchSimd128Reg, src, kScratchSimd128Reg);
__ vxor(tempFPReg1, src, kScratchSimd128Reg);
__ vsubudm(i.OutputSimd128Register(), tempFPReg1, kScratchSimd128Reg);
......@@ -2882,9 +2880,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
Simd128Register tempFPReg1 = i.ToSimd128Register(instr->TempAt(0));
Simd128Register src = i.InputSimd128Register(0);
constexpr int shift_bits = 31;
__ li(ip, Operand(shift_bits));
__ mtvsrd(kScratchSimd128Reg, ip);
__ vspltb(kScratchSimd128Reg, kScratchSimd128Reg, Operand(7));
__ xxspltib(kScratchSimd128Reg, Operand(shift_bits));
__ vsraw(kScratchSimd128Reg, src, kScratchSimd128Reg);
__ vxor(tempFPReg1, src, kScratchSimd128Reg);
__ vsubuwm(i.OutputSimd128Register(), tempFPReg1, kScratchSimd128Reg);
......@@ -2892,9 +2888,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
case kPPC_I16x8Neg: {
Simd128Register tempFPReg1 = i.ToSimd128Register(instr->TempAt(0));
__ li(ip, Operand(1));
__ mtvsrd(kScratchSimd128Reg, ip);
__ vsplth(kScratchSimd128Reg, kScratchSimd128Reg, Operand(3));
__ vspltish(kScratchSimd128Reg, Operand(1));
__ vnor(tempFPReg1, i.InputSimd128Register(0), i.InputSimd128Register(0));
__ vadduhm(i.OutputSimd128Register(), kScratchSimd128Reg, tempFPReg1);
break;
......@@ -2903,9 +2897,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
Simd128Register tempFPReg1 = i.ToSimd128Register(instr->TempAt(0));
Simd128Register src = i.InputSimd128Register(0);
constexpr int shift_bits = 15;
__ li(ip, Operand(shift_bits));
__ mtvsrd(kScratchSimd128Reg, ip);
__ vspltb(kScratchSimd128Reg, kScratchSimd128Reg, Operand(7));
__ xxspltib(kScratchSimd128Reg, Operand(shift_bits));
__ vsrah(kScratchSimd128Reg, src, kScratchSimd128Reg);
__ vxor(tempFPReg1, src, kScratchSimd128Reg);
__ vsubuhm(i.OutputSimd128Register(), tempFPReg1, kScratchSimd128Reg);
......@@ -2913,9 +2905,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
case kPPC_I8x16Neg: {
Simd128Register tempFPReg1 = i.ToSimd128Register(instr->TempAt(0));
__ li(ip, Operand(1));
__ mtvsrd(kScratchSimd128Reg, ip);
__ vspltb(kScratchSimd128Reg, kScratchSimd128Reg, Operand(7));
__ xxspltib(kScratchSimd128Reg, Operand(1));
__ vnor(tempFPReg1, i.InputSimd128Register(0), i.InputSimd128Register(0));
__ vaddubm(i.OutputSimd128Register(), kScratchSimd128Reg, tempFPReg1);
break;
......@@ -2924,9 +2914,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
Simd128Register tempFPReg1 = i.ToSimd128Register(instr->TempAt(0));
Simd128Register src = i.InputSimd128Register(0);
constexpr int shift_bits = 7;
__ li(ip, Operand(shift_bits));
__ mtvsrd(kScratchSimd128Reg, ip);
__ vspltb(kScratchSimd128Reg, kScratchSimd128Reg, Operand(7));
__ xxspltib(kScratchSimd128Reg, Operand(shift_bits));
__ vsrab(kScratchSimd128Reg, src, kScratchSimd128Reg);
__ vxor(tempFPReg1, src, kScratchSimd128Reg);
__ vsububm(i.OutputSimd128Register(), tempFPReg1, kScratchSimd128Reg);
......@@ -3589,9 +3577,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
Simd128Register src = i.InputSimd128Register(0);
Simd128Register dst = i.OutputSimd128Register();
Simd128Register tempFPReg1 = i.ToSimd128Register(instr->TempAt(0));
__ li(kScratchReg, Operand(1));
__ mtvsrd(kScratchSimd128Reg, kScratchReg);
__ vsplth(kScratchSimd128Reg, kScratchSimd128Reg, Operand(3));
__ vspltish(kScratchSimd128Reg, Operand(1));
EXT_ADD_PAIRWISE(vmulesh, vmulosh, vadduwm)
break;
}
......@@ -3599,9 +3585,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
Simd128Register src = i.InputSimd128Register(0);
Simd128Register dst = i.OutputSimd128Register();
Simd128Register tempFPReg1 = i.ToSimd128Register(instr->TempAt(0));
__ li(kScratchReg, Operand(1));
__ mtvsrd(kScratchSimd128Reg, kScratchReg);
__ vsplth(kScratchSimd128Reg, kScratchSimd128Reg, Operand(3));
__ vspltish(kScratchSimd128Reg, Operand(1));
EXT_ADD_PAIRWISE(vmuleuh, vmulouh, vadduwm)
break;
}
......
......@@ -270,6 +270,11 @@ int Decoder::FormatOption(Instruction* instr, const char* format) {
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_, "%d", value);
return 3;
}
case 'S': { // SIM
int32_t value = static_cast<int32_t>(SIGN_EXT_IMM5(instr->Bits(20, 16)));
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_, "%d", value);
return 3;
}
case 'U': { // UIM
int32_t value = instr->Bits(20, 16);
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_, "%d", value);
......@@ -460,6 +465,13 @@ void Decoder::DecodeExt0(Instruction* instr) {
}
PPC_VX_OPCODE_C_FORM_LIST(DECODE_VX_C_FORM__INSTRUCTIONS)
#undef DECODE_VX_C_FORM__INSTRUCTIONS
#define DECODE_VX_E_FORM__INSTRUCTIONS(name, opcode_name, opcode_value) \
case opcode_name: { \
Format(instr, #name " 'Vt, 'SIM"); \
return; \
}
PPC_VX_OPCODE_E_FORM_LIST(DECODE_VX_E_FORM__INSTRUCTIONS)
#undef DECODE_VX_E_FORM__INSTRUCTIONS
}
}
......
......@@ -4037,6 +4037,23 @@ void Simulator::ExecuteGeneric(Instruction* instr) {
break;
}
#undef VSPLT
#define VSPLTI(type) \
type sim = static_cast<type>(SIGN_EXT_IMM5(instr->Bits(20, 16))); \
int vrt = instr->RTValue(); \
FOR_EACH_LANE(i, type) { set_simd_register_by_lane<type>(vrt, i, sim); }
case VSPLTISW: {
VSPLTI(int32_t)
break;
}
case VSPLTISH: {
VSPLTI(int16_t)
break;
}
case VSPLTISB: {
VSPLTI(int8_t)
break;
}
#undef VSPLTI
#define VINSERT(type, element) \
uint32_t uim = static_cast<uint32_t>(instr->Bits(20, 16)) / sizeof(type); \
int vrt = instr->RTValue(); \
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment