Commit 322c7e47 authored by Milad Fa's avatar Milad Fa Committed by V8 LUCI CQ

PPC [simd]: Optimize bitmask on Power10

This cl uses the newly added instructions on power10 for
extracting the sign bits.

Change-Id: I9e4fa3bdd7fa5fc7004695c1d3ac29e3906d5207
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3056506Reviewed-by: 's avatarJunliang Yan <junyan@redhat.com>
Commit-Queue: Milad Fa <mfarazma@redhat.com>
Cr-Commit-Position: refs/heads/master@{#75947}
parent 8c9fc13a
......@@ -492,6 +492,10 @@ class Assembler : public AssemblerBase {
inline void name(const Simd128Register rt, const Operand& imm) { \
vx_form(instr_name, rt, imm); \
}
#define DECLARE_PPC_VX_INSTRUCTIONS_F_FORM(name, instr_name, instr_value) \
inline void name(const Register rt, const Simd128Register rb) { \
vx_form(instr_name, rt, rb); \
}
inline void vx_form(Instr instr, Simd128Register rt, Simd128Register rb,
const Operand& imm) {
......@@ -509,6 +513,9 @@ class Assembler : public AssemblerBase {
inline void vx_form(Instr instr, Simd128Register rt, const Operand& imm) {
emit(instr | (rt.code() & 0x1F) * B21 | (imm.immediate() & 0x1F) * B16);
}
inline void vx_form(Instr instr, Register rt, Simd128Register rb) {
emit(instr | (rt.code() & 0x1F) * B21 | (rb.code() & 0x1F) * B11);
}
PPC_VX_OPCODE_A_FORM_LIST(DECLARE_PPC_VX_INSTRUCTIONS_A_FORM)
PPC_VX_OPCODE_B_FORM_LIST(DECLARE_PPC_VX_INSTRUCTIONS_B_FORM)
......@@ -517,10 +524,12 @@ class Assembler : public AssemblerBase {
DECLARE_PPC_VX_INSTRUCTIONS_C_FORM) /* OPCODE_D_FORM can use
INSTRUCTIONS_C_FORM */
PPC_VX_OPCODE_E_FORM_LIST(DECLARE_PPC_VX_INSTRUCTIONS_E_FORM)
PPC_VX_OPCODE_F_FORM_LIST(DECLARE_PPC_VX_INSTRUCTIONS_F_FORM)
#undef DECLARE_PPC_VX_INSTRUCTIONS_A_FORM
#undef DECLARE_PPC_VX_INSTRUCTIONS_B_FORM
#undef DECLARE_PPC_VX_INSTRUCTIONS_C_FORM
#undef DECLARE_PPC_VX_INSTRUCTIONS_E_FORM
#undef DECLARE_PPC_VX_INSTRUCTIONS_F_FORM
#define DECLARE_PPC_VA_INSTRUCTIONS_A_FORM(name, instr_name, instr_value) \
inline void name(const Simd128Register rt, const Simd128Register ra, \
......
......@@ -2458,6 +2458,16 @@ using Instr = uint32_t;
/* Vector Splat Immediate Signed Word */ \
V(vspltisw, VSPLTISW, 0x1000038C)
#define PPC_VX_OPCODE_F_FORM_LIST(V) \
/* Vector Extract Byte Mask */ \
V(vextractbm, VEXTRACTBM, 0x10080642) \
/* Vector Extract Halfword Mask */ \
V(vextracthm, VEXTRACTHM, 0x10090642) \
/* Vector Extract Word Mask */ \
V(vextractwm, VEXTRACTWM, 0x100A0642) \
/* Vector Extract Doubleword Mask */ \
V(vextractdm, VEXTRACTDM, 0x100B0642)
#define PPC_VX_OPCODE_UNUSED_LIST(V) \
/* Decimal Add Modulo */ \
V(bcdadd, BCDADD, 0xF0000400) \
......@@ -2610,6 +2620,7 @@ using Instr = uint32_t;
PPC_VX_OPCODE_C_FORM_LIST(V) \
PPC_VX_OPCODE_D_FORM_LIST(V) \
PPC_VX_OPCODE_E_FORM_LIST(V) \
PPC_VX_OPCODE_F_FORM_LIST(V) \
PPC_VX_OPCODE_UNUSED_LIST(V)
#define PPC_XS_OPCODE_LIST(V) \
......@@ -2948,6 +2959,7 @@ class Instruction {
opcode = extcode | BitField(20, 16) | BitField(10, 0);
switch (opcode) {
PPC_VX_OPCODE_D_FORM_LIST(OPCODE_CASES)
PPC_VX_OPCODE_F_FORM_LIST(OPCODE_CASES)
return static_cast<Opcode>(opcode);
}
opcode = extcode | BitField(10, 0);
......
......@@ -3251,43 +3251,59 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
case kPPC_I64x2BitMask: {
__ mov(kScratchReg,
Operand(0x8080808080800040)); // Select 0 for the high bits.
__ mtvsrd(kScratchSimd128Reg, kScratchReg);
__ vbpermq(kScratchSimd128Reg, i.InputSimd128Register(0),
kScratchSimd128Reg);
__ vextractub(kScratchSimd128Reg, kScratchSimd128Reg, Operand(6));
__ mfvsrd(i.OutputRegister(), kScratchSimd128Reg);
if (CpuFeatures::IsSupported(PPC_10_PLUS)) {
__ vextractdm(i.OutputRegister(), i.InputSimd128Register(0));
} else {
__ mov(kScratchReg,
Operand(0x8080808080800040)); // Select 0 for the high bits.
__ mtvsrd(kScratchSimd128Reg, kScratchReg);
__ vbpermq(kScratchSimd128Reg, i.InputSimd128Register(0),
kScratchSimd128Reg);
__ vextractub(kScratchSimd128Reg, kScratchSimd128Reg, Operand(6));
__ mfvsrd(i.OutputRegister(), kScratchSimd128Reg);
}
break;
}
case kPPC_I32x4BitMask: {
__ mov(kScratchReg,
Operand(0x8080808000204060)); // Select 0 for the high bits.
__ mtvsrd(kScratchSimd128Reg, kScratchReg);
__ vbpermq(kScratchSimd128Reg, i.InputSimd128Register(0),
kScratchSimd128Reg);
__ vextractub(kScratchSimd128Reg, kScratchSimd128Reg, Operand(6));
__ mfvsrd(i.OutputRegister(), kScratchSimd128Reg);
if (CpuFeatures::IsSupported(PPC_10_PLUS)) {
__ vextractwm(i.OutputRegister(), i.InputSimd128Register(0));
} else {
__ mov(kScratchReg,
Operand(0x8080808000204060)); // Select 0 for the high bits.
__ mtvsrd(kScratchSimd128Reg, kScratchReg);
__ vbpermq(kScratchSimd128Reg, i.InputSimd128Register(0),
kScratchSimd128Reg);
__ vextractub(kScratchSimd128Reg, kScratchSimd128Reg, Operand(6));
__ mfvsrd(i.OutputRegister(), kScratchSimd128Reg);
}
break;
}
case kPPC_I16x8BitMask: {
__ mov(kScratchReg, Operand(0x10203040506070));
__ mtvsrd(kScratchSimd128Reg, kScratchReg);
__ vbpermq(kScratchSimd128Reg, i.InputSimd128Register(0),
kScratchSimd128Reg);
__ vextractub(kScratchSimd128Reg, kScratchSimd128Reg, Operand(6));
__ mfvsrd(i.OutputRegister(), kScratchSimd128Reg);
if (CpuFeatures::IsSupported(PPC_10_PLUS)) {
__ vextracthm(i.OutputRegister(), i.InputSimd128Register(0));
} else {
__ mov(kScratchReg, Operand(0x10203040506070));
__ mtvsrd(kScratchSimd128Reg, kScratchReg);
__ vbpermq(kScratchSimd128Reg, i.InputSimd128Register(0),
kScratchSimd128Reg);
__ vextractub(kScratchSimd128Reg, kScratchSimd128Reg, Operand(6));
__ mfvsrd(i.OutputRegister(), kScratchSimd128Reg);
}
break;
}
case kPPC_I8x16BitMask: {
Register temp = i.ToRegister(instr->TempAt(0));
__ mov(temp, Operand(0x8101820283038));
__ mov(ip, Operand(0x4048505860687078));
__ mtvsrdd(kScratchSimd128Reg, temp, ip);
__ vbpermq(kScratchSimd128Reg, i.InputSimd128Register(0),
kScratchSimd128Reg);
__ vextractuh(kScratchSimd128Reg, kScratchSimd128Reg, Operand(6));
__ mfvsrd(i.OutputRegister(), kScratchSimd128Reg);
if (CpuFeatures::IsSupported(PPC_10_PLUS)) {
__ vextractbm(i.OutputRegister(), i.InputSimd128Register(0));
} else {
Register temp = i.ToRegister(instr->TempAt(0));
__ mov(temp, Operand(0x8101820283038));
__ mov(ip, Operand(0x4048505860687078));
__ mtvsrdd(kScratchSimd128Reg, temp, ip);
__ vbpermq(kScratchSimd128Reg, i.InputSimd128Register(0),
kScratchSimd128Reg);
__ vextractuh(kScratchSimd128Reg, kScratchSimd128Reg, Operand(6));
__ mfvsrd(i.OutputRegister(), kScratchSimd128Reg);
}
break;
}
case kPPC_I32x4DotI16x8S: {
......
......@@ -435,6 +435,13 @@ void Decoder::DecodeExt0(Instruction* instr) {
}
PPC_VX_OPCODE_D_FORM_LIST(DECODE_VX_D_FORM__INSTRUCTIONS)
#undef DECODE_VX_D_FORM__INSTRUCTIONS
#define DECODE_VX_F_FORM__INSTRUCTIONS(name, opcode_name, opcode_value) \
case opcode_name: { \
Format(instr, #name " 'rt, 'Vb"); \
return; \
}
PPC_VX_OPCODE_F_FORM_LIST(DECODE_VX_F_FORM__INSTRUCTIONS)
#undef DECODE_VX_F_FORM__INSTRUCTIONS
}
// Some encodings are 5-0 bits, handle those first
switch (EXT0 | (instr->BitField(5, 0))) {
......
......@@ -5044,6 +5044,32 @@ void Simulator::ExecuteGeneric(Instruction* instr) {
}
break;
}
#define EXTRACT_MASK(type) \
int rt = instr->RTValue(); \
int vrb = instr->RBValue(); \
uint64_t result = 0; \
FOR_EACH_LANE(i, type) { \
if (i > 0) result <<= 1; \
result |= std::signbit(get_simd_register_by_lane<type>(vrb, i)); \
} \
set_register(rt, result);
case VEXTRACTDM: {
EXTRACT_MASK(int64_t)
break;
}
case VEXTRACTWM: {
EXTRACT_MASK(int32_t)
break;
}
case VEXTRACTHM: {
EXTRACT_MASK(int16_t)
break;
}
case VEXTRACTBM: {
EXTRACT_MASK(int8_t)
break;
}
#undef EXTRACT_MASK
#undef FOR_EACH_LANE
#undef DECODE_VX_INSTRUCTION
#undef GET_ADDRESS
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment