Commit 2e076e2b authored by Milad Fa's avatar Milad Fa Committed by V8 LUCI CQ

PPC: optimize floating pint min/max

This Cl optimizes 64bit FP min/max using scalar VSX instructions.
FP values are always stored in DP format in a FP register which
means 32bit FP min/max ops will also benefit from this change.

Change-Id: I181e61b2d28ddf6920b548d33cb4d926da856be8
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3086023
Commit-Queue: Milad Fa <mfarazma@redhat.com>
Reviewed-by: 's avatarJunliang Yan <junyan@redhat.com>
Cr-Commit-Position: refs/heads/master@{#76212}
parent dfa3dfa3
...@@ -463,25 +463,37 @@ class Assembler : public AssemblerBase { ...@@ -463,25 +463,37 @@ class Assembler : public AssemblerBase {
PPC_XX2_OPCODE_B_FORM_LIST(DECLARE_PPC_XX2_INSTRUCTIONS) PPC_XX2_OPCODE_B_FORM_LIST(DECLARE_PPC_XX2_INSTRUCTIONS)
#undef DECLARE_PPC_XX2_INSTRUCTIONS #undef DECLARE_PPC_XX2_INSTRUCTIONS
#define DECLARE_PPC_XX3_INSTRUCTIONS(name, instr_name, instr_value) \ #define DECLARE_PPC_XX3_VECTOR_INSTRUCTIONS(name, instr_name, instr_value) \
inline void name(const Simd128Register rt, const Simd128Register ra, \ inline void name(const Simd128Register rt, const Simd128Register ra, \
const Simd128Register rb) { \ const Simd128Register rb) { \
xx3_form(instr_name, rt, ra, rb); \ xx3_form(instr_name, rt, ra, rb); \
} }
#define DECLARE_PPC_XX3_SCALAR_INSTRUCTIONS(name, instr_name, instr_value) \
inline void xx3_form(Instr instr, Simd128Register t, Simd128Register a, inline void name(const DoubleRegister rt, const DoubleRegister ra, \
Simd128Register b) { const DoubleRegister rb) { \
// Using VR (high VSR) registers. xx3_form(instr_name, rt, ra, rb); \
int AX = 1; }
int BX = 1;
int TX = 1; template <typename T>
inline void xx3_form(Instr instr, T t, T a, T b) {
static_assert(std::is_same<T, Simd128Register>::value ||
std::is_same<T, DoubleRegister>::value,
"VSX only uses FP or Vector registers.");
// Using FP (low VSR) registers.
int AX = 0, BX = 0, TX = 0;
// Using VR (high VSR) registers when Simd registers are used.
if (std::is_same<T, Simd128Register>::value) {
AX = BX = TX = 1;
}
emit(instr | (t.code() & 0x1F) * B21 | (a.code() & 0x1F) * B16 | emit(instr | (t.code() & 0x1F) * B21 | (a.code() & 0x1F) * B16 |
(b.code() & 0x1F) * B11 | AX * B2 | BX * B1 | TX); (b.code() & 0x1F) * B11 | AX * B2 | BX * B1 | TX);
} }
PPC_XX3_OPCODE_LIST(DECLARE_PPC_XX3_INSTRUCTIONS) PPC_XX3_OPCODE_VECTOR_LIST(DECLARE_PPC_XX3_VECTOR_INSTRUCTIONS)
#undef DECLARE_PPC_XX3_INSTRUCTIONS PPC_XX3_OPCODE_SCALAR_LIST(DECLARE_PPC_XX3_SCALAR_INSTRUCTIONS)
#undef DECLARE_PPC_XX3_VECTOR_INSTRUCTIONS
#undef DECLARE_PPC_XX3_SCALAR_INSTRUCTIONS
#define DECLARE_PPC_VX_INSTRUCTIONS_A_FORM(name, instr_name, instr_value) \ #define DECLARE_PPC_VX_INSTRUCTIONS_A_FORM(name, instr_name, instr_value) \
inline void name(const Simd128Register rt, const Simd128Register rb, \ inline void name(const Simd128Register rt, const Simd128Register rb, \
......
This diff is collapsed.
...@@ -1809,6 +1809,10 @@ void TurboAssembler::MinF64(DoubleRegister dst, DoubleRegister lhs, ...@@ -1809,6 +1809,10 @@ void TurboAssembler::MinF64(DoubleRegister dst, DoubleRegister lhs,
Label check_zero, return_left, return_right, return_nan, done; Label check_zero, return_left, return_right, return_nan, done;
fcmpu(lhs, rhs); fcmpu(lhs, rhs);
bunordered(&return_nan); bunordered(&return_nan);
if (CpuFeatures::IsSupported(PPC_7_PLUS)) {
xsmindp(dst, lhs, rhs);
b(&done);
}
beq(&check_zero); beq(&check_zero);
ble(&return_left); ble(&return_left);
b(&return_right); b(&return_right);
...@@ -1853,6 +1857,10 @@ void TurboAssembler::MaxF64(DoubleRegister dst, DoubleRegister lhs, ...@@ -1853,6 +1857,10 @@ void TurboAssembler::MaxF64(DoubleRegister dst, DoubleRegister lhs,
Label check_zero, return_left, return_right, return_nan, done; Label check_zero, return_left, return_right, return_nan, done;
fcmpu(lhs, rhs); fcmpu(lhs, rhs);
bunordered(&return_nan); bunordered(&return_nan);
if (CpuFeatures::IsSupported(PPC_7_PLUS)) {
xsmaxdp(dst, lhs, rhs);
b(&done);
}
beq(&check_zero); beq(&check_zero);
bge(&return_left); bge(&return_left);
b(&return_right); b(&return_right);
......
...@@ -1367,13 +1367,20 @@ void Decoder::DecodeExt6(Instruction* instr) { ...@@ -1367,13 +1367,20 @@ void Decoder::DecodeExt6(Instruction* instr) {
} }
} }
switch (EXT6 | (instr->BitField(10, 3))) { switch (EXT6 | (instr->BitField(10, 3))) {
#define DECODE_XX3_INSTRUCTIONS(name, opcode_name, opcode_value) \ #define DECODE_XX3_VECTOR_INSTRUCTIONS(name, opcode_name, opcode_value) \
case opcode_name: { \ case opcode_name: { \
Format(instr, #name " 'Xt, 'Xa, 'Xb"); \ Format(instr, #name " 'Xt, 'Xa, 'Xb"); \
return; \ return; \
}
PPC_XX3_OPCODE_VECTOR_LIST(DECODE_XX3_VECTOR_INSTRUCTIONS)
#undef DECODE_XX3_VECTOR_INSTRUCTIONS
#define DECODE_XX3_SCALAR_INSTRUCTIONS(name, opcode_name, opcode_value) \
case opcode_name: { \
Format(instr, #name " 'Dt, 'Da, 'Db"); \
return; \
} }
PPC_XX3_OPCODE_LIST(DECODE_XX3_INSTRUCTIONS) PPC_XX3_OPCODE_SCALAR_LIST(DECODE_XX3_SCALAR_INSTRUCTIONS)
#undef DECODE_XX3_INSTRUCTIONS #undef DECODE_XX3_SCALAR_INSTRUCTIONS
} }
// Some encodings have integers hard coded in the middle, handle those first. // Some encodings have integers hard coded in the middle, handle those first.
switch (EXT6 | (instr->BitField(20, 16)) | (instr->BitField(10, 2))) { switch (EXT6 | (instr->BitField(20, 16)) | (instr->BitField(10, 2))) {
......
...@@ -4408,6 +4408,20 @@ void Simulator::ExecuteGeneric(Instruction* instr) { ...@@ -4408,6 +4408,20 @@ void Simulator::ExecuteGeneric(Instruction* instr) {
type b_val = get_simd_register_by_lane<type>(b, i); \ type b_val = get_simd_register_by_lane<type>(b, i); \
set_simd_register_by_lane<type>(t, i, a_val op b_val ? a_val : b_val); \ set_simd_register_by_lane<type>(t, i, a_val op b_val ? a_val : b_val); \
} }
case XSMINDP: {
DECODE_VX_INSTRUCTION(t, a, b, T)
double a_val = get_double_from_d_register(a);
double b_val = get_double_from_d_register(b);
set_d_register_from_double(t, VSXFPMin<double>(a_val, b_val));
break;
}
case XSMAXDP: {
DECODE_VX_INSTRUCTION(t, a, b, T)
double a_val = get_double_from_d_register(a);
double b_val = get_double_from_d_register(b);
set_d_register_from_double(t, VSXFPMax<double>(a_val, b_val));
break;
}
case XVMINDP: { case XVMINDP: {
DECODE_VX_INSTRUCTION(t, a, b, T) DECODE_VX_INSTRUCTION(t, a, b, T)
FOR_EACH_LANE(i, double) { FOR_EACH_LANE(i, double) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment