Commit 2e076e2b authored by Milad Fa's avatar Milad Fa Committed by V8 LUCI CQ

PPC: optimize floating pint min/max

This Cl optimizes 64bit FP min/max using scalar VSX instructions.
FP values are always stored in DP format in a FP register which
means 32bit FP min/max ops will also benefit from this change.

Change-Id: I181e61b2d28ddf6920b548d33cb4d926da856be8
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3086023
Commit-Queue: Milad Fa <mfarazma@redhat.com>
Reviewed-by: 's avatarJunliang Yan <junyan@redhat.com>
Cr-Commit-Position: refs/heads/master@{#76212}
parent dfa3dfa3
......@@ -463,25 +463,37 @@ class Assembler : public AssemblerBase {
PPC_XX2_OPCODE_B_FORM_LIST(DECLARE_PPC_XX2_INSTRUCTIONS)
#undef DECLARE_PPC_XX2_INSTRUCTIONS
#define DECLARE_PPC_XX3_INSTRUCTIONS(name, instr_name, instr_value) \
inline void name(const Simd128Register rt, const Simd128Register ra, \
const Simd128Register rb) { \
xx3_form(instr_name, rt, ra, rb); \
}
inline void xx3_form(Instr instr, Simd128Register t, Simd128Register a,
Simd128Register b) {
// Using VR (high VSR) registers.
int AX = 1;
int BX = 1;
int TX = 1;
#define DECLARE_PPC_XX3_VECTOR_INSTRUCTIONS(name, instr_name, instr_value) \
inline void name(const Simd128Register rt, const Simd128Register ra, \
const Simd128Register rb) { \
xx3_form(instr_name, rt, ra, rb); \
}
#define DECLARE_PPC_XX3_SCALAR_INSTRUCTIONS(name, instr_name, instr_value) \
inline void name(const DoubleRegister rt, const DoubleRegister ra, \
const DoubleRegister rb) { \
xx3_form(instr_name, rt, ra, rb); \
}
template <typename T>
inline void xx3_form(Instr instr, T t, T a, T b) {
static_assert(std::is_same<T, Simd128Register>::value ||
std::is_same<T, DoubleRegister>::value,
"VSX only uses FP or Vector registers.");
// Using FP (low VSR) registers.
int AX = 0, BX = 0, TX = 0;
// Using VR (high VSR) registers when Simd registers are used.
if (std::is_same<T, Simd128Register>::value) {
AX = BX = TX = 1;
}
emit(instr | (t.code() & 0x1F) * B21 | (a.code() & 0x1F) * B16 |
(b.code() & 0x1F) * B11 | AX * B2 | BX * B1 | TX);
}
PPC_XX3_OPCODE_LIST(DECLARE_PPC_XX3_INSTRUCTIONS)
#undef DECLARE_PPC_XX3_INSTRUCTIONS
PPC_XX3_OPCODE_VECTOR_LIST(DECLARE_PPC_XX3_VECTOR_INSTRUCTIONS)
PPC_XX3_OPCODE_SCALAR_LIST(DECLARE_PPC_XX3_SCALAR_INSTRUCTIONS)
#undef DECLARE_PPC_XX3_VECTOR_INSTRUCTIONS
#undef DECLARE_PPC_XX3_SCALAR_INSTRUCTIONS
#define DECLARE_PPC_VX_INSTRUCTIONS_A_FORM(name, instr_name, instr_value) \
inline void name(const Simd128Register rt, const Simd128Register rb, \
......
This diff is collapsed.
......@@ -1809,6 +1809,10 @@ void TurboAssembler::MinF64(DoubleRegister dst, DoubleRegister lhs,
Label check_zero, return_left, return_right, return_nan, done;
fcmpu(lhs, rhs);
bunordered(&return_nan);
if (CpuFeatures::IsSupported(PPC_7_PLUS)) {
xsmindp(dst, lhs, rhs);
b(&done);
}
beq(&check_zero);
ble(&return_left);
b(&return_right);
......@@ -1853,6 +1857,10 @@ void TurboAssembler::MaxF64(DoubleRegister dst, DoubleRegister lhs,
Label check_zero, return_left, return_right, return_nan, done;
fcmpu(lhs, rhs);
bunordered(&return_nan);
if (CpuFeatures::IsSupported(PPC_7_PLUS)) {
xsmaxdp(dst, lhs, rhs);
b(&done);
}
beq(&check_zero);
bge(&return_left);
b(&return_right);
......
......@@ -1367,13 +1367,20 @@ void Decoder::DecodeExt6(Instruction* instr) {
}
}
switch (EXT6 | (instr->BitField(10, 3))) {
#define DECODE_XX3_INSTRUCTIONS(name, opcode_name, opcode_value) \
case opcode_name: { \
Format(instr, #name " 'Xt, 'Xa, 'Xb"); \
return; \
#define DECODE_XX3_VECTOR_INSTRUCTIONS(name, opcode_name, opcode_value) \
case opcode_name: { \
Format(instr, #name " 'Xt, 'Xa, 'Xb"); \
return; \
}
PPC_XX3_OPCODE_VECTOR_LIST(DECODE_XX3_VECTOR_INSTRUCTIONS)
#undef DECODE_XX3_VECTOR_INSTRUCTIONS
#define DECODE_XX3_SCALAR_INSTRUCTIONS(name, opcode_name, opcode_value) \
case opcode_name: { \
Format(instr, #name " 'Dt, 'Da, 'Db"); \
return; \
}
PPC_XX3_OPCODE_LIST(DECODE_XX3_INSTRUCTIONS)
#undef DECODE_XX3_INSTRUCTIONS
PPC_XX3_OPCODE_SCALAR_LIST(DECODE_XX3_SCALAR_INSTRUCTIONS)
#undef DECODE_XX3_SCALAR_INSTRUCTIONS
}
// Some encodings have integers hard coded in the middle, handle those first.
switch (EXT6 | (instr->BitField(20, 16)) | (instr->BitField(10, 2))) {
......
......@@ -4408,6 +4408,20 @@ void Simulator::ExecuteGeneric(Instruction* instr) {
type b_val = get_simd_register_by_lane<type>(b, i); \
set_simd_register_by_lane<type>(t, i, a_val op b_val ? a_val : b_val); \
}
case XSMINDP: {
DECODE_VX_INSTRUCTION(t, a, b, T)
double a_val = get_double_from_d_register(a);
double b_val = get_double_from_d_register(b);
set_d_register_from_double(t, VSXFPMin<double>(a_val, b_val));
break;
}
case XSMAXDP: {
DECODE_VX_INSTRUCTION(t, a, b, T)
double a_val = get_double_from_d_register(a);
double b_val = get_double_from_d_register(b);
set_d_register_from_double(t, VSXFPMax<double>(a_val, b_val));
break;
}
case XVMINDP: {
DECODE_VX_INSTRUCTION(t, a, b, T)
FOR_EACH_LANE(i, double) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment