Commit 2e076e2b authored by Milad Fa's avatar Milad Fa Committed by V8 LUCI CQ

PPC: optimize floating pint min/max

This Cl optimizes 64bit FP min/max using scalar VSX instructions.
FP values are always stored in DP format in a FP register which
means 32bit FP min/max ops will also benefit from this change.

Change-Id: I181e61b2d28ddf6920b548d33cb4d926da856be8
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3086023
Commit-Queue: Milad Fa <mfarazma@redhat.com>
Reviewed-by: 's avatarJunliang Yan <junyan@redhat.com>
Cr-Commit-Position: refs/heads/master@{#76212}
parent dfa3dfa3
......@@ -463,25 +463,37 @@ class Assembler : public AssemblerBase {
PPC_XX2_OPCODE_B_FORM_LIST(DECLARE_PPC_XX2_INSTRUCTIONS)
#undef DECLARE_PPC_XX2_INSTRUCTIONS
#define DECLARE_PPC_XX3_INSTRUCTIONS(name, instr_name, instr_value) \
inline void name(const Simd128Register rt, const Simd128Register ra, \
const Simd128Register rb) { \
xx3_form(instr_name, rt, ra, rb); \
}
inline void xx3_form(Instr instr, Simd128Register t, Simd128Register a,
Simd128Register b) {
// Using VR (high VSR) registers.
int AX = 1;
int BX = 1;
int TX = 1;
#define DECLARE_PPC_XX3_VECTOR_INSTRUCTIONS(name, instr_name, instr_value) \
inline void name(const Simd128Register rt, const Simd128Register ra, \
const Simd128Register rb) { \
xx3_form(instr_name, rt, ra, rb); \
}
#define DECLARE_PPC_XX3_SCALAR_INSTRUCTIONS(name, instr_name, instr_value) \
inline void name(const DoubleRegister rt, const DoubleRegister ra, \
const DoubleRegister rb) { \
xx3_form(instr_name, rt, ra, rb); \
}
template <typename T>
inline void xx3_form(Instr instr, T t, T a, T b) {
static_assert(std::is_same<T, Simd128Register>::value ||
std::is_same<T, DoubleRegister>::value,
"VSX only uses FP or Vector registers.");
// Using FP (low VSR) registers.
int AX = 0, BX = 0, TX = 0;
// Using VR (high VSR) registers when Simd registers are used.
if (std::is_same<T, Simd128Register>::value) {
AX = BX = TX = 1;
}
emit(instr | (t.code() & 0x1F) * B21 | (a.code() & 0x1F) * B16 |
(b.code() & 0x1F) * B11 | AX * B2 | BX * B1 | TX);
}
PPC_XX3_OPCODE_LIST(DECLARE_PPC_XX3_INSTRUCTIONS)
#undef DECLARE_PPC_XX3_INSTRUCTIONS
PPC_XX3_OPCODE_VECTOR_LIST(DECLARE_PPC_XX3_VECTOR_INSTRUCTIONS)
PPC_XX3_OPCODE_SCALAR_LIST(DECLARE_PPC_XX3_SCALAR_INSTRUCTIONS)
#undef DECLARE_PPC_XX3_VECTOR_INSTRUCTIONS
#undef DECLARE_PPC_XX3_SCALAR_INSTRUCTIONS
#define DECLARE_PPC_VX_INSTRUCTIONS_A_FORM(name, instr_name, instr_value) \
inline void name(const Simd128Register rt, const Simd128Register rb, \
......
......@@ -140,69 +140,71 @@ inline Condition NegateCondition(Condition cond) {
// access the various ISA fields.
using Instr = uint32_t;
#define PPC_XX3_OPCODE_LIST(V) \
/* VSX Scalar Add Double-Precision */ \
V(xsadddp, XSADDDP, 0xF0000100) \
/* VSX Scalar Add Single-Precision */ \
V(xsaddsp, XSADDSP, 0xF0000000) \
/* VSX Scalar Compare Ordered Double-Precision */ \
V(xscmpodp, XSCMPODP, 0xF0000158) \
/* VSX Scalar Compare Unordered Double-Precision */ \
V(xscmpudp, XSCMPUDP, 0xF0000118) \
/* VSX Scalar Copy Sign Double-Precision */ \
V(xscpsgndp, XSCPSGNDP, 0xF0000580) \
/* VSX Scalar Divide Double-Precision */ \
V(xsdivdp, XSDIVDP, 0xF00001C0) \
/* VSX Scalar Divide Single-Precision */ \
V(xsdivsp, XSDIVSP, 0xF00000C0) \
/* VSX Scalar Multiply-Add Type-A Double-Precision */ \
V(xsmaddadp, XSMADDADP, 0xF0000108) \
/* VSX Scalar Multiply-Add Type-A Single-Precision */ \
V(xsmaddasp, XSMADDASP, 0xF0000008) \
/* VSX Scalar Multiply-Add Type-M Double-Precision */ \
V(xsmaddmdp, XSMADDMDP, 0xF0000148) \
/* VSX Scalar Multiply-Add Type-M Single-Precision */ \
V(xsmaddmsp, XSMADDMSP, 0xF0000048) \
/* VSX Scalar Maximum Double-Precision */ \
V(xsmaxdp, XSMAXDP, 0xF0000500) \
/* VSX Scalar Minimum Double-Precision */ \
V(xsmindp, XSMINDP, 0xF0000540) \
/* VSX Scalar Multiply-Subtract Type-A Double-Precision */ \
V(xsmsubadp, XSMSUBADP, 0xF0000188) \
/* VSX Scalar Multiply-Subtract Type-A Single-Precision */ \
V(xsmsubasp, XSMSUBASP, 0xF0000088) \
/* VSX Scalar Multiply-Subtract Type-M Double-Precision */ \
V(xsmsubmdp, XSMSUBMDP, 0xF00001C8) \
/* VSX Scalar Multiply-Subtract Type-M Single-Precision */ \
V(xsmsubmsp, XSMSUBMSP, 0xF00000C8) \
/* VSX Scalar Multiply Double-Precision */ \
V(xsmuldp, XSMULDP, 0xF0000180) \
/* VSX Scalar Multiply Single-Precision */ \
V(xsmulsp, XSMULSP, 0xF0000080) \
/* VSX Scalar Negative Multiply-Add Type-A Double-Precision */ \
V(xsnmaddadp, XSNMADDADP, 0xF0000508) \
/* VSX Scalar Negative Multiply-Add Type-A Single-Precision */ \
V(xsnmaddasp, XSNMADDASP, 0xF0000408) \
/* VSX Scalar Negative Multiply-Add Type-M Double-Precision */ \
V(xsnmaddmdp, XSNMADDMDP, 0xF0000548) \
/* VSX Scalar Negative Multiply-Add Type-M Single-Precision */ \
V(xsnmaddmsp, XSNMADDMSP, 0xF0000448) \
/* VSX Scalar Negative Multiply-Subtract Type-A Double-Precision */ \
V(xsnmsubadp, XSNMSUBADP, 0xF0000588) \
/* VSX Scalar Negative Multiply-Subtract Type-A Single-Precision */ \
V(xsnmsubasp, XSNMSUBASP, 0xF0000488) \
/* VSX Scalar Negative Multiply-Subtract Type-M Double-Precision */ \
V(xsnmsubmdp, XSNMSUBMDP, 0xF00005C8) \
/* VSX Scalar Negative Multiply-Subtract Type-M Single-Precision */ \
V(xsnmsubmsp, XSNMSUBMSP, 0xF00004C8) \
/* VSX Scalar Reciprocal Estimate Double-Precision */ \
V(xsredp, XSREDP, 0xF0000168) \
/* VSX Scalar Subtract Double-Precision */ \
V(xssubdp, XSSUBDP, 0xF0000140) \
/* VSX Scalar Subtract Single-Precision */ \
V(xssubsp, XSSUBSP, 0xF0000040) \
/* VSX Scalar Test for software Divide Double-Precision */ \
V(xstdivdp, XSTDIVDP, 0xF00001E8) \
#define PPC_XX3_OPCODE_SCALAR_LIST(V) \
/* VSX Scalar Add Double-Precision */ \
V(xsadddp, XSADDDP, 0xF0000100) \
/* VSX Scalar Add Single-Precision */ \
V(xsaddsp, XSADDSP, 0xF0000000) \
/* VSX Scalar Compare Ordered Double-Precision */ \
V(xscmpodp, XSCMPODP, 0xF0000158) \
/* VSX Scalar Compare Unordered Double-Precision */ \
V(xscmpudp, XSCMPUDP, 0xF0000118) \
/* VSX Scalar Copy Sign Double-Precision */ \
V(xscpsgndp, XSCPSGNDP, 0xF0000580) \
/* VSX Scalar Divide Double-Precision */ \
V(xsdivdp, XSDIVDP, 0xF00001C0) \
/* VSX Scalar Divide Single-Precision */ \
V(xsdivsp, XSDIVSP, 0xF00000C0) \
/* VSX Scalar Multiply-Add Type-A Double-Precision */ \
V(xsmaddadp, XSMADDADP, 0xF0000108) \
/* VSX Scalar Multiply-Add Type-A Single-Precision */ \
V(xsmaddasp, XSMADDASP, 0xF0000008) \
/* VSX Scalar Multiply-Add Type-M Double-Precision */ \
V(xsmaddmdp, XSMADDMDP, 0xF0000148) \
/* VSX Scalar Multiply-Add Type-M Single-Precision */ \
V(xsmaddmsp, XSMADDMSP, 0xF0000048) \
/* VSX Scalar Maximum Double-Precision */ \
V(xsmaxdp, XSMAXDP, 0xF0000500) \
/* VSX Scalar Minimum Double-Precision */ \
V(xsmindp, XSMINDP, 0xF0000540) \
/* VSX Scalar Multiply-Subtract Type-A Double-Precision */ \
V(xsmsubadp, XSMSUBADP, 0xF0000188) \
/* VSX Scalar Multiply-Subtract Type-A Single-Precision */ \
V(xsmsubasp, XSMSUBASP, 0xF0000088) \
/* VSX Scalar Multiply-Subtract Type-M Double-Precision */ \
V(xsmsubmdp, XSMSUBMDP, 0xF00001C8) \
/* VSX Scalar Multiply-Subtract Type-M Single-Precision */ \
V(xsmsubmsp, XSMSUBMSP, 0xF00000C8) \
/* VSX Scalar Multiply Double-Precision */ \
V(xsmuldp, XSMULDP, 0xF0000180) \
/* VSX Scalar Multiply Single-Precision */ \
V(xsmulsp, XSMULSP, 0xF0000080) \
/* VSX Scalar Negative Multiply-Add Type-A Double-Precision */ \
V(xsnmaddadp, XSNMADDADP, 0xF0000508) \
/* VSX Scalar Negative Multiply-Add Type-A Single-Precision */ \
V(xsnmaddasp, XSNMADDASP, 0xF0000408) \
/* VSX Scalar Negative Multiply-Add Type-M Double-Precision */ \
V(xsnmaddmdp, XSNMADDMDP, 0xF0000548) \
/* VSX Scalar Negative Multiply-Add Type-M Single-Precision */ \
V(xsnmaddmsp, XSNMADDMSP, 0xF0000448) \
/* VSX Scalar Negative Multiply-Subtract Type-A Double-Precision */ \
V(xsnmsubadp, XSNMSUBADP, 0xF0000588) \
/* VSX Scalar Negative Multiply-Subtract Type-A Single-Precision */ \
V(xsnmsubasp, XSNMSUBASP, 0xF0000488) \
/* VSX Scalar Negative Multiply-Subtract Type-M Double-Precision */ \
V(xsnmsubmdp, XSNMSUBMDP, 0xF00005C8) \
/* VSX Scalar Negative Multiply-Subtract Type-M Single-Precision */ \
V(xsnmsubmsp, XSNMSUBMSP, 0xF00004C8) \
/* VSX Scalar Reciprocal Estimate Double-Precision */ \
V(xsredp, XSREDP, 0xF0000168) \
/* VSX Scalar Subtract Double-Precision */ \
V(xssubdp, XSSUBDP, 0xF0000140) \
/* VSX Scalar Subtract Single-Precision */ \
V(xssubsp, XSSUBSP, 0xF0000040) \
/* VSX Scalar Test for software Divide Double-Precision */ \
V(xstdivdp, XSTDIVDP, 0xF00001E8)
#define PPC_XX3_OPCODE_VECTOR_LIST(V) \
/* VSX Vector Add Double-Precision */ \
V(xvadddp, XVADDDP, 0xF0000300) \
/* VSX Vector Add Single-Precision */ \
......@@ -2679,7 +2681,8 @@ immediate-specified index */ \
PPC_VC_OPCODE_LIST(V) \
PPC_XX1_OPCODE_LIST(V) \
PPC_XX2_OPCODE_LIST(V) \
PPC_XX3_OPCODE_LIST(V) \
PPC_XX3_OPCODE_VECTOR_LIST(V) \
PPC_XX3_OPCODE_SCALAR_LIST(V) \
PPC_XX4_OPCODE_LIST(V)
enum Opcode : uint32_t {
......@@ -3030,7 +3033,8 @@ class Instruction {
opcode = extcode | BitField(10, 3);
switch (opcode) {
PPC_EVS_OPCODE_LIST(OPCODE_CASES)
PPC_XX3_OPCODE_LIST(OPCODE_CASES)
PPC_XX3_OPCODE_VECTOR_LIST(OPCODE_CASES)
PPC_XX3_OPCODE_SCALAR_LIST(OPCODE_CASES)
return static_cast<Opcode>(opcode);
}
opcode = extcode | BitField(8, 1);
......
......@@ -1809,6 +1809,10 @@ void TurboAssembler::MinF64(DoubleRegister dst, DoubleRegister lhs,
Label check_zero, return_left, return_right, return_nan, done;
fcmpu(lhs, rhs);
bunordered(&return_nan);
if (CpuFeatures::IsSupported(PPC_7_PLUS)) {
xsmindp(dst, lhs, rhs);
b(&done);
}
beq(&check_zero);
ble(&return_left);
b(&return_right);
......@@ -1853,6 +1857,10 @@ void TurboAssembler::MaxF64(DoubleRegister dst, DoubleRegister lhs,
Label check_zero, return_left, return_right, return_nan, done;
fcmpu(lhs, rhs);
bunordered(&return_nan);
if (CpuFeatures::IsSupported(PPC_7_PLUS)) {
xsmaxdp(dst, lhs, rhs);
b(&done);
}
beq(&check_zero);
bge(&return_left);
b(&return_right);
......
......@@ -1367,13 +1367,20 @@ void Decoder::DecodeExt6(Instruction* instr) {
}
}
switch (EXT6 | (instr->BitField(10, 3))) {
#define DECODE_XX3_INSTRUCTIONS(name, opcode_name, opcode_value) \
case opcode_name: { \
Format(instr, #name " 'Xt, 'Xa, 'Xb"); \
return; \
#define DECODE_XX3_VECTOR_INSTRUCTIONS(name, opcode_name, opcode_value) \
case opcode_name: { \
Format(instr, #name " 'Xt, 'Xa, 'Xb"); \
return; \
}
PPC_XX3_OPCODE_VECTOR_LIST(DECODE_XX3_VECTOR_INSTRUCTIONS)
#undef DECODE_XX3_VECTOR_INSTRUCTIONS
#define DECODE_XX3_SCALAR_INSTRUCTIONS(name, opcode_name, opcode_value) \
case opcode_name: { \
Format(instr, #name " 'Dt, 'Da, 'Db"); \
return; \
}
PPC_XX3_OPCODE_LIST(DECODE_XX3_INSTRUCTIONS)
#undef DECODE_XX3_INSTRUCTIONS
PPC_XX3_OPCODE_SCALAR_LIST(DECODE_XX3_SCALAR_INSTRUCTIONS)
#undef DECODE_XX3_SCALAR_INSTRUCTIONS
}
// Some encodings have integers hard coded in the middle, handle those first.
switch (EXT6 | (instr->BitField(20, 16)) | (instr->BitField(10, 2))) {
......
......@@ -4408,6 +4408,20 @@ void Simulator::ExecuteGeneric(Instruction* instr) {
type b_val = get_simd_register_by_lane<type>(b, i); \
set_simd_register_by_lane<type>(t, i, a_val op b_val ? a_val : b_val); \
}
case XSMINDP: {
DECODE_VX_INSTRUCTION(t, a, b, T)
double a_val = get_double_from_d_register(a);
double b_val = get_double_from_d_register(b);
set_d_register_from_double(t, VSXFPMin<double>(a_val, b_val));
break;
}
case XSMAXDP: {
DECODE_VX_INSTRUCTION(t, a, b, T)
double a_val = get_double_from_d_register(a);
double b_val = get_double_from_d_register(b);
set_d_register_from_double(t, VSXFPMax<double>(a_val, b_val));
break;
}
case XVMINDP: {
DECODE_VX_INSTRUCTION(t, a, b, T)
FOR_EACH_LANE(i, double) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment