Commit 68841739 authored by Anton Khirnov's avatar Anton Khirnov

hevcdsp: split the pred functions by width

This should allow for more efficient SIMD.
parent 818bfe7f
This diff is collapsed.
...@@ -128,6 +128,18 @@ void ff_hevc_dsp_init(HEVCDSPContext *hevcdsp, int bit_depth) ...@@ -128,6 +128,18 @@ void ff_hevc_dsp_init(HEVCDSPContext *hevcdsp, int bit_depth)
hevcdsp->put_hevc_epel[1][0][i] = FUNC(put_hevc_epel_v_ ## width, depth); \ hevcdsp->put_hevc_epel[1][0][i] = FUNC(put_hevc_epel_v_ ## width, depth); \
hevcdsp->put_hevc_epel[1][1][i] = FUNC(put_hevc_epel_hv_ ## width, depth); \ hevcdsp->put_hevc_epel[1][1][i] = FUNC(put_hevc_epel_hv_ ## width, depth); \
#define PRED_FUNC(i, width, depth) \
hevcdsp->put_unweighted_pred[i] = FUNC(put_unweighted_pred_ ## width, depth); \
hevcdsp->put_unweighted_pred_avg[i] = FUNC(put_unweighted_pred_avg_ ## width, depth); \
hevcdsp->weighted_pred[i] = FUNC(put_weighted_pred_ ## width, depth); \
hevcdsp->weighted_pred_avg[i] = FUNC(put_weighted_pred_avg_ ## width, depth); \
#define PRED_FUNC_CHROMA(i, width, depth) \
hevcdsp->put_unweighted_pred_chroma[i] = FUNC(put_unweighted_pred_ ## width, depth); \
hevcdsp->put_unweighted_pred_avg_chroma[i] = FUNC(put_unweighted_pred_avg_ ## width, depth); \
hevcdsp->weighted_pred_chroma[i] = FUNC(put_weighted_pred_ ## width, depth); \
hevcdsp->weighted_pred_avg_chroma[i] = FUNC(put_weighted_pred_avg_ ## width, depth); \
#define HEVC_DSP(depth) \ #define HEVC_DSP(depth) \
hevcdsp->put_pcm = FUNC(put_pcm, depth); \ hevcdsp->put_pcm = FUNC(put_pcm, depth); \
hevcdsp->transquant_bypass[0] = FUNC(transquant_bypass4x4, depth); \ hevcdsp->transquant_bypass[0] = FUNC(transquant_bypass4x4, depth); \
...@@ -169,11 +181,22 @@ void ff_hevc_dsp_init(HEVCDSPContext *hevcdsp, int bit_depth) ...@@ -169,11 +181,22 @@ void ff_hevc_dsp_init(HEVCDSPContext *hevcdsp, int bit_depth)
EPEL_FUNC(6, 24, depth); \ EPEL_FUNC(6, 24, depth); \
EPEL_FUNC(7, 32, depth); \ EPEL_FUNC(7, 32, depth); \
\ \
hevcdsp->put_unweighted_pred = FUNC(put_unweighted_pred, depth); \ PRED_FUNC(0, 4, depth); \
hevcdsp->put_unweighted_pred_avg = FUNC(put_unweighted_pred_avg, depth); \ PRED_FUNC(1, 8, depth); \
\ PRED_FUNC(2, 12, depth); \
hevcdsp->weighted_pred = FUNC(weighted_pred, depth); \ PRED_FUNC(3, 16, depth); \
hevcdsp->weighted_pred_avg = FUNC(weighted_pred_avg, depth); \ PRED_FUNC(4, 24, depth); \
PRED_FUNC(5, 32, depth); \
PRED_FUNC(6, 48, depth); \
PRED_FUNC(7, 64, depth); \
PRED_FUNC_CHROMA(0, 2, depth); \
PRED_FUNC_CHROMA(1, 4, depth); \
PRED_FUNC_CHROMA(2, 6, depth); \
PRED_FUNC_CHROMA(3, 8, depth); \
PRED_FUNC_CHROMA(4, 12, depth); \
PRED_FUNC_CHROMA(5, 16, depth); \
PRED_FUNC_CHROMA(6, 24, depth); \
PRED_FUNC_CHROMA(7, 32, depth); \
\ \
hevcdsp->hevc_h_loop_filter_luma = FUNC(hevc_h_loop_filter_luma, depth); \ hevcdsp->hevc_h_loop_filter_luma = FUNC(hevc_h_loop_filter_luma, depth); \
hevcdsp->hevc_v_loop_filter_luma = FUNC(hevc_v_loop_filter_luma, depth); \ hevcdsp->hevc_v_loop_filter_luma = FUNC(hevc_v_loop_filter_luma, depth); \
......
...@@ -65,18 +65,30 @@ typedef struct HEVCDSPContext { ...@@ -65,18 +65,30 @@ typedef struct HEVCDSPContext {
ptrdiff_t srcstride, int height, ptrdiff_t srcstride, int height,
int mx, int my, int16_t *mcbuffer); int mx, int my, int16_t *mcbuffer);
void (*put_unweighted_pred)(uint8_t *dst, ptrdiff_t dststride, int16_t *src, void (*put_unweighted_pred[8])(uint8_t *dst, ptrdiff_t dststride, int16_t *src,
ptrdiff_t srcstride, int width, int height); ptrdiff_t srcstride, int height);
void (*put_unweighted_pred_avg)(uint8_t *dst, ptrdiff_t dststride, void (*put_unweighted_pred_chroma[8])(uint8_t *dst, ptrdiff_t dststride, int16_t *src,
int16_t *src1, int16_t *src2, ptrdiff_t srcstride, int height);
ptrdiff_t srcstride, int width, int height); void (*put_unweighted_pred_avg[8])(uint8_t *dst, ptrdiff_t dststride,
void (*weighted_pred)(uint8_t denom, int16_t wlxFlag, int16_t olxFlag, int16_t *src1, int16_t *src2,
uint8_t *dst, ptrdiff_t dststride, int16_t *src, ptrdiff_t srcstride, int height);
ptrdiff_t srcstride, int width, int height); void (*put_unweighted_pred_avg_chroma[8])(uint8_t *dst, ptrdiff_t dststride,
void (*weighted_pred_avg)(uint8_t denom, int16_t wl0Flag, int16_t wl1Flag, int16_t *src1, int16_t *src2,
int16_t ol0Flag, int16_t ol1Flag, uint8_t *dst, ptrdiff_t srcstride, int height);
ptrdiff_t dststride, int16_t *src1, int16_t *src2, void (*weighted_pred[8])(uint8_t denom, int16_t wlxFlag, int16_t olxFlag,
ptrdiff_t srcstride, int width, int height); uint8_t *dst, ptrdiff_t dststride, int16_t *src,
ptrdiff_t srcstride, int height);
void (*weighted_pred_chroma[8])(uint8_t denom, int16_t wlxFlag, int16_t olxFlag,
uint8_t *dst, ptrdiff_t dststride, int16_t *src,
ptrdiff_t srcstride, int height);
void (*weighted_pred_avg[8])(uint8_t denom, int16_t wl0Flag, int16_t wl1Flag,
int16_t ol0Flag, int16_t ol1Flag, uint8_t *dst,
ptrdiff_t dststride, int16_t *src1, int16_t *src2,
ptrdiff_t srcstride, int height);
void (*weighted_pred_avg_chroma[8])(uint8_t denom, int16_t wl0Flag, int16_t wl1Flag,
int16_t ol0Flag, int16_t ol1Flag, uint8_t *dst,
ptrdiff_t dststride, int16_t *src1, int16_t *src2,
ptrdiff_t srcstride, int height);
void (*hevc_h_loop_filter_luma)(uint8_t *pix, ptrdiff_t stride, void (*hevc_h_loop_filter_luma)(uint8_t *pix, ptrdiff_t stride,
int beta, int *tc, int beta, int *tc,
......
...@@ -1130,9 +1130,10 @@ EPEL(6) ...@@ -1130,9 +1130,10 @@ EPEL(6)
EPEL(4) EPEL(4)
EPEL(2) EPEL(2)
static void FUNC(put_unweighted_pred)(uint8_t *_dst, ptrdiff_t _dststride, static av_always_inline void
int16_t *src, ptrdiff_t srcstride, FUNC(put_unweighted_pred)(uint8_t *_dst, ptrdiff_t _dststride,
int width, int height) int16_t *src, ptrdiff_t srcstride,
int width, int height)
{ {
int x, y; int x, y;
pixel *dst = (pixel *)_dst; pixel *dst = (pixel *)_dst;
...@@ -1152,10 +1153,11 @@ static void FUNC(put_unweighted_pred)(uint8_t *_dst, ptrdiff_t _dststride, ...@@ -1152,10 +1153,11 @@ static void FUNC(put_unweighted_pred)(uint8_t *_dst, ptrdiff_t _dststride,
} }
} }
static void FUNC(put_unweighted_pred_avg)(uint8_t *_dst, ptrdiff_t _dststride, static av_always_inline void
int16_t *src1, int16_t *src2, FUNC(put_unweighted_pred_avg)(uint8_t *_dst, ptrdiff_t _dststride,
ptrdiff_t srcstride, int16_t *src1, int16_t *src2,
int width, int height) ptrdiff_t srcstride,
int width, int height)
{ {
int x, y; int x, y;
pixel *dst = (pixel *)_dst; pixel *dst = (pixel *)_dst;
...@@ -1177,10 +1179,11 @@ static void FUNC(put_unweighted_pred_avg)(uint8_t *_dst, ptrdiff_t _dststride, ...@@ -1177,10 +1179,11 @@ static void FUNC(put_unweighted_pred_avg)(uint8_t *_dst, ptrdiff_t _dststride,
} }
} }
static void FUNC(weighted_pred)(uint8_t denom, int16_t wlxFlag, int16_t olxFlag, static av_always_inline void
uint8_t *_dst, ptrdiff_t _dststride, FUNC(weighted_pred)(uint8_t denom, int16_t wlxFlag, int16_t olxFlag,
int16_t *src, ptrdiff_t srcstride, uint8_t *_dst, ptrdiff_t _dststride,
int width, int height) int16_t *src, ptrdiff_t srcstride,
int width, int height)
{ {
int shift, log2Wd, wx, ox, x, y, offset; int shift, log2Wd, wx, ox, x, y, offset;
pixel *dst = (pixel *)_dst; pixel *dst = (pixel *)_dst;
...@@ -1205,13 +1208,14 @@ static void FUNC(weighted_pred)(uint8_t denom, int16_t wlxFlag, int16_t olxFlag, ...@@ -1205,13 +1208,14 @@ static void FUNC(weighted_pred)(uint8_t denom, int16_t wlxFlag, int16_t olxFlag,
} }
} }
static void FUNC(weighted_pred_avg)(uint8_t denom, static av_always_inline void
int16_t wl0Flag, int16_t wl1Flag, FUNC(weighted_pred_avg)(uint8_t denom,
int16_t ol0Flag, int16_t ol1Flag, int16_t wl0Flag, int16_t wl1Flag,
uint8_t *_dst, ptrdiff_t _dststride, int16_t ol0Flag, int16_t ol1Flag,
int16_t *src1, int16_t *src2, uint8_t *_dst, ptrdiff_t _dststride,
ptrdiff_t srcstride, int16_t *src1, int16_t *src2,
int width, int height) ptrdiff_t srcstride,
int width, int height)
{ {
int shift, log2Wd, w0, w1, o0, o1, x, y; int shift, log2Wd, w0, w1, o0, o1, x, y;
pixel *dst = (pixel *)_dst; pixel *dst = (pixel *)_dst;
...@@ -1234,6 +1238,47 @@ static void FUNC(weighted_pred_avg)(uint8_t denom, ...@@ -1234,6 +1238,47 @@ static void FUNC(weighted_pred_avg)(uint8_t denom,
} }
} }
#define PUT_PRED(w) \
static void FUNC(put_unweighted_pred_ ## w)(uint8_t *dst, ptrdiff_t dststride, \
int16_t *src, ptrdiff_t srcstride, \
int height) \
{ \
FUNC(put_unweighted_pred)(dst, dststride, src, srcstride, w, height); \
} \
static void FUNC(put_unweighted_pred_avg_ ## w)(uint8_t *dst, ptrdiff_t dststride, \
int16_t *src1, int16_t *src2, \
ptrdiff_t srcstride, int height) \
{ \
FUNC(put_unweighted_pred_avg)(dst, dststride, src1, src2, srcstride, w, height); \
} \
static void FUNC(put_weighted_pred_ ## w)(uint8_t denom, int16_t weight, int16_t offset, \
uint8_t *dst, ptrdiff_t dststride, \
int16_t *src, ptrdiff_t srcstride, int height) \
{ \
FUNC(weighted_pred)(denom, weight, offset, \
dst, dststride, src, srcstride, w, height); \
} \
static void FUNC(put_weighted_pred_avg_ ## w)(uint8_t denom, int16_t weight0, int16_t weight1, \
int16_t offset0, int16_t offset1, \
uint8_t *dst, ptrdiff_t dststride, \
int16_t *src1, int16_t *src2, \
ptrdiff_t srcstride, int height) \
{ \
FUNC(weighted_pred_avg)(denom, weight0, weight1, offset0, offset1, \
dst, dststride, src1, src2, srcstride, w, height); \
}
PUT_PRED(64)
PUT_PRED(48)
PUT_PRED(32)
PUT_PRED(24)
PUT_PRED(16)
PUT_PRED(12)
PUT_PRED(8)
PUT_PRED(6)
PUT_PRED(4)
PUT_PRED(2)
// line zero // line zero
#define P3 pix[-4 * xstride] #define P3 pix[-4 * xstride]
#define P2 pix[-3 * xstride] #define P2 pix[-3 * xstride]
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment