Commit 4571c7c0 authored by gxw's avatar gxw Committed by Michael Niedermayer

avcodec/mips: [loongson] mmi optimizations for VP9 put and avg functions

VP9 decoding speed improved about 60.5%(from 38fps to 61fps, tested on loongson 3A3000).
Reviewed-by: 's avatarShiyou Yin <yinshiyou-hf@loongson.cn>
Signed-off-by: 's avatarMichael Niedermayer <michael@niedermayer.cc>
parent 97f47fd6
...@@ -88,3 +88,4 @@ MMI-OBJS-$(CONFIG_VC1_DECODER) += mips/vc1dsp_mmi.o ...@@ -88,3 +88,4 @@ MMI-OBJS-$(CONFIG_VC1_DECODER) += mips/vc1dsp_mmi.o
MMI-OBJS-$(CONFIG_WMV2DSP) += mips/wmv2dsp_mmi.o MMI-OBJS-$(CONFIG_WMV2DSP) += mips/wmv2dsp_mmi.o
MMI-OBJS-$(CONFIG_HEVC_DECODER) += mips/hevcdsp_mmi.o MMI-OBJS-$(CONFIG_HEVC_DECODER) += mips/hevcdsp_mmi.o
MMI-OBJS-$(CONFIG_VP3DSP) += mips/vp3dsp_idct_mmi.o MMI-OBJS-$(CONFIG_VP3DSP) += mips/vp3dsp_idct_mmi.o
MMI-OBJS-$(CONFIG_VP9_DECODER) += mips/vp9_mc_mmi.o
This diff is collapsed.
...@@ -168,8 +168,50 @@ static av_cold void vp9dsp_init_msa(VP9DSPContext *dsp, int bpp) ...@@ -168,8 +168,50 @@ static av_cold void vp9dsp_init_msa(VP9DSPContext *dsp, int bpp)
} }
#endif // #if HAVE_MSA #endif // #if HAVE_MSA
#if HAVE_MMI
static av_cold void vp9dsp_mc_init_mmi(VP9DSPContext *dsp)
{
#define init_subpel1(idx1, idx2, idxh, idxv, sz, dir, type) \
dsp->mc[idx1][FILTER_8TAP_SMOOTH ][idx2][idxh][idxv] = \
ff_##type##_8tap_smooth_##sz##dir##_mmi; \
dsp->mc[idx1][FILTER_8TAP_REGULAR][idx2][idxh][idxv] = \
ff_##type##_8tap_regular_##sz##dir##_mmi; \
dsp->mc[idx1][FILTER_8TAP_SHARP ][idx2][idxh][idxv] = \
ff_##type##_8tap_sharp_##sz##dir##_mmi;
#define init_subpel2(idx, idxh, idxv, dir, type) \
init_subpel1(0, idx, idxh, idxv, 64, dir, type); \
init_subpel1(1, idx, idxh, idxv, 32, dir, type); \
init_subpel1(2, idx, idxh, idxv, 16, dir, type); \
init_subpel1(3, idx, idxh, idxv, 8, dir, type); \
init_subpel1(4, idx, idxh, idxv, 4, dir, type)
#define init_subpel3(idx, type) \
init_subpel2(idx, 1, 1, hv, type); \
init_subpel2(idx, 0, 1, v, type); \
init_subpel2(idx, 1, 0, h, type)
init_subpel3(0, put);
init_subpel3(1, avg);
#undef init_subpel1
#undef init_subpel2
#undef init_subpel3
}
static av_cold void vp9dsp_init_mmi(VP9DSPContext *dsp, int bpp)
{
if (bpp == 8) {
vp9dsp_mc_init_mmi(dsp);
}
}
#endif // #if HAVE_MMI
av_cold void ff_vp9dsp_init_mips(VP9DSPContext *dsp, int bpp) av_cold void ff_vp9dsp_init_mips(VP9DSPContext *dsp, int bpp)
{ {
#if HAVE_MMI
vp9dsp_init_mmi(dsp, bpp);
#endif // #if HAVE_MMI
#if HAVE_MSA #if HAVE_MSA
vp9dsp_init_msa(dsp, bpp); vp9dsp_init_msa(dsp, bpp);
#endif // #if HAVE_MSA #endif // #if HAVE_MSA
......
...@@ -234,4 +234,54 @@ void ff_tm_16x16_msa(uint8_t *dst, ptrdiff_t stride, const uint8_t *left, ...@@ -234,4 +234,54 @@ void ff_tm_16x16_msa(uint8_t *dst, ptrdiff_t stride, const uint8_t *left,
void ff_tm_32x32_msa(uint8_t *dst, ptrdiff_t stride, const uint8_t *left, void ff_tm_32x32_msa(uint8_t *dst, ptrdiff_t stride, const uint8_t *left,
const uint8_t *top); const uint8_t *top);
#define VP9_8TAP_MIPS_MMI_FUNC(SIZE, type, type_idx) \
void ff_put_8tap_##type##_##SIZE##h_mmi(uint8_t *dst, ptrdiff_t dststride, \
const uint8_t *src, \
ptrdiff_t srcstride, \
int h, int mx, int my); \
\
void ff_put_8tap_##type##_##SIZE##v_mmi(uint8_t *dst, ptrdiff_t dststride, \
const uint8_t *src, \
ptrdiff_t srcstride, \
int h, int mx, int my); \
\
void ff_put_8tap_##type##_##SIZE##hv_mmi(uint8_t *dst, ptrdiff_t dststride, \
const uint8_t *src, \
ptrdiff_t srcstride, \
int h, int mx, int my); \
\
void ff_avg_8tap_##type##_##SIZE##h_mmi(uint8_t *dst, ptrdiff_t dststride, \
const uint8_t *src, \
ptrdiff_t srcstride, \
int h, int mx, int my); \
\
void ff_avg_8tap_##type##_##SIZE##v_mmi(uint8_t *dst, ptrdiff_t dststride, \
const uint8_t *src, \
ptrdiff_t srcstride, \
int h, int mx, int my); \
\
void ff_avg_8tap_##type##_##SIZE##hv_mmi(uint8_t *dst, ptrdiff_t dststride, \
const uint8_t *src, \
ptrdiff_t srcstride, \
int h, int mx, int my);
VP9_8TAP_MIPS_MMI_FUNC(64, regular, FILTER_8TAP_REGULAR);
VP9_8TAP_MIPS_MMI_FUNC(32, regular, FILTER_8TAP_REGULAR);
VP9_8TAP_MIPS_MMI_FUNC(16, regular, FILTER_8TAP_REGULAR);
VP9_8TAP_MIPS_MMI_FUNC(8, regular, FILTER_8TAP_REGULAR);
VP9_8TAP_MIPS_MMI_FUNC(4, regular, FILTER_8TAP_REGULAR);
VP9_8TAP_MIPS_MMI_FUNC(64, sharp, FILTER_8TAP_SHARP);
VP9_8TAP_MIPS_MMI_FUNC(32, sharp, FILTER_8TAP_SHARP);
VP9_8TAP_MIPS_MMI_FUNC(16, sharp, FILTER_8TAP_SHARP);
VP9_8TAP_MIPS_MMI_FUNC(8, sharp, FILTER_8TAP_SHARP);
VP9_8TAP_MIPS_MMI_FUNC(4, sharp, FILTER_8TAP_SHARP);
VP9_8TAP_MIPS_MMI_FUNC(64, smooth, FILTER_8TAP_SMOOTH);
VP9_8TAP_MIPS_MMI_FUNC(32, smooth, FILTER_8TAP_SMOOTH);
VP9_8TAP_MIPS_MMI_FUNC(16, smooth, FILTER_8TAP_SMOOTH);
VP9_8TAP_MIPS_MMI_FUNC(8, smooth, FILTER_8TAP_SMOOTH);
VP9_8TAP_MIPS_MMI_FUNC(4, smooth, FILTER_8TAP_SMOOTH);
#undef VP9_8TAP_MIPS_MMI_FUNC
#endif // #ifndef AVCODEC_MIPS_VP9DSP_MIPS_H #endif // #ifndef AVCODEC_MIPS_VP9DSP_MIPS_H
...@@ -345,5 +345,20 @@ ...@@ -345,5 +345,20 @@
PSRAH_4_MMI(fp1, fp2, fp3, fp4, shift) \ PSRAH_4_MMI(fp1, fp2, fp3, fp4, shift) \
PSRAH_4_MMI(fp5, fp6, fp7, fp8, shift) PSRAH_4_MMI(fp5, fp6, fp7, fp8, shift)
/**
* brief: (((value) + (1 << ((n) - 1))) >> (n))
* fr_i0: src & dst
* fr_i1: Operand number
* fr_t0, fr_t1: temporary FPR
* gr_t0: temporary GPR
*/
#define ROUND_POWER_OF_TWO_MMI(fr_i0, fr_i1, fr_t0, fr_t1, gr_t0) \
"li "#gr_t0", 0x01 \n\t" \
"dmtc1 "#gr_t0", "#fr_t0" \n\t" \
"punpcklwd "#fr_t0", "#fr_t0", "#fr_t0" \n\t" \
"psubw "#fr_t1", "#fr_i1", "#fr_t0" \n\t" \
"psllw "#fr_t1", "#fr_t0", "#fr_t1" \n\t" \
"paddw "#fr_i0", "#fr_i0", "#fr_t1" \n\t" \
"psraw "#fr_i0", "#fr_i0", "#fr_i1" \n\t"
#endif /* AVUTILS_MIPS_MMIUTILS_H */ #endif /* AVUTILS_MIPS_MMIUTILS_H */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment