Commit a34d9023 authored by Shivraj Patil's avatar Shivraj Patil Committed by Michael Niedermayer

avcodec/mips: MSA (MIPS-SIMD-Arch) optimizations for HEVC idct functions

This patch adds MSA (MIPS-SIMD-Arch) optimizations for HEVC idct functions in new file hevc_idct_msa.c
Adds new generic macros (needed for this patch) in libavutil/mips/generic_macros_msa.h
Signed-off-by: 's avatarShivraj Patil <shivraj.patil@imgtec.com>
Signed-off-by: 's avatarMichael Niedermayer <michaelni@gmx.at>
parent 7131aba9
...@@ -25,7 +25,8 @@ MSA-OBJS-$(CONFIG_HEVC_DECODER) += mips/hevcdsp_msa.o \ ...@@ -25,7 +25,8 @@ MSA-OBJS-$(CONFIG_HEVC_DECODER) += mips/hevcdsp_msa.o \
mips/hevc_mc_uni_msa.o \ mips/hevc_mc_uni_msa.o \
mips/hevc_mc_uniw_msa.o \ mips/hevc_mc_uniw_msa.o \
mips/hevc_mc_bi_msa.o \ mips/hevc_mc_bi_msa.o \
mips/hevc_mc_biw_msa.o mips/hevc_mc_biw_msa.o \
mips/hevc_idct_msa.o
MSA-OBJS-$(CONFIG_H264DSP) += mips/h264dsp_msa.o MSA-OBJS-$(CONFIG_H264DSP) += mips/h264dsp_msa.o
LOONGSON3-OBJS-$(CONFIG_H264DSP) += mips/h264dsp_mmi.o LOONGSON3-OBJS-$(CONFIG_H264DSP) += mips/h264dsp_mmi.o
LOONGSON3-OBJS-$(CONFIG_H264CHROMA) += mips/h264chroma_mmi.o LOONGSON3-OBJS-$(CONFIG_H264CHROMA) += mips/h264chroma_mmi.o
This diff is collapsed.
...@@ -402,6 +402,20 @@ static av_cold void hevc_dsp_init_msa(HEVCDSPContext *c, ...@@ -402,6 +402,20 @@ static av_cold void hevc_dsp_init_msa(HEVCDSPContext *c,
c->put_hevc_epel_bi_w[5][1][1] = ff_hevc_put_hevc_bi_w_epel_hv16_8_msa; c->put_hevc_epel_bi_w[5][1][1] = ff_hevc_put_hevc_bi_w_epel_hv16_8_msa;
c->put_hevc_epel_bi_w[6][1][1] = ff_hevc_put_hevc_bi_w_epel_hv24_8_msa; c->put_hevc_epel_bi_w[6][1][1] = ff_hevc_put_hevc_bi_w_epel_hv24_8_msa;
c->put_hevc_epel_bi_w[7][1][1] = ff_hevc_put_hevc_bi_w_epel_hv32_8_msa; c->put_hevc_epel_bi_w[7][1][1] = ff_hevc_put_hevc_bi_w_epel_hv32_8_msa;
c->idct[0] = ff_hevc_idct_4x4_msa;
c->idct[1] = ff_hevc_idct_8x8_msa;
c->idct[2] = ff_hevc_idct_16x16_msa;
c->idct[3] = ff_hevc_idct_32x32_msa;
c->idct_dc[0] = ff_hevc_idct_dc_4x4_msa;
c->idct_dc[1] = ff_hevc_idct_dc_8x8_msa;
c->idct_dc[2] = ff_hevc_idct_dc_16x16_msa;
c->idct_dc[3] = ff_hevc_idct_dc_32x32_msa;
c->transform_add[0] = ff_hevc_addblk_4x4_msa;
c->transform_add[1] = ff_hevc_addblk_8x8_msa;
c->transform_add[2] = ff_hevc_addblk_16x16_msa;
c->transform_add[3] = ff_hevc_addblk_32x32_msa;
c->idct_4x4_luma = ff_hevc_idct_luma_4x4_msa;
} }
} }
#endif // #if HAVE_MSA #endif // #if HAVE_MSA
......
...@@ -18,6 +18,9 @@ ...@@ -18,6 +18,9 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/ */
#ifndef AVCODEC_MIPS_HEVCDSP_MIPS_H
#define AVCODEC_MIPS_HEVCDSP_MIPS_H
#include "libavcodec/hevcdsp.h" #include "libavcodec/hevcdsp.h"
#define MC(PEL, DIR, WIDTH) \ #define MC(PEL, DIR, WIDTH) \
...@@ -427,3 +430,23 @@ BI_W_MC(epel, hv, 48); ...@@ -427,3 +430,23 @@ BI_W_MC(epel, hv, 48);
BI_W_MC(epel, hv, 64); BI_W_MC(epel, hv, 64);
#undef BI_W_MC #undef BI_W_MC
void ff_hevc_idct_4x4_msa(int16_t *coeffs, int col_limit);
void ff_hevc_idct_8x8_msa(int16_t *coeffs, int col_limit);
void ff_hevc_idct_16x16_msa(int16_t *coeffs, int col_limit);
void ff_hevc_idct_32x32_msa(int16_t *coeffs, int col_limit);
void ff_hevc_idct_dc_4x4_msa(int16_t *coeffs);
void ff_hevc_idct_dc_8x8_msa(int16_t *coeffs);
void ff_hevc_idct_dc_16x16_msa(int16_t *coeffs);
void ff_hevc_idct_dc_32x32_msa(int16_t *coeffs);
void ff_hevc_addblk_4x4_msa(uint8_t *dst, int16_t *pi16Coeffs,
ptrdiff_t stride);
void ff_hevc_addblk_8x8_msa(uint8_t *dst, int16_t *pi16Coeffs,
ptrdiff_t stride);
void ff_hevc_addblk_16x16_msa(uint8_t *dst, int16_t *pi16Coeffs,
ptrdiff_t stride);
void ff_hevc_addblk_32x32_msa(uint8_t *dst, int16_t *pi16Coeffs,
ptrdiff_t stride);
void ff_hevc_idct_luma_4x4_msa(int16_t *pi16Coeffs);
#endif // #ifndef AVCODEC_MIPS_HEVCDSP_MIPS_H
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment