Commit f7e9b9d2 authored by 周晓勇's avatar 周晓勇 Committed by Michael Niedermayer

avcodec: loongson optimize h264dsp idct and loop filter with mmi

Change-Id: Ic87fb8f5cd22a502ff9dbbc5a5a8ea97cfc8a1dd
Signed-off-by: 's avatarZhouXiaoyong <zhouxiaoyong@loongson.cn>
Signed-off-by: 's avatarMichael Niedermayer <michael@niedermayer.cc>
parent b45ff138
...@@ -82,11 +82,33 @@ static av_cold void h264dsp_init_msa(H264DSPContext *c, ...@@ -82,11 +82,33 @@ static av_cold void h264dsp_init_msa(H264DSPContext *c,
#endif // #if HAVE_MSA #endif // #if HAVE_MSA
#if HAVE_MMI #if HAVE_MMI
static av_cold void h264dsp_init_mmi(H264DSPContext * c, static av_cold void h264dsp_init_mmi(H264DSPContext * c, const int bit_depth,
const int bit_depth,
const int chroma_format_idc) const int chroma_format_idc)
{ {
if (bit_depth == 8) { if (bit_depth == 8) {
c->h264_add_pixels4_clear = ff_h264_add_pixels4_8_mmi;
c->h264_idct_add = ff_h264_idct_add_8_mmi;
c->h264_idct8_add = ff_h264_idct8_add_8_mmi;
c->h264_idct_dc_add = ff_h264_idct_dc_add_8_mmi;
c->h264_idct8_dc_add = ff_h264_idct8_dc_add_8_mmi;
c->h264_idct_add16 = ff_h264_idct_add16_8_mmi;
c->h264_idct_add16intra = ff_h264_idct_add16intra_8_mmi;
c->h264_idct8_add4 = ff_h264_idct8_add4_8_mmi;
if (chroma_format_idc <= 1)
c->h264_idct_add8 = ff_h264_idct_add8_8_mmi;
else
c->h264_idct_add8 = ff_h264_idct_add8_422_8_mmi;
c->h264_luma_dc_dequant_idct = ff_h264_luma_dc_dequant_idct_8_mmi;
if (chroma_format_idc <= 1)
c->h264_chroma_dc_dequant_idct =
ff_h264_chroma_dc_dequant_idct_8_mmi;
else
c->h264_chroma_dc_dequant_idct =
ff_h264_chroma422_dc_dequant_idct_8_mmi;
c->weight_h264_pixels_tab[0] = ff_h264_weight_pixels16_8_mmi; c->weight_h264_pixels_tab[0] = ff_h264_weight_pixels16_8_mmi;
c->weight_h264_pixels_tab[1] = ff_h264_weight_pixels8_8_mmi; c->weight_h264_pixels_tab[1] = ff_h264_weight_pixels8_8_mmi;
c->weight_h264_pixels_tab[2] = ff_h264_weight_pixels4_8_mmi; c->weight_h264_pixels_tab[2] = ff_h264_weight_pixels4_8_mmi;
...@@ -94,6 +116,21 @@ static av_cold void h264dsp_init_mmi(H264DSPContext * c, ...@@ -94,6 +116,21 @@ static av_cold void h264dsp_init_mmi(H264DSPContext * c,
c->biweight_h264_pixels_tab[0] = ff_h264_biweight_pixels16_8_mmi; c->biweight_h264_pixels_tab[0] = ff_h264_biweight_pixels16_8_mmi;
c->biweight_h264_pixels_tab[1] = ff_h264_biweight_pixels8_8_mmi; c->biweight_h264_pixels_tab[1] = ff_h264_biweight_pixels8_8_mmi;
c->biweight_h264_pixels_tab[2] = ff_h264_biweight_pixels4_8_mmi; c->biweight_h264_pixels_tab[2] = ff_h264_biweight_pixels4_8_mmi;
c->h264_v_loop_filter_chroma = ff_deblock_v_chroma_8_mmi;
c->h264_v_loop_filter_chroma_intra = ff_deblock_v_chroma_intra_8_mmi;
if (chroma_format_idc <= 1) {
c->h264_h_loop_filter_chroma =
ff_deblock_h_chroma_8_mmi;
c->h264_h_loop_filter_chroma_intra =
ff_deblock_h_chroma_intra_8_mmi;
}
c->h264_v_loop_filter_luma = ff_deblock_v_luma_8_mmi;
c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_8_mmi;
c->h264_h_loop_filter_luma = ff_deblock_h_luma_8_mmi;
c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_8_mmi;
} }
} }
#endif /* HAVE_MMI */ #endif /* HAVE_MMI */
......
...@@ -319,6 +319,26 @@ void ff_vp8_pred8x8_129_dc_8_msa(uint8_t *src, ptrdiff_t stride); ...@@ -319,6 +319,26 @@ void ff_vp8_pred8x8_129_dc_8_msa(uint8_t *src, ptrdiff_t stride);
void ff_vp8_pred16x16_127_dc_8_msa(uint8_t *src, ptrdiff_t stride); void ff_vp8_pred16x16_127_dc_8_msa(uint8_t *src, ptrdiff_t stride);
void ff_vp8_pred16x16_129_dc_8_msa(uint8_t *src, ptrdiff_t stride); void ff_vp8_pred16x16_129_dc_8_msa(uint8_t *src, ptrdiff_t stride);
void ff_h264_add_pixels4_8_mmi(uint8_t *_dst, int16_t *_src, int stride);
void ff_h264_idct_add_8_mmi(uint8_t *dst, int16_t *block, int stride);
void ff_h264_idct8_add_8_mmi(uint8_t *dst, int16_t *block, int stride);
void ff_h264_idct_dc_add_8_mmi(uint8_t *dst, int16_t *block, int stride);
void ff_h264_idct8_dc_add_8_mmi(uint8_t *dst, int16_t *block, int stride);
void ff_h264_idct_add16_8_mmi(uint8_t *dst, const int *block_offset,
int16_t *block, int stride, const uint8_t nnzc[15*8]);
void ff_h264_idct_add16intra_8_mmi(uint8_t *dst, const int *block_offset,
int16_t *block, int stride, const uint8_t nnzc[15*8]);
void ff_h264_idct8_add4_8_mmi(uint8_t *dst, const int *block_offset,
int16_t *block, int stride, const uint8_t nnzc[15*8]);
void ff_h264_idct_add8_8_mmi(uint8_t **dest, const int *block_offset,
int16_t *block, int stride, const uint8_t nnzc[15*8]);
void ff_h264_idct_add8_422_8_mmi(uint8_t **dest, const int *block_offset,
int16_t *block, int stride, const uint8_t nnzc[15*8]);
void ff_h264_luma_dc_dequant_idct_8_mmi(int16_t *output, int16_t *input,
int qmul);
void ff_h264_chroma_dc_dequant_idct_8_mmi(int16_t *block, int qmul);
void ff_h264_chroma422_dc_dequant_idct_8_mmi(int16_t *block, int qmul);
void ff_h264_weight_pixels16_8_mmi(uint8_t *block, int stride, int height, void ff_h264_weight_pixels16_8_mmi(uint8_t *block, int stride, int height,
int log2_denom, int weight, int offset); int log2_denom, int weight, int offset);
void ff_h264_biweight_pixels16_8_mmi(uint8_t *dst, uint8_t *src, void ff_h264_biweight_pixels16_8_mmi(uint8_t *dst, uint8_t *src,
...@@ -335,6 +355,27 @@ void ff_h264_biweight_pixels4_8_mmi(uint8_t *dst, uint8_t *src, ...@@ -335,6 +355,27 @@ void ff_h264_biweight_pixels4_8_mmi(uint8_t *dst, uint8_t *src,
int stride, int height, int log2_denom, int weightd, int weights, int stride, int height, int log2_denom, int weightd, int weights,
int offset); int offset);
void ff_deblock_v_chroma_8_mmi(uint8_t *pix, int stride, int alpha, int beta,
int8_t *tc0);
void ff_deblock_v_chroma_intra_8_mmi(uint8_t *pix, int stride, int alpha,
int beta);
void ff_deblock_h_chroma_8_mmi(uint8_t *pix, int stride, int alpha, int beta,
int8_t *tc0);
void ff_deblock_h_chroma_intra_8_mmi(uint8_t *pix, int stride, int alpha,
int beta);
void ff_deblock_v_luma_8_mmi(uint8_t *pix, int stride, int alpha, int beta,
int8_t *tc0);
void ff_deblock_v_luma_intra_8_mmi(uint8_t *pix, int stride, int alpha,
int beta);
void ff_deblock_h_luma_8_mmi(uint8_t *pix, int stride, int alpha, int beta,
int8_t *tc0);
void ff_deblock_h_luma_intra_8_mmi(uint8_t *pix, int stride, int alpha,
int beta);
void ff_deblock_v8_luma_8_mmi(uint8_t *pix, int stride, int alpha, int beta,
int8_t *tc0);
void ff_deblock_v8_luma_intra_8_mmi(uint8_t *pix, int stride, int alpha,
int beta);
void ff_put_h264_qpel16_mc00_mmi(uint8_t *dst, const uint8_t *src, void ff_put_h264_qpel16_mc00_mmi(uint8_t *dst, const uint8_t *src,
ptrdiff_t dst_stride); ptrdiff_t dst_stride);
void ff_put_h264_qpel16_mc10_mmi(uint8_t *dst, const uint8_t *src, void ff_put_h264_qpel16_mc10_mmi(uint8_t *dst, const uint8_t *src,
......
This source diff could not be displayed because it is too large. You can view the blob instead.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment