Commit df13b75a authored by Shiyou Yin's avatar Shiyou Yin Committed by Michael Niedermayer

avcodec/mips: [loongson] reoptimize simple idct with mmi.

Performance of mpeg4 decoding improved about 23%(from 128fps to 158fps, tested on loongson 3A3000).
Reoptimized following functions with mmi.
1. ff_simple_idct_put_8_mmi
2. ff_simple_idct_add_8_mmi
3. ff_simple_idct_8_mmi
Signed-off-by: 's avatarMichael Niedermayer <michael@niedermayer.cc>
parent 1124df03
......@@ -20,6 +20,7 @@
*/
#include "idctdsp_mips.h"
#include "xvididct_mips.h"
#if HAVE_MSA
static av_cold void idctdsp_init_msa(IDCTDSPContext *c, AVCodecContext *avctx,
......@@ -48,8 +49,10 @@ static av_cold void idctdsp_init_mmi(IDCTDSPContext *c, AVCodecContext *avctx,
if ((avctx->lowres != 1) && (avctx->lowres != 2) && (avctx->lowres != 3) &&
(avctx->bits_per_raw_sample != 10) &&
(avctx->bits_per_raw_sample != 12) &&
(avctx->idct_algo == FF_IDCT_AUTO)) {
c->idct = ff_simple_idct_mmi;
((avctx->idct_algo == FF_IDCT_AUTO) || (avctx->idct_algo == FF_IDCT_SIMPLE))) {
c->idct_put = ff_simple_idct_put_8_mmi;
c->idct_add = ff_simple_idct_add_8_mmi;
c->idct = ff_simple_idct_8_mmi;
c->perm_type = FF_IDCT_PERM_NONE;
}
......
......@@ -46,8 +46,8 @@ void ff_put_signed_pixels_clamped_mmi(const int16_t *block,
uint8_t *av_restrict pixels, ptrdiff_t line_size);
void ff_add_pixels_clamped_mmi(const int16_t *block,
uint8_t *av_restrict pixels, ptrdiff_t line_size);
void ff_simple_idct_mmi(int16_t *block);
void ff_simple_idct_put_mmi(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
void ff_simple_idct_add_mmi(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
void ff_simple_idct_8_mmi(int16_t *block);
void ff_simple_idct_put_8_mmi(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
void ff_simple_idct_add_8_mmi(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
#endif // #ifndef AVCODEC_MIPS_IDCTDSP_MIPS_H
This diff is collapsed.
......@@ -201,6 +201,55 @@
#endif /* HAVE_LOONGSON2 */
/**
* backup register
*/
#define BACKUP_REG \
double temp_backup_reg[8]; \
if (_MIPS_SIM == _ABI64) \
__asm__ volatile ( \
"gssqc1 $f25, $f24, 0x00(%[temp]) \n\t" \
"gssqc1 $f27, $f26, 0x10(%[temp]) \n\t" \
"gssqc1 $f29, $f28, 0x20(%[temp]) \n\t" \
"gssqc1 $f31, $f30, 0x30(%[temp]) \n\t" \
: \
: [temp]"r"(temp_backup_reg) \
: "memory" \
); \
else \
__asm__ volatile ( \
"gssqc1 $f22, $f20, 0x00(%[temp]) \n\t" \
"gssqc1 $f26, $f24, 0x10(%[temp]) \n\t" \
"gssqc1 $f30, $f28, 0x20(%[temp]) \n\t" \
: \
: [temp]"r"(temp_backup_reg) \
: "memory" \
);
/**
* recover register
*/
#define RECOVER_REG \
if (_MIPS_SIM == _ABI64) \
__asm__ volatile ( \
"gslqc1 $f25, $f24, 0x00(%[temp]) \n\t" \
"gslqc1 $f27, $f26, 0x10(%[temp]) \n\t" \
"gslqc1 $f29, $f28, 0x20(%[temp]) \n\t" \
"gslqc1 $f31, $f30, 0x30(%[temp]) \n\t" \
: \
: [temp]"r"(temp_backup_reg) \
: "memory" \
); \
else \
__asm__ volatile ( \
"gslqc1 $f22, $f20, 0x00(%[temp]) \n\t" \
"gslqc1 $f26, $f24, 0x10(%[temp]) \n\t" \
"gslqc1 $f30, $f28, 0x20(%[temp]) \n\t" \
: \
: [temp]"r"(temp_backup_reg) \
: "memory" \
);
#define TRANSPOSE_4H(m1, m2, m3, m4, t1, t2, t3, t4, t5, r1, zero, shift) \
"li "#r1", 0x93 \n\t" \
"xor "#zero","#zero","#zero" \n\t" \
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment