Commit 090647da authored by gxw's avatar gxw Committed by Michael Niedermayer

avcodec/mips: [loongson] optimize vp8 decoding in vp8dsp.

Optimize vp8 loop filter with mmi, four functions optimized:
1. ff_vp8_h_loop_filter8uv_mmi.
2. ff_vp8_v_loop_filter8uv_mmi.
3. ff_vp8_h_loop_filter16_mmi.
4. ff_vp8_v_loop_filter16_mmi.

Vp8 decoding speed improved about 50%(from 73fps to 110fps, Tested on loongson 3A3000).
Signed-off-by: 's avatarShiyou Yin <yinshiyou-hf@loongson.cn>
Signed-off-by: 's avatarMichael Niedermayer <michael@niedermayer.cc>
parent 8ef7fb86
This diff is collapsed.
......@@ -275,6 +275,34 @@
"punpcklwd "#m3", "#t2", "#t4" \n\t" \
"punpckhwd "#m4", "#t2", "#t4" \n\t"
/**
* brief: Parallel SRA for 8 byte packaged data.
* fr_i0: src
* fr_i1: SRA number(SRAB number + 8)
* fr_t0, fr_t1: temporary register
* fr_d0: dst
*/
#define PSRAB_MMI(fr_i0, fr_i1, fr_t0, fr_t1, fr_d0) \
"punpcklbh "#fr_t0", "#fr_t0", "#fr_i0" \n\t" \
"punpckhbh "#fr_t1", "#fr_t1", "#fr_i0" \n\t" \
"psrah "#fr_t0", "#fr_t0", "#fr_i1" \n\t" \
"psrah "#fr_t1", "#fr_t1", "#fr_i1" \n\t" \
"packsshb "#fr_d0", "#fr_t0", "#fr_t1" \n\t"
/**
* brief: Parallel SRL for 8 byte packaged data.
* fr_i0: src
* fr_i1: SRL number(SRLB number + 8)
* fr_t0, fr_t1: temporary register
* fr_d0: dst
*/
#define PSRLB_MMI(fr_i0, fr_i1, fr_t0, fr_t1, fr_d0) \
"punpcklbh "#fr_t0", "#fr_t0", "#fr_i0" \n\t" \
"punpckhbh "#fr_t1", "#fr_t1", "#fr_i0" \n\t" \
"psrlh "#fr_t0", "#fr_t0", "#fr_i1" \n\t" \
"psrlh "#fr_t1", "#fr_t1", "#fr_i1" \n\t" \
"packsshb "#fr_d0", "#fr_t0", "#fr_t1" \n\t"
#define PSRAH_4_MMI(fp1, fp2, fp3, fp4, shift) \
"psrah "#fp1", "#fp1", "#shift" \n\t" \
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment