Commit c0942b7a authored by Shiyou Yin's avatar Shiyou Yin Committed by Michael Niedermayer

avcodec/mips: [loongson] optimize put_hevc_epel_bi_hv_8 with mmi.

Optimize put_hevc_epel_bi_hv_8 with mmi in the case width=4/8/12/16/24/32.
This optimization improved HEVC decoding performance 1.7%(2.30x to 2.34x, tested on loongson 3A3000).
Signed-off-by: 's avatarMichael Niedermayer <michael@niedermayer.cc>
parent 0c434292
......@@ -55,6 +55,13 @@ static av_cold void hevc_dsp_init_mmi(HEVCDSPContext *c,
c->put_hevc_epel_bi[6][0][0] = ff_hevc_put_hevc_pel_bi_pixels24_8_mmi;
c->put_hevc_epel_bi[7][0][0] = ff_hevc_put_hevc_pel_bi_pixels32_8_mmi;
c->put_hevc_epel_bi[1][1][1] = ff_hevc_put_hevc_epel_bi_hv4_8_mmi;
c->put_hevc_epel_bi[3][1][1] = ff_hevc_put_hevc_epel_bi_hv8_8_mmi;
c->put_hevc_epel_bi[4][1][1] = ff_hevc_put_hevc_epel_bi_hv12_8_mmi;
c->put_hevc_epel_bi[5][1][1] = ff_hevc_put_hevc_epel_bi_hv16_8_mmi;
c->put_hevc_epel_bi[6][1][1] = ff_hevc_put_hevc_epel_bi_hv24_8_mmi;
c->put_hevc_epel_bi[7][1][1] = ff_hevc_put_hevc_epel_bi_hv32_8_mmi;
c->put_hevc_qpel_uni[1][1][1] = ff_hevc_put_hevc_qpel_uni_hv4_8_mmi;
c->put_hevc_qpel_uni[3][1][1] = ff_hevc_put_hevc_qpel_uni_hv8_8_mmi;
c->put_hevc_qpel_uni[4][1][1] = ff_hevc_put_hevc_qpel_uni_hv12_8_mmi;
......
......@@ -524,6 +524,12 @@ L_BI_MC(qpel, hv, 32, mmi);
L_BI_MC(qpel, hv, 48, mmi);
L_BI_MC(qpel, hv, 64, mmi);
L_BI_MC(epel, hv, 4, mmi);
L_BI_MC(epel, hv, 8, mmi);
L_BI_MC(epel, hv, 12, mmi);
L_BI_MC(epel, hv, 16, mmi);
L_BI_MC(epel, hv, 24, mmi);
L_BI_MC(epel, hv, 32, mmi);
#undef L_BI_MC
#define L_UNI_MC(PEL, DIR, WIDTH, TYPE) \
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment