Commit 7993ec19 authored by Alexandra Hájková's avatar Alexandra Hájková Committed by Martin Storsjö

hevc: Add hevc_get_pixel_4/8/12/16/24/32/48/64

Checkasm timings:
block size bitdepth  C       NEON
4           8 bit:    146.7   48.7
           10 bit:    146.7   52.7
8           8 bit:    430.3   84.4
           10 bit:    430.4  119.5
12          8 bit:    812.8  141.0
           10 bit:    812.8  195.0
16          8 bit:   1499.1  268.0
           10 bit:   1498.9  368.4
24          8 bit:   4394.2  574.8
           10 bit:   3696.3  804.8
32          8 bit:   5108.6  568.9
           10 bit:   4249.6  918.8
48          8 bit:  16819.6 2304.9
           10 bit:  13882.0 3178.5
64          8 bit:  13490.8 1799.5
           10 bit:  11018.5 2519.4
Signed-off-by: 's avatarMartin Storsjö <martin@martin.st>
parent 3ff8fbbf
......@@ -135,7 +135,8 @@ NEON-OBJS-$(CONFIG_AAC_DECODER) += arm/aacpsdsp_neon.o \
NEON-OBJS-$(CONFIG_APE_DECODER) += arm/apedsp_neon.o
NEON-OBJS-$(CONFIG_DCA_DECODER) += arm/dcadsp_neon.o \
arm/synth_filter_neon.o
NEON-OBJS-$(CONFIG_HEVC_DECODER) += arm/hevc_idct.o
NEON-OBJS-$(CONFIG_HEVC_DECODER) += arm/hevc_idct.o \
arm/hevc_mc.o
NEON-OBJS-$(CONFIG_RV30_DECODER) += arm/rv34dsp_neon.o
NEON-OBJS-$(CONFIG_RV40_DECODER) += arm/rv34dsp_neon.o \
arm/rv40dsp_neon.o
......
This diff is collapsed.
......@@ -61,6 +61,55 @@ void ff_hevc_idct_8x8_10_neon(int16_t *coeffs, int col_limit);
void ff_hevc_idct_16x16_10_neon(int16_t *coeffs, int col_limit);
void ff_hevc_idct_32x32_10_neon(int16_t *coeffs, int col_limit);
void ff_hevc_get_pixels_4_8_neon(int16_t *dst, ptrdiff_t dststride, uint8_t *src,
ptrdiff_t srcstride, int height, int mx, int my,
int16_t *mcbuffer);
void ff_hevc_get_pixels_4_10_neon(int16_t *dst, ptrdiff_t dststride, uint8_t *src,
ptrdiff_t srcstride, int height, int mx, int my,
int16_t *mcbuffer);
void ff_hevc_get_pixels_8_8_neon(int16_t *dst, ptrdiff_t dststride, uint8_t *src,
ptrdiff_t srcstride, int height, int mx, int my,
int16_t *mcbuffer);
void ff_hevc_get_pixels_8_10_neon(int16_t *dst, ptrdiff_t dststride, uint8_t *src,
ptrdiff_t srcstride, int height, int mx, int my,
int16_t *mcbuffer);
void ff_hevc_get_pixels_12_8_neon(int16_t *dst, ptrdiff_t dststride, uint8_t *src,
ptrdiff_t srcstride, int height, int mx, int my,
int16_t *mcbuffer);
void ff_hevc_get_pixels_12_10_neon(int16_t *dst, ptrdiff_t dststride, uint8_t *src,
ptrdiff_t srcstride, int height, int mx, int my,
int16_t *mcbuffer);
void ff_hevc_get_pixels_16_8_neon(int16_t *dst, ptrdiff_t dststride, uint8_t *src,
ptrdiff_t srcstride, int height, int mx, int my,
int16_t *mcbuffer);
void ff_hevc_get_pixels_16_10_neon(int16_t *dst, ptrdiff_t dststride, uint8_t *src,
ptrdiff_t srcstride, int height, int mx, int my,
int16_t *mcbuffer);
void ff_hevc_get_pixels_24_8_neon(int16_t *dst, ptrdiff_t dststride, uint8_t *src,
ptrdiff_t srcstride, int height, int mx, int my,
int16_t *mcbuffer);
void ff_hevc_get_pixels_24_10_neon(int16_t *dst, ptrdiff_t dststride, uint8_t *src,
ptrdiff_t srcstride, int height, int mx, int my,
int16_t *mcbuffer);
void ff_hevc_get_pixels_32_8_neon(int16_t *dst, ptrdiff_t dststride, uint8_t *src,
ptrdiff_t srcstride, int height, int mx, int my,
int16_t *mcbuffer);
void ff_hevc_get_pixels_32_10_neon(int16_t *dst, ptrdiff_t dststride, uint8_t *src,
ptrdiff_t srcstride, int height, int mx, int my,
int16_t *mcbuffer);
void ff_hevc_get_pixels_48_8_neon(int16_t *dst, ptrdiff_t dststride, uint8_t *src,
ptrdiff_t srcstride, int height, int mx, int my,
int16_t *mcbuffer);
void ff_hevc_get_pixels_48_10_neon(int16_t *dst, ptrdiff_t dststride, uint8_t *src,
ptrdiff_t srcstride, int height, int mx, int my,
int16_t *mcbuffer);
void ff_hevc_get_pixels_64_8_neon(int16_t *dst, ptrdiff_t dststride, uint8_t *src,
ptrdiff_t srcstride, int height, int mx, int my,
int16_t *mcbuffer);
void ff_hevc_get_pixels_64_10_neon(int16_t *dst, ptrdiff_t dststride, uint8_t *src,
ptrdiff_t srcstride, int height, int mx, int my,
int16_t *mcbuffer);
av_cold void ff_hevc_dsp_init_arm(HEVCDSPContext *c, int bit_depth)
{
int cpu_flags = av_get_cpu_flags();
......@@ -81,6 +130,15 @@ av_cold void ff_hevc_dsp_init_arm(HEVCDSPContext *c, int bit_depth)
c->idct[1] = ff_hevc_idct_8x8_8_neon;
c->idct[2] = ff_hevc_idct_16x16_8_neon;
c->idct[3] = ff_hevc_idct_32x32_8_neon;
c->put_hevc_qpel[0][0][0] = ff_hevc_get_pixels_4_8_neon;
c->put_hevc_qpel[0][0][1] = ff_hevc_get_pixels_8_8_neon;
c->put_hevc_qpel[0][0][2] = ff_hevc_get_pixels_12_8_neon;
c->put_hevc_qpel[0][0][3] = ff_hevc_get_pixels_16_8_neon;
c->put_hevc_qpel[0][0][4] = ff_hevc_get_pixels_24_8_neon;
c->put_hevc_qpel[0][0][5] = ff_hevc_get_pixels_32_8_neon;
c->put_hevc_qpel[0][0][6] = ff_hevc_get_pixels_48_8_neon;
c->put_hevc_qpel[0][0][7] = ff_hevc_get_pixels_64_8_neon;
}
if (bit_depth == 10) {
c->add_residual[0] = ff_hevc_add_residual_4x4_10_neon;
......@@ -97,6 +155,15 @@ av_cold void ff_hevc_dsp_init_arm(HEVCDSPContext *c, int bit_depth)
c->idct[1] = ff_hevc_idct_8x8_10_neon;
c->idct[2] = ff_hevc_idct_16x16_10_neon;
c->idct[3] = ff_hevc_idct_32x32_10_neon;
c->put_hevc_qpel[0][0][0] = ff_hevc_get_pixels_4_10_neon;
c->put_hevc_qpel[0][0][1] = ff_hevc_get_pixels_8_10_neon;
c->put_hevc_qpel[0][0][2] = ff_hevc_get_pixels_12_10_neon;
c->put_hevc_qpel[0][0][3] = ff_hevc_get_pixels_16_10_neon;
c->put_hevc_qpel[0][0][4] = ff_hevc_get_pixels_24_10_neon;
c->put_hevc_qpel[0][0][5] = ff_hevc_get_pixels_32_10_neon;
c->put_hevc_qpel[0][0][6] = ff_hevc_get_pixels_48_10_neon;
c->put_hevc_qpel[0][0][7] = ff_hevc_get_pixels_64_10_neon;
}
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment