Commit c0683dce authored by James Almer's avatar James Almer

Merge commit '0b9a237b'

* commit '0b9a237b':
  hevc: Add NEON 4x4 and 8x8 IDCT

[15:12:59] <@ubitux> hevc_idct_4x4_8_c: 389.1
[15:13:00] <@ubitux> hevc_idct_4x4_8_neon: 126.6
[15:13:02] <@ubitux> our ^
[15:13:06] <@ubitux> hevc_idct_4x4_8_c: 389.3
[15:13:08] <@ubitux> hevc_idct_4x4_8_neon: 107.8
[15:13:10] <@ubitux> hevc_idct_4x4_10_c: 418.6
[15:13:12] <@ubitux> hevc_idct_4x4_10_neon: 108.1
[15:13:14] <@ubitux> libav ^
[15:13:30] <@ubitux> so yeah, we can probably trash our versions here
Merged-by: 's avatarJames Almer <jamrial@gmail.com>
parents 59b00ffe 0b9a237b
...@@ -21,6 +21,6 @@ ...@@ -21,6 +21,6 @@
#include "libavcodec/hevcdsp.h" #include "libavcodec/hevcdsp.h"
void ff_hevcdsp_init_neon(HEVCDSPContext *c, const int bit_depth); void ff_hevc_dsp_init_neon(HEVCDSPContext *c, const int bit_depth);
#endif /* AVCODEC_ARM_HEVCDSP_ARM_H */ #endif /* AVCODEC_ARM_HEVCDSP_ARM_H */
This diff is collapsed.
...@@ -19,14 +19,16 @@ ...@@ -19,14 +19,16 @@
*/ */
#include "libavutil/attributes.h" #include "libavutil/attributes.h"
#include "libavutil/cpu.h"
#include "libavutil/arm/cpu.h" #include "libavutil/arm/cpu.h"
#include "libavcodec/hevcdsp.h" #include "libavcodec/hevcdsp.h"
#include "hevcdsp_arm.h" #include "hevcdsp_arm.h"
av_cold void ff_hevcdsp_init_arm(HEVCDSPContext *c, const int bit_depth) av_cold void ff_hevc_dsp_init_arm(HEVCDSPContext *c, const int bit_depth)
{ {
int cpu_flags = av_get_cpu_flags(); int cpu_flags = av_get_cpu_flags();
if (have_neon(cpu_flags)) if (have_neon(cpu_flags))
ff_hevcdsp_init_neon(c, bit_depth); ff_hevc_dsp_init_neon(c, bit_depth);
} }
...@@ -27,8 +27,10 @@ void ff_hevc_v_loop_filter_luma_neon(uint8_t *_pix, ptrdiff_t _stride, int _beta ...@@ -27,8 +27,10 @@ void ff_hevc_v_loop_filter_luma_neon(uint8_t *_pix, ptrdiff_t _stride, int _beta
void ff_hevc_h_loop_filter_luma_neon(uint8_t *_pix, ptrdiff_t _stride, int _beta, int *_tc, uint8_t *_no_p, uint8_t *_no_q); void ff_hevc_h_loop_filter_luma_neon(uint8_t *_pix, ptrdiff_t _stride, int _beta, int *_tc, uint8_t *_no_p, uint8_t *_no_q);
void ff_hevc_v_loop_filter_chroma_neon(uint8_t *_pix, ptrdiff_t _stride, int *_tc, uint8_t *_no_p, uint8_t *_no_q); void ff_hevc_v_loop_filter_chroma_neon(uint8_t *_pix, ptrdiff_t _stride, int *_tc, uint8_t *_no_p, uint8_t *_no_q);
void ff_hevc_h_loop_filter_chroma_neon(uint8_t *_pix, ptrdiff_t _stride, int *_tc, uint8_t *_no_p, uint8_t *_no_q); void ff_hevc_h_loop_filter_chroma_neon(uint8_t *_pix, ptrdiff_t _stride, int *_tc, uint8_t *_no_p, uint8_t *_no_q);
void ff_hevc_transform_4x4_neon_8(int16_t *coeffs, int col_limit); void ff_hevc_idct_4x4_8_neon(int16_t *coeffs, int col_limit);
void ff_hevc_transform_8x8_neon_8(int16_t *coeffs, int col_limit); void ff_hevc_idct_8x8_8_neon(int16_t *coeffs, int col_limit);
void ff_hevc_idct_4x4_10_neon(int16_t *coeffs, int col_limit);
void ff_hevc_idct_8x8_10_neon(int16_t *coeffs, int col_limit);
void ff_hevc_idct_4x4_dc_neon_8(int16_t *coeffs); void ff_hevc_idct_4x4_dc_neon_8(int16_t *coeffs);
void ff_hevc_idct_8x8_dc_neon_8(int16_t *coeffs); void ff_hevc_idct_8x8_dc_neon_8(int16_t *coeffs);
void ff_hevc_idct_16x16_dc_neon_8(int16_t *coeffs); void ff_hevc_idct_16x16_dc_neon_8(int16_t *coeffs);
...@@ -142,7 +144,7 @@ void ff_hevc_put_qpel_bi_neon_wrapper(uint8_t *dst, ptrdiff_t dststride, uint8_t ...@@ -142,7 +144,7 @@ void ff_hevc_put_qpel_bi_neon_wrapper(uint8_t *dst, ptrdiff_t dststride, uint8_t
put_hevc_qpel_uw_neon[my][mx](dst, dststride, src, srcstride, width, height, src2, MAX_PB_SIZE); put_hevc_qpel_uw_neon[my][mx](dst, dststride, src, srcstride, width, height, src2, MAX_PB_SIZE);
} }
av_cold void ff_hevcdsp_init_neon(HEVCDSPContext *c, const int bit_depth) av_cold void ff_hevc_dsp_init_neon(HEVCDSPContext *c, const int bit_depth)
{ {
if (bit_depth == 8) { if (bit_depth == 8) {
int x; int x;
...@@ -150,8 +152,8 @@ av_cold void ff_hevcdsp_init_neon(HEVCDSPContext *c, const int bit_depth) ...@@ -150,8 +152,8 @@ av_cold void ff_hevcdsp_init_neon(HEVCDSPContext *c, const int bit_depth)
c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_neon; c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_neon;
c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_neon; c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_neon;
c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_neon; c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_neon;
c->idct[0] = ff_hevc_transform_4x4_neon_8; c->idct[0] = ff_hevc_idct_4x4_8_neon;
c->idct[1] = ff_hevc_transform_8x8_neon_8; c->idct[1] = ff_hevc_idct_8x8_8_neon;
c->idct_dc[0] = ff_hevc_idct_4x4_dc_neon_8; c->idct_dc[0] = ff_hevc_idct_4x4_dc_neon_8;
c->idct_dc[1] = ff_hevc_idct_8x8_dc_neon_8; c->idct_dc[1] = ff_hevc_idct_8x8_dc_neon_8;
c->idct_dc[2] = ff_hevc_idct_16x16_dc_neon_8; c->idct_dc[2] = ff_hevc_idct_16x16_dc_neon_8;
...@@ -221,4 +223,9 @@ av_cold void ff_hevcdsp_init_neon(HEVCDSPContext *c, const int bit_depth) ...@@ -221,4 +223,9 @@ av_cold void ff_hevcdsp_init_neon(HEVCDSPContext *c, const int bit_depth)
c->put_hevc_qpel_uni[8][0][0] = ff_hevc_put_qpel_uw_pixels_w48_neon_8; c->put_hevc_qpel_uni[8][0][0] = ff_hevc_put_qpel_uw_pixels_w48_neon_8;
c->put_hevc_qpel_uni[9][0][0] = ff_hevc_put_qpel_uw_pixels_w64_neon_8; c->put_hevc_qpel_uni[9][0][0] = ff_hevc_put_qpel_uw_pixels_w64_neon_8;
} }
if (bit_depth == 10) {
c->idct[0] = ff_hevc_idct_4x4_10_neon;
c->idct[1] = ff_hevc_idct_8x8_10_neon;
}
} }
...@@ -257,12 +257,12 @@ int i = 0; ...@@ -257,12 +257,12 @@ int i = 0;
break; break;
} }
if (ARCH_ARM)
ff_hevc_dsp_init_arm(hevcdsp, bit_depth);
if (ARCH_PPC) if (ARCH_PPC)
ff_hevc_dsp_init_ppc(hevcdsp, bit_depth); ff_hevc_dsp_init_ppc(hevcdsp, bit_depth);
if (ARCH_X86) if (ARCH_X86)
ff_hevc_dsp_init_x86(hevcdsp, bit_depth); ff_hevc_dsp_init_x86(hevcdsp, bit_depth);
if (ARCH_ARM)
ff_hevcdsp_init_arm(hevcdsp, bit_depth);
if (ARCH_MIPS) if (ARCH_MIPS)
ff_hevc_dsp_init_mips(hevcdsp, bit_depth); ff_hevc_dsp_init_mips(hevcdsp, bit_depth);
} }
...@@ -127,8 +127,9 @@ void ff_hevc_dsp_init(HEVCDSPContext *hpc, int bit_depth); ...@@ -127,8 +127,9 @@ void ff_hevc_dsp_init(HEVCDSPContext *hpc, int bit_depth);
extern const int8_t ff_hevc_epel_filters[7][4]; extern const int8_t ff_hevc_epel_filters[7][4];
extern const int8_t ff_hevc_qpel_filters[3][16]; extern const int8_t ff_hevc_qpel_filters[3][16];
void ff_hevc_dsp_init_arm(HEVCDSPContext *c, const int bit_depth);
void ff_hevc_dsp_init_ppc(HEVCDSPContext *c, const int bit_depth); void ff_hevc_dsp_init_ppc(HEVCDSPContext *c, const int bit_depth);
void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth); void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth);
void ff_hevcdsp_init_arm(HEVCDSPContext *c, const int bit_depth);
void ff_hevc_dsp_init_mips(HEVCDSPContext *c, const int bit_depth); void ff_hevc_dsp_init_mips(HEVCDSPContext *c, const int bit_depth);
#endif /* AVCODEC_HEVCDSP_H */ #endif /* AVCODEC_HEVCDSP_H */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment