Commit a6af4bf6 authored by Pierre Edouard Lepere's avatar Pierre Edouard Lepere Committed by Michael Niedermayer

x86: hevc: adding transform_add

Reviewed-by: 's avatarJames Almer <jamrial@gmail.com>
Approved-by: Ronald S. Bultje
Signed-off-by: 's avatarMichael Niedermayer <michaelni@gmx.at>
parent 5a22877e
...@@ -131,7 +131,8 @@ YASM-OBJS-$(CONFIG_AAC_DECODER) += x86/sbrdsp.o ...@@ -131,7 +131,8 @@ YASM-OBJS-$(CONFIG_AAC_DECODER) += x86/sbrdsp.o
YASM-OBJS-$(CONFIG_DCA_DECODER) += x86/dcadsp.o YASM-OBJS-$(CONFIG_DCA_DECODER) += x86/dcadsp.o
YASM-OBJS-$(CONFIG_HEVC_DECODER) += x86/hevc_mc.o \ YASM-OBJS-$(CONFIG_HEVC_DECODER) += x86/hevc_mc.o \
x86/hevc_deblock.o \ x86/hevc_deblock.o \
x86/hevc_idct.o x86/hevc_idct.o \
x86/hevc_res_add.o
YASM-OBJS-$(CONFIG_PNG_DECODER) += x86/pngdsp.o YASM-OBJS-$(CONFIG_PNG_DECODER) += x86/pngdsp.o
YASM-OBJS-$(CONFIG_PRORES_DECODER) += x86/proresdsp.o YASM-OBJS-$(CONFIG_PRORES_DECODER) += x86/proresdsp.o
YASM-OBJS-$(CONFIG_PRORES_LGPL_DECODER) += x86/proresdsp.o YASM-OBJS-$(CONFIG_PRORES_LGPL_DECODER) += x86/proresdsp.o
......
This diff is collapsed.
...@@ -131,4 +131,20 @@ WEIGHTING_PROTOTYPES(8, sse4); ...@@ -131,4 +131,20 @@ WEIGHTING_PROTOTYPES(8, sse4);
WEIGHTING_PROTOTYPES(10, sse4); WEIGHTING_PROTOTYPES(10, sse4);
WEIGHTING_PROTOTYPES(12, sse4); WEIGHTING_PROTOTYPES(12, sse4);
///////////////////////////////////////////////////////////////////////////////
// TRANSFORM_ADD
///////////////////////////////////////////////////////////////////////////////
void ff_hevc_transform_add4_8_mmxext(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride);
void ff_hevc_transform_add8_8_sse2(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride);
void ff_hevc_transform_add16_8_sse2(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride);
void ff_hevc_transform_add32_8_sse2(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride);
void ff_hevc_transform_add4_10_mmxext(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride);
void ff_hevc_transform_add8_10_sse2(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride);
void ff_hevc_transform_add16_10_sse2(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride);
void ff_hevc_transform_add32_10_sse2(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride);
void ff_hevc_transform_add16_10_avx2(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride);
void ff_hevc_transform_add32_10_avx2(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride);
#endif // AVCODEC_X86_HEVCDSP_H #endif // AVCODEC_X86_HEVCDSP_H
...@@ -469,6 +469,7 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth) ...@@ -469,6 +469,7 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth)
if (EXTERNAL_MMXEXT(cpu_flags)) { if (EXTERNAL_MMXEXT(cpu_flags)) {
c->idct_dc[0] = ff_hevc_idct4x4_dc_8_mmxext; c->idct_dc[0] = ff_hevc_idct4x4_dc_8_mmxext;
c->idct_dc[1] = ff_hevc_idct8x8_dc_8_mmxext; c->idct_dc[1] = ff_hevc_idct8x8_dc_8_mmxext;
c->transform_add[0] = ff_hevc_transform_add4_8_mmxext;
} }
if (EXTERNAL_SSE2(cpu_flags)) { if (EXTERNAL_SSE2(cpu_flags)) {
c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_8_sse2; c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_8_sse2;
...@@ -476,11 +477,15 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth) ...@@ -476,11 +477,15 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth)
if (ARCH_X86_64) { if (ARCH_X86_64) {
c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_sse2; c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_sse2;
c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_8_sse2; c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_8_sse2;
}
c->transform_add[2] = ff_hevc_transform_add16_8_sse2;
c->transform_add[3] = ff_hevc_transform_add32_8_sse2;
}
c->idct_dc[1] = ff_hevc_idct8x8_dc_8_sse2; c->idct_dc[1] = ff_hevc_idct8x8_dc_8_sse2;
c->idct_dc[2] = ff_hevc_idct16x16_dc_8_sse2; c->idct_dc[2] = ff_hevc_idct16x16_dc_8_sse2;
c->idct_dc[3] = ff_hevc_idct32x32_dc_8_sse2; c->idct_dc[3] = ff_hevc_idct32x32_dc_8_sse2;
c->transform_add[1] = ff_hevc_transform_add8_8_sse2;
} }
if (EXTERNAL_SSSE3(cpu_flags) && ARCH_X86_64) { if (EXTERNAL_SSSE3(cpu_flags) && ARCH_X86_64) {
c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_ssse3; c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_ssse3;
...@@ -512,6 +517,7 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth) ...@@ -512,6 +517,7 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth)
} }
} else if (bit_depth == 10) { } else if (bit_depth == 10) {
if (EXTERNAL_MMXEXT(cpu_flags)) { if (EXTERNAL_MMXEXT(cpu_flags)) {
c->transform_add[0] = ff_hevc_transform_add4_10_mmxext;
c->idct_dc[0] = ff_hevc_idct4x4_dc_10_mmxext; c->idct_dc[0] = ff_hevc_idct4x4_dc_10_mmxext;
c->idct_dc[1] = ff_hevc_idct8x8_dc_10_mmxext; c->idct_dc[1] = ff_hevc_idct8x8_dc_10_mmxext;
} }
...@@ -526,6 +532,10 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth) ...@@ -526,6 +532,10 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth)
c->idct_dc[1] = ff_hevc_idct8x8_dc_10_sse2; c->idct_dc[1] = ff_hevc_idct8x8_dc_10_sse2;
c->idct_dc[2] = ff_hevc_idct16x16_dc_10_sse2; c->idct_dc[2] = ff_hevc_idct16x16_dc_10_sse2;
c->idct_dc[3] = ff_hevc_idct32x32_dc_10_sse2; c->idct_dc[3] = ff_hevc_idct32x32_dc_10_sse2;
c->transform_add[1] = ff_hevc_transform_add8_10_sse2;
c->transform_add[2] = ff_hevc_transform_add16_10_sse2;
c->transform_add[3] = ff_hevc_transform_add32_10_sse2;
} }
if (EXTERNAL_SSSE3(cpu_flags) && ARCH_X86_64) { if (EXTERNAL_SSSE3(cpu_flags) && ARCH_X86_64) {
c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_10_ssse3; c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_10_ssse3;
...@@ -551,9 +561,13 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth) ...@@ -551,9 +561,13 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth)
} }
} }
if (EXTERNAL_AVX2(cpu_flags)) { if (EXTERNAL_AVX2(cpu_flags)) {
c->idct_dc[2] = ff_hevc_idct16x16_dc_10_avx2; c->idct_dc[2] = ff_hevc_idct16x16_dc_10_avx2;
c->idct_dc[3] = ff_hevc_idct32x32_dc_10_avx2; c->idct_dc[3] = ff_hevc_idct32x32_dc_10_avx2;
c->transform_add[2] = ff_hevc_transform_add16_10_avx2;
c->transform_add[3] = ff_hevc_transform_add32_10_avx2;
} }
} else if (bit_depth == 12) { } else if (bit_depth == 12) {
if (EXTERNAL_MMXEXT(cpu_flags)) { if (EXTERNAL_MMXEXT(cpu_flags)) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment