Commit 276bef53 authored by James Almer's avatar James Almer Committed by Michael Niedermayer

x86/hevc_deblock: add ff_hevc_[hv]_loop_filter_luma_{8, 10}_sse2

Signed-off-by: 's avatarJames Almer <jamrial@gmail.com>
Reviewed-by: 's avatarKieran Kunhya <kierank@obe.tv>
Signed-off-by: 's avatarMichael Niedermayer <michaelni@gmx.at>
parent ebd1c505
...@@ -728,7 +728,7 @@ cglobal hevc_h_loop_filter_chroma_10, 3, 4, 7, pix, stride, tc, pix0 ...@@ -728,7 +728,7 @@ cglobal hevc_h_loop_filter_chroma_10, 3, 4, 7, pix, stride, tc, pix0
RET RET
%if ARCH_X86_64 %if ARCH_X86_64
INIT_XMM ssse3 %macro LOOP_FILTER_LUMA 0
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
; void ff_hevc_v_loop_filter_luma(uint8_t *_pix, ptrdiff_t _stride, int *_beta, int *_tc, uint8_t *_no_p, uint8_t *_no_q); ; void ff_hevc_v_loop_filter_luma(uint8_t *_pix, ptrdiff_t _stride, int *_beta, int *_tc, uint8_t *_no_p, uint8_t *_no_q);
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
...@@ -828,4 +828,10 @@ cglobal hevc_h_loop_filter_luma_10, 4, 15, 16, pix, stride, beta, tc, count, pix ...@@ -828,4 +828,10 @@ cglobal hevc_h_loop_filter_luma_10, 4, 15, 16, pix, stride, beta, tc, count, pix
movdqu [pixq+2*strideq], m6; q2 movdqu [pixq+2*strideq], m6; q2
.bypassluma: .bypassluma:
RET RET
%endmacro
INIT_XMM sse2
LOOP_FILTER_LUMA
INIT_XMM ssse3
LOOP_FILTER_LUMA
%endif %endif
...@@ -36,18 +36,20 @@ void ff_hevc_ ## DIR ## _loop_filter_chroma_ ## DEPTH ## _ ## OPT(uint8_t *_pix, ...@@ -36,18 +36,20 @@ void ff_hevc_ ## DIR ## _loop_filter_chroma_ ## DEPTH ## _ ## OPT(uint8_t *_pix,
void ff_hevc_ ## DIR ## _loop_filter_luma_ ## DEPTH ## _ ## OPT(uint8_t *_pix, ptrdiff_t stride, int *_beta, int *_tc, \ void ff_hevc_ ## DIR ## _loop_filter_luma_ ## DEPTH ## _ ## OPT(uint8_t *_pix, ptrdiff_t stride, int *_beta, int *_tc, \
uint8_t *_no_p, uint8_t *_no_q); uint8_t *_no_p, uint8_t *_no_q);
#define LFC_FUNCS(type, depth) \ #define LFC_FUNCS(type, depth, opt) \
LFC_FUNC(h, depth, sse2) \ LFC_FUNC(h, depth, opt) \
LFC_FUNC(v, depth, sse2) LFC_FUNC(v, depth, opt)
#define LFL_FUNCS(type, depth) \ #define LFL_FUNCS(type, depth, opt) \
LFL_FUNC(h, depth, ssse3) \ LFL_FUNC(h, depth, opt) \
LFL_FUNC(v, depth, ssse3) LFL_FUNC(v, depth, opt)
LFC_FUNCS(uint8_t, 8) LFC_FUNCS(uint8_t, 8, sse2)
LFC_FUNCS(uint8_t, 10) LFC_FUNCS(uint8_t, 10, sse2)
LFL_FUNCS(uint8_t, 8) LFL_FUNCS(uint8_t, 8, sse2)
LFL_FUNCS(uint8_t, 10) LFL_FUNCS(uint8_t, 10, sse2)
LFL_FUNCS(uint8_t, 8, ssse3)
LFL_FUNCS(uint8_t, 10, ssse3)
#if HAVE_SSE2_EXTERNAL #if HAVE_SSE2_EXTERNAL
void ff_hevc_idct32_dc_add_8_sse2(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride) void ff_hevc_idct32_dc_add_8_sse2(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride)
...@@ -429,6 +431,10 @@ void ff_hevcdsp_init_x86(HEVCDSPContext *c, const int bit_depth) ...@@ -429,6 +431,10 @@ void ff_hevcdsp_init_x86(HEVCDSPContext *c, const int bit_depth)
if (EXTERNAL_SSE2(mm_flags)) { if (EXTERNAL_SSE2(mm_flags)) {
c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_8_sse2; c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_8_sse2;
c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_8_sse2; c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_8_sse2;
if (ARCH_X86_64) {
c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_sse2;
c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_8_sse2;
}
c->transform_dc_add[2] = ff_hevc_idct16_dc_add_8_sse2; c->transform_dc_add[2] = ff_hevc_idct16_dc_add_8_sse2;
c->transform_dc_add[3] = ff_hevc_idct32_dc_add_8_sse2; c->transform_dc_add[3] = ff_hevc_idct32_dc_add_8_sse2;
...@@ -460,7 +466,10 @@ void ff_hevcdsp_init_x86(HEVCDSPContext *c, const int bit_depth) ...@@ -460,7 +466,10 @@ void ff_hevcdsp_init_x86(HEVCDSPContext *c, const int bit_depth)
if (EXTERNAL_SSE2(mm_flags)) { if (EXTERNAL_SSE2(mm_flags)) {
c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_10_sse2; c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_10_sse2;
c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_10_sse2; c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_10_sse2;
if (ARCH_X86_64) {
c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_10_sse2;
c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_10_sse2;
}
c->transform_dc_add[1] = ff_hevc_idct8_dc_add_10_sse2; c->transform_dc_add[1] = ff_hevc_idct8_dc_add_10_sse2;
c->transform_dc_add[2] = ff_hevc_idct16_dc_add_10_sse2; c->transform_dc_add[2] = ff_hevc_idct16_dc_add_10_sse2;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment