Commit b94cd551 authored by Martin Vignali's avatar Martin Vignali

avfilter/x86/vf_interlace : add AVX2 version

parent ef21033c
...@@ -39,6 +39,20 @@ SECTION .text ...@@ -39,6 +39,20 @@ SECTION .text
pcmpeq%1 m6, m6 pcmpeq%1 m6, m6
test hq, mmsize
je .loop
;process 1 * mmsize
movu m0, [mrefq+hq]
pavg%1 m0, [prefq+hq]
pxor m0, m6
pxor m2, m6, [srcq+hq]
pavg%1 m0, m2
pxor m0, m6
mova [dstq+hq], m0
add hq, mmsize
jge .end
.loop: .loop:
movu m0, [mrefq+hq] movu m0, [mrefq+hq]
movu m1, [mrefq+hq+mmsize] movu m1, [mrefq+hq+mmsize]
...@@ -57,7 +71,9 @@ SECTION .text ...@@ -57,7 +71,9 @@ SECTION .text
add hq, 2*mmsize add hq, 2*mmsize
jl .loop jl .loop
REP_RET
.end:
REP_RET
%endmacro %endmacro
%macro LOWPASS_LINE 0 %macro LOWPASS_LINE 0
...@@ -201,5 +217,10 @@ LOWPASS_LINE ...@@ -201,5 +217,10 @@ LOWPASS_LINE
INIT_XMM avx INIT_XMM avx
LOWPASS_LINE LOWPASS_LINE
%if HAVE_AVX2_EXTERNAL
INIT_YMM avx2
LOWPASS_LINE
%endif
INIT_XMM sse2 INIT_XMM sse2
LOWPASS_LINE_COMPLEX LOWPASS_LINE_COMPLEX
...@@ -32,6 +32,9 @@ void ff_lowpass_line_sse2(uint8_t *dstp, ptrdiff_t linesize, ...@@ -32,6 +32,9 @@ void ff_lowpass_line_sse2(uint8_t *dstp, ptrdiff_t linesize,
void ff_lowpass_line_avx (uint8_t *dstp, ptrdiff_t linesize, void ff_lowpass_line_avx (uint8_t *dstp, ptrdiff_t linesize,
const uint8_t *srcp, ptrdiff_t mref, const uint8_t *srcp, ptrdiff_t mref,
ptrdiff_t pref, int clip_max); ptrdiff_t pref, int clip_max);
void ff_lowpass_line_avx2 (uint8_t *dstp, ptrdiff_t linesize,
const uint8_t *srcp, ptrdiff_t mref,
ptrdiff_t pref, int clip_max);
void ff_lowpass_line_16_sse2(uint8_t *dstp, ptrdiff_t linesize, void ff_lowpass_line_16_sse2(uint8_t *dstp, ptrdiff_t linesize,
const uint8_t *srcp, ptrdiff_t mref, const uint8_t *srcp, ptrdiff_t mref,
...@@ -39,6 +42,9 @@ void ff_lowpass_line_16_sse2(uint8_t *dstp, ptrdiff_t linesize, ...@@ -39,6 +42,9 @@ void ff_lowpass_line_16_sse2(uint8_t *dstp, ptrdiff_t linesize,
void ff_lowpass_line_16_avx (uint8_t *dstp, ptrdiff_t linesize, void ff_lowpass_line_16_avx (uint8_t *dstp, ptrdiff_t linesize,
const uint8_t *srcp, ptrdiff_t mref, const uint8_t *srcp, ptrdiff_t mref,
ptrdiff_t pref, int clip_max); ptrdiff_t pref, int clip_max);
void ff_lowpass_line_16_avx2 (uint8_t *dstp, ptrdiff_t linesize,
const uint8_t *srcp, ptrdiff_t mref,
ptrdiff_t pref, int clip_max);
void ff_lowpass_line_complex_sse2(uint8_t *dstp, ptrdiff_t linesize, void ff_lowpass_line_complex_sse2(uint8_t *dstp, ptrdiff_t linesize,
const uint8_t *srcp, ptrdiff_t mref, const uint8_t *srcp, ptrdiff_t mref,
...@@ -62,6 +68,9 @@ av_cold void ff_interlace_init_x86(InterlaceContext *s, int depth) ...@@ -62,6 +68,9 @@ av_cold void ff_interlace_init_x86(InterlaceContext *s, int depth)
if (EXTERNAL_AVX(cpu_flags)) if (EXTERNAL_AVX(cpu_flags))
if (s->lowpass == VLPF_LIN) if (s->lowpass == VLPF_LIN)
s->lowpass_line = ff_lowpass_line_16_avx; s->lowpass_line = ff_lowpass_line_16_avx;
if (EXTERNAL_AVX2_FAST(cpu_flags))
if (s->lowpass == VLPF_LIN)
s->lowpass_line = ff_lowpass_line_16_avx2;
} else { } else {
if (EXTERNAL_SSE2(cpu_flags)) { if (EXTERNAL_SSE2(cpu_flags)) {
if (s->lowpass == VLPF_LIN) if (s->lowpass == VLPF_LIN)
...@@ -72,5 +81,8 @@ av_cold void ff_interlace_init_x86(InterlaceContext *s, int depth) ...@@ -72,5 +81,8 @@ av_cold void ff_interlace_init_x86(InterlaceContext *s, int depth)
if (EXTERNAL_AVX(cpu_flags)) if (EXTERNAL_AVX(cpu_flags))
if (s->lowpass == VLPF_LIN) if (s->lowpass == VLPF_LIN)
s->lowpass_line = ff_lowpass_line_avx; s->lowpass_line = ff_lowpass_line_avx;
if (EXTERNAL_AVX2_FAST(cpu_flags))
if (s->lowpass == VLPF_LIN)
s->lowpass_line = ff_lowpass_line_avx2;
} }
} }
...@@ -33,6 +33,9 @@ void ff_lowpass_line_sse2(uint8_t *dstp, ptrdiff_t linesize, ...@@ -33,6 +33,9 @@ void ff_lowpass_line_sse2(uint8_t *dstp, ptrdiff_t linesize,
void ff_lowpass_line_avx (uint8_t *dstp, ptrdiff_t linesize, void ff_lowpass_line_avx (uint8_t *dstp, ptrdiff_t linesize,
const uint8_t *srcp, ptrdiff_t mref, const uint8_t *srcp, ptrdiff_t mref,
ptrdiff_t pref, int clip_max); ptrdiff_t pref, int clip_max);
void ff_lowpass_line_avx2 (uint8_t *dstp, ptrdiff_t linesize,
const uint8_t *srcp, ptrdiff_t mref,
ptrdiff_t pref, int clip_max);
void ff_lowpass_line_16_sse2(uint8_t *dstp, ptrdiff_t linesize, void ff_lowpass_line_16_sse2(uint8_t *dstp, ptrdiff_t linesize,
const uint8_t *srcp, ptrdiff_t mref, const uint8_t *srcp, ptrdiff_t mref,
...@@ -40,6 +43,9 @@ void ff_lowpass_line_16_sse2(uint8_t *dstp, ptrdiff_t linesize, ...@@ -40,6 +43,9 @@ void ff_lowpass_line_16_sse2(uint8_t *dstp, ptrdiff_t linesize,
void ff_lowpass_line_16_avx (uint8_t *dstp, ptrdiff_t linesize, void ff_lowpass_line_16_avx (uint8_t *dstp, ptrdiff_t linesize,
const uint8_t *srcp, ptrdiff_t mref, const uint8_t *srcp, ptrdiff_t mref,
ptrdiff_t pref, int clip_max); ptrdiff_t pref, int clip_max);
void ff_lowpass_line_16_avx2 (uint8_t *dstp, ptrdiff_t linesize,
const uint8_t *srcp, ptrdiff_t mref,
ptrdiff_t pref, int clip_max);
void ff_lowpass_line_complex_sse2(uint8_t *dstp, ptrdiff_t linesize, void ff_lowpass_line_complex_sse2(uint8_t *dstp, ptrdiff_t linesize,
const uint8_t *srcp, ptrdiff_t mref, const uint8_t *srcp, ptrdiff_t mref,
...@@ -63,6 +69,11 @@ av_cold void ff_tinterlace_init_x86(TInterlaceContext *s) ...@@ -63,6 +69,11 @@ av_cold void ff_tinterlace_init_x86(TInterlaceContext *s)
if (EXTERNAL_AVX(cpu_flags)) if (EXTERNAL_AVX(cpu_flags))
if (!(s->flags & TINTERLACE_FLAG_CVLPF)) if (!(s->flags & TINTERLACE_FLAG_CVLPF))
s->lowpass_line = ff_lowpass_line_16_avx; s->lowpass_line = ff_lowpass_line_16_avx;
if (EXTERNAL_AVX2_FAST(cpu_flags)) {
if (!(s->flags & TINTERLACE_FLAG_CVLPF)) {
s->lowpass_line = ff_lowpass_line_16_avx2;
}
}
} else { } else {
if (EXTERNAL_SSE2(cpu_flags)) { if (EXTERNAL_SSE2(cpu_flags)) {
if (!(s->flags & TINTERLACE_FLAG_CVLPF)) if (!(s->flags & TINTERLACE_FLAG_CVLPF))
...@@ -73,5 +84,10 @@ av_cold void ff_tinterlace_init_x86(TInterlaceContext *s) ...@@ -73,5 +84,10 @@ av_cold void ff_tinterlace_init_x86(TInterlaceContext *s)
if (EXTERNAL_AVX(cpu_flags)) if (EXTERNAL_AVX(cpu_flags))
if (!(s->flags & TINTERLACE_FLAG_CVLPF)) if (!(s->flags & TINTERLACE_FLAG_CVLPF))
s->lowpass_line = ff_lowpass_line_avx; s->lowpass_line = ff_lowpass_line_avx;
if (EXTERNAL_AVX2_FAST(cpu_flags)) {
if (!(s->flags & TINTERLACE_FLAG_CVLPF)) {
s->lowpass_line = ff_lowpass_line_avx2;
}
}
} }
} }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment