Commit 51fd962c authored by Michael Niedermayer's avatar Michael Niedermayer

Merge commit 'c74b8669'

* commit 'c74b8669':
  x86/synth_filter: add synth_filter_fma3
  x86/synth_filter: add synth_filter_avx
  x86/synth_filter: add synth_filter_sse

Conflicts:
	libavcodec/x86/dcadsp.asm
	libavcodec/x86/dcadsp_init.c

See: 64672098
See: 68c3ed93
See: 7fd64e3e
See: aa1f3801
See: dfd865e5Merged-by: 's avatarMichael Niedermayer <michaelni@gmx.at>
parents b00f6bb9 c74b8669
...@@ -292,7 +292,7 @@ cglobal synth_filter_inner, 0, 6 + 4 * ARCH_X86_64, 7 + 6 * ARCH_X86_64, \ ...@@ -292,7 +292,7 @@ cglobal synth_filter_inner, 0, 6 + 4 * ARCH_X86_64, 7 + 6 * ARCH_X86_64, \
%define scale m0 %define scale m0
%if ARCH_X86_32 || WIN64 %if ARCH_X86_32 || WIN64
%if cpuflag(sse2) && notcpuflag(avx) %if cpuflag(sse2) && notcpuflag(avx)
movd m0, scalem movd scale, scalem
SPLATD m0 SPLATD m0
%else %else
VBROADCASTSS m0, scalem VBROADCASTSS m0, scalem
...@@ -311,7 +311,7 @@ cglobal synth_filter_inner, 0, 6 + 4 * ARCH_X86_64, 7 + 6 * ARCH_X86_64, \ ...@@ -311,7 +311,7 @@ cglobal synth_filter_inner, 0, 6 + 4 * ARCH_X86_64, 7 + 6 * ARCH_X86_64, \
sub r5q, offmp sub r5q, offmp
and r5q, -64 and r5q, -64
shl r5q, 2 shl r5q, 2
%if ARCH_X86_32 || mmsize < 32 %if ARCH_X86_32 || notcpuflag(avx)
mov OFFQ, r5q mov OFFQ, r5q
%define i r5q %define i r5q
mov i, 16 * 4 - (ARCH_X86_64 + 1) * mmsize ; main loop counter mov i, 16 * 4 - (ARCH_X86_64 + 1) * mmsize ; main loop counter
...@@ -337,7 +337,7 @@ cglobal synth_filter_inner, 0, 6 + 4 * ARCH_X86_64, 7 + 6 * ARCH_X86_64, \ ...@@ -337,7 +337,7 @@ cglobal synth_filter_inner, 0, 6 + 4 * ARCH_X86_64, 7 + 6 * ARCH_X86_64, \
%define j r3q %define j r3q
mov win, windowm mov win, windowm
mov ptr1, synth_bufm mov ptr1, synth_bufm
%if ARCH_X86_32 || mmsize < 32 %if ARCH_X86_32 || notcpuflag(avx)
add win, i add win, i
add ptr1, i add ptr1, i
%endif %endif
...@@ -356,7 +356,7 @@ cglobal synth_filter_inner, 0, 6 + 4 * ARCH_X86_64, 7 + 6 * ARCH_X86_64, \ ...@@ -356,7 +356,7 @@ cglobal synth_filter_inner, 0, 6 + 4 * ARCH_X86_64, 7 + 6 * ARCH_X86_64, \
mov ptr2, synth_bufmp mov ptr2, synth_bufmp
; prepare the inner loop counter ; prepare the inner loop counter
mov j, OFFQ mov j, OFFQ
%if ARCH_X86_32 || mmsize < 32 %if ARCH_X86_32 || notcpuflag(avx)
sub ptr2, i sub ptr2, i
%endif %endif
.loop1: .loop1:
...@@ -403,7 +403,7 @@ cglobal synth_filter_inner, 0, 6 + 4 * ARCH_X86_64, 7 + 6 * ARCH_X86_64, \ ...@@ -403,7 +403,7 @@ cglobal synth_filter_inner, 0, 6 + 4 * ARCH_X86_64, 7 + 6 * ARCH_X86_64, \
mova [outq + i + 0 * 4 + mmsize], m7 mova [outq + i + 0 * 4 + mmsize], m7
mova [outq + i + 16 * 4 + mmsize], m8 mova [outq + i + 16 * 4 + mmsize], m8
%endif %endif
%if ARCH_X86_32 || mmsize < 32 %if ARCH_X86_32 || notcpuflag(avx)
sub i, (ARCH_X86_64 + 1) * mmsize sub i, (ARCH_X86_64 + 1) * mmsize
jge .mainloop jge .mainloop
%endif %endif
......
...@@ -82,7 +82,9 @@ SYNTH_FILTER_FUNC(sse) ...@@ -82,7 +82,9 @@ SYNTH_FILTER_FUNC(sse)
#endif #endif
SYNTH_FILTER_FUNC(sse2) SYNTH_FILTER_FUNC(sse2)
SYNTH_FILTER_FUNC(avx) SYNTH_FILTER_FUNC(avx)
#if HAVE_FMA3_EXTERNAL
SYNTH_FILTER_FUNC(fma3) SYNTH_FILTER_FUNC(fma3)
#endif
#endif /* HAVE_YASM */ #endif /* HAVE_YASM */
av_cold void ff_synth_filter_init_x86(SynthFilterContext *s) av_cold void ff_synth_filter_init_x86(SynthFilterContext *s)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment