Commit 6c104826 authored by Diego Biurrun's avatar Diego Biurrun

x86: vc1dsp: port to cpuflags

parent d578f947
...@@ -34,7 +34,13 @@ section .text ...@@ -34,7 +34,13 @@ section .text
punpckl%1 m%2, m%4 punpckl%1 m%2, m%4
%endmacro %endmacro
%macro STORE_4_WORDS_MMX 6 %macro STORE_4_WORDS 6
%if cpuflag(sse4)
pextrw %1, %5, %6+0
pextrw %2, %5, %6+1
pextrw %3, %5, %6+2
pextrw %4, %5, %6+3
%else
movd %6d, %5 movd %6d, %5
%if mmsize==16 %if mmsize==16
psrldq %5, 4 psrldq %5, 4
...@@ -48,13 +54,7 @@ section .text ...@@ -48,13 +54,7 @@ section .text
mov %3, %6w mov %3, %6w
shr %6, 16 shr %6, 16
mov %4, %6w mov %4, %6w
%endmacro %endif
%macro STORE_4_WORDS_SSE4 6
pextrw %1, %5, %6+0
pextrw %2, %5, %6+1
pextrw %3, %5, %6+2
pextrw %4, %5, %6+3
%endmacro %endmacro
; in: p1 p0 q0 q1, clobbers p0 ; in: p1 p0 q0 q1, clobbers p0
...@@ -200,14 +200,14 @@ section .text ...@@ -200,14 +200,14 @@ section .text
VC1_FILTER %1 VC1_FILTER %1
punpcklbw m0, m1 punpcklbw m0, m1
%if %0 > 1 %if %0 > 1
STORE_4_WORDS_MMX [r0-1], [r0+r1-1], [r0+2*r1-1], [r0+r3-1], m0, %2 STORE_4_WORDS [r0-1], [r0+r1-1], [r0+2*r1-1], [r0+r3-1], m0, %2
%if %1 > 4 %if %1 > 4
psrldq m0, 4 psrldq m0, 4
STORE_4_WORDS_MMX [r4-1], [r4+r1-1], [r4+2*r1-1], [r4+r3-1], m0, %2 STORE_4_WORDS [r4-1], [r4+r1-1], [r4+2*r1-1], [r4+r3-1], m0, %2
%endif %endif
%else %else
STORE_4_WORDS_SSE4 [r0-1], [r0+r1-1], [r0+2*r1-1], [r0+r3-1], m0, 0 STORE_4_WORDS [r0-1], [r0+r1-1], [r0+2*r1-1], [r0+r3-1], m0, 0
STORE_4_WORDS_SSE4 [r4-1], [r4+r1-1], [r4+2*r1-1], [r4+r3-1], m0, 4 STORE_4_WORDS [r4-1], [r4+r1-1], [r4+2*r1-1], [r4+r3-1], m0, 4
%endif %endif
%endmacro %endmacro
...@@ -228,92 +228,93 @@ section .text ...@@ -228,92 +228,93 @@ section .text
imul r2, 0x01010101 imul r2, 0x01010101
%endmacro %endmacro
%macro VC1_LF_MMX 1 %macro VC1_LF 0
INIT_MMX cglobal vc1_v_loop_filter_internal
cglobal vc1_v_loop_filter_internal_%1
VC1_V_LOOP_FILTER 4, d VC1_V_LOOP_FILTER 4, d
ret ret
cglobal vc1_h_loop_filter_internal_%1 cglobal vc1_h_loop_filter_internal
VC1_H_LOOP_FILTER 4, r4 VC1_H_LOOP_FILTER 4, r4
ret ret
; void ff_vc1_v_loop_filter4_mmx2(uint8_t *src, int stride, int pq) ; void ff_vc1_v_loop_filter4_mmxext(uint8_t *src, int stride, int pq)
cglobal vc1_v_loop_filter4_%1, 3,5,0 cglobal vc1_v_loop_filter4, 3,5,0
START_V_FILTER START_V_FILTER
call vc1_v_loop_filter_internal_%1 call vc1_v_loop_filter_internal
RET RET
; void ff_vc1_h_loop_filter4_mmx2(uint8_t *src, int stride, int pq) ; void ff_vc1_h_loop_filter4_mmxext(uint8_t *src, int stride, int pq)
cglobal vc1_h_loop_filter4_%1, 3,5,0 cglobal vc1_h_loop_filter4, 3,5,0
START_H_FILTER 4 START_H_FILTER 4
call vc1_h_loop_filter_internal_%1 call vc1_h_loop_filter_internal
RET RET
; void ff_vc1_v_loop_filter8_mmx2(uint8_t *src, int stride, int pq) ; void ff_vc1_v_loop_filter8_mmxext(uint8_t *src, int stride, int pq)
cglobal vc1_v_loop_filter8_%1, 3,5,0 cglobal vc1_v_loop_filter8, 3,5,0
START_V_FILTER START_V_FILTER
call vc1_v_loop_filter_internal_%1 call vc1_v_loop_filter_internal
add r4, 4 add r4, 4
add r0, 4 add r0, 4
call vc1_v_loop_filter_internal_%1 call vc1_v_loop_filter_internal
RET RET
; void ff_vc1_h_loop_filter8_mmx2(uint8_t *src, int stride, int pq) ; void ff_vc1_h_loop_filter8_mmxext(uint8_t *src, int stride, int pq)
cglobal vc1_h_loop_filter8_%1, 3,5,0 cglobal vc1_h_loop_filter8, 3,5,0
START_H_FILTER 4 START_H_FILTER 4
call vc1_h_loop_filter_internal_%1 call vc1_h_loop_filter_internal
lea r0, [r0+4*r1] lea r0, [r0+4*r1]
call vc1_h_loop_filter_internal_%1 call vc1_h_loop_filter_internal
RET RET
%endmacro %endmacro
INIT_MMX mmxext
%define PABSW PABSW_MMXEXT %define PABSW PABSW_MMXEXT
VC1_LF_MMX mmx2 VC1_LF
INIT_XMM INIT_XMM sse2
; void ff_vc1_v_loop_filter8_sse2(uint8_t *src, int stride, int pq) ; void ff_vc1_v_loop_filter8_sse2(uint8_t *src, int stride, int pq)
cglobal vc1_v_loop_filter8_sse2, 3,5,8 cglobal vc1_v_loop_filter8, 3,5,8
START_V_FILTER START_V_FILTER
VC1_V_LOOP_FILTER 8, q VC1_V_LOOP_FILTER 8, q
RET RET
; void ff_vc1_h_loop_filter8_sse2(uint8_t *src, int stride, int pq) ; void ff_vc1_h_loop_filter8_sse2(uint8_t *src, int stride, int pq)
cglobal vc1_h_loop_filter8_sse2, 3,6,8 cglobal vc1_h_loop_filter8, 3,6,8
START_H_FILTER 8 START_H_FILTER 8
VC1_H_LOOP_FILTER 8, r5 VC1_H_LOOP_FILTER 8, r5
RET RET
%define PABSW PABSW_SSSE3 %define PABSW PABSW_SSSE3
INIT_MMX INIT_MMX ssse3
; void ff_vc1_v_loop_filter4_ssse3(uint8_t *src, int stride, int pq) ; void ff_vc1_v_loop_filter4_ssse3(uint8_t *src, int stride, int pq)
cglobal vc1_v_loop_filter4_ssse3, 3,5,0 cglobal vc1_v_loop_filter4, 3,5,0
START_V_FILTER START_V_FILTER
VC1_V_LOOP_FILTER 4, d VC1_V_LOOP_FILTER 4, d
RET RET
; void ff_vc1_h_loop_filter4_ssse3(uint8_t *src, int stride, int pq) ; void ff_vc1_h_loop_filter4_ssse3(uint8_t *src, int stride, int pq)
cglobal vc1_h_loop_filter4_ssse3, 3,5,0 cglobal vc1_h_loop_filter4, 3,5,0
START_H_FILTER 4 START_H_FILTER 4
VC1_H_LOOP_FILTER 4, r4 VC1_H_LOOP_FILTER 4, r4
RET RET
INIT_XMM INIT_XMM ssse3
; void ff_vc1_v_loop_filter8_ssse3(uint8_t *src, int stride, int pq) ; void ff_vc1_v_loop_filter8_ssse3(uint8_t *src, int stride, int pq)
cglobal vc1_v_loop_filter8_ssse3, 3,5,8 cglobal vc1_v_loop_filter8, 3,5,8
START_V_FILTER START_V_FILTER
VC1_V_LOOP_FILTER 8, q VC1_V_LOOP_FILTER 8, q
RET RET
; void ff_vc1_h_loop_filter8_ssse3(uint8_t *src, int stride, int pq) ; void ff_vc1_h_loop_filter8_ssse3(uint8_t *src, int stride, int pq)
cglobal vc1_h_loop_filter8_ssse3, 3,6,8 cglobal vc1_h_loop_filter8, 3,6,8
START_H_FILTER 8 START_H_FILTER 8
VC1_H_LOOP_FILTER 8, r5 VC1_H_LOOP_FILTER 8, r5
RET RET
INIT_XMM sse4
; void ff_vc1_h_loop_filter8_sse4(uint8_t *src, int stride, int pq) ; void ff_vc1_h_loop_filter8_sse4(uint8_t *src, int stride, int pq)
cglobal vc1_h_loop_filter8_sse4, 3,5,8 cglobal vc1_h_loop_filter8, 3,5,8
START_H_FILTER 8 START_H_FILTER 8
VC1_H_LOOP_FILTER 8 VC1_H_LOOP_FILTER 8
RET RET
...@@ -49,7 +49,7 @@ static void vc1_h_loop_filter16_ ## EXT(uint8_t *src, int stride, int pq) \ ...@@ -49,7 +49,7 @@ static void vc1_h_loop_filter16_ ## EXT(uint8_t *src, int stride, int pq) \
} }
#if HAVE_YASM #if HAVE_YASM
LOOP_FILTER(mmx2) LOOP_FILTER(mmxext)
LOOP_FILTER(sse2) LOOP_FILTER(sse2)
LOOP_FILTER(ssse3) LOOP_FILTER(ssse3)
...@@ -98,7 +98,7 @@ av_cold void ff_vc1dsp_init_x86(VC1DSPContext *dsp) ...@@ -98,7 +98,7 @@ av_cold void ff_vc1dsp_init_x86(VC1DSPContext *dsp)
} }
if (mm_flags & AV_CPU_FLAG_MMXEXT) { if (mm_flags & AV_CPU_FLAG_MMXEXT) {
ASSIGN_LF(mmx2); ASSIGN_LF(mmxext);
dsp->avg_no_rnd_vc1_chroma_pixels_tab[0] = ff_avg_vc1_chroma_mc8_nornd_mmx2; dsp->avg_no_rnd_vc1_chroma_pixels_tab[0] = ff_avg_vc1_chroma_mc8_nornd_mmx2;
} else if (mm_flags & AV_CPU_FLAG_3DNOW) { } else if (mm_flags & AV_CPU_FLAG_3DNOW) {
dsp->avg_no_rnd_vc1_chroma_pixels_tab[0] = ff_avg_vc1_chroma_mc8_nornd_3dnow; dsp->avg_no_rnd_vc1_chroma_pixels_tab[0] = ff_avg_vc1_chroma_mc8_nornd_3dnow;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment