Commit 738bc3e7 authored by James Almer's avatar James Almer

x86/vf_transpose: make ff_transpose_8x8_16_sse2 work on x86_32

Reviewed-by: 's avatarPaul B Mahol <onemda@gmail.com>
Signed-off-by: 's avatarJames Almer <jamrial@gmail.com>
parent 2a75006d
...@@ -56,10 +56,7 @@ cglobal transpose_8x8_8, 4,5,8, src, src_linesize, dst, dst_linesize, linesize3 ...@@ -56,10 +56,7 @@ cglobal transpose_8x8_8, 4,5,8, src, src_linesize, dst, dst_linesize, linesize3
movq [dstq + linesize3q], m7 movq [dstq + linesize3q], m7
RET RET
%if ARCH_X86_64 cglobal transpose_8x8_16, 4,5,9, ARCH_X86_32 * 32, src, src_linesize, dst, dst_linesize, linesize3
INIT_XMM sse2
cglobal transpose_8x8_16, 4,5,9, src, src_linesize, dst, dst_linesize, linesize3
lea linesize3q, [src_linesizeq * 3] lea linesize3q, [src_linesizeq * 3]
movu m0, [srcq + src_linesizeq * 0] movu m0, [srcq + src_linesizeq * 0]
movu m1, [srcq + src_linesizeq * 1] movu m1, [srcq + src_linesizeq * 1]
...@@ -71,7 +68,11 @@ cglobal transpose_8x8_16, 4,5,9, src, src_linesize, dst, dst_linesize, linesize3 ...@@ -71,7 +68,11 @@ cglobal transpose_8x8_16, 4,5,9, src, src_linesize, dst, dst_linesize, linesize3
movu m6, [srcq + src_linesizeq * 2] movu m6, [srcq + src_linesizeq * 2]
movu m7, [srcq + linesize3q] movu m7, [srcq + linesize3q]
%if ARCH_X86_64
TRANSPOSE8x8W 0, 1, 2, 3, 4, 5, 6, 7, 8 TRANSPOSE8x8W 0, 1, 2, 3, 4, 5, 6, 7, 8
%else
TRANSPOSE8x8W 0, 1, 2, 3, 4, 5, 6, 7, [rsp], [rsp + 16]
%endif
lea linesize3q, [dst_linesizeq * 3] lea linesize3q, [dst_linesizeq * 3]
movu [dstq + dst_linesizeq * 0], m0 movu [dstq + dst_linesizeq * 0], m0
...@@ -84,5 +85,3 @@ cglobal transpose_8x8_16, 4,5,9, src, src_linesize, dst, dst_linesize, linesize3 ...@@ -84,5 +85,3 @@ cglobal transpose_8x8_16, 4,5,9, src, src_linesize, dst, dst_linesize, linesize3
movu [dstq + dst_linesizeq * 2], m6 movu [dstq + dst_linesizeq * 2], m6
movu [dstq + linesize3q], m7 movu [dstq + linesize3q], m7
RET RET
%endif
...@@ -43,7 +43,7 @@ av_cold void ff_transpose_init_x86(TransVtable *v, int pixstep) ...@@ -43,7 +43,7 @@ av_cold void ff_transpose_init_x86(TransVtable *v, int pixstep)
v->transpose_8x8 = ff_transpose_8x8_8_sse2; v->transpose_8x8 = ff_transpose_8x8_8_sse2;
} }
if (ARCH_X86_64 && EXTERNAL_SSE2(cpu_flags) && pixstep == 2) { if (EXTERNAL_SSE2(cpu_flags) && pixstep == 2) {
v->transpose_8x8 = ff_transpose_8x8_16_sse2; v->transpose_8x8 = ff_transpose_8x8_16_sse2;
} }
} }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment