Commit 004357a1 authored by Daniel Kang's avatar Daniel Kang Committed by Carl Eugen Hoyos

Fix compilation on x86-32 with --disable-optimizations,

fixes issue 2127.

Patch by Daniel Kang, daniel.d.kang at gmail

Originally committed as revision 26204 to svn://svn.ffmpeg.org/ffmpeg/trunk
parent ba763783
...@@ -24,6 +24,7 @@ ...@@ -24,6 +24,7 @@
#include <stdint.h> #include <stdint.h>
#include "libavcodec/dsputil.h" #include "libavcodec/dsputil.h"
#include "libavutil/x86_cpu.h"
typedef struct { uint64_t a, b; } xmm_reg; typedef struct { uint64_t a, b; } xmm_reg;
...@@ -94,32 +95,31 @@ extern const double ff_pd_2[2]; ...@@ -94,32 +95,31 @@ extern const double ff_pd_2[2];
SBUTTERFLY(a,c,d,dq,q) /* a=aeim d=bfjn */\ SBUTTERFLY(a,c,d,dq,q) /* a=aeim d=bfjn */\
SBUTTERFLY(t,b,c,dq,q) /* t=cgko c=dhlp */ SBUTTERFLY(t,b,c,dq,q) /* t=cgko c=dhlp */
static inline void transpose4x4(uint8_t *dst, uint8_t *src, int dst_stride, int src_stride){ static inline void transpose4x4(uint8_t *dst, uint8_t *src, x86_reg dst_stride, x86_reg src_stride){
__asm__ volatile( //FIXME could save 1 instruction if done as 8x4 ... __asm__ volatile( //FIXME could save 1 instruction if done as 8x4 ...
"movd %4, %%mm0 \n\t" "movd (%1), %%mm0 \n\t"
"movd %5, %%mm1 \n\t" "add %3, %1 \n\t"
"movd %6, %%mm2 \n\t" "movd (%1), %%mm1 \n\t"
"movd %7, %%mm3 \n\t" "movd (%1,%3,1), %%mm2 \n\t"
"movd (%1,%3,2), %%mm3 \n\t"
"punpcklbw %%mm1, %%mm0 \n\t" "punpcklbw %%mm1, %%mm0 \n\t"
"punpcklbw %%mm3, %%mm2 \n\t" "punpcklbw %%mm3, %%mm2 \n\t"
"movq %%mm0, %%mm1 \n\t" "movq %%mm0, %%mm1 \n\t"
"punpcklwd %%mm2, %%mm0 \n\t" "punpcklwd %%mm2, %%mm0 \n\t"
"punpckhwd %%mm2, %%mm1 \n\t" "punpckhwd %%mm2, %%mm1 \n\t"
"movd %%mm0, %0 \n\t" "movd %%mm0, (%0) \n\t"
"add %2, %0 \n\t"
"punpckhdq %%mm0, %%mm0 \n\t" "punpckhdq %%mm0, %%mm0 \n\t"
"movd %%mm0, %1 \n\t" "movd %%mm0, (%0) \n\t"
"movd %%mm1, %2 \n\t" "movd %%mm1, (%0,%2,1) \n\t"
"punpckhdq %%mm1, %%mm1 \n\t" "punpckhdq %%mm1, %%mm1 \n\t"
"movd %%mm1, %3 \n\t" "movd %%mm1, (%0,%2,2) \n\t"
: "=m" (*(uint32_t*)(dst + 0*dst_stride)), : "+&r" (dst),
"=m" (*(uint32_t*)(dst + 1*dst_stride)), "+&r" (src)
"=m" (*(uint32_t*)(dst + 2*dst_stride)), : "r" (dst_stride),
"=m" (*(uint32_t*)(dst + 3*dst_stride)) "r" (src_stride)
: "m" (*(uint32_t*)(src + 0*src_stride)), : "memory"
"m" (*(uint32_t*)(src + 1*src_stride)),
"m" (*(uint32_t*)(src + 2*src_stride)),
"m" (*(uint32_t*)(src + 3*src_stride))
); );
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment