Commit 395f2e70 authored by Justin Ruggles's avatar Justin Ruggles

dsputil: use movups instead of movdqu in ff_emu_edge_core_sse()

This allows emulated_edge_mc_sse() and gmc_sse() to be used under
AV_CPU_FLAG_SSE.
parent 05d1e45d
...@@ -2874,6 +2874,10 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) ...@@ -2874,6 +2874,10 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
#if HAVE_YASM #if HAVE_YASM
c->scalarproduct_float = ff_scalarproduct_float_sse; c->scalarproduct_float = ff_scalarproduct_float_sse;
c->butterflies_float_interleave = ff_butterflies_float_interleave_sse; c->butterflies_float_interleave = ff_butterflies_float_interleave_sse;
if (!high_bit_depth)
c->emulated_edge_mc = emulated_edge_mc_sse;
c->gmc = gmc_sse;
#endif #endif
} }
if (HAVE_AMD3DNOW && (mm_flags & AV_CPU_FLAG_3DNOW)) if (HAVE_AMD3DNOW && (mm_flags & AV_CPU_FLAG_3DNOW))
...@@ -2894,10 +2898,6 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) ...@@ -2894,10 +2898,6 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
c->apply_window_int16 = ff_apply_window_int16_sse2; c->apply_window_int16 = ff_apply_window_int16_sse2;
} }
} }
if (!high_bit_depth)
c->emulated_edge_mc = emulated_edge_mc_sse;
c->gmc= gmc_sse;
#endif #endif
} }
if (mm_flags & AV_CPU_FLAG_SSSE3) { if (mm_flags & AV_CPU_FLAG_SSSE3) {
......
...@@ -637,7 +637,7 @@ cglobal emu_edge_core_%1, 2, 7, 0 ...@@ -637,7 +637,7 @@ cglobal emu_edge_core_%1, 2, 7, 0
%ifnidn %3, mmx %ifnidn %3, mmx
%rep %2/16 %rep %2/16
movdqu xmm %+ %%sxidx, [r1+%%src_off] movups xmm %+ %%sxidx, [r1+%%src_off]
%assign %%src_off %%src_off+16 %assign %%src_off %%src_off+16
%assign %%sxidx %%sxidx+1 %assign %%sxidx %%sxidx+1
%endrep ; %2/16 %endrep ; %2/16
...@@ -686,7 +686,7 @@ cglobal emu_edge_core_%1, 2, 7, 0 ...@@ -686,7 +686,7 @@ cglobal emu_edge_core_%1, 2, 7, 0
%ifnidn %3, mmx %ifnidn %3, mmx
%rep %2/16 %rep %2/16
movdqu [r0+%%dst_off], xmm %+ %%dxidx movups [r0+%%dst_off], xmm %+ %%dxidx
%assign %%dst_off %%dst_off+16 %assign %%dst_off %%dst_off+16
%assign %%dxidx %%dxidx+1 %assign %%dxidx %%dxidx+1
%endrep ; %2/16 %endrep ; %2/16
...@@ -915,7 +915,7 @@ ALIGN 64 ...@@ -915,7 +915,7 @@ ALIGN 64
%define linesize r2m %define linesize r2m
V_COPY_NPX %1, mm0, movq, 8, 0xFFFFFFF8 V_COPY_NPX %1, mm0, movq, 8, 0xFFFFFFF8
%else ; !mmx %else ; !mmx
V_COPY_NPX %1, xmm0, movdqu, 16, 0xFFFFFFF0 V_COPY_NPX %1, xmm0, movups, 16, 0xFFFFFFF0
%ifdef ARCH_X86_64 %ifdef ARCH_X86_64
%define linesize r2 %define linesize r2
V_COPY_NPX %1, rax , mov, 8 V_COPY_NPX %1, rax , mov, 8
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment