Commit f91c4b78 authored by Ronald S. Bultje's avatar Ronald S. Bultje

png: add SSE2 version for add_bytes_l2.

parent 59f474b4
...@@ -29,16 +29,22 @@ void ff_add_png_paeth_prediction_ssse3(uint8_t *dst, uint8_t *src, ...@@ -29,16 +29,22 @@ void ff_add_png_paeth_prediction_ssse3(uint8_t *dst, uint8_t *src,
uint8_t *top, int w, int bpp); uint8_t *top, int w, int bpp);
void ff_add_bytes_l2_mmx (uint8_t *dst, uint8_t *src1, void ff_add_bytes_l2_mmx (uint8_t *dst, uint8_t *src1,
uint8_t *src2, int w); uint8_t *src2, int w);
void ff_add_bytes_l2_sse2(uint8_t *dst, uint8_t *src1,
uint8_t *src2, int w);
void ff_pngdsp_init_x86(PNGDSPContext *dsp) void ff_pngdsp_init_x86(PNGDSPContext *dsp)
{ {
#if HAVE_YASM #if HAVE_YASM
int flags = av_get_cpu_flags(); int flags = av_get_cpu_flags();
#if ARCH_X86_32
if (flags & AV_CPU_FLAG_MMX) if (flags & AV_CPU_FLAG_MMX)
dsp->add_bytes_l2 = ff_add_bytes_l2_mmx; dsp->add_bytes_l2 = ff_add_bytes_l2_mmx;
#endif
if (flags & AV_CPU_FLAG_MMX2) if (flags & AV_CPU_FLAG_MMX2)
dsp->add_paeth_prediction = ff_add_png_paeth_prediction_mmx2; dsp->add_paeth_prediction = ff_add_png_paeth_prediction_mmx2;
if (flags & AV_CPU_FLAG_SSE2)
dsp->add_bytes_l2 = ff_add_bytes_l2_sse2;
if (flags & AV_CPU_FLAG_SSSE3) if (flags & AV_CPU_FLAG_SSSE3)
dsp->add_paeth_prediction = ff_add_png_paeth_prediction_ssse3; dsp->add_paeth_prediction = ff_add_png_paeth_prediction_ssse3;
#endif #endif
......
...@@ -53,6 +53,21 @@ cglobal add_bytes_l2, 4, 6, %1, dst, src1, src2, wa, w, i ...@@ -53,6 +53,21 @@ cglobal add_bytes_l2, 4, 6, %1, dst, src1, src2, wa, w, i
cmp iq, waq cmp iq, waq
jl .loop_v jl .loop_v
%if mmsize == 16
; vector loop
mov wq, waq
and waq, ~7
jmp .end_l
.loop_l:
movq mm0, [src1q+iq]
paddb mm0, [src2q+iq]
movq [dstq+iq ], mm0
add iq, 8
.end_l:
cmp iq, waq
jl .loop_l
%endif
; scalar loop for leftover ; scalar loop for leftover
jmp .end_s jmp .end_s
.loop_s: .loop_s:
...@@ -66,8 +81,13 @@ cglobal add_bytes_l2, 4, 6, %1, dst, src1, src2, wa, w, i ...@@ -66,8 +81,13 @@ cglobal add_bytes_l2, 4, 6, %1, dst, src1, src2, wa, w, i
REP_RET REP_RET
%endmacro %endmacro
%if ARCH_X86_32
INIT_MMX mmx INIT_MMX mmx
ADD_BYTES_FN 0 ADD_BYTES_FN 0
%endif
INIT_XMM sse2
ADD_BYTES_FN 2
%macro ADD_PAETH_PRED_FN 1 %macro ADD_PAETH_PRED_FN 1
cglobal add_png_paeth_prediction, 5, 7, %1, dst, src, top, w, bpp, end, cntr cglobal add_png_paeth_prediction, 5, 7, %1, dst, src, top, w, bpp, end, cntr
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment