Commit ca5c3ff9 authored by Michael Niedermayer's avatar Michael Niedermayer Committed by Vittorio Giovara

vf_interlace: x86: improve asm performance

4775 decicycles -> 3688 decicycles
parent 57ed5a64
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
;* x86-optimized functions for interlace filter ;* x86-optimized functions for interlace filter
;* ;*
;* Copyright (C) 2014 Kieran Kunhya <kierank@obe.tv> ;* Copyright (C) 2014 Kieran Kunhya <kierank@obe.tv>
;* Copyright (c) 2014 Michael Niedermayer <michaelni@gmx.at>
;* ;*
;* This file is part of Libav. ;* This file is part of Libav.
;* ;*
...@@ -34,36 +35,27 @@ cglobal lowpass_line, 5, 5, 7 ...@@ -34,36 +35,27 @@ cglobal lowpass_line, 5, 5, 7
add r4, r1 add r4, r1
neg r1 neg r1
pxor m6, m6 pcmpeqb m6, m6
.loop .loop
mova m0, [r2+r1] mova m0, [r3+r1]
punpcklbw m1, m0, m6 mova m1, [r3+r1+mmsize]
punpckhbw m0, m6 pavgb m0, [r4+r1]
paddw m0, m0 pavgb m1, [r4+r1+mmsize]
paddw m1, m1 mova m2, [r2+r1]
mova m3, [r2+r1+mmsize]
pxor m0, m6
pxor m1, m6
pxor m2, m6, [r2+r1]
pxor m3, m6, [r2+r1+mmsize]
pavgb m0, m2
pavgb m1, m3
pxor m0, m6
pxor m1, m6
mova [r0+r1], m0
mova [r0+r1+mmsize], m1
mova m2, [r3+r1] add r1, 2*mmsize
punpcklbw m3, m2, m6
punpckhbw m2, m6
mova m4, [r4+r1]
punpcklbw m5, m4, m6
punpckhbw m4, m6
paddw m1, m3
pavgw m1, m5
paddw m0, m2
pavgw m0, m4
psrlw m0, 1
psrlw m1, 1
packuswb m1, m0
mova [r0+r1], m1
add r1, mmsize
jl .loop jl .loop
REP_RET REP_RET
%endmacro %endmacro
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment