Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in / Register
Toggle navigation
F
ffmpeg.wasm-core
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Linshizhi
ffmpeg.wasm-core
Commits
59f474b4
Commit
59f474b4
authored
Jan 27, 2012
by
Ronald S. Bultje
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
png: convert DSP functions to yasm.
parent
3715d841
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
156 additions
and
103 deletions
+156
-103
Makefile
libavcodec/x86/Makefile
+1
-0
pngdsp-init.c
libavcodec/x86/pngdsp-init.c
+13
-103
pngdsp.asm
libavcodec/x86/pngdsp.asm
+142
-0
No files found.
libavcodec/x86/Makefile
View file @
59f474b4
...
...
@@ -42,6 +42,7 @@ MMX-OBJS-$(CONFIG_ENCODERS) += x86/dsputilenc_mmx.o
YASM-OBJS-$(CONFIG_ENCODERS)
+=
x86/dsputilenc_yasm.o
MMX-OBJS-$(CONFIG_GPL)
+=
x86/idct_mmx.o
MMX-OBJS-$(CONFIG_LPC)
+=
x86/lpc_mmx.o
YASM-OBJS-$(CONFIG_PNG_DECODER)
+=
x86/pngdsp.o
MMX-OBJS-$(CONFIG_PNG_DECODER)
+=
x86/pngdsp-init.o
YASM-OBJS-$(CONFIG_PRORES_DECODER)
+=
x86/proresdsp.o
MMX-OBJS-$(CONFIG_PRORES_DECODER)
+=
x86/proresdsp-init.o
...
...
libavcodec/x86/pngdsp-init.c
View file @
59f474b4
...
...
@@ -19,117 +19,27 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "libavutil/common.h"
#include "libavutil/cpu.h"
#include "libavutil/x86_cpu.h"
#include "libavcodec/pngdsp.h"
#include "dsputil_mmx.h"
#define PAETH(cpu, abs3)\
static void add_png_paeth_prediction_##cpu(uint8_t *dst, uint8_t *src, uint8_t *top, int w, int bpp)\
{\
x86_reg i = -bpp;\
x86_reg end = w-3;\
__asm__ volatile(\
"pxor %%mm7, %%mm7 \n"\
"movd (%1,%0), %%mm0 \n"\
"movd (%2,%0), %%mm1 \n"\
"punpcklbw %%mm7, %%mm0 \n"\
"punpcklbw %%mm7, %%mm1 \n"\
"add %4, %0 \n"\
"1: \n"\
"movq %%mm1, %%mm2 \n"\
"movd (%2,%0), %%mm1 \n"\
"movq %%mm2, %%mm3 \n"\
"punpcklbw %%mm7, %%mm1 \n"\
"movq %%mm2, %%mm4 \n"\
"psubw %%mm1, %%mm3 \n"\
"psubw %%mm0, %%mm4 \n"\
"movq %%mm3, %%mm5 \n"\
"paddw %%mm4, %%mm5 \n"\
abs3\
"movq %%mm4, %%mm6 \n"\
"pminsw %%mm5, %%mm6 \n"\
"pcmpgtw %%mm6, %%mm3 \n"\
"pcmpgtw %%mm5, %%mm4 \n"\
"movq %%mm4, %%mm6 \n"\
"pand %%mm3, %%mm4 \n"\
"pandn %%mm3, %%mm6 \n"\
"pandn %%mm0, %%mm3 \n"\
"movd (%3,%0), %%mm0 \n"\
"pand %%mm1, %%mm6 \n"\
"pand %%mm4, %%mm2 \n"\
"punpcklbw %%mm7, %%mm0 \n"\
"movq %6, %%mm5 \n"\
"paddw %%mm6, %%mm0 \n"\
"paddw %%mm2, %%mm3 \n"\
"paddw %%mm3, %%mm0 \n"\
"pand %%mm5, %%mm0 \n"\
"movq %%mm0, %%mm3 \n"\
"packuswb %%mm3, %%mm3 \n"\
"movd %%mm3, (%1,%0) \n"\
"add %4, %0 \n"\
"cmp %5, %0 \n"\
"jle 1b \n"\
:"+r"(i)\
:"r"(dst), "r"(top), "r"(src), "r"((x86_reg)bpp), "g"(end),\
"m"(ff_pw_255)\
:"memory"\
);\
}
#define ABS3_MMX2\
"psubw %%mm5, %%mm7 \n"\
"pmaxsw %%mm7, %%mm5 \n"\
"pxor %%mm6, %%mm6 \n"\
"pxor %%mm7, %%mm7 \n"\
"psubw %%mm3, %%mm6 \n"\
"psubw %%mm4, %%mm7 \n"\
"pmaxsw %%mm6, %%mm3 \n"\
"pmaxsw %%mm7, %%mm4 \n"\
"pxor %%mm7, %%mm7 \n"
#define ABS3_SSSE3\
"pabsw %%mm3, %%mm3 \n"\
"pabsw %%mm4, %%mm4 \n"\
"pabsw %%mm5, %%mm5 \n"
PAETH
(
mmx2
,
ABS3_MMX2
)
#if HAVE_SSSE3
PAETH
(
ssse3
,
ABS3_SSSE3
)
#endif
static
void
add_bytes_l2_mmx
(
uint8_t
*
dst
,
uint8_t
*
src1
,
uint8_t
*
src2
,
int
w
)
{
x86_reg
i
=
0
;
__asm__
volatile
(
"jmp 2f
\n\t
"
"1:
\n\t
"
"movq (%2, %0), %%mm0
\n\t
"
"movq 8(%2, %0), %%mm1
\n\t
"
"paddb (%3, %0), %%mm0
\n\t
"
"paddb 8(%3, %0), %%mm1
\n\t
"
"movq %%mm0, (%1, %0)
\n\t
"
"movq %%mm1, 8(%1, %0)
\n\t
"
"add $16, %0
\n\t
"
"2:
\n\t
"
"cmp %4, %0
\n\t
"
" js 1b
\n\t
"
:
"+r"
(
i
)
:
"r"
(
dst
),
"r"
(
src1
),
"r"
(
src2
),
"r"
((
x86_reg
)
w
-
15
)
);
for
(;
i
<
w
;
i
++
)
dst
[
i
]
=
src1
[
i
]
+
src2
[
i
];
}
void
ff_add_png_paeth_prediction_mmx2
(
uint8_t
*
dst
,
uint8_t
*
src
,
uint8_t
*
top
,
int
w
,
int
bpp
);
void
ff_add_png_paeth_prediction_ssse3
(
uint8_t
*
dst
,
uint8_t
*
src
,
uint8_t
*
top
,
int
w
,
int
bpp
);
void
ff_add_bytes_l2_mmx
(
uint8_t
*
dst
,
uint8_t
*
src1
,
uint8_t
*
src2
,
int
w
);
void
ff_pngdsp_init_x86
(
PNGDSPContext
*
dsp
)
{
#if HAVE_YASM
int
flags
=
av_get_cpu_flags
();
if
(
flags
&
AV_CPU_FLAG_MMX
)
dsp
->
add_bytes_l2
=
add_bytes_l2_mmx
;
dsp
->
add_bytes_l2
=
ff_
add_bytes_l2_mmx
;
if
(
flags
&
AV_CPU_FLAG_MMX2
)
dsp
->
add_paeth_prediction
=
add_png_paeth_prediction_mmx2
;
if
(
HAVE_SSSE3
&&
flags
&
AV_CPU_FLAG_SSSE3
)
dsp
->
add_paeth_prediction
=
add_png_paeth_prediction_ssse3
;
dsp
->
add_paeth_prediction
=
ff_add_png_paeth_prediction_mmx2
;
if
(
flags
&
AV_CPU_FLAG_SSSE3
)
dsp
->
add_paeth_prediction
=
ff_add_png_paeth_prediction_ssse3
;
#endif
}
libavcodec/x86/pngdsp.asm
0 → 100644
View file @
59f474b4
;******************************************************************************
;* x86 optimizations for PNG decoding
;*
;* Copyright (c) 2008 Loren Merritt <lorenm@u.washington.edu>
;*
;* This file is part of Libav.
;*
;* Libav is free software; you can redistribute it and/or
;* modify it under the terms of the GNU Lesser General Public
;* License as published by the Free Software Foundation; either
;* version 2.1 of the License, or (at your option) any later version.
;*
;* Libav is distributed in the hope that it will be useful,
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;* Lesser General Public License for more details.
;*
;* You should have received a copy of the GNU Lesser General Public
;* License along with Libav; if not, write to the Free Software
;* 51, Inc., Foundation Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;******************************************************************************
%include
"x86inc.asm"
%include
"x86util.asm"
SECTION_RODATA
cextern
pw_255
section
.
text
align
=
16
; %1 = nr. of xmm registers used
%macro
ADD_BYTES_FN
1
cglobal
add_bytes_l2
,
4
,
6
,
%1
,
dst
,
src1
,
src2
,
wa
,
w
,
i
%if
ARCH_X86_64
movsxd
waq
,
wad
%endif
xor
iq
,
iq
; vector loop
mov
wq
,
waq
and
waq
,
~
(
mmsize
*
2
-
1
)
jmp
.
end_v
.
loop_v
:
mova
m0
,
[
src1q
+
iq
]
mova
m1
,
[
src1q
+
iq
+
mmsize
]
paddb
m0
,
[
src2q
+
iq
]
paddb
m1
,
[
src2q
+
iq
+
mmsize
]
mova
[
dstq
+
iq
]
,
m0
mova
[
dstq
+
iq
+
mmsize
]
,
m1
add
iq
,
mmsize
*
2
.
end_v
:
cmp
iq
,
waq
jl
.
loop_v
; scalar loop for leftover
jmp
.
end_s
.
loop_s
:
mov
wab
,
[
src1q
+
iq
]
add
wab
,
[
src2q
+
iq
]
mov
[
dstq
+
iq
]
,
wab
inc
iq
.
end_s
:
cmp
iq
,
wq
jl
.
loop_s
REP_RET
%endmacro
INIT_MMX
mmx
ADD_BYTES_FN
0
%macro
ADD_PAETH_PRED_FN
1
cglobal
add_png_paeth_prediction
,
5
,
7
,
%1
,
dst
,
src
,
top
,
w
,
bpp
,
end
,
cntr
%if
ARCH_X86_64
movsxd
bppq
,
bppd
movsxd
wq
,
wd
%endif
lea
endq
,
[
dstq
+
wq
-
(
mmsize
/
2
-
1
)
]
sub
topq
,
dstq
sub
srcq
,
dstq
sub
dstq
,
bppq
pxor
m7
,
m7
movh
m0
,
[dstq]
movh
m1
,
[
topq
+
dstq
]
punpcklbw
m0
,
m7
punpcklbw
m1
,
m7
add
dstq
,
bppq
.
loop
:
mova
m2
,
m1
movh
m1
,
[
topq
+
dstq
]
mova
m3
,
m2
punpcklbw
m1
,
m7
mova
m4
,
m2
psubw
m3
,
m1
psubw
m4
,
m0
mova
m5
,
m3
paddw
m5
,
m4
%if
cpuflag
(
ssse3
)
pabsw
m3
,
m3
pabsw
m4
,
m4
pabsw
m5
,
m5
%else
; !cpuflag(ssse3)
psubw
m7
,
m5
pmaxsw
m5
,
m7
pxor
m6
,
m6
pxor
m7
,
m7
psubw
m6
,
m3
psubw
m7
,
m4
pmaxsw
m3
,
m6
pmaxsw
m4
,
m7
pxor
m7
,
m7
%endif
; cpuflag(ssse3)
mova
m6
,
m4
pminsw
m6
,
m5
pcmpgtw
m3
,
m6
pcmpgtw
m4
,
m5
mova
m6
,
m4
pand
m4
,
m3
pandn
m6
,
m3
pandn
m3
,
m0
movh
m0
,
[
srcq
+
dstq
]
pand
m6
,
m1
pand
m2
,
m4
punpcklbw
m0
,
m7
paddw
m0
,
m6
paddw
m3
,
m2
paddw
m0
,
m3
pand
m0
,
[
pw_255
]
mova
m3
,
m0
packuswb
m3
,
m3
movh
[dstq],
m3
add
dstq
,
bppq
cmp
dstq
,
endq
jle
.
loop
REP_RET
%endmacro
INIT_MMX
mmx2
ADD_PAETH_PRED_FN
0
INIT_MMX
ssse3
ADD_PAETH_PRED_FN
0
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment