Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in / Register
Toggle navigation
F
ffmpeg.wasm-core
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Linshizhi
ffmpeg.wasm-core
Commits
3c172a41
Commit
3c172a41
authored
Jan 13, 2012
by
Ronald S. Bultje
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
swscale: change yuv2yuvX code to use cpuflag().
parent
57facb73
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
59 additions
and
60 deletions
+59
-60
output.asm
libswscale/x86/output.asm
+59
-60
No files found.
libswscale/x86/output.asm
View file @
3c172a41
...
@@ -56,7 +56,7 @@ SECTION .text
...
@@ -56,7 +56,7 @@ SECTION .text
; of 2. $offset is either 0 or 3. $dither holds 8 values.
; of 2. $offset is either 0 or 3. $dither holds 8 values.
;-----------------------------------------------------------------------------
;-----------------------------------------------------------------------------
%macro
yuv2planeX_fn
4
%macro
yuv2planeX_fn
3
%ifdef
ARCH_X86_32
%ifdef
ARCH_X86_32
%define
cntr_reg
r1
%define
cntr_reg
r1
...
@@ -66,12 +66,12 @@ SECTION .text
...
@@ -66,12 +66,12 @@ SECTION .text
%define
movsx
movsxd
%define
movsx
movsxd
%endif
%endif
cglobal
yuv2planeX_
%
2
_
%1
,
%4
,
7
,
%3
cglobal
yuv2planeX_
%
1
,
%3
,
7
,
%2
%if
%
2
==
8
||
%2
==
9
||
%2
==
10
%if
%
1
==
8
||
%1
==
9
||
%1
==
10
pxor
m6
,
m6
pxor
m6
,
m6
%endif
; %
2
== 8/9/10
%endif
; %
1
== 8/9/10
%if
%
2
==
8
%if
%
1
==
8
%ifdef
ARCH_X86_32
%ifdef
ARCH_X86_32
%assign
pad
0x2c
-
(
stack_offset
&
15
)
%assign
pad
0x2c
-
(
stack_offset
&
15
)
SUB
rsp
,
pad
SUB
rsp
,
pad
...
@@ -120,7 +120,7 @@ cglobal yuv2planeX_%2_%1, %4, 7, %3
...
@@ -120,7 +120,7 @@ cglobal yuv2planeX_%2_%1, %4, 7, %3
mova
[
rsp
+
16
]
,
m3
mova
[
rsp
+
16
]
,
m3
mova
[
rsp
+
24
]
,
m_dith
mova
[
rsp
+
24
]
,
m_dith
%endif
; mmsize == 8/16
%endif
; mmsize == 8/16
%endif
; %
2
== 8
%endif
; %
1
== 8
xor
r5
,
r5
xor
r5
,
r5
...
@@ -130,11 +130,11 @@ cglobal yuv2planeX_%2_%1, %4, 7, %3
...
@@ -130,11 +130,11 @@ cglobal yuv2planeX_%2_%1, %4, 7, %3
; 8 pixels but we can only handle 2 pixels per register, and thus 4
; 8 pixels but we can only handle 2 pixels per register, and thus 4
; pixels per iteration. In order to not have to keep track of where
; pixels per iteration. In order to not have to keep track of where
; we are w.r.t. dithering, we unroll the mmx/8bit loop x2.
; we are w.r.t. dithering, we unroll the mmx/8bit loop x2.
%if
%
2
==
8
%if
%
1
==
8
%rep
16
/
mmsize
%rep
16
/
mmsize
%endif
; %
2
== 8
%endif
; %
1
== 8
%if
%
2
==
8
%if
%
1
==
8
%ifdef
ARCH_X86_32
%ifdef
ARCH_X86_32
mova
m2
,
[
rsp
+
mmsize
*
(
0
+
%%
i
)
]
mova
m2
,
[
rsp
+
mmsize
*
(
0
+
%%
i
)
]
mova
m1
,
[
rsp
+
mmsize
*
(
1
+
%%
i
)
]
mova
m1
,
[
rsp
+
mmsize
*
(
1
+
%%
i
)
]
...
@@ -142,31 +142,31 @@ cglobal yuv2planeX_%2_%1, %4, 7, %3
...
@@ -142,31 +142,31 @@ cglobal yuv2planeX_%2_%1, %4, 7, %3
mova
m2
,
m8
mova
m2
,
m8
mova
m1
,
m_dith
mova
m1
,
m_dith
%endif
; x86-32/64
%endif
; x86-32/64
%else
; %
2
== 9/10/16
%else
; %
1
== 9/10/16
mova
m1
,
[
yuv2yuvX_
%
2
_start
]
mova
m1
,
[
yuv2yuvX_
%
1
_start
]
mova
m2
,
m1
mova
m2
,
m1
%endif
; %
2
== 8/9/10/16
%endif
; %
1
== 8/9/10/16
movsx
cntr_reg
,
r1m
movsx
cntr_reg
,
r1m
.
filterloop_
%
+
%%
i
:
.
filterloop_
%
+
%%
i
:
; input pixels
; input pixels
mov
r6
,
[
r2
+
gprsize
*
cntr_reg
-
2
*
gprsize
]
mov
r6
,
[
r2
+
gprsize
*
cntr_reg
-
2
*
gprsize
]
%if
%
2
==
16
%if
%
1
==
16
mova
m3
,
[
r6
+
r5
*
4
]
mova
m3
,
[
r6
+
r5
*
4
]
mova
m5
,
[
r6
+
r5
*
4
+
mmsize
]
mova
m5
,
[
r6
+
r5
*
4
+
mmsize
]
%else
; %
2
== 8/9/10
%else
; %
1
== 8/9/10
mova
m3
,
[
r6
+
r5
*
2
]
mova
m3
,
[
r6
+
r5
*
2
]
%endif
; %
2
== 8/9/10/16
%endif
; %
1
== 8/9/10/16
mov
r6
,
[
r2
+
gprsize
*
cntr_reg
-
gprsize
]
mov
r6
,
[
r2
+
gprsize
*
cntr_reg
-
gprsize
]
%if
%
2
==
16
%if
%
1
==
16
mova
m4
,
[
r6
+
r5
*
4
]
mova
m4
,
[
r6
+
r5
*
4
]
mova
m6
,
[
r6
+
r5
*
4
+
mmsize
]
mova
m6
,
[
r6
+
r5
*
4
+
mmsize
]
%else
; %
2
== 8/9/10
%else
; %
1
== 8/9/10
mova
m4
,
[
r6
+
r5
*
2
]
mova
m4
,
[
r6
+
r5
*
2
]
%endif
; %
2
== 8/9/10/16
%endif
; %
1
== 8/9/10/16
; coefficients
; coefficients
movd
m0
,
[
r0
+
2
*
cntr_reg
-
4
]
; coeff[0], coeff[1]
movd
m0
,
[
r0
+
2
*
cntr_reg
-
4
]
; coeff[0], coeff[1]
%if
%
2
==
16
%if
%
1
==
16
pshuflw
m7
,
m0
,
0
; coeff[0]
pshuflw
m7
,
m0
,
0
; coeff[0]
pshuflw
m0
,
m0
,
0x55
; coeff[1]
pshuflw
m0
,
m0
,
0x55
; coeff[1]
pmovsxwd
m7
,
m7
; word -> dword
pmovsxwd
m7
,
m7
; word -> dword
...
@@ -181,7 +181,7 @@ cglobal yuv2planeX_%2_%1, %4, 7, %3
...
@@ -181,7 +181,7 @@ cglobal yuv2planeX_%2_%1, %4, 7, %3
paddd
m1
,
m5
paddd
m1
,
m5
paddd
m2
,
m4
paddd
m2
,
m4
paddd
m1
,
m6
paddd
m1
,
m6
%else
; %
2
== 10/9/8
%else
; %
1
== 10/9/8
punpcklwd
m5
,
m3
,
m4
punpcklwd
m5
,
m3
,
m4
punpckhwd
m3
,
m4
punpckhwd
m3
,
m4
SPLATD
m0
,
m0
SPLATD
m0
,
m0
...
@@ -191,84 +191,83 @@ cglobal yuv2planeX_%2_%1, %4, 7, %3
...
@@ -191,84 +191,83 @@ cglobal yuv2planeX_%2_%1, %4, 7, %3
paddd
m2
,
m5
paddd
m2
,
m5
paddd
m1
,
m3
paddd
m1
,
m3
%endif
; %
2
== 8/9/10/16
%endif
; %
1
== 8/9/10/16
sub
cntr_reg
,
2
sub
cntr_reg
,
2
jg
.
filterloop_
%
+
%%
i
jg
.
filterloop_
%
+
%%
i
%if
%
2
==
16
%if
%
1
==
16
psrad
m2
,
31
-
%
2
psrad
m2
,
31
-
%
1
psrad
m1
,
31
-
%
2
psrad
m1
,
31
-
%
1
%else
; %
2
== 10/9/8
%else
; %
1
== 10/9/8
psrad
m2
,
27
-
%
2
psrad
m2
,
27
-
%
1
psrad
m1
,
27
-
%
2
psrad
m1
,
27
-
%
1
%endif
; %
2
== 8/9/10/16
%endif
; %
1
== 8/9/10/16
%if
%
2
==
8
%if
%
1
==
8
packssdw
m2
,
m1
packssdw
m2
,
m1
packuswb
m2
,
m2
packuswb
m2
,
m2
movh
[
r3
+
r5
*
1
]
,
m2
movh
[
r3
+
r5
*
1
]
,
m2
%else
; %
2
== 9/10/16
%else
; %
1
== 9/10/16
%if
%
2
==
16
%if
%
1
==
16
packssdw
m2
,
m1
packssdw
m2
,
m1
paddw
m2
,
[minshort]
paddw
m2
,
[minshort]
%else
; %2 == 9/10
%else
; %1 == 9/10
%ifidn
%1
,
sse4
%if
cpuflag
(
sse4
)
packusdw
m2
,
m1
%elifidn
%1
,
avx
packusdw
m2
,
m1
packusdw
m2
,
m1
%else
; mmx2/sse2
%else
; mmx2/sse2
packssdw
m2
,
m1
packssdw
m2
,
m1
pmaxsw
m2
,
m6
pmaxsw
m2
,
m6
%endif
; mmx2/sse2/sse4/avx
%endif
; mmx2/sse2/sse4/avx
pminsw
m2
,
[
yuv2yuvX_
%
2
_upper
]
pminsw
m2
,
[
yuv2yuvX_
%
1
_upper
]
%endif
; %
2
== 9/10/16
%endif
; %
1
== 9/10/16
mova
[
r3
+
r5
*
2
]
,
m2
mova
[
r3
+
r5
*
2
]
,
m2
%endif
; %
2
== 8/9/10/16
%endif
; %
1
== 8/9/10/16
add
r5
,
mmsize
/
2
add
r5
,
mmsize
/
2
sub
r4d
,
mmsize
/
2
sub
r4d
,
mmsize
/
2
%if
%
2
==
8
%if
%
1
==
8
%assign
%%
i
%%
i
+
2
%assign
%%
i
%%
i
+
2
%endrep
%endrep
%endif
; %
2
== 8
%endif
; %
1
== 8
jg
.
pixelloop
jg
.
pixelloop
%if
%
2
==
8
%if
%
1
==
8
%ifdef
ARCH_X86_32
%ifdef
ARCH_X86_32
ADD
rsp
,
pad
ADD
rsp
,
pad
RET
RET
%else
; x86-64
%else
; x86-64
REP_RET
REP_RET
%endif
; x86-32/64
%endif
; x86-32/64
%else
; %
2
== 9/10/16
%else
; %
1
== 9/10/16
REP_RET
REP_RET
%endif
; %
2
== 8/9/10/16
%endif
; %
1
== 8/9/10/16
%endmacro
%endmacro
%define
PALIGNR
PALIGNR_MMX
%define
PALIGNR
PALIGNR_MMX
%ifdef
ARCH_X86_32
%ifdef
ARCH_X86_32
INIT_MMX
INIT_MMX
mmx2
yuv2planeX_fn
mmx2
,
8
,
0
,
7
yuv2planeX_fn
8
,
0
,
7
yuv2planeX_fn
mmx2
,
9
,
0
,
5
yuv2planeX_fn
9
,
0
,
5
yuv2planeX_fn
mmx2
,
10
,
0
,
5
yuv2planeX_fn
10
,
0
,
5
%endif
%endif
INIT_XMM
INIT_XMM
sse2
yuv2planeX_fn
sse2
,
8
,
10
,
7
yuv2planeX_fn
8
,
10
,
7
yuv2planeX_fn
sse2
,
9
,
7
,
5
yuv2planeX_fn
9
,
7
,
5
yuv2planeX_fn
sse2
,
10
,
7
,
5
yuv2planeX_fn
10
,
7
,
5
%define
PALIGNR
PALIGNR_SSSE3
%define
PALIGNR
PALIGNR_SSSE3
yuv2planeX_fn
sse4
,
8
,
10
,
7
INIT_XMM
sse4
yuv2planeX_fn
sse4
,
9
,
7
,
5
yuv2planeX_fn
8
,
10
,
7
yuv2planeX_fn
sse4
,
10
,
7
,
5
yuv2planeX_fn
9
,
7
,
5
yuv2planeX_fn
sse4
,
16
,
8
,
5
yuv2planeX_fn
10
,
7
,
5
yuv2planeX_fn
16
,
8
,
5
INIT_AVX
yuv2planeX_fn
avx
,
8
,
10
,
7
INIT_XMM
avx
yuv2planeX_fn
avx
,
9
,
7
,
5
yuv2planeX_fn
8
,
10
,
7
yuv2planeX_fn
avx
,
10
,
7
,
5
yuv2planeX_fn
9
,
7
,
5
yuv2planeX_fn
10
,
7
,
5
; %1=outout-bpc, %2=alignment (u/a)
; %1=outout-bpc, %2=alignment (u/a)
%macro
yuv2plane1_mainloop
2
%macro
yuv2plane1_mainloop
2
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment