Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in / Register
Toggle navigation
F
ffmpeg.wasm-core
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Linshizhi
ffmpeg.wasm-core
Commits
681a86ab
Commit
681a86ab
authored
Jul 15, 2012
by
Diego Biurrun
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
x86: fft: Port to cpuflags
parent
e9bb77fb
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
43 additions
and
51 deletions
+43
-51
fft.asm
libavcodec/x86/fft.asm
+43
-51
No files found.
libavcodec/x86/fft.asm
View file @
681a86ab
...
...
@@ -190,6 +190,23 @@ SECTION .text
addps
%2
,
%2
,
%5
; {i0,i1,i2,i3}
%endmacro
%macro
INTERL
5
%if
cpuflag
(
avx
)
vunpckhps
%3
,
%2
,
%1
vunpcklps
%2
,
%2
,
%1
vextractf128
%4
(
%5
),
%2
,
0
vextractf128
%4
%
+
H
(
%5
),
%3
,
0
vextractf128
%4
(
%5
+
1
),
%2
,
1
vextractf128
%4
%
+
H
(
%5
+
1
),
%3
,
1
%elif
cpuflag
(
sse
)
mova
%3
,
%2
unpcklps
%2
,
%1
unpckhps
%3
,
%1
mova
%4
(
%5
),
%2
mova
%4
(
%5
+
1
),
%3
%endif
%endmacro
; scheduled for cpu-bound sizes
%macro
PASS_SMALL
3
; (to load m4-m7), wre, wim
IF%1
mova
m4
,
Z
(
4
)
...
...
@@ -536,17 +553,6 @@ DEFINE_ARGS zc, w, n, o1, o3
INIT_YMM
avx
%macro
INTERL_AVX
5
vunpckhps
%3
,
%2
,
%1
vunpcklps
%2
,
%2
,
%1
vextractf128
%4
(
%5
),
%2
,
0
vextractf128
%4
%
+
H
(
%5
),
%3
,
0
vextractf128
%4
(
%5
+
1
),
%2
,
1
vextractf128
%4
%
+
H
(
%5
+
1
),
%3
,
1
%endmacro
%define
INTERL
INTERL_AVX
DECL_PASS
pass_avx
,
PASS_BIG
1
DECL_PASS
pass_interleave_avx
,
PASS_BIG
0
...
...
@@ -560,16 +566,6 @@ cglobal fft_calc, 2,5,8
INIT_XMM
sse
%macro
INTERL_SSE
5
mova
%3
,
%2
unpcklps
%2
,
%1
unpckhps
%3
,
%1
mova
%4
(
%5
),
%2
mova
%4
(
%5
+
1
),
%3
%endmacro
%define
INTERL
INTERL_SSE
DECL_PASS
pass_sse
,
PASS_BIG
1
DECL_PASS
pass_interleave_sse
,
PASS_BIG
0
...
...
@@ -855,16 +851,30 @@ INIT_XMM sse
%endmacro
%macro
CMUL
6
;j, xmm0, xmm1, 3, 4, 5
%if
cpuflag
(
sse
)
mulps
m6
,
%3
,
[
%5
+
%1
]
mulps
m7
,
%2
,
[
%5
+
%1
]
mulps
%2
,
%2
,
[
%6
+
%1
]
mulps
%3
,
%3
,
[
%6
+
%1
]
subps
%2
,
%2
,
m6
addps
%3
,
%3
,
m7
%elif
cpuflag
(
3
dnow
)
mova
m6
,
[
%1
+
%2
*
2
]
mova
%3
,
[
%1
+
%2
*
2
+
8
]
mova
%4
,
m6
mova
m7
,
%3
pfmul
m6
,
[
%5
+
%2
]
pfmul
%3
,
[
%6
+
%2
]
pfmul
%4
,
[
%6
+
%2
]
pfmul
m7
,
[
%5
+
%2
]
pfsub
%3
,
m6
pfadd
%4
,
m7
%endif
%endmacro
%macro
POSROTATESHUF
_AVX
5
;j, k, z+n8, tcos+n8, tsin+n8
%macro
POSROTATESHUF
5
;j, k, z+n8, tcos+n8, tsin+n8
.
post
:
%if
cpuflag
(
avx
)
vmovaps
ymm1
,
[
%3
+
%1
*
2
]
vmovaps
ymm0
,
[
%3
+
%1
*
2
+
0x20
]
vmovaps
ymm3
,
[
%3
+
%2
*
2
]
...
...
@@ -893,10 +903,7 @@ INIT_XMM sse
sub
%2
,
0x20
add
%1
,
0x20
jl
.
post
%endmacro
%macro
POSROTATESHUF
5
;j, k, z+n8, tcos+n8, tsin+n8
.
post
:
%elif
cpuflag
(
sse
)
movaps
xmm1
,
[
%3
+
%1
*
2
]
movaps
xmm0
,
[
%3
+
%1
*
2
+
0x10
]
CMUL
%1
,
xmm0
,
xmm1
,
%3
,
%4
,
%5
...
...
@@ -918,25 +925,9 @@ INIT_XMM sse
sub
%2
,
0x10
add
%1
,
0x10
jl
.
post
%endmacro
%macro
CMUL_3DNOW
6
mova
m6
,
[
%1
+
%2
*
2
]
mova
%3
,
[
%1
+
%2
*
2
+
8
]
mova
%4
,
m6
mova
m7
,
%3
pfmul
m6
,
[
%5
+
%2
]
pfmul
%3
,
[
%6
+
%2
]
pfmul
%4
,
[
%6
+
%2
]
pfmul
m7
,
[
%5
+
%2
]
pfsub
%3
,
m6
pfadd
%4
,
m7
%endmacro
%macro
POSROTATESHUF_3DNOW
5
;j, k, z+n8, tcos+n8, tsin+n8
.
post
:
CMUL_3DNOW
%3
,
%1
,
m0
,
m1
,
%4
,
%5
CMUL_3DNOW
%3
,
%2
,
m2
,
m3
,
%4
,
%5
%elif
cpuflag
(
3
dnow
)
CMUL
%3
,
%1
,
m0
,
m1
,
%4
,
%5
CMUL
%3
,
%2
,
m2
,
m3
,
%4
,
%5
movd
[
%3
+
%1
*
2
+
0
]
,
m0
movd
[
%3
+
%2
*
2
+
12
]
,
m1
movd
[
%3
+
%2
*
2
+
0
]
,
m2
...
...
@@ -952,9 +943,10 @@ INIT_XMM sse
sub
%2
,
8
add
%1
,
8
jl
.
post
%endif
%endmacro
%macro
DECL_IMDCT
1
%macro
DECL_IMDCT
0
cglobal
imdct_half
,
3
,
12
,
8
; FFTContext *s, FFTSample *output, const FFTSample *input
%if
ARCH_X86_64
%define
rrevtab
r7
...
...
@@ -1060,7 +1052,7 @@ cglobal imdct_half, 3,12,8; FFTContext *s, FFTSample *output, const FFTSample *i
neg
r0
mov
r1
,
-
mmsize
sub
r1
,
r0
%1
r0
,
r1
,
r6
,
rtcos
,
rtsin
POSROTATESHUF
r0
,
r1
,
r6
,
rtcos
,
rtsin
%if
ARCH_X86_64
==
0
add
esp
,
12
%endif
...
...
@@ -1070,17 +1062,17 @@ cglobal imdct_half, 3,12,8; FFTContext *s, FFTSample *output, const FFTSample *i
RET
%endmacro
DECL_IMDCT
POSROTATESHUF
DECL_IMDCT
%if
ARCH_X86_32
INIT_MMX
3
dnow
DECL_IMDCT
POSROTATESHUF_3DNOW
DECL_IMDCT
INIT_MMX
3
dnowext
DECL_IMDCT
POSROTATESHUF_3DNOW
DECL_IMDCT
%endif
INIT_YMM
avx
DECL_IMDCT
POSROTATESHUF_AVX
DECL_IMDCT
%endif
; CONFIG_MDCT
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment