Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in / Register
Toggle navigation
F
ffmpeg.wasm-core
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Linshizhi
ffmpeg.wasm-core
Commits
bac0729d
Commit
bac0729d
authored
May 27, 2012
by
Vitor Sessak
Committed by
Janne Grunau
May 29, 2012
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
x86: use new schema for ASM macros
Signed-off-by:
Janne Grunau
<
janne-libav@jannau.net
>
parent
7263cd55
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
82 additions
and
76 deletions
+82
-76
fft.c
libavcodec/x86/fft.c
+6
-6
fft.h
libavcodec/x86/fft.h
+6
-6
fft_3dn2.c
libavcodec/x86/fft_3dn2.c
+13
-13
fft_mmx.asm
libavcodec/x86/fft_mmx.asm
+57
-51
No files found.
libavcodec/x86/fft.c
View file @
bac0729d
...
@@ -27,15 +27,15 @@ av_cold void ff_fft_init_mmx(FFTContext *s)
...
@@ -27,15 +27,15 @@ av_cold void ff_fft_init_mmx(FFTContext *s)
int
has_vectors
=
av_get_cpu_flags
();
int
has_vectors
=
av_get_cpu_flags
();
if
(
has_vectors
&
AV_CPU_FLAG_3DNOW
&&
HAVE_AMD3DNOW
)
{
if
(
has_vectors
&
AV_CPU_FLAG_3DNOW
&&
HAVE_AMD3DNOW
)
{
/* 3DNow! for K6-2/3 */
/* 3DNow! for K6-2/3 */
s
->
imdct_calc
=
ff_imdct_calc_3dn
;
s
->
imdct_calc
=
ff_imdct_calc_3dn
ow
;
s
->
imdct_half
=
ff_imdct_half_3dn
;
s
->
imdct_half
=
ff_imdct_half_3dn
ow
;
s
->
fft_calc
=
ff_fft_calc_3dn
;
s
->
fft_calc
=
ff_fft_calc_3dn
ow
;
}
}
if
(
has_vectors
&
AV_CPU_FLAG_3DNOWEXT
&&
HAVE_AMD3DNOWEXT
)
{
if
(
has_vectors
&
AV_CPU_FLAG_3DNOWEXT
&&
HAVE_AMD3DNOWEXT
)
{
/* 3DNowEx for K7 */
/* 3DNowEx for K7 */
s
->
imdct_calc
=
ff_imdct_calc_3dn2
;
s
->
imdct_calc
=
ff_imdct_calc_3dn
ow
2
;
s
->
imdct_half
=
ff_imdct_half_3dn2
;
s
->
imdct_half
=
ff_imdct_half_3dn
ow
2
;
s
->
fft_calc
=
ff_fft_calc_3dn2
;
s
->
fft_calc
=
ff_fft_calc_3dn
ow
2
;
}
}
if
(
has_vectors
&
AV_CPU_FLAG_SSE
&&
HAVE_SSE
)
{
if
(
has_vectors
&
AV_CPU_FLAG_SSE
&&
HAVE_SSE
)
{
/* SSE for P3/P4/K8 */
/* SSE for P3/P4/K8 */
...
...
libavcodec/x86/fft.h
View file @
bac0729d
...
@@ -24,13 +24,13 @@
...
@@ -24,13 +24,13 @@
void
ff_fft_permute_sse
(
FFTContext
*
s
,
FFTComplex
*
z
);
void
ff_fft_permute_sse
(
FFTContext
*
s
,
FFTComplex
*
z
);
void
ff_fft_calc_avx
(
FFTContext
*
s
,
FFTComplex
*
z
);
void
ff_fft_calc_avx
(
FFTContext
*
s
,
FFTComplex
*
z
);
void
ff_fft_calc_sse
(
FFTContext
*
s
,
FFTComplex
*
z
);
void
ff_fft_calc_sse
(
FFTContext
*
s
,
FFTComplex
*
z
);
void
ff_fft_calc_3dn
(
FFTContext
*
s
,
FFTComplex
*
z
);
void
ff_fft_calc_3dn
ow
(
FFTContext
*
s
,
FFTComplex
*
z
);
void
ff_fft_calc_3dn2
(
FFTContext
*
s
,
FFTComplex
*
z
);
void
ff_fft_calc_3dn
ow
2
(
FFTContext
*
s
,
FFTComplex
*
z
);
void
ff_imdct_calc_3dn
(
FFTContext
*
s
,
FFTSample
*
output
,
const
FFTSample
*
input
);
void
ff_imdct_calc_3dn
ow
(
FFTContext
*
s
,
FFTSample
*
output
,
const
FFTSample
*
input
);
void
ff_imdct_half_3dn
(
FFTContext
*
s
,
FFTSample
*
output
,
const
FFTSample
*
input
);
void
ff_imdct_half_3dn
ow
(
FFTContext
*
s
,
FFTSample
*
output
,
const
FFTSample
*
input
);
void
ff_imdct_calc_3dn2
(
FFTContext
*
s
,
FFTSample
*
output
,
const
FFTSample
*
input
);
void
ff_imdct_calc_3dn
ow
2
(
FFTContext
*
s
,
FFTSample
*
output
,
const
FFTSample
*
input
);
void
ff_imdct_half_3dn2
(
FFTContext
*
s
,
FFTSample
*
output
,
const
FFTSample
*
input
);
void
ff_imdct_half_3dn
ow
2
(
FFTContext
*
s
,
FFTSample
*
output
,
const
FFTSample
*
input
);
void
ff_imdct_calc_sse
(
FFTContext
*
s
,
FFTSample
*
output
,
const
FFTSample
*
input
);
void
ff_imdct_calc_sse
(
FFTContext
*
s
,
FFTSample
*
output
,
const
FFTSample
*
input
);
void
ff_imdct_half_sse
(
FFTContext
*
s
,
FFTSample
*
output
,
const
FFTSample
*
input
);
void
ff_imdct_half_sse
(
FFTContext
*
s
,
FFTSample
*
output
,
const
FFTSample
*
input
);
void
ff_imdct_half_avx
(
FFTContext
*
s
,
FFTSample
*
output
,
const
FFTSample
*
input
);
void
ff_imdct_half_avx
(
FFTContext
*
s
,
FFTSample
*
output
,
const
FFTSample
*
input
);
...
...
libavcodec/x86/fft_3dn2.c
View file @
bac0729d
...
@@ -30,30 +30,30 @@ DECLARE_ALIGNED(8, static const unsigned int, m1m1)[2] = { 1U<<31, 1U<<31 };
...
@@ -30,30 +30,30 @@ DECLARE_ALIGNED(8, static const unsigned int, m1m1)[2] = { 1U<<31, 1U<<31 };
"movq "#s","#d"\n"\
"movq "#s","#d"\n"\
"psrlq $32,"#d"\n"\
"psrlq $32,"#d"\n"\
"punpckldq "#s","#d"\n"
"punpckldq "#s","#d"\n"
#define ff_fft_calc_3dn
2 ff_fft_calc_3dn
#define ff_fft_calc_3dn
ow2 ff_fft_calc_3dnow
#define ff_fft_dispatch_3dn
2 ff_fft_dispatch_3dn
#define ff_fft_dispatch_3dn
ow2 ff_fft_dispatch_3dnow
#define ff_fft_dispatch_interleave_3dn
2 ff_fft_dispatch_interleave_3dn
#define ff_fft_dispatch_interleave_3dn
ow2 ff_fft_dispatch_interleave_3dnow
#define ff_imdct_calc_3dn
2 ff_imdct_calc_3dn
#define ff_imdct_calc_3dn
ow2 ff_imdct_calc_3dnow
#define ff_imdct_half_3dn
2 ff_imdct_half_3dn
#define ff_imdct_half_3dn
ow2 ff_imdct_half_3dnow
#else
#else
#define PSWAPD(s,d) "pswapd "#s","#d"\n"
#define PSWAPD(s,d) "pswapd "#s","#d"\n"
#endif
#endif
void
ff_fft_dispatch_3dn2
(
FFTComplex
*
z
,
int
nbits
);
void
ff_fft_dispatch_3dn
ow
2
(
FFTComplex
*
z
,
int
nbits
);
void
ff_fft_dispatch_interleave_3dn2
(
FFTComplex
*
z
,
int
nbits
);
void
ff_fft_dispatch_interleave_3dn
ow
2
(
FFTComplex
*
z
,
int
nbits
);
void
ff_fft_calc_3dn2
(
FFTContext
*
s
,
FFTComplex
*
z
)
void
ff_fft_calc_3dn
ow
2
(
FFTContext
*
s
,
FFTComplex
*
z
)
{
{
int
n
=
1
<<
s
->
nbits
;
int
n
=
1
<<
s
->
nbits
;
int
i
;
int
i
;
ff_fft_dispatch_interleave_3dn2
(
z
,
s
->
nbits
);
ff_fft_dispatch_interleave_3dn
ow
2
(
z
,
s
->
nbits
);
__asm__
volatile
(
"femms"
);
__asm__
volatile
(
"femms"
);
if
(
n
<=
8
)
if
(
n
<=
8
)
for
(
i
=
0
;
i
<
n
;
i
+=
2
)
for
(
i
=
0
;
i
<
n
;
i
+=
2
)
FFSWAP
(
FFTSample
,
z
[
i
].
im
,
z
[
i
+
1
].
re
);
FFSWAP
(
FFTSample
,
z
[
i
].
im
,
z
[
i
+
1
].
re
);
}
}
void
ff_imdct_half_3dn2
(
FFTContext
*
s
,
FFTSample
*
output
,
const
FFTSample
*
input
)
void
ff_imdct_half_3dn
ow
2
(
FFTContext
*
s
,
FFTSample
*
output
,
const
FFTSample
*
input
)
{
{
x86_reg
j
,
k
;
x86_reg
j
,
k
;
long
n
=
s
->
mdct_size
;
long
n
=
s
->
mdct_size
;
...
@@ -101,7 +101,7 @@ void ff_imdct_half_3dn2(FFTContext *s, FFTSample *output, const FFTSample *input
...
@@ -101,7 +101,7 @@ void ff_imdct_half_3dn2(FFTContext *s, FFTSample *output, const FFTSample *input
);
);
}
}
ff_fft_dispatch_3dn2
(
z
,
s
->
nbits
);
ff_fft_dispatch_3dn
ow
2
(
z
,
s
->
nbits
);
#define CMUL(j,mm0,mm1)\
#define CMUL(j,mm0,mm1)\
"movq (%2,"#j",2), %%mm6 \n"\
"movq (%2,"#j",2), %%mm6 \n"\
...
@@ -144,13 +144,13 @@ void ff_imdct_half_3dn2(FFTContext *s, FFTSample *output, const FFTSample *input
...
@@ -144,13 +144,13 @@ void ff_imdct_half_3dn2(FFTContext *s, FFTSample *output, const FFTSample *input
__asm__
volatile
(
"femms"
);
__asm__
volatile
(
"femms"
);
}
}
void
ff_imdct_calc_3dn2
(
FFTContext
*
s
,
FFTSample
*
output
,
const
FFTSample
*
input
)
void
ff_imdct_calc_3dn
ow
2
(
FFTContext
*
s
,
FFTSample
*
output
,
const
FFTSample
*
input
)
{
{
x86_reg
j
,
k
;
x86_reg
j
,
k
;
long
n
=
s
->
mdct_size
;
long
n
=
s
->
mdct_size
;
long
n4
=
n
>>
2
;
long
n4
=
n
>>
2
;
ff_imdct_half_3dn2
(
s
,
output
+
n4
,
input
);
ff_imdct_half_3dn
ow
2
(
s
,
output
+
n4
,
input
);
j
=
-
n
;
j
=
-
n
;
k
=
n
-
8
;
k
=
n
-
8
;
...
...
libavcodec/x86/fft_mmx.asm
View file @
bac0729d
...
@@ -297,7 +297,7 @@ IF%1 mova Z(1), m5
...
@@ -297,7 +297,7 @@ IF%1 mova Z(1), m5
%define
Z2
(
x
)
[
r0
+
mmsize
*
x
]
%define
Z2
(
x
)
[
r0
+
mmsize
*
x
]
%define
ZH
(
x
)
[
r0
+
mmsize
*
x
+
mmsize
/
2
]
%define
ZH
(
x
)
[
r0
+
mmsize
*
x
+
mmsize
/
2
]
INIT_YMM
INIT_YMM
avx
%if
HAVE_AVX
%if
HAVE_AVX
align
16
align
16
...
@@ -390,7 +390,7 @@ fft32_interleave_avx:
...
@@ -390,7 +390,7 @@ fft32_interleave_avx:
ret
ret
%endif
%endif
INIT_XMM
INIT_XMM
sse
%define
movdqa
movaps
%define
movdqa
movaps
align
16
align
16
...
@@ -439,11 +439,9 @@ fft16_sse:
...
@@ -439,11 +439,9 @@ fft16_sse:
ret
ret
INIT_MMX
%macro
FFT48_3DN
0
%macro
FFT48_3DN
1
align
16
align
16
fft4
%1
:
fft4
%
+
SUFFIX
:
T2_3DN
m0
,
m1
,
Z
(
0
),
Z
(
1
)
T2_3DN
m0
,
m1
,
Z
(
0
),
Z
(
1
)
mova
m2
,
Z
(
2
)
mova
m2
,
Z
(
2
)
mova
m3
,
Z
(
3
)
mova
m3
,
Z
(
3
)
...
@@ -457,7 +455,7 @@ fft4%1:
...
@@ -457,7 +455,7 @@ fft4%1:
ret
ret
align
16
align
16
fft8
%1
:
fft8
%
+
SUFFIX
:
T2_3DN
m0
,
m1
,
Z
(
0
),
Z
(
1
)
T2_3DN
m0
,
m1
,
Z
(
0
),
Z
(
1
)
mova
m2
,
Z
(
2
)
mova
m2
,
Z
(
2
)
mova
m3
,
Z
(
3
)
mova
m3
,
Z
(
3
)
...
@@ -495,7 +493,8 @@ fft8%1:
...
@@ -495,7 +493,8 @@ fft8%1:
ret
ret
%endmacro
%endmacro
FFT48_3DN
_3dn2
INIT_MMX
3
dnow2
FFT48_3DN
%macro
pswapd
2
%macro
pswapd
2
%ifidn
%1
,
%2
%ifidn
%1
,
%2
...
@@ -508,7 +507,8 @@ FFT48_3DN _3dn2
...
@@ -508,7 +507,8 @@ FFT48_3DN _3dn2
%endif
%endif
%endmacro
%endmacro
FFT48_3DN
_3dn
INIT_MMX
3
dnow
FFT48_3DN
%define
Z
(
x
)
[
zq
+
o1q
*
(
x
&
6
)
+
mmsize
*
(
x
&
1
)
]
%define
Z
(
x
)
[
zq
+
o1q
*
(
x
&
6
)
+
mmsize
*
(
x
&
1
)
]
...
@@ -532,7 +532,7 @@ DEFINE_ARGS z, w, n, o1, o3
...
@@ -532,7 +532,7 @@ DEFINE_ARGS z, w, n, o1, o3
rep
ret
rep
ret
%endmacro
%endmacro
INIT_YMM
INIT_YMM
avx
%if
HAVE_AVX
%if
HAVE_AVX
%macro
INTERL_AVX
5
%macro
INTERL_AVX
5
...
@@ -550,7 +550,7 @@ DECL_PASS pass_avx, PASS_BIG 1
...
@@ -550,7 +550,7 @@ DECL_PASS pass_avx, PASS_BIG 1
DECL_PASS
pass_interleave_avx
,
PASS_BIG
0
DECL_PASS
pass_interleave_avx
,
PASS_BIG
0
%endif
%endif
INIT_XMM
INIT_XMM
sse
%macro
INTERL_SSE
5
%macro
INTERL_SSE
5
mova
%3
,
%2
mova
%3
,
%2
...
@@ -565,16 +565,16 @@ INIT_XMM
...
@@ -565,16 +565,16 @@ INIT_XMM
DECL_PASS
pass_sse
,
PASS_BIG
1
DECL_PASS
pass_sse
,
PASS_BIG
1
DECL_PASS
pass_interleave_sse
,
PASS_BIG
0
DECL_PASS
pass_interleave_sse
,
PASS_BIG
0
INIT_MMX
INIT_MMX
3
dnow
%define
mulps
pfmul
%define
mulps
pfmul
%define
addps
pfadd
%define
addps
pfadd
%define
subps
pfsub
%define
subps
pfsub
%define
unpcklps
punpckldq
%define
unpcklps
punpckldq
%define
unpckhps
punpckhdq
%define
unpckhps
punpckhdq
DECL_PASS
pass_3dn
,
PASS_SMALL
1
,
[wq],
[
wq
+
o1q
]
DECL_PASS
pass_3dn
ow
,
PASS_SMALL
1
,
[wq],
[
wq
+
o1q
]
DECL_PASS
pass_interleave_3dn
,
PASS_BIG
0
DECL_PASS
pass_interleave_3dn
ow
,
PASS_BIG
0
%define
pass_3dn
2
pass_3dn
%define
pass_3dn
ow2
pass_3dnow
%define
pass_interleave_3dn
2
pass_interleave_3dn
%define
pass_interleave_3dn
ow2
pass_interleave_3dnow
%ifdef
PIC
%ifdef
PIC
%define
SECTION_REL
-
$$
%define
SECTION_REL
-
$$
...
@@ -592,67 +592,73 @@ DECL_PASS pass_interleave_3dn, PASS_BIG 0
...
@@ -592,67 +592,73 @@ DECL_PASS pass_interleave_3dn, PASS_BIG 0
call
r2
call
r2
%endmacro
; FFT_DISPATCH
%endmacro
; FFT_DISPATCH
%macro
DECL_FFT
2
-
3
; nbits, cpu, suffix
%macro
DECL_FFT
1
-
2
; nbits, suffix
%xdefine
list_of_fft
fft4
%2
SECTION_REL
,
fft8
%2
SECTION_REL
%ifidn
%0
,
1
%xdefine
fullsuffix
SUFFIX
%else
%xdefine
fullsuffix
%2
%
+
SUFFIX
%endif
%xdefine
list_of_fft
fft4
%
+
SUFFIX
SECTION_REL
,
fft8
%
+
SUFFIX
SECTION_REL
%if
%1
>=
5
%if
%1
>=
5
%xdefine
list_of_fft
list_of_fft
,
fft16
%2
SECTION_REL
%xdefine
list_of_fft
list_of_fft
,
fft16
%
+
SUFFIX
SECTION_REL
%endif
%endif
%if
%1
>=
6
%if
%1
>=
6
%xdefine
list_of_fft
list_of_fft
,
fft32
%3%2
SECTION_REL
%xdefine
list_of_fft
list_of_fft
,
fft32
%
+
fullsuffix
SECTION_REL
%endif
%endif
%assign
n
1
<<
%1
%assign
n
1
<<
%1
%rep
17
-
%1
%rep
17
-
%1
%assign
n2
n
/
2
%assign
n2
n
/
2
%assign
n4
n
/
4
%assign
n4
n
/
4
%xdefine
list_of_fft
list_of_fft
,
fft
%
+
n
%
+
%3%2
SECTION_REL
%xdefine
list_of_fft
list_of_fft
,
fft
%
+
n
%
+
fullsuffix
SECTION_REL
align
16
align
16
fft
%
+
n
%
+
%3%2
:
fft
%
+
n
%
+
fullsuffix
:
call
fft
%
+
n2
%
+
%2
call
fft
%
+
n2
%
+
SUFFIX
add
r0
,
n
*
4
-
(
n
&
(
-
2
<<
%1
))
add
r0
,
n
*
4
-
(
n
&
(
-
2
<<
%1
))
call
fft
%
+
n4
%
+
%2
call
fft
%
+
n4
%
+
SUFFIX
add
r0
,
n
*
2
-
(
n2
&
(
-
2
<<
%1
))
add
r0
,
n
*
2
-
(
n2
&
(
-
2
<<
%1
))
call
fft
%
+
n4
%
+
%2
call
fft
%
+
n4
%
+
SUFFIX
sub
r0
,
n
*
6
+
(
n2
&
(
-
2
<<
%1
))
sub
r0
,
n
*
6
+
(
n2
&
(
-
2
<<
%1
))
lea
r1
,
[
cos_
%
+
n
]
lea
r1
,
[
cos_
%
+
n
]
mov
r2d
,
n4
/
2
mov
r2d
,
n4
/
2
jmp
pass
%3
%2
jmp
pass
%
+
fullsuffix
%assign
n
n
*
2
%assign
n
n
*
2
%endrep
%endrep
%undef
n
%undef
n
align
8
align
8
dispatch_tab
%3%2
:
pointer
list_of_fft
dispatch_tab
%
+
fullsuffix
:
pointer
list_of_fft
section
.
text
section
.
text
; On x86_32, this function does the register saving and restoring for all of fft.
; On x86_32, this function does the register saving and restoring for all of fft.
; The others pass args in registers and don't spill anything.
; The others pass args in registers and don't spill anything.
cglobal
fft_dispatch
%
3%
2
,
2
,
5
,
8
,
z
,
nbits
cglobal
fft_dispatch
%2
,
2
,
5
,
8
,
z
,
nbits
FFT_DISPATCH
%3%2
,
nbits
FFT_DISPATCH
fullsuffix
,
nbits
%if
idn
%2
,
_avx
%if
mmsize
==
32
vzeroupper
vzeroupper
%endif
%endif
RET
RET
%endmacro
; DECL_FFT
%endmacro
; DECL_FFT
%if
HAVE_AVX
%if
HAVE_AVX
INIT_YMM
INIT_YMM
avx
DECL_FFT
6
,
_avx
DECL_FFT
6
DECL_FFT
6
,
_
avx
,
_
interleave
DECL_FFT
6
,
_interleave
%endif
%endif
INIT_XMM
INIT_XMM
sse
DECL_FFT
5
,
_sse
DECL_FFT
5
DECL_FFT
5
,
_sse
,
_interleave
DECL_FFT
5
,
_interleave
INIT_MMX
INIT_MMX
3
dnow
DECL_FFT
4
,
_3dn
DECL_FFT
4
DECL_FFT
4
,
_3dn
,
_interleave
DECL_FFT
4
,
_interleave
DECL_FFT
4
,
_3dn2
INIT_MMX
3
dnow2
DECL_FFT
4
,
_3dn2
,
_interleave
DECL_FFT
4
DECL_FFT
4
,
_interleave
INIT_XMM
INIT_XMM
sse
%undef
mulps
%undef
mulps
%undef
addps
%undef
addps
%undef
subps
%undef
subps
...
@@ -748,8 +754,8 @@ INIT_XMM
...
@@ -748,8 +754,8 @@ INIT_XMM
jl
.
post
jl
.
post
%endmacro
%endmacro
%macro
DECL_IMDCT
2
%macro
DECL_IMDCT
1
cglobal
imdct_half
%1
,
3
,
12
,
8
; FFTContext *s, FFTSample *output, const FFTSample *input
cglobal
imdct_half
,
3
,
12
,
8
; FFTContext *s, FFTSample *output, const FFTSample *input
%if
ARCH_X86_64
%if
ARCH_X86_64
%define
rrevtab
r7
%define
rrevtab
r7
%define
rtcos
r8
%define
rtcos
r8
...
@@ -821,7 +827,7 @@ cglobal imdct_half%1, 3,12,8; FFTContext *s, FFTSample *output, const FFTSample
...
@@ -821,7 +827,7 @@ cglobal imdct_half%1, 3,12,8; FFTContext *s, FFTSample *output, const FFTSample
mov
r0
,
r1
mov
r0
,
r1
mov
r1d
,
[
r5
+
FFTContext
.
nbits
]
mov
r1d
,
[
r5
+
FFTContext
.
nbits
]
FFT_DISPATCH
%1
,
r1
FFT_DISPATCH
SUFFIX
,
r1
mov
r0d
,
[
r5
+
FFTContext
.
mdctsize
]
mov
r0d
,
[
r5
+
FFTContext
.
mdctsize
]
add
r6
,
r0
add
r6
,
r0
...
@@ -835,20 +841,20 @@ cglobal imdct_half%1, 3,12,8; FFTContext *s, FFTSample *output, const FFTSample
...
@@ -835,20 +841,20 @@ cglobal imdct_half%1, 3,12,8; FFTContext *s, FFTSample *output, const FFTSample
neg
r0
neg
r0
mov
r1
,
-
mmsize
mov
r1
,
-
mmsize
sub
r1
,
r0
sub
r1
,
r0
%
2
r0
,
r1
,
r6
,
rtcos
,
rtsin
%
1
r0
,
r1
,
r6
,
rtcos
,
rtsin
%if
ARCH_X86_64
==
0
%if
ARCH_X86_64
==
0
add
esp
,
12
add
esp
,
12
%endif
%endif
%if
idn
avx_enabled
,
1
%if
mmsize
==
32
vzeroupper
vzeroupper
%endif
%endif
RET
RET
%endmacro
%endmacro
DECL_IMDCT
_sse
,
POSROTATESHUF
DECL_IMDCT
POSROTATESHUF
INIT_YMM
INIT_YMM
avx
%if
HAVE_AVX
%if
HAVE_AVX
DECL_IMDCT
_avx
,
POSROTATESHUF_AVX
DECL_IMDCT
POSROTATESHUF_AVX
%endif
%endif
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment