Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in / Register
Toggle navigation
F
ffmpeg.wasm-core
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Linshizhi
ffmpeg.wasm-core
Commits
18b131de
Commit
18b131de
authored
Jul 30, 2011
by
Vitor Sessak
Committed by
Ronald S. Bultje
Aug 02, 2011
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
dct32: Add SSE2 ASM optimizations
Signed-off-by:
Ronald S. Bultje
<
rsbultje@gmail.com
>
parent
6f7fe472
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
33 additions
and
9 deletions
+33
-9
dct32_sse.asm
libavcodec/x86/dct32_sse.asm
+30
-9
fft.c
libavcodec/x86/fft.c
+2
-0
fft.h
libavcodec/x86/fft.h
+1
-0
No files found.
libavcodec/x86/dct32_sse.asm
View file @
18b131de
...
...
@@ -63,6 +63,13 @@ ps_p1p1m1m1: dd 0, 0, 0x80000000, 0x80000000, 0, 0, 0x80000000, 0x80000000
mulps
%1
,
%3
%endmacro
%macro
BUTTERFLY0_SSE2
5
pshufd
%4
,
%1
,
%5
xorps
%1
,
%2
addps
%1
,
%4
mulps
%1
,
%3
%endmacro
%macro
BUTTERFLY0_AVX
5
vshufps
%4
,
%1
,
%1
,
%5
vxorps
%1
,
%1
,
%2
...
...
@@ -405,18 +412,17 @@ INIT_XMM
INIT_XMM
%macro
DCT32_FUNC
1
; void ff_dct32_float_sse(FFTSample *out, const FFTSample *in)
cglobal
dct32_float_
sse
,
2
,
3
,
16
,
out
,
in
,
tmp
cglobal
dct32_float_
%1
,
2
,
3
,
16
,
out
,
in
,
tmp
; pass 1
movaps
m0
,
[
inq
+
0
]
movaps
m1
,
[
inq
+
112
]
shufps
m1
,
m1
,
0x1b
LOAD_INV
m1
,
[
inq
+
112
]
BUTTERFLY
m0
,
m1
,
[
ps_cos_vec
]
,
m3
movaps
m7
,
[
inq
+
64
]
movaps
m4
,
[
inq
+
48
]
shufps
m4
,
m4
,
0x1b
LOAD_INV
m4
,
[
inq
+
48
]
BUTTERFLY
m7
,
m4
,
[
ps_cos_vec
+
32
]
,
m3
; pass 2
...
...
@@ -427,13 +433,11 @@ cglobal dct32_float_sse, 2,3,16, out, in, tmp
; pass 1
movaps
m1
,
[
inq
+
16
]
movaps
m6
,
[
inq
+
96
]
shufps
m6
,
m6
,
0x1b
LOAD_INV
m6
,
[
inq
+
96
]
BUTTERFLY
m1
,
m6
,
[
ps_cos_vec
+
16
]
,
m3
movaps
m4
,
[
inq
+
80
]
movaps
m5
,
[
inq
+
32
]
shufps
m5
,
m5
,
0x1b
LOAD_INV
m5
,
[
inq
+
32
]
BUTTERFLY
m4
,
m5
,
[
ps_cos_vec
+
48
]
,
m3
; pass 2
...
...
@@ -492,3 +496,20 @@ cglobal dct32_float_sse, 2,3,16, out, in, tmp
PASS5
PASS6
RET
%endmacro
%macro
LOAD_INV_SSE
2
movaps
%1
,
%2
shufps
%1
,
%1
,
0x1b
%endmacro
%define
LOAD_INV
LOAD_INV_SSE
DCT32_FUNC
sse
%macro
LOAD_INV_SSE2
2
pshufd
%1
,
%2
,
0x1b
%endmacro
%define
LOAD_INV
LOAD_INV_SSE2
%define
BUTTERFLY0
BUTTERFLY0_SSE2
DCT32_FUNC
sse2
libavcodec/x86/fft.c
View file @
18b131de
...
...
@@ -60,6 +60,8 @@ av_cold void ff_dct_init_mmx(DCTContext *s)
int
has_vectors
=
av_get_cpu_flags
();
if
(
has_vectors
&
AV_CPU_FLAG_AVX
&&
HAVE_AVX
)
s
->
dct32
=
ff_dct32_float_avx
;
else
if
(
has_vectors
&
AV_CPU_FLAG_SSE2
&&
HAVE_SSE
)
s
->
dct32
=
ff_dct32_float_sse2
;
else
if
(
has_vectors
&
AV_CPU_FLAG_SSE
&&
HAVE_SSE
)
s
->
dct32
=
ff_dct32_float_sse
;
#endif
...
...
libavcodec/x86/fft.h
View file @
18b131de
...
...
@@ -35,6 +35,7 @@ void ff_imdct_calc_sse(FFTContext *s, FFTSample *output, const FFTSample *input)
void
ff_imdct_half_sse
(
FFTContext
*
s
,
FFTSample
*
output
,
const
FFTSample
*
input
);
void
ff_imdct_half_avx
(
FFTContext
*
s
,
FFTSample
*
output
,
const
FFTSample
*
input
);
void
ff_dct32_float_sse
(
FFTSample
*
out
,
const
FFTSample
*
in
);
void
ff_dct32_float_sse2
(
FFTSample
*
out
,
const
FFTSample
*
in
);
void
ff_dct32_float_avx
(
FFTSample
*
out
,
const
FFTSample
*
in
);
#endif
/* AVCODEC_X86_FFT_H */
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment