Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in / Register
Toggle navigation
F
ffmpeg.wasm-core
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Linshizhi
ffmpeg.wasm-core
Commits
8bb59e67
Commit
8bb59e67
authored
Jun 12, 2017
by
James Almer
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
x86/aacpsdsp: add ff_ps_hybrid_analysis_ileave_sse
About 2x faster than the c version.
parent
e229df94
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
109 additions
and
0 deletions
+109
-0
aacpsdsp.asm
libavcodec/x86/aacpsdsp.asm
+106
-0
aacpsdsp_init.c
libavcodec/x86/aacpsdsp_init.c
+3
-0
No files found.
libavcodec/x86/aacpsdsp.asm
View file @
8bb59e67
...
...
@@ -166,6 +166,112 @@ align 16
jl
.
loop
REP_RET
;**********************************************************
;void ps_hybrid_analysis_ileave_sse(float out[2][38][64],
; float (*in)[32][2],
; int i, int len)
;**********************************************************
INIT_XMM
sse
cglobal
ps_hybrid_analysis_ileave
,
3
,
7
,
5
,
out
,
in
,
i
,
len
,
in0
,
in1
,
tmp
movsxdifnidn
iq
,
id
mov
lend
,
32
<<
3
lea
inq
,
[
inq
+
iq
*
4
]
mov
tmpd
,
id
shl
tmpd
,
8
add
outq
,
tmpq
mov
tmpd
,
64
sub
tmpd
,
id
mov
id
,
tmpd
test
id
,
1
jne
.
loop4
test
id
,
2
jne
.
loop8
align
16
.
loop16
:
mov
in0q
,
inq
mov
in1q
,
38
*
64
*
4
add
in1q
,
in0q
mov
tmpd
,
lend
.
inner_loop16
:
movaps
m0
,
[in0q]
movaps
m1
,
[in1q]
movaps
m2
,
[
in0q
+
lenq
]
movaps
m3
,
[
in1q
+
lenq
]
TRANSPOSE4x4PS
0
,
1
,
2
,
3
,
4
movaps
[outq],
m0
movaps
[
outq
+
lenq
]
,
m1
movaps
[
outq
+
lenq
*
2
]
,
m2
movaps
[
outq
+
3
*
32
*
2
*
4
]
,
m3
lea
in0q
,
[
in0q
+
lenq
*
2
]
lea
in1q
,
[
in1q
+
lenq
*
2
]
add
outq
,
mmsize
sub
tmpd
,
mmsize
jg
.
inner_loop16
add
inq
,
16
add
outq
,
3
*
32
*
2
*
4
sub
id
,
4
jg
.
loop16
RET
align
16
.
loop8
:
mov
in0q
,
inq
mov
in1q
,
38
*
64
*
4
add
in1q
,
in0q
mov
tmpd
,
lend
.
inner_loop8
:
movlps
m0
,
[in0q]
movlps
m1
,
[in1q]
movhps
m0
,
[
in0q
+
lenq
]
movhps
m1
,
[
in1q
+
lenq
]
SBUTTERFLYPS
0
,
1
,
2
SBUTTERFLYPD
0
,
1
,
2
movaps
[outq],
m0
movaps
[
outq
+
lenq
]
,
m1
lea
in0q
,
[
in0q
+
lenq
*
2
]
lea
in1q
,
[
in1q
+
lenq
*
2
]
add
outq
,
mmsize
sub
tmpd
,
mmsize
jg
.
inner_loop8
add
inq
,
8
add
outq
,
lenq
sub
id
,
2
jg
.
loop16
RET
align
16
.
loop4
:
mov
in0q
,
inq
mov
in1q
,
38
*
64
*
4
add
in1q
,
in0q
mov
tmpd
,
lend
.
inner_loop4
:
movss
m0
,
[in0q]
movss
m1
,
[in1q]
movss
m2
,
[
in0q
+
lenq
]
movss
m3
,
[
in1q
+
lenq
]
movlhps
m0
,
m1
movlhps
m2
,
m3
shufps
m0
,
m2
,
q2020
movaps
[outq],
m0
lea
in0q
,
[
in0q
+
lenq
*
2
]
lea
in1q
,
[
in1q
+
lenq
*
2
]
add
outq
,
mmsize
sub
tmpd
,
mmsize
jg
.
inner_loop4
add
inq
,
4
sub
id
,
1
test
id
,
2
jne
.
loop8
cmp
id
,
4
jge
.
loop16
RET
;***********************************************************
;void ps_hybrid_synthesis_deint_sse4(float out[2][38][64],
; float (*in)[32][2],
...
...
libavcodec/x86/aacpsdsp_init.c
View file @
8bb59e67
...
...
@@ -44,6 +44,8 @@ void ff_ps_hybrid_synthesis_deint_sse(float out[2][38][64], float (*in)[32][2],
int
i
,
int
len
);
void
ff_ps_hybrid_synthesis_deint_sse4
(
float
out
[
2
][
38
][
64
],
float
(
*
in
)[
32
][
2
],
int
i
,
int
len
);
void
ff_ps_hybrid_analysis_ileave_sse
(
float
(
*
out
)[
32
][
2
],
float
L
[
2
][
38
][
64
],
int
i
,
int
len
);
av_cold
void
ff_psdsp_init_x86
(
PSDSPContext
*
s
)
{
...
...
@@ -52,6 +54,7 @@ av_cold void ff_psdsp_init_x86(PSDSPContext *s)
if
(
EXTERNAL_SSE
(
cpu_flags
))
{
s
->
add_squares
=
ff_ps_add_squares_sse
;
s
->
mul_pair_single
=
ff_ps_mul_pair_single_sse
;
s
->
hybrid_analysis_ileave
=
ff_ps_hybrid_analysis_ileave_sse
;
s
->
hybrid_synthesis_deint
=
ff_ps_hybrid_synthesis_deint_sse
;
s
->
hybrid_analysis
=
ff_ps_hybrid_analysis_sse
;
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment