Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in / Register
Toggle navigation
F
ffmpeg.wasm-core
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Linshizhi
ffmpeg.wasm-core
Commits
22e25c00
Commit
22e25c00
authored
Nov 07, 2011
by
Vitor Sessak
Committed by
Michael Niedermayer
Nov 07, 2011
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
mpegaudiodec: add SSE-optimized imdct36()
Signed-off-by:
Michael Niedermayer
<
michaelni@gmx.at
>
parent
e32aaba3
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
385 additions
and
0 deletions
+385
-0
Makefile
libavcodec/x86/Makefile
+1
-0
imdct36_sse.asm
libavcodec/x86/imdct36_sse.asm
+361
-0
mpegaudiodec_mmx.c
libavcodec/x86/mpegaudiodec_mmx.c
+21
-0
x86inc.asm
libavutil/x86/x86inc.asm
+2
-0
No files found.
libavcodec/x86/Makefile
View file @
22e25c00
...
...
@@ -33,6 +33,7 @@ MMX-OBJS-$(CONFIG_AC3DSP) += x86/ac3dsp_mmx.o
YASM-OBJS-$(CONFIG_AC3DSP)
+=
x86/ac3dsp.o
MMX-OBJS-$(CONFIG_CAVS_DECODER)
+=
x86/cavsdsp_mmx.o
MMX-OBJS-$(CONFIG_MPEGAUDIODSP)
+=
x86/mpegaudiodec_mmx.o
YASM-OBJS-$(CONFIG_MPEGAUDIODSP)
+=
x86/imdct36_sse.o
MMX-OBJS-$(CONFIG_PNG_DECODER)
+=
x86/png_mmx.o
MMX-OBJS-$(CONFIG_ENCODERS)
+=
x86/dsputilenc_mmx.o
YASM-OBJS-$(CONFIG_ENCODERS)
+=
x86/dsputilenc_yasm.o
...
...
libavcodec/x86/imdct36_sse.asm
0 → 100644
View file @
22e25c00
;******************************************************************************
;* 36 point SSE-optimized IMDCT transform
;* Copyright (c) 2011 Vitor Sessak
;*
;* This file is part of Libav.
;*
;* Libav is free software; you can redistribute it and/or
;* modify it under the terms of the GNU Lesser General Public
;* License as published by the Free Software Foundation; either
;* version 2.1 of the License, or (at your option) any later version.
;*
;* Libav is distributed in the hope that it will be useful,
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;* Lesser General Public License for more details.
;*
;* You should have received a copy of the GNU Lesser General Public
;* License along with Libav; if not, write to the Free Software
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;******************************************************************************
%include
"libavutil/x86/x86inc.asm"
%include
"libavutil/x86/x86util.asm"
SECTION_RODATA
align
16
ps_mask
:
dd
0
,
~
0
,
~
0
,
~
0
ps_mask2
:
dd
0
,
~
0
,
0
,
~
0
ps_mask3
:
dd
0
,
0
,
0
,
~
0
ps_mask4
:
dd
0
,
~
0
,
0
,
0
ps_val1
:
dd
-
0
.
5
,
-
0
.
5
,
-
0
.
8660254038
,
-
0
.
8660254038
ps_val2
:
dd
1
.
0
,
1
.
0
,
0
.
8660254038
,
0
.
8660254038
ps_val3
:
dd
0
.
1736481777
,
0
.
1736481777
,
0
.
3420201433
,
0
.
3420201433
ps_val4
:
dd
-
0
.
7660444431
,
-
0
.
7660444431
,
0
.
8660254038
,
0
.
8660254038
ps_val5
:
dd
-
0
.
9396926208
,
-
0
.
9396926208
,
-
0
.
9848077530
,
-
0
.
9848077530
ps_val6
:
dd
0
.
5
,
0
.
5
,
-
0
.
6427876097
,
-
0
.
6427876097
ps_val7
:
dd
1
.
0
,
1
.
0
,
-
0
.
6427876097
,
-
0
.
6427876097
ps_p1p1m1m1
:
dd
0
,
0
,
0x80000000
,
0x80000000
ps_p1m1p1m1
:
dd
0
,
0x80000000
,
0
,
0x80000000
ps_cosh
:
dd
1
.
0
,
0
.
50190991877167369479
,
1
.
0
,
5
.
73685662283492756461
dd
1
.
0
,
0
.
51763809020504152469
,
1
.
0
,
1
.
93185165257813657349
dd
1
.
0
,
0
.
55168895948124587824
,
-
1
.
0
,
-
1
.
18310079157624925896
dd
1
.
0
,
0
.
61038729438072803416
,
-
1
.
0
,
-
0
.
87172339781054900991
dd
1
.
0
,
0
.
70710678118654752439
,
0
.
0
,
0
.
0
ps_cosh_sse3
:
dd
1
.
0
,
-
0
.
50190991877167369479
,
1
.
0
,
-
5
.
73685662283492756461
dd
1
.
0
,
-
0
.
51763809020504152469
,
1
.
0
,
-
1
.
93185165257813657349
dd
1
.
0
,
-
0
.
55168895948124587824
,
-
1
.
0
,
1
.
18310079157624925896
dd
1
.
0
,
-
0
.
61038729438072803416
,
-
1
.
0
,
0
.
87172339781054900991
dd
1
.
0
,
0
.
70710678118654752439
,
0
.
0
,
0
.
0
%define
SBLIMIT
32
SECTION_TEXT
%macro
PSHUFD_SSE_AVX
3
shufps
%1
,
%2
,
%2
,
%3
%endmacro
%macro
PSHUFD_SSE2
3
pshufd
%1
,
%2
,
%3
%endmacro
; input %1={x1,x2,x3,x4}, %2={y1,y2,y3,y4}
; output %3={x3,x4,y1,y2}
%macro
BUILDINVHIGHLOW_SSE
3
movlhps
%3
,
%2
movhlps
%3
,
%1
%endmacro
%macro
BUILDINVHIGHLOW_AVX
3
shufps
%3
,
%1
,
%2
,
0x4e
%endmacro
; input %1={x1,x2,x3,x4}, %2={y1,y2,y3,y4}
; output %3={x4,y1,y2,y3}
%macro
ROTLEFT_SSE
3
BUILDINVHIGHLOW
%1
,
%2
,
%3
shufps
%3
,
%3
,
%2
,
0x99
%endmacro
%macro
ROTLEFT_SSSE3
3
palignr
%3
,
%2
,
%1
,
12
%endmacro
%macro
INVERTHL_SSE1
2
movhlps
%1
,
%2
movlhps
%1
,
%2
%endmacro
%macro
INVERTHL_SSE2
2
PSHUFD
%1
,
%2
,
0x4e
%endmacro
%macro
BUTTERF_SSE12
3
INVERTHL
%2
,
%1
xorps
%1
,
[
ps_p1p1m1m1
]
addps
%1
,
%2
mulps
%1
,
[
ps_cosh
+
%3
]
PSHUFD
%2
,
%1
,
0xb1
xorps
%1
,
[
ps_p1m1p1m1
]
addps
%1
,
%2
%endmacro
%macro
BUTTERF_SSE3
3
INVERTHL
%2
,
%1
xorps
%1
,
%1
,
[
ps_p1p1m1m1
]
addps
%1
,
%1
,
%2
mulps
%1
,
%1
,
[
ps_cosh_sse3
+
%3
]
PSHUFD
%2
,
%1
,
0xb1
addsubps
%1
,
%1
,
%2
%endmacro
%macro
STORE
3
movhlps
%2
,
%1
movss
[
%3
]
,
%1
movss
[
%3
+
8
*
SBLIMIT
]
,
%2
shufps
%1
,
%1
,
0xb1
movss
[
%3
+
4
*
SBLIMIT
]
,
%1
movhlps
%2
,
%1
movss
[
%3
+
12
*
SBLIMIT
]
,
%2
%endmacro
%macro
LOADA64
2
movlps
%1
,
[
%2
]
movhps
%1
,
[
%2
+
8
]
%endmacro
%macro
STOREA64
2
movlps
[
%1
]
,
%2
movhps
[
%1
+
8
]
,
%2
%endmacro
%macro
DEFINE_IMDCT
1
cglobal
imdct36_float_
%1
,
4
,
4
,
9
,
out
,
buf
,
in
,
win
; for(i=17;i>=1;i--) in[i] += in[i-1];
LOADA64
m0
,
inq
LOADA64
m1
,
inq
+
16
ROTLEFT
m0
,
m1
,
m5
PSHUFD
m6
,
m0
,
0x93
andps
m6
,
m6
,
[
ps_mask
]
addps
m0
,
m0
,
m6
LOADA64
m2
,
inq
+
32
ROTLEFT
m1
,
m2
,
m7
addps
m1
,
m1
,
m5
LOADA64
m3
,
inq
+
48
ROTLEFT
m2
,
m3
,
m5
xorps
m4
,
m4
,
m4
movlps
m4
,
[
inq
+
64
]
BUILDINVHIGHLOW
m3
,
m4
,
m6
shufps
m6
,
m6
,
m4
,
0xa9
addps
m4
,
m4
,
m6
addps
m2
,
m2
,
m7
addps
m3
,
m3
,
m5
; for(i=17;i>=3;i-=2) in[i] += in[i-2];
movlhps
m5
,
m5
,
m0
andps
m5
,
m5
,
[
ps_mask3
]
BUILDINVHIGHLOW
m0
,
m1
,
m7
andps
m7
,
m7
,
[
ps_mask2
]
addps
m0
,
m0
,
m5
BUILDINVHIGHLOW
m1
,
m2
,
m6
andps
m6
,
m6
,
[
ps_mask2
]
addps
m1
,
m1
,
m7
BUILDINVHIGHLOW
m2
,
m3
,
m7
andps
m7
,
m7
,
[
ps_mask2
]
addps
m2
,
m2
,
m6
movhlps
m6
,
m6
,
m3
andps
m6
,
m6
,
[
ps_mask4
]
addps
m3
,
m3
,
m7
addps
m4
,
m4
,
m6
; Populate tmp[]
movlhps
m6
,
m1
,
m5
; zero out high values
subps
m6
,
m6
,
m4
subps
m5
,
m0
,
m3
%ifdef
ARCH_X86_64
SWAP
m5
,
m8
%endif
mulps
m7
,
m2
,
[
ps_val1
]
%ifdef
ARCH_X86_64
mulps
m5
,
m8
,
[
ps_val2
]
%else
mulps
m5
,
m5
,
[
ps_val2
]
%endif
addps
m7
,
m7
,
m5
mulps
m5
,
m6
,
[
ps_val1
]
subps
m7
,
m7
,
m5
%ifndef
ARCH_X86_64
subps
m5
,
m0
,
m3
%else
SWAP
m5
,
m8
%endif
subps
m5
,
m5
,
m6
addps
m5
,
m5
,
m2
shufps
m6
,
m4
,
m3
,
0xe4
subps
m6
,
m6
,
m2
mulps
m6
,
m6
,
[
ps_val3
]
addps
m4
,
m4
,
m1
mulps
m4
,
m4
,
[
ps_val4
]
shufps
m1
,
m1
,
m0
,
0xe4
addps
m1
,
m1
,
m2
mulps
m1
,
m1
,
[
ps_val5
]
mulps
m3
,
m3
,
[
ps_val6
]
mulps
m0
,
m0
,
[
ps_val7
]
addps
m0
,
m0
,
m3
xorps
m2
,
m1
,
[
ps_p1p1m1m1
]
subps
m2
,
m2
,
m4
addps
m2
,
m2
,
m0
addps
m3
,
m4
,
m0
subps
m3
,
m3
,
m6
xorps
m3
,
m3
,
[
ps_p1p1m1m1
]
shufps
m0
,
m0
,
m4
,
0xe4
subps
m0
,
m0
,
m1
addps
m0
,
m0
,
m6
BUILDINVHIGHLOW
m2
,
m3
,
m4
shufps
m3
,
m3
,
m2
,
0x4e
; we have tmp = {SwAPLH(m0), SwAPLH(m7), m3, m4, m5}
BUTTERF
m0
,
m1
,
0
BUTTERF
m7
,
m2
,
16
BUTTERF
m3
,
m6
,
32
BUTTERF
m4
,
m1
,
48
mulps
m5
,
m5
,
[
ps_cosh
+
64
]
PSHUFD
m1
,
m5
,
0xe1
xorps
m5
,
m5
,
[
ps_p1m1p1m1
]
addps
m5
,
m5
,
m1
; permutates:
; m0 0 1 2 3 => 2 6 10 14 m1
; m7 4 5 6 7 => 3 7 11 15 m2
; m3 8 9 10 11 => 17 13 9 5 m3
; m4 12 13 14 15 => 16 12 8 4 m5
; m5 16 17 xx xx => 0 1 xx xx m0
unpckhps
m1
,
m0
,
m7
unpckhps
m6
,
m3
,
m4
movhlps
m2
,
m6
,
m1
movlhps
m1
,
m1
,
m6
unpcklps
m5
,
m5
,
m4
unpcklps
m3
,
m3
,
m7
movhlps
m4
,
m3
,
m5
movlhps
m5
,
m5
,
m3
SWAP
m4
,
m3
; permutation done
PSHUFD
m6
,
m2
,
0xb1
movlps
m7
,
[
bufq
+
64
]
mulps
m6
,
m6
,
[
winq
+
16
*
4
]
addps
m6
,
m6
,
m7
movss
[
outq
+
64
*
SBLIMIT
]
,
m6
shufps
m6
,
m6
,
m6
,
0xb1
movss
[
outq
+
68
*
SBLIMIT
]
,
m6
mulps
m6
,
m3
,
[
winq
+
4
*
4
]
LOADA64
m4
,
bufq
+
16
addps
m6
,
m6
,
m4
STORE
m6
,
m7
,
outq
+
16
*
SBLIMIT
shufps
m4
,
m0
,
m3
,
0xb5
mulps
m4
,
m4
,
[
winq
+
8
*
4
]
LOADA64
m7
,
bufq
+
32
addps
m4
,
m4
,
m7
STORE
m4
,
m6
,
outq
+
32
*
SBLIMIT
shufps
m3
,
m3
,
m2
,
0xb1
mulps
m3
,
m3
,
[
winq
+
12
*
4
]
LOADA64
m7
,
bufq
+
48
addps
m3
,
m3
,
m7
STORE
m3
,
m7
,
outq
+
48
*
SBLIMIT
mulps
m2
,
m2
,
[winq]
LOADA64
m6
,
bufq
addps
m2
,
m2
,
m6
STORE
m2
,
m7
,
outq
mulps
m4
,
m1
,
[
winq
+
20
*
4
]
STOREA64
bufq
,
m4
mulps
m3
,
m5
,
[
winq
+
24
*
4
]
STOREA64
bufq
+
16
,
m3
shufps
m0
,
m0
,
m5
,
0xb0
mulps
m0
,
m0
,
[
winq
+
28
*
4
]
STOREA64
bufq
+
32
,
m0
shufps
m5
,
m5
,
m1
,
0xb1
mulps
m5
,
m5
,
[
winq
+
32
*
4
]
STOREA64
bufq
+
48
,
m5
shufps
m1
,
m1
,
m1
,
0xb1
mulps
m1
,
m1
,
[
winq
+
36
*
4
]
movlps
[
bufq
+
64
]
,
m1
RET
%endmacro
%define
PSHUFD
PSHUFD_SSE_AVX
%define
INVERTHL
INVERTHL_SSE1
%define
BUTTERF
BUTTERF_SSE12
%define
BUTTERF0
BUTTERF0_SSE12
%define
BUILDINVHIGHLOW
BUILDINVHIGHLOW_SSE
%define
ROTLEFT
ROTLEFT_SSE
INIT_XMM
DEFINE_IMDCT
sse
%define
PSHUFD
PSHUFD_SSE2
%define
INVERTHL
INVERTHL_SSE2
DEFINE_IMDCT
sse2
%define
BUTTERF
BUTTERF_SSE3
%define
BUTTERF0
BUTTERF0_SSE3
DEFINE_IMDCT
sse3
%define
ROTLEFT
ROTLEFT_SSSE3
DEFINE_IMDCT
ssse3
%define
BUILDINVHIGHLOW
BUILDINVHIGHLOW_AVX
%define
PSHUFD
PSHUFD_SSE_AVX
INIT_AVX
DEFINE_IMDCT
avx
libavcodec/x86/mpegaudiodec_mmx.c
View file @
22e25c00
...
...
@@ -24,6 +24,12 @@
#include "libavcodec/dsputil.h"
#include "libavcodec/mpegaudiodsp.h"
void
ff_imdct36_float_sse
(
float
*
out
,
float
*
buf
,
float
*
in
,
float
*
win
);
void
ff_imdct36_float_sse2
(
float
*
out
,
float
*
buf
,
float
*
in
,
float
*
win
);
void
ff_imdct36_float_sse3
(
float
*
out
,
float
*
buf
,
float
*
in
,
float
*
win
);
void
ff_imdct36_float_ssse3
(
float
*
out
,
float
*
buf
,
float
*
in
,
float
*
win
);
void
ff_imdct36_float_avx
(
float
*
out
,
float
*
buf
,
float
*
in
,
float
*
win
);
#define MACS(rt, ra, rb) rt+=(ra)*(rb)
#define MLSS(rt, ra, rb) rt-=(ra)*(rb)
...
...
@@ -154,4 +160,19 @@ void ff_mpadsp_init_mmx(MPADSPContext *s)
if
(
mm_flags
&
AV_CPU_FLAG_SSE2
)
{
s
->
apply_window_float
=
apply_window_mp3
;
}
if
(
HAVE_YASM
&&
mm_flags
&
AV_CPU_FLAG_AVX
&&
HAVE_AVX
)
{
s
->
imdct36_float
=
ff_imdct36_float_avx
;
}
else
if
(
HAVE_YASM
&&
mm_flags
&
AV_CPU_FLAG_SSSE3
&&
HAVE_SSE
)
{
s
->
imdct36_float
=
ff_imdct36_float_ssse3
;
}
else
if
(
HAVE_YASM
&&
mm_flags
&
AV_CPU_FLAG_SSE3
&&
HAVE_SSE
)
{
s
->
imdct36_float
=
ff_imdct36_float_sse3
;
}
else
if
(
HAVE_YASM
&&
mm_flags
&
AV_CPU_FLAG_SSE2
&&
HAVE_SSE
)
{
s
->
imdct36_float
=
ff_imdct36_float_sse2
;
}
else
if
(
HAVE_YASM
&&
mm_flags
&
AV_CPU_FLAG_SSE
&&
HAVE_SSE
)
{
s
->
imdct36_float
=
ff_imdct36_float_sse
;
}
}
libavutil/x86/x86inc.asm
View file @
22e25c00
...
...
@@ -919,6 +919,8 @@ AVX_INSTR minss, 1, 0, 1
AVX_INSTR
movsd
,
1
,
0
,
0
AVX_INSTR
movss
,
1
,
0
,
0
AVX_INSTR
mpsadbw
,
0
,
1
,
0
AVX_INSTR
movhlps
,
1
,
0
,
0
AVX_INSTR
movlhps
,
1
,
0
,
0
AVX_INSTR
mulpd
,
1
,
0
,
1
AVX_INSTR
mulps
,
1
,
0
,
1
AVX_INSTR
mulsd
,
1
,
0
,
1
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment