Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in / Register
Toggle navigation
F
ffmpeg.wasm-core
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Linshizhi
ffmpeg.wasm-core
Commits
2e4bb99f
Commit
2e4bb99f
authored
Jan 16, 2013
by
Ronald S. Bultje
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
vorbisdsp: convert x86 simd functions from inline asm to yasm.
parent
0ee8293a
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
92 additions
and
74 deletions
+92
-74
Makefile
libavcodec/x86/Makefile
+1
-0
dsputil_mmx.c
libavcodec/x86/dsputil_mmx.c
+0
-3
dsputil_mmx.h
libavcodec/x86/dsputil_mmx.h
+0
-2
vorbisdsp.asm
libavcodec/x86/vorbisdsp.asm
+83
-0
vorbisdsp_init.c
libavcodec/x86/vorbisdsp_init.c
+8
-69
No files found.
libavcodec/x86/Makefile
View file @
2e4bb99f
...
...
@@ -62,6 +62,7 @@ YASM-OBJS-$(CONFIG_RV40_DECODER) += x86/rv34dsp.o \
x86/rv40dsp.o
YASM-OBJS-$(CONFIG_VC1_DECODER)
+=
x86/vc1dsp.o
YASM-OBJS-$(CONFIG_VIDEODSP)
+=
x86/videodsp.o
YASM-OBJS-$(CONFIG_VORBIS_DECODER)
+=
x86/vorbisdsp.o
YASM-OBJS-$(CONFIG_VP3DSP)
+=
x86/vp3dsp.o
YASM-OBJS-$(CONFIG_VP6_DECODER)
+=
x86/vp56dsp.o
YASM-OBJS-$(CONFIG_VP8_DECODER)
+=
x86/vp8dsp.o
...
...
libavcodec/x86/dsputil_mmx.c
View file @
2e4bb99f
...
...
@@ -38,9 +38,6 @@
DECLARE_ALIGNED
(
8
,
const
uint64_t
,
ff_bone
)
=
0x0101010101010101ULL
;
DECLARE_ALIGNED
(
8
,
const
uint64_t
,
ff_wtwo
)
=
0x0002000200020002ULL
;
DECLARE_ALIGNED
(
16
,
const
uint64_t
,
ff_pdw_80000000
)[
2
]
=
{
0x8000000080000000ULL
,
0x8000000080000000ULL
};
DECLARE_ALIGNED
(
16
,
const
xmm_reg
,
ff_pw_1
)
=
{
0x0001000100010001ULL
,
0x0001000100010001ULL
};
DECLARE_ALIGNED
(
16
,
const
xmm_reg
,
ff_pw_2
)
=
{
0x0002000200020002ULL
,
0x0002000200020002ULL
};
DECLARE_ALIGNED
(
16
,
const
xmm_reg
,
ff_pw_3
)
=
{
0x0003000300030003ULL
,
0x0003000300030003ULL
};
...
...
libavcodec/x86/dsputil_mmx.h
View file @
2e4bb99f
...
...
@@ -31,8 +31,6 @@ typedef struct xmm_reg { uint64_t a, b; } xmm_reg;
extern
const
uint64_t
ff_bone
;
extern
const
uint64_t
ff_wtwo
;
extern
const
uint64_t
ff_pdw_80000000
[
2
];
extern
const
xmm_reg
ff_pw_3
;
extern
const
xmm_reg
ff_pw_4
;
extern
const
xmm_reg
ff_pw_5
;
...
...
libavcodec/x86/vorbisdsp.asm
0 → 100644
View file @
2e4bb99f
;******************************************************************************
;* Vorbis x86 optimizations
;* Copyright (C) 2006 Loren Merritt <lorenm@u.washington.edu>
;*
;* This file is part of Libav.
;*
;* Libav is free software; you can redistribute it and/or
;* modify it under the terms of the GNU Lesser General Public
;* License as published by the Free Software Foundation; either
;* version 2.1 of the License, or (at your option) any later version.
;*
;* Libav is distributed in the hope that it will be useful,
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;* Lesser General Public License for more details.
;*
;* You should have received a copy of the GNU Lesser General Public
;* License along with Libav; if not, write to the Free Software
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;******************************************************************************
%include
"libavutil/x86/x86util.asm"
SECTION_RODATA
pdw_80000000
:
times
4
dd
0x80000000
SECTION
.
text
%if
ARCH_X86_32
INIT_MMX
3
dnow
cglobal
vorbis_inverse_coupling
,
3
,
3
,
6
,
mag
,
ang
,
block_size
pxor
m7
,
m7
lea
magq
,
[
magq
+
block_sizeq
*
4
]
lea
angq
,
[
angq
+
block_sizeq
*
4
]
neg
block_sizeq
.
loop
:
mova
m0
,
[
magq
+
block_sizeq
*
4
]
mova
m1
,
[
angq
+
block_sizeq
*
4
]
mova
m2
,
m0
mova
m3
,
m1
pfcmpge
m2
,
m7
; m <= 0.0
pfcmpge
m3
,
m7
; a <= 0.0
pslld
m2
,
31
; keep only the sign bit
pxor
m1
,
m2
mova
m4
,
m3
pand
m3
,
m1
pandn
m4
,
m1
pfadd
m3
,
m0
; a = m + ((a < 0) & (a ^ sign(m)))
pfsub
m0
,
m4
; m = m + ((a > 0) & (a ^ sign(m)))
mova
[
angq
+
block_sizeq
*
4
]
,
m3
mova
[
magq
+
block_sizeq
*
4
]
,
m0
add
block_sizeq
,
2
jl
.
loop
femms
RET
%endif
INIT_XMM
sse
cglobal
vorbis_inverse_coupling
,
3
,
4
,
6
,
mag
,
ang
,
block_size
,
cntr
mova
m5
,
[
pdw_80000000
]
xor
cntrq
,
cntrq
align
16
.
loop
:
mova
m0
,
[
magq
+
cntrq
*
4
]
mova
m1
,
[
angq
+
cntrq
*
4
]
xorps
m2
,
m2
xorps
m3
,
m3
cmpleps
m2
,
m0
; m <= 0.0
cmpleps
m3
,
m1
; a <= 0.0
andps
m2
,
m5
; keep only the sign bit
xorps
m1
,
m2
mova
m4
,
m3
andps
m3
,
m1
andnps
m4
,
m1
addps
m3
,
m0
; a = m + ((a < 0) & (a ^ sign(m)))
subps
m0
,
m4
; m = m + ((a > 0) & (a ^ sign(m)))
mova
[
angq
+
cntrq
*
4
]
,
m3
mova
[
magq
+
cntrq
*
4
]
,
m0
add
cntrq
,
4
cmp
cntrq
,
block_sizeq
jl
.
loop
RET
libavcodec/x86/vorbisdsp_init.c
View file @
2e4bb99f
...
...
@@ -21,83 +21,22 @@
#include "config.h"
#include "libavutil/cpu.h"
#include "libavcodec/vorbisdsp.h"
#include "dsputil_mmx.h" // for ff_pdw_80000000
#if HAVE_INLINE_ASM
#if ARCH_X86_32
static
void
vorbis_inverse_coupling_3dnow
(
float
*
mag
,
float
*
ang
,
intptr_t
blocksize
)
{
int
i
;
__asm__
volatile
(
"pxor %%mm7, %%mm7"
:
);
for
(
i
=
0
;
i
<
blocksize
;
i
+=
2
)
{
__asm__
volatile
(
"movq %0, %%mm0
\n\t
"
"movq %1, %%mm1
\n\t
"
"movq %%mm0, %%mm2
\n\t
"
"movq %%mm1, %%mm3
\n\t
"
"pfcmpge %%mm7, %%mm2
\n\t
"
// m <= 0.0
"pfcmpge %%mm7, %%mm3
\n\t
"
// a <= 0.0
"pslld $31, %%mm2
\n\t
"
// keep only the sign bit
"pxor %%mm2, %%mm1
\n\t
"
"movq %%mm3, %%mm4
\n\t
"
"pand %%mm1, %%mm3
\n\t
"
"pandn %%mm1, %%mm4
\n\t
"
"pfadd %%mm0, %%mm3
\n\t
"
// a = m + ((a < 0) & (a ^ sign(m)))
"pfsub %%mm4, %%mm0
\n\t
"
// m = m + ((a > 0) & (a ^ sign(m)))
"movq %%mm3, %1
\n\t
"
"movq %%mm0, %0
\n\t
"
:
"+m"
(
mag
[
i
]),
"+m"
(
ang
[
i
])
::
"memory"
);
}
__asm__
volatile
(
"femms"
);
}
#endif
static
void
vorbis_inverse_coupling_sse
(
float
*
mag
,
float
*
ang
,
intptr_t
blocksize
)
{
int
i
;
__asm__
volatile
(
"movaps %0, %%xmm5
\n\t
"
::
"m"
(
ff_pdw_80000000
[
0
])
);
for
(
i
=
0
;
i
<
blocksize
;
i
+=
4
)
{
__asm__
volatile
(
"movaps %0, %%xmm0
\n\t
"
"movaps %1, %%xmm1
\n\t
"
"xorps %%xmm2, %%xmm2
\n\t
"
"xorps %%xmm3, %%xmm3
\n\t
"
"cmpleps %%xmm0, %%xmm2
\n\t
"
// m <= 0.0
"cmpleps %%xmm1, %%xmm3
\n\t
"
// a <= 0.0
"andps %%xmm5, %%xmm2
\n\t
"
// keep only the sign bit
"xorps %%xmm2, %%xmm1
\n\t
"
"movaps %%xmm3, %%xmm4
\n\t
"
"andps %%xmm1, %%xmm3
\n\t
"
"andnps %%xmm1, %%xmm4
\n\t
"
"addps %%xmm0, %%xmm3
\n\t
"
// a = m + ((a < 0) & (a ^ sign(m)))
"subps %%xmm4, %%xmm0
\n\t
"
// m = m + ((a > 0) & (a ^ sign(m)))
"movaps %%xmm3, %1
\n\t
"
"movaps %%xmm0, %0
\n\t
"
:
"+m"
(
mag
[
i
]),
"+m"
(
ang
[
i
])
::
"memory"
);
}
}
#endif
void
ff_vorbis_inverse_coupling_3dnow
(
float
*
mag
,
float
*
ang
,
intptr_t
blocksize
);
void
ff_vorbis_inverse_coupling_sse
(
float
*
mag
,
float
*
ang
,
intptr_t
blocksize
);
void
ff_vorbisdsp_init_x86
(
VorbisDSPContext
*
dsp
)
{
#if HAVE_
INLINE_
ASM
#if HAVE_
Y
ASM
int
mm_flags
=
av_get_cpu_flags
();
#if ARCH_X86_32
if
(
mm_flags
&
AV_CPU_FLAG_3DNOW
)
dsp
->
vorbis_inverse_coupling
=
vorbis_inverse_coupling_3dnow
;
dsp
->
vorbis_inverse_coupling
=
ff_
vorbis_inverse_coupling_3dnow
;
#endif
/* ARCH_X86_32 */
if
(
mm_flags
&
AV_CPU_FLAG_SSE
)
dsp
->
vorbis_inverse_coupling
=
vorbis_inverse_coupling_sse
;
#endif
/* HAVE_
INLINE_
ASM */
dsp
->
vorbis_inverse_coupling
=
ff_
vorbis_inverse_coupling_sse
;
#endif
/* HAVE_
Y
ASM */
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment