Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in / Register
Toggle navigation
F
ffmpeg.wasm-core
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Linshizhi
ffmpeg.wasm-core
Commits
d74a8cb7
Commit
d74a8cb7
authored
Feb 21, 2015
by
Anton Khirnov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
fmtconvert: drop unused functions
parent
ee964145
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
0 additions
and
1101 deletions
+0
-1101
Makefile
libavcodec/arm/Makefile
+0
-1
fmtconvert_init_arm.c
libavcodec/arm/fmtconvert_init_arm.c
+0
-14
fmtconvert_neon.S
libavcodec/arm/fmtconvert_neon.S
+0
-341
fmtconvert_vfp_armv6.S
libavcodec/arm/fmtconvert_vfp_armv6.S
+0
-78
fmtconvert.c
libavcodec/fmtconvert.c
+0
-49
fmtconvert.h
libavcodec/fmtconvert.h
+0
-48
fmtconvert_altivec.c
libavcodec/ppc/fmtconvert_altivec.c
+0
-111
fmtconvert.asm
libavcodec/x86/fmtconvert.asm
+0
-362
fmtconvert_init.c
libavcodec/x86/fmtconvert_init.c
+0
-97
No files found.
libavcodec/arm/Makefile
View file @
d74a8cb7
...
...
@@ -91,7 +91,6 @@ VFP-OBJS += arm/fmtconvert_vfp.o
# subsystems
VFP-OBJS-$(CONFIG_FFT)
+=
arm/fft_vfp.o
VFP-OBJS-$(CONFIG_MDCT)
+=
arm/mdct_vfp.o
VFP-OBJS-$(HAVE_ARMV6)
+=
arm/fmtconvert_vfp_armv6.o
# decoders/encoders
VFP-OBJS-$(CONFIG_DCA_DECODER)
+=
arm/dcadsp_vfp.o
\
...
...
libavcodec/arm/fmtconvert_init_arm.c
View file @
d74a8cb7
...
...
@@ -34,11 +34,6 @@ void ff_int32_to_float_fmul_array8_vfp(FmtConvertContext *c, float *dst,
const
int32_t
*
src
,
const
float
*
mul
,
int
len
);
void
ff_float_to_int16_neon
(
int16_t
*
dst
,
const
float
*
src
,
long
len
);
void
ff_float_to_int16_interleave_neon
(
int16_t
*
,
const
float
**
,
long
,
int
);
void
ff_float_to_int16_vfp
(
int16_t
*
dst
,
const
float
*
src
,
long
len
);
av_cold
void
ff_fmt_convert_init_arm
(
FmtConvertContext
*
c
,
AVCodecContext
*
avctx
)
{
int
cpu_flags
=
av_get_cpu_flags
();
...
...
@@ -48,18 +43,9 @@ av_cold void ff_fmt_convert_init_arm(FmtConvertContext *c, AVCodecContext *avctx
c
->
int32_to_float_fmul_scalar
=
ff_int32_to_float_fmul_scalar_vfp
;
c
->
int32_to_float_fmul_array8
=
ff_int32_to_float_fmul_array8_vfp
;
}
if
(
have_armv6
(
cpu_flags
))
{
c
->
float_to_int16
=
ff_float_to_int16_vfp
;
}
}
if
(
have_neon
(
cpu_flags
))
{
c
->
int32_to_float_fmul_scalar
=
ff_int32_to_float_fmul_scalar_neon
;
if
(
!
(
avctx
->
flags
&
CODEC_FLAG_BITEXACT
))
{
c
->
float_to_int16
=
ff_float_to_int16_neon
;
c
->
float_to_int16_interleave
=
ff_float_to_int16_interleave_neon
;
}
}
}
libavcodec/arm/fmtconvert_neon.S
View file @
d74a8cb7
This diff is collapsed.
Click to expand it.
libavcodec/arm/fmtconvert_vfp_armv6.S
deleted
100644 → 0
View file @
ee964145
/*
* Copyright (c) 2008 Siarhei Siamashka <ssvb@users.sourceforge.net>
*
* This file is part of Libav.
*
* Libav is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* Libav is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with Libav; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "config.h"
#include "libavutil/arm/asm.S"
/**
* ARM VFP optimized float to int16 conversion.
* Assume that len is a positive number and is multiple of 8, destination
* buffer is at least 4 bytes aligned (8 bytes alignment is better for
* performance), little-endian byte sex.
*/
@ void ff_float_to_int16_vfp(int16_t *dst, const float *src, int len)
function ff_float_to_int16_vfp, export=1
push {r4-r8,lr}
vpush {d8-d11}
vldmia r1!, {s16-s23}
vcvt.s32.f32 s0, s16
vcvt.s32.f32 s1, s17
vcvt.s32.f32 s2, s18
vcvt.s32.f32 s3, s19
vcvt.s32.f32 s4, s20
vcvt.s32.f32 s5, s21
vcvt.s32.f32 s6, s22
vcvt.s32.f32 s7, s23
1:
subs r2, r2, #8
vmov r3, r4, s0, s1
vmov r5, r6, s2, s3
vmov r7, r8, s4, s5
vmov ip, lr, s6, s7
it gt
vldmiagt r1!, {s16-s23}
ssat r4, #16, r4
ssat r3, #16, r3
ssat r6, #16, r6
ssat r5, #16, r5
pkhbt r3, r3, r4, lsl #16
pkhbt r4, r5, r6, lsl #16
itttt gt
vcvtgt.s32.f32 s0, s16
vcvtgt.s32.f32 s1, s17
vcvtgt.s32.f32 s2, s18
vcvtgt.s32.f32 s3, s19
itttt gt
vcvtgt.s32.f32 s4, s20
vcvtgt.s32.f32 s5, s21
vcvtgt.s32.f32 s6, s22
vcvtgt.s32.f32 s7, s23
ssat r8, #16, r8
ssat r7, #16, r7
ssat lr, #16, lr
ssat ip, #16, ip
pkhbt r5, r7, r8, lsl #16
pkhbt r6, ip, lr, lsl #16
stmia r0!, {r3-r6}
bgt 1b
vpop {d8-d11}
pop {r4-r8,pc}
endfunc
libavcodec/fmtconvert.c
View file @
d74a8cb7
...
...
@@ -41,59 +41,10 @@ static void int32_to_float_fmul_array8_c(FmtConvertContext *c, float *dst,
c
->
int32_to_float_fmul_scalar
(
&
dst
[
i
],
&
src
[
i
],
*
mul
++
,
8
);
}
static
av_always_inline
int
float_to_int16_one
(
const
float
*
src
){
return
av_clip_int16
(
lrintf
(
*
src
));
}
static
void
float_to_int16_c
(
int16_t
*
dst
,
const
float
*
src
,
long
len
)
{
int
i
;
for
(
i
=
0
;
i
<
len
;
i
++
)
dst
[
i
]
=
float_to_int16_one
(
src
+
i
);
}
static
void
float_to_int16_interleave_c
(
int16_t
*
dst
,
const
float
**
src
,
long
len
,
int
channels
)
{
int
i
,
j
,
c
;
if
(
channels
==
2
){
for
(
i
=
0
;
i
<
len
;
i
++
){
dst
[
2
*
i
]
=
float_to_int16_one
(
src
[
0
]
+
i
);
dst
[
2
*
i
+
1
]
=
float_to_int16_one
(
src
[
1
]
+
i
);
}
}
else
{
for
(
c
=
0
;
c
<
channels
;
c
++
)
for
(
i
=
0
,
j
=
c
;
i
<
len
;
i
++
,
j
+=
channels
)
dst
[
j
]
=
float_to_int16_one
(
src
[
c
]
+
i
);
}
}
void
ff_float_interleave_c
(
float
*
dst
,
const
float
**
src
,
unsigned
int
len
,
int
channels
)
{
int
j
,
c
;
unsigned
int
i
;
if
(
channels
==
2
)
{
for
(
i
=
0
;
i
<
len
;
i
++
)
{
dst
[
2
*
i
]
=
src
[
0
][
i
];
dst
[
2
*
i
+
1
]
=
src
[
1
][
i
];
}
}
else
if
(
channels
==
1
&&
len
<
INT_MAX
/
sizeof
(
float
))
{
memcpy
(
dst
,
src
[
0
],
len
*
sizeof
(
float
));
}
else
{
for
(
c
=
0
;
c
<
channels
;
c
++
)
for
(
i
=
0
,
j
=
c
;
i
<
len
;
i
++
,
j
+=
channels
)
dst
[
j
]
=
src
[
c
][
i
];
}
}
av_cold
void
ff_fmt_convert_init
(
FmtConvertContext
*
c
,
AVCodecContext
*
avctx
)
{
c
->
int32_to_float_fmul_scalar
=
int32_to_float_fmul_scalar_c
;
c
->
int32_to_float_fmul_array8
=
int32_to_float_fmul_array8_c
;
c
->
float_to_int16
=
float_to_int16_c
;
c
->
float_to_int16_interleave
=
float_to_int16_interleave_c
;
c
->
float_interleave
=
ff_float_interleave_c
;
if
(
ARCH_ARM
)
ff_fmt_convert_init_arm
(
c
,
avctx
);
if
(
ARCH_PPC
)
ff_fmt_convert_init_ppc
(
c
,
avctx
);
...
...
libavcodec/fmtconvert.h
View file @
d74a8cb7
...
...
@@ -54,56 +54,8 @@ typedef struct FmtConvertContext {
float
*
dst
,
const
int32_t
*
src
,
const
float
*
mul
,
int
len
);
/**
* Convert an array of float to an array of int16_t.
*
* Convert floats from in the range [-32768.0,32767.0] to ints
* without rescaling
*
* @param dst destination array of int16_t.
* constraints: 16-byte aligned
* @param src source array of float.
* constraints: 16-byte aligned
* @param len number of elements to convert.
* constraints: multiple of 8
*/
void
(
*
float_to_int16
)(
int16_t
*
dst
,
const
float
*
src
,
long
len
);
/**
* Convert multiple arrays of float to an interleaved array of int16_t.
*
* Convert floats from in the range [-32768.0,32767.0] to ints
* without rescaling
*
* @param dst destination array of interleaved int16_t.
* constraints: 16-byte aligned
* @param src source array of float arrays, one for each channel.
* constraints: 16-byte aligned
* @param len number of elements to convert.
* constraints: multiple of 8
* @param channels number of channels
*/
void
(
*
float_to_int16_interleave
)(
int16_t
*
dst
,
const
float
**
src
,
long
len
,
int
channels
);
/**
* Convert multiple arrays of float to an array of interleaved float.
*
* @param dst destination array of interleaved float.
* constraints: 16-byte aligned
* @param src source array of float arrays, one for each channel.
* constraints: 16-byte aligned
* @param len number of elements to convert.
* constraints: multiple of 8
* @param channels number of channels
*/
void
(
*
float_interleave
)(
float
*
dst
,
const
float
**
src
,
unsigned
int
len
,
int
channels
);
}
FmtConvertContext
;
void
ff_float_interleave_c
(
float
*
dst
,
const
float
**
src
,
unsigned
int
len
,
int
channels
);
void
ff_fmt_convert_init
(
FmtConvertContext
*
c
,
AVCodecContext
*
avctx
);
void
ff_fmt_convert_init_arm
(
FmtConvertContext
*
c
,
AVCodecContext
*
avctx
);
...
...
libavcodec/ppc/fmtconvert_altivec.c
View file @
d74a8cb7
...
...
@@ -52,113 +52,6 @@ static void int32_to_float_fmul_scalar_altivec(float *dst, const int32_t *src,
}
}
static
vector
signed
short
float_to_int16_one_altivec
(
const
float
*
src
)
{
vector
float
s0
=
vec_ld
(
0
,
src
);
vector
float
s1
=
vec_ld
(
16
,
src
);
vector
signed
int
t0
=
vec_cts
(
s0
,
0
);
vector
signed
int
t1
=
vec_cts
(
s1
,
0
);
return
vec_packs
(
t0
,
t1
);
}
static
void
float_to_int16_altivec
(
int16_t
*
dst
,
const
float
*
src
,
long
len
)
{
int
i
;
vector
signed
short
d0
,
d1
,
d
;
vector
unsigned
char
align
;
if
(((
long
)
dst
)
&
15
)
{
//FIXME
for
(
i
=
0
;
i
<
len
-
7
;
i
+=
8
)
{
d0
=
vec_ld
(
0
,
dst
+
i
);
d
=
float_to_int16_one_altivec
(
src
+
i
);
d1
=
vec_ld
(
15
,
dst
+
i
);
d1
=
vec_perm
(
d1
,
d0
,
vec_lvsl
(
0
,
dst
+
i
));
align
=
vec_lvsr
(
0
,
dst
+
i
);
d0
=
vec_perm
(
d1
,
d
,
align
);
d1
=
vec_perm
(
d
,
d1
,
align
);
vec_st
(
d0
,
0
,
dst
+
i
);
vec_st
(
d1
,
15
,
dst
+
i
);
}
}
else
{
for
(
i
=
0
;
i
<
len
-
7
;
i
+=
8
)
{
d
=
float_to_int16_one_altivec
(
src
+
i
);
vec_st
(
d
,
0
,
dst
+
i
);
}
}
}
#define VSTE_INC(dst, v, elem, inc) do { \
vector signed short s = vec_splat(v, elem); \
vec_ste(s, 0, dst); \
dst += inc; \
} while (0)
static
void
float_to_int16_stride_altivec
(
int16_t
*
dst
,
const
float
*
src
,
long
len
,
int
stride
)
{
int
i
;
vector
signed
short
d
;
for
(
i
=
0
;
i
<
len
-
7
;
i
+=
8
)
{
d
=
float_to_int16_one_altivec
(
src
+
i
);
VSTE_INC
(
dst
,
d
,
0
,
stride
);
VSTE_INC
(
dst
,
d
,
1
,
stride
);
VSTE_INC
(
dst
,
d
,
2
,
stride
);
VSTE_INC
(
dst
,
d
,
3
,
stride
);
VSTE_INC
(
dst
,
d
,
4
,
stride
);
VSTE_INC
(
dst
,
d
,
5
,
stride
);
VSTE_INC
(
dst
,
d
,
6
,
stride
);
VSTE_INC
(
dst
,
d
,
7
,
stride
);
}
}
static
void
float_to_int16_interleave_altivec
(
int16_t
*
dst
,
const
float
**
src
,
long
len
,
int
channels
)
{
int
i
;
vector
signed
short
d0
,
d1
,
d2
,
c0
,
c1
,
t0
,
t1
;
vector
unsigned
char
align
;
if
(
channels
==
1
)
float_to_int16_altivec
(
dst
,
src
[
0
],
len
);
else
{
if
(
channels
==
2
)
{
if
(((
long
)
dst
)
&
15
)
{
for
(
i
=
0
;
i
<
len
-
7
;
i
+=
8
)
{
d0
=
vec_ld
(
0
,
dst
+
i
);
t0
=
float_to_int16_one_altivec
(
src
[
0
]
+
i
);
d1
=
vec_ld
(
31
,
dst
+
i
);
t1
=
float_to_int16_one_altivec
(
src
[
1
]
+
i
);
c0
=
vec_mergeh
(
t0
,
t1
);
c1
=
vec_mergel
(
t0
,
t1
);
d2
=
vec_perm
(
d1
,
d0
,
vec_lvsl
(
0
,
dst
+
i
));
align
=
vec_lvsr
(
0
,
dst
+
i
);
d0
=
vec_perm
(
d2
,
c0
,
align
);
d1
=
vec_perm
(
c0
,
c1
,
align
);
vec_st
(
d0
,
0
,
dst
+
i
);
d0
=
vec_perm
(
c1
,
d2
,
align
);
vec_st
(
d1
,
15
,
dst
+
i
);
vec_st
(
d0
,
31
,
dst
+
i
);
dst
+=
8
;
}
}
else
{
for
(
i
=
0
;
i
<
len
-
7
;
i
+=
8
)
{
t0
=
float_to_int16_one_altivec
(
src
[
0
]
+
i
);
t1
=
float_to_int16_one_altivec
(
src
[
1
]
+
i
);
d0
=
vec_mergeh
(
t0
,
t1
);
d1
=
vec_mergel
(
t0
,
t1
);
vec_st
(
d0
,
0
,
dst
+
i
);
vec_st
(
d1
,
16
,
dst
+
i
);
dst
+=
8
;
}
}
}
else
{
for
(
i
=
0
;
i
<
channels
;
i
++
)
float_to_int16_stride_altivec
(
dst
+
i
,
src
[
i
],
len
,
channels
);
}
}
}
#endif
/* HAVE_ALTIVEC */
av_cold
void
ff_fmt_convert_init_ppc
(
FmtConvertContext
*
c
,
...
...
@@ -169,9 +62,5 @@ av_cold void ff_fmt_convert_init_ppc(FmtConvertContext *c,
return
;
c
->
int32_to_float_fmul_scalar
=
int32_to_float_fmul_scalar_altivec
;
if
(
!
(
avctx
->
flags
&
CODEC_FLAG_BITEXACT
))
{
c
->
float_to_int16
=
float_to_int16_altivec
;
c
->
float_to_int16_interleave
=
float_to_int16_interleave_altivec
;
}
#endif
/* HAVE_ALTIVEC */
}
libavcodec/x86/fmtconvert.asm
View file @
d74a8cb7
...
...
@@ -23,14 +23,6 @@
SECTION_TEXT
%macro
CVTPS2PI
2
%if
cpuflag
(
sse
)
cvtps2pi
%1
,
%2
%elif
cpuflag
(
3
dnow
)
pf2id
%1
,
%2
%endif
%endmacro
;------------------------------------------------------------------------------
; void ff_int32_to_float_fmul_scalar(float *dst, const int32_t *src, float mul,
; int len);
...
...
@@ -76,357 +68,3 @@ INIT_XMM sse
INT32_TO_FLOAT_FMUL_SCALAR
5
INIT_XMM
sse2
INT32_TO_FLOAT_FMUL_SCALAR
3
;------------------------------------------------------------------------------
; void ff_float_to_int16(int16_t *dst, const float *src, long len);
;------------------------------------------------------------------------------
%macro
FLOAT_TO_INT16
1
cglobal
float_to_int16
,
3
,
3
,
%1
,
dst
,
src
,
len
add
lenq
,
lenq
lea
srcq
,
[
srcq
+
2
*
lenq
]
add
dstq
,
lenq
neg
lenq
.
loop
:
%if
cpuflag
(
sse2
)
cvtps2dq
m0
,
[
srcq
+
2
*
lenq
]
cvtps2dq
m1
,
[
srcq
+
2
*
lenq
+
16
]
packssdw
m0
,
m1
mova
[
dstq
+
lenq
]
,
m0
%else
CVTPS2PI
m0
,
[
srcq
+
2
*
lenq
]
CVTPS2PI
m1
,
[
srcq
+
2
*
lenq
+
8
]
CVTPS2PI
m2
,
[
srcq
+
2
*
lenq
+
16
]
CVTPS2PI
m3
,
[
srcq
+
2
*
lenq
+
24
]
packssdw
m0
,
m1
packssdw
m2
,
m3
mova
[
dstq
+
lenq
]
,
m0
mova
[
dstq
+
lenq
+
8
]
,
m2
%endif
add
lenq
,
16
js
.
loop
%if
mmsize
==
8
emms
%endif
REP_RET
%endmacro
INIT_XMM
sse2
FLOAT_TO_INT16
2
INIT_MMX
sse
FLOAT_TO_INT16
0
INIT_MMX
3
dnow
FLOAT_TO_INT16
0
;------------------------------------------------------------------------------
; void ff_float_to_int16_step(int16_t *dst, const float *src, long len, long step);
;------------------------------------------------------------------------------
%macro
FLOAT_TO_INT16_STEP
1
cglobal
float_to_int16_step
,
4
,
7
,
%1
,
dst
,
src
,
len
,
step
,
step3
,
v1
,
v2
add
lenq
,
lenq
lea
srcq
,
[
srcq
+
2
*
lenq
]
lea
step3q
,
[
stepq
*
3
]
neg
lenq
.
loop
:
%if
cpuflag
(
sse2
)
cvtps2dq
m0
,
[
srcq
+
2
*
lenq
]
cvtps2dq
m1
,
[
srcq
+
2
*
lenq
+
16
]
packssdw
m0
,
m1
movd
v1d
,
m0
psrldq
m0
,
4
movd
v2d
,
m0
psrldq
m0
,
4
mov
[dstq],
v1w
mov
[
dstq
+
stepq
*
4
]
,
v2w
shr
v1d
,
16
shr
v2d
,
16
mov
[
dstq
+
stepq
*
2
]
,
v1w
mov
[
dstq
+
step3q
*
2
]
,
v2w
lea
dstq
,
[
dstq
+
stepq
*
8
]
movd
v1d
,
m0
psrldq
m0
,
4
movd
v2d
,
m0
mov
[dstq],
v1w
mov
[
dstq
+
stepq
*
4
]
,
v2w
shr
v1d
,
16
shr
v2d
,
16
mov
[
dstq
+
stepq
*
2
]
,
v1w
mov
[
dstq
+
step3q
*
2
]
,
v2w
lea
dstq
,
[
dstq
+
stepq
*
8
]
%else
CVTPS2PI
m0
,
[
srcq
+
2
*
lenq
]
CVTPS2PI
m1
,
[
srcq
+
2
*
lenq
+
8
]
CVTPS2PI
m2
,
[
srcq
+
2
*
lenq
+
16
]
CVTPS2PI
m3
,
[
srcq
+
2
*
lenq
+
24
]
packssdw
m0
,
m1
packssdw
m2
,
m3
movd
v1d
,
m0
psrlq
m0
,
32
movd
v2d
,
m0
mov
[dstq],
v1w
mov
[
dstq
+
stepq
*
4
]
,
v2w
shr
v1d
,
16
shr
v2d
,
16
mov
[
dstq
+
stepq
*
2
]
,
v1w
mov
[
dstq
+
step3q
*
2
]
,
v2w
lea
dstq
,
[
dstq
+
stepq
*
8
]
movd
v1d
,
m2
psrlq
m2
,
32
movd
v2d
,
m2
mov
[dstq],
v1w
mov
[
dstq
+
stepq
*
4
]
,
v2w
shr
v1d
,
16
shr
v2d
,
16
mov
[
dstq
+
stepq
*
2
]
,
v1w
mov
[
dstq
+
step3q
*
2
]
,
v2w
lea
dstq
,
[
dstq
+
stepq
*
8
]
%endif
add
lenq
,
16
js
.
loop
%if
mmsize
==
8
emms
%endif
REP_RET
%endmacro
INIT_XMM
sse2
FLOAT_TO_INT16_STEP
2
INIT_MMX
sse
FLOAT_TO_INT16_STEP
0
INIT_MMX
3
dnow
FLOAT_TO_INT16_STEP
0
;-------------------------------------------------------------------------------
; void ff_float_to_int16_interleave2(int16_t *dst, const float **src, long len);
;-------------------------------------------------------------------------------
%macro
FLOAT_TO_INT16_INTERLEAVE2
0
cglobal
float_to_int16_interleave2
,
3
,
4
,
2
,
dst
,
src0
,
src1
,
len
lea
lenq
,
[
4
*
r2q
]
mov
src1q
,
[
src0q
+
gprsize
]
mov
src0q
,
[src0q]
add
dstq
,
lenq
add
src0q
,
lenq
add
src1q
,
lenq
neg
lenq
.
loop
:
%if
cpuflag
(
sse2
)
cvtps2dq
m0
,
[
src0q
+
lenq
]
cvtps2dq
m1
,
[
src1q
+
lenq
]
packssdw
m0
,
m1
movhlps
m1
,
m0
punpcklwd
m0
,
m1
mova
[
dstq
+
lenq
]
,
m0
%else
CVTPS2PI
m0
,
[
src0q
+
lenq
]
CVTPS2PI
m1
,
[
src0q
+
lenq
+
8
]
CVTPS2PI
m2
,
[
src1q
+
lenq
]
CVTPS2PI
m3
,
[
src1q
+
lenq
+
8
]
packssdw
m0
,
m1
packssdw
m2
,
m3
mova
m1
,
m0
punpcklwd
m0
,
m2
punpckhwd
m1
,
m2
mova
[
dstq
+
lenq
]
,
m0
mova
[
dstq
+
lenq
+
8
]
,
m1
%endif
add
lenq
,
16
js
.
loop
%if
mmsize
==
8
emms
%endif
REP_RET
%endmacro
INIT_MMX
3
dnow
FLOAT_TO_INT16_INTERLEAVE2
INIT_MMX
sse
FLOAT_TO_INT16_INTERLEAVE2
INIT_XMM
sse2
FLOAT_TO_INT16_INTERLEAVE2
;-----------------------------------------------------------------------------
; void ff_float_to_int16_interleave6(int16_t *dst, const float **src, int len)
;-----------------------------------------------------------------------------
%macro
FLOAT_TO_INT16_INTERLEAVE6
0
cglobal
float_to_int16_interleave6
,
2
,
8
,
0
,
dst
,
src
,
src1
,
src2
,
src3
,
src4
,
src5
,
len
%if
ARCH_X86_64
mov
lend
,
r2d
%else
%
define
lend
dword
r2m
%endif
mov
src1q
,
[
srcq
+
1
*
gprsize
]
mov
src2q
,
[
srcq
+
2
*
gprsize
]
mov
src3q
,
[
srcq
+
3
*
gprsize
]
mov
src4q
,
[
srcq
+
4
*
gprsize
]
mov
src5q
,
[
srcq
+
5
*
gprsize
]
mov
srcq
,
[srcq]
sub
src1q
,
srcq
sub
src2q
,
srcq
sub
src3q
,
srcq
sub
src4q
,
srcq
sub
src5q
,
srcq
.
loop
:
CVTPS2PI
mm0
,
[srcq]
CVTPS2PI
mm1
,
[
srcq
+
src1q
]
CVTPS2PI
mm2
,
[
srcq
+
src2q
]
CVTPS2PI
mm3
,
[
srcq
+
src3q
]
CVTPS2PI
mm4
,
[
srcq
+
src4q
]
CVTPS2PI
mm5
,
[
srcq
+
src5q
]
packssdw
mm0
,
mm3
packssdw
mm1
,
mm4
packssdw
mm2
,
mm5
PSWAPD
mm3
,
mm0
punpcklwd
mm0
,
mm1
punpckhwd
mm1
,
mm2
punpcklwd
mm2
,
mm3
PSWAPD
mm3
,
mm0
punpckldq
mm0
,
mm2
punpckhdq
mm2
,
mm1
punpckldq
mm1
,
mm3
movq
[
dstq
]
,
mm0
movq
[
dstq
+
16
]
,
mm2
movq
[
dstq
+
8
]
,
mm1
add
srcq
,
8
add
dstq
,
24
sub
lend
,
2
jg
.
loop
emms
RET
%endmacro
; FLOAT_TO_INT16_INTERLEAVE6
INIT_MMX
sse
FLOAT_TO_INT16_INTERLEAVE6
INIT_MMX
3
dnow
FLOAT_TO_INT16_INTERLEAVE6
INIT_MMX
3
dnowext
FLOAT_TO_INT16_INTERLEAVE6
;-----------------------------------------------------------------------------
; void ff_float_interleave6(float *dst, const float **src, unsigned int len);
;-----------------------------------------------------------------------------
%macro
FLOAT_INTERLEAVE6
1
cglobal
float_interleave6
,
2
,
8
,
%1
,
dst
,
src
,
src1
,
src2
,
src3
,
src4
,
src5
,
len
%if
ARCH_X86_64
mov
lend
,
r2d
%else
%
define
lend
dword
r2m
%endif
mov
src1q
,
[
srcq
+
1
*
gprsize
]
mov
src2q
,
[
srcq
+
2
*
gprsize
]
mov
src3q
,
[
srcq
+
3
*
gprsize
]
mov
src4q
,
[
srcq
+
4
*
gprsize
]
mov
src5q
,
[
srcq
+
5
*
gprsize
]
mov
srcq
,
[srcq]
sub
src1q
,
srcq
sub
src2q
,
srcq
sub
src3q
,
srcq
sub
src4q
,
srcq
sub
src5q
,
srcq
.
loop
:
%if
cpuflag
(
sse
)
movaps
m0
,
[srcq]
movaps
m1
,
[
srcq
+
src1q
]
movaps
m2
,
[
srcq
+
src2q
]
movaps
m3
,
[
srcq
+
src3q
]
movaps
m4
,
[
srcq
+
src4q
]
movaps
m5
,
[
srcq
+
src5q
]
SBUTTERFLYPS
0
,
1
,
6
SBUTTERFLYPS
2
,
3
,
6
SBUTTERFLYPS
4
,
5
,
6
movaps
m6
,
m4
shufps
m4
,
m0
,
0xe4
movlhps
m0
,
m2
movhlps
m6
,
m2
movaps
[
dstq
]
,
m0
movaps
[
dstq
+
16
]
,
m4
movaps
[
dstq
+
32
]
,
m6
movaps
m6
,
m5
shufps
m5
,
m1
,
0xe4
movlhps
m1
,
m3
movhlps
m6
,
m3
movaps
[
dstq
+
48
]
,
m1
movaps
[
dstq
+
64
]
,
m5
movaps
[
dstq
+
80
]
,
m6
%else
; mmx
movq
m0
,
[srcq]
movq
m1
,
[
srcq
+
src1q
]
movq
m2
,
[
srcq
+
src2q
]
movq
m3
,
[
srcq
+
src3q
]
movq
m4
,
[
srcq
+
src4q
]
movq
m5
,
[
srcq
+
src5q
]
SBUTTERFLY
dq
,
0
,
1
,
6
SBUTTERFLY
dq
,
2
,
3
,
6
SBUTTERFLY
dq
,
4
,
5
,
6
movq
[
dstq
]
,
m0
movq
[
dstq
+
8
]
,
m2
movq
[
dstq
+
16
]
,
m4
movq
[
dstq
+
24
]
,
m1
movq
[
dstq
+
32
]
,
m3
movq
[
dstq
+
40
]
,
m5
%endif
add
srcq
,
mmsize
add
dstq
,
mmsize
*
6
sub
lend
,
mmsize
/
4
jg
.
loop
%if
mmsize
==
8
emms
%endif
REP_RET
%endmacro
INIT_MMX
mmx
FLOAT_INTERLEAVE6
0
INIT_XMM
sse
FLOAT_INTERLEAVE6
7
;-----------------------------------------------------------------------------
; void ff_float_interleave2(float *dst, const float **src, unsigned int len);
;-----------------------------------------------------------------------------
%macro
FLOAT_INTERLEAVE2
1
cglobal
float_interleave2
,
3
,
4
,
%1
,
dst
,
src
,
len
,
src1
mov
src1q
,
[
srcq
+
gprsize
]
mov
srcq
,
[
srcq
]
sub
src1q
,
srcq
.
loop
:
mova
m0
,
[
srcq
]
mova
m1
,
[
srcq
+
src1q
]
mova
m3
,
[
srcq
+
mmsize
]
mova
m4
,
[
srcq
+
src1q
+
mmsize
]
mova
m2
,
m0
PUNPCKLDQ
m0
,
m1
PUNPCKHDQ
m2
,
m1
mova
m1
,
m3
PUNPCKLDQ
m3
,
m4
PUNPCKHDQ
m1
,
m4
mova
[
dstq
]
,
m0
mova
[
dstq
+
1
*
mmsize
]
,
m2
mova
[
dstq
+
2
*
mmsize
]
,
m3
mova
[
dstq
+
3
*
mmsize
]
,
m1
add
srcq
,
mmsize
*
2
add
dstq
,
mmsize
*
4
sub
lend
,
mmsize
/
2
jg
.
loop
%if
mmsize
==
8
emms
%endif
REP_RET
%endmacro
INIT_MMX
mmx
%define
PUNPCKLDQ
punpckldq
%define
PUNPCKHDQ
punpckhdq
FLOAT_INTERLEAVE2
0
INIT_XMM
sse
%define
PUNPCKLDQ
unpcklps
%define
PUNPCKHDQ
unpckhps
FLOAT_INTERLEAVE2
5
libavcodec/x86/fmtconvert_init.c
View file @
d74a8cb7
...
...
@@ -33,84 +33,6 @@
void
ff_int32_to_float_fmul_scalar_sse
(
float
*
dst
,
const
int32_t
*
src
,
float
mul
,
int
len
);
void
ff_int32_to_float_fmul_scalar_sse2
(
float
*
dst
,
const
int32_t
*
src
,
float
mul
,
int
len
);
void
ff_float_to_int16_3dnow
(
int16_t
*
dst
,
const
float
*
src
,
long
len
);
void
ff_float_to_int16_sse
(
int16_t
*
dst
,
const
float
*
src
,
long
len
);
void
ff_float_to_int16_sse2
(
int16_t
*
dst
,
const
float
*
src
,
long
len
);
void
ff_float_to_int16_step_3dnow
(
int16_t
*
dst
,
const
float
*
src
,
long
len
,
long
step
);
void
ff_float_to_int16_step_sse
(
int16_t
*
dst
,
const
float
*
src
,
long
len
,
long
step
);
void
ff_float_to_int16_step_sse2
(
int16_t
*
dst
,
const
float
*
src
,
long
len
,
long
step
);
void
ff_float_to_int16_interleave2_3dnow
(
int16_t
*
dst
,
const
float
**
src
,
long
len
);
void
ff_float_to_int16_interleave2_sse
(
int16_t
*
dst
,
const
float
**
src
,
long
len
);
void
ff_float_to_int16_interleave2_sse2
(
int16_t
*
dst
,
const
float
**
src
,
long
len
);
void
ff_float_to_int16_interleave6_sse
(
int16_t
*
dst
,
const
float
**
src
,
int
len
);
void
ff_float_to_int16_interleave6_3dnow
(
int16_t
*
dst
,
const
float
**
src
,
int
len
);
void
ff_float_to_int16_interleave6_3dnowext
(
int16_t
*
dst
,
const
float
**
src
,
int
len
);
#define ff_float_to_int16_interleave6_sse2 ff_float_to_int16_interleave6_sse
#define FLOAT_TO_INT16_INTERLEAVE(cpu) \
/* gcc pessimizes register allocation if this is in the same function as float_to_int16_interleave_sse2*/
\
static av_noinline void float_to_int16_interleave_misc_##cpu(int16_t *dst, const float **src, long len, int channels){\
int c;\
for(c=0; c<channels; c++){\
ff_float_to_int16_step_##cpu(dst+c, src[c], len, channels);\
}\
}\
\
static void float_to_int16_interleave_##cpu(int16_t *dst, const float **src, long len, int channels){\
if(channels==1)\
ff_float_to_int16_##cpu(dst, src[0], len);\
else if(channels==2){\
ff_float_to_int16_interleave2_##cpu(dst, src, len);\
}else if(channels==6){\
ff_float_to_int16_interleave6_##cpu(dst, src, len);\
}else\
float_to_int16_interleave_misc_##cpu(dst, src, len, channels);\
}
FLOAT_TO_INT16_INTERLEAVE
(
3
dnow
)
FLOAT_TO_INT16_INTERLEAVE
(
sse
)
FLOAT_TO_INT16_INTERLEAVE
(
sse2
)
static
void
float_to_int16_interleave_3dnowext
(
int16_t
*
dst
,
const
float
**
src
,
long
len
,
int
channels
)
{
if
(
channels
==
6
)
ff_float_to_int16_interleave6_3dnowext
(
dst
,
src
,
len
);
else
float_to_int16_interleave_3dnow
(
dst
,
src
,
len
,
channels
);
}
void
ff_float_interleave2_mmx
(
float
*
dst
,
const
float
**
src
,
unsigned
int
len
);
void
ff_float_interleave2_sse
(
float
*
dst
,
const
float
**
src
,
unsigned
int
len
);
void
ff_float_interleave6_mmx
(
float
*
dst
,
const
float
**
src
,
unsigned
int
len
);
void
ff_float_interleave6_sse
(
float
*
dst
,
const
float
**
src
,
unsigned
int
len
);
static
void
float_interleave_mmx
(
float
*
dst
,
const
float
**
src
,
unsigned
int
len
,
int
channels
)
{
if
(
channels
==
2
)
{
ff_float_interleave2_mmx
(
dst
,
src
,
len
);
}
else
if
(
channels
==
6
)
ff_float_interleave6_mmx
(
dst
,
src
,
len
);
else
ff_float_interleave_c
(
dst
,
src
,
len
,
channels
);
}
static
void
float_interleave_sse
(
float
*
dst
,
const
float
**
src
,
unsigned
int
len
,
int
channels
)
{
if
(
channels
==
2
)
{
ff_float_interleave2_sse
(
dst
,
src
,
len
);
}
else
if
(
channels
==
6
)
ff_float_interleave6_sse
(
dst
,
src
,
len
);
else
ff_float_interleave_c
(
dst
,
src
,
len
,
channels
);
}
#endif
/* HAVE_YASM */
av_cold
void
ff_fmt_convert_init_x86
(
FmtConvertContext
*
c
,
AVCodecContext
*
avctx
)
...
...
@@ -118,30 +40,11 @@ av_cold void ff_fmt_convert_init_x86(FmtConvertContext *c, AVCodecContext *avctx
#if HAVE_YASM
int
cpu_flags
=
av_get_cpu_flags
();
if
(
EXTERNAL_MMX
(
cpu_flags
))
{
c
->
float_interleave
=
float_interleave_mmx
;
}
if
(
EXTERNAL_AMD3DNOW
(
cpu_flags
))
{
if
(
!
(
avctx
->
flags
&
CODEC_FLAG_BITEXACT
))
{
c
->
float_to_int16
=
ff_float_to_int16_3dnow
;
c
->
float_to_int16_interleave
=
float_to_int16_interleave_3dnow
;
}
}
if
(
EXTERNAL_AMD3DNOWEXT
(
cpu_flags
))
{
if
(
!
(
avctx
->
flags
&
CODEC_FLAG_BITEXACT
))
{
c
->
float_to_int16_interleave
=
float_to_int16_interleave_3dnowext
;
}
}
if
(
EXTERNAL_SSE
(
cpu_flags
))
{
c
->
int32_to_float_fmul_scalar
=
ff_int32_to_float_fmul_scalar_sse
;
c
->
float_to_int16
=
ff_float_to_int16_sse
;
c
->
float_to_int16_interleave
=
float_to_int16_interleave_sse
;
c
->
float_interleave
=
float_interleave_sse
;
}
if
(
EXTERNAL_SSE2
(
cpu_flags
))
{
c
->
int32_to_float_fmul_scalar
=
ff_int32_to_float_fmul_scalar_sse2
;
c
->
float_to_int16
=
ff_float_to_int16_sse2
;
c
->
float_to_int16_interleave
=
float_to_int16_interleave_sse2
;
}
#endif
/* HAVE_YASM */
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment