Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in / Register
Toggle navigation
F
ffmpeg.wasm-core
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Linshizhi
ffmpeg.wasm-core
Commits
aad3429d
Commit
aad3429d
authored
Oct 10, 2011
by
Justin Ruggles
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
fmtconvert: port float_to_int16_interleave() 2-channel x86 inline asm to yasm
parent
4e8e2624
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
61 additions
and
60 deletions
+61
-60
fmtconvert.asm
libavcodec/x86/fmtconvert.asm
+52
-0
fmtconvert_mmx.c
libavcodec/x86/fmtconvert_mmx.c
+9
-60
No files found.
libavcodec/x86/fmtconvert.asm
View file @
aad3429d
...
...
@@ -112,6 +112,58 @@ FLOAT_TO_INT16 3dnow, 0
%undef
cvtps2pi
;-------------------------------------------------------------------------------
; void ff_float_to_int16_interleave2(int16_t *dst, const float **src, long len);
;-------------------------------------------------------------------------------
%macro
FLOAT_TO_INT16_INTERLEAVE2
1
cglobal
float_to_int16_interleave2_
%1
,
3
,
4
,
2
,
dst
,
src0
,
src1
,
len
lea
lenq
,
[
4
*
r2q
]
mov
src1q
,
[
src0q
+
gprsize
]
mov
src0q
,
[src0q]
add
dstq
,
lenq
add
src0q
,
lenq
add
src1q
,
lenq
neg
lenq
.
loop
:
%ifidn
%1
,
sse2
cvtps2dq
m0
,
[
src0q
+
lenq
]
cvtps2dq
m1
,
[
src1q
+
lenq
]
packssdw
m0
,
m1
movhlps
m1
,
m0
punpcklwd
m0
,
m1
mova
[
dstq
+
lenq
]
,
m0
%else
cvtps2pi
m0
,
[
src0q
+
lenq
]
cvtps2pi
m1
,
[
src0q
+
lenq
+
8
]
cvtps2pi
m2
,
[
src1q
+
lenq
]
cvtps2pi
m3
,
[
src1q
+
lenq
+
8
]
packssdw
m0
,
m1
packssdw
m2
,
m3
mova
m1
,
m0
punpcklwd
m0
,
m2
punpckhwd
m1
,
m2
mova
[
dstq
+
lenq
]
,
m0
mova
[
dstq
+
lenq
+
8
]
,
m1
%endif
add
lenq
,
16
js
.
loop
%ifnidn
%1
,
sse2
emms
%endif
REP_RET
%endmacro
INIT_MMX
%define
cvtps2pi
pf2id
FLOAT_TO_INT16_INTERLEAVE2
3
dnow
%undef
cvtps2pi
%define
movdqa
movaps
FLOAT_TO_INT16_INTERLEAVE2
sse
%undef
movdqa
INIT_XMM
FLOAT_TO_INT16_INTERLEAVE2
sse2
%macro
PSWAPD_SSE
2
pshufw
%1
,
%2
,
0x4e
%endmacro
...
...
libavcodec/x86/fmtconvert_mmx.c
View file @
aad3429d
...
...
@@ -35,13 +35,17 @@ void ff_float_to_int16_3dnow(int16_t *dst, const float *src, long len);
void
ff_float_to_int16_sse
(
int16_t
*
dst
,
const
float
*
src
,
long
len
);
void
ff_float_to_int16_sse2
(
int16_t
*
dst
,
const
float
*
src
,
long
len
);
void
ff_float_to_int16_interleave2_3dnow
(
int16_t
*
dst
,
const
float
**
src
,
long
len
);
void
ff_float_to_int16_interleave2_sse
(
int16_t
*
dst
,
const
float
**
src
,
long
len
);
void
ff_float_to_int16_interleave2_sse2
(
int16_t
*
dst
,
const
float
**
src
,
long
len
);
void
ff_float_to_int16_interleave6_sse
(
int16_t
*
dst
,
const
float
**
src
,
int
len
);
void
ff_float_to_int16_interleave6_3dnow
(
int16_t
*
dst
,
const
float
**
src
,
int
len
);
void
ff_float_to_int16_interleave6_3dn2
(
int16_t
*
dst
,
const
float
**
src
,
int
len
);
#define ff_float_to_int16_interleave6_sse2 ff_float_to_int16_interleave6_sse
#define FLOAT_TO_INT16_INTERLEAVE(cpu
, body
) \
#define FLOAT_TO_INT16_INTERLEAVE(cpu) \
/* gcc pessimizes register allocation if this is in the same function as float_to_int16_interleave_sse2*/
\
static av_noinline void float_to_int16_interleave_misc_##cpu(int16_t *dst, const float **src, long len, int channels){\
DECLARE_ALIGNED(16, int16_t, tmp)[len];\
...
...
@@ -57,71 +61,16 @@ static void float_to_int16_interleave_##cpu(int16_t *dst, const float **src, lon
if(channels==1)\
ff_float_to_int16_##cpu(dst, src[0], len);\
else if(channels==2){\
x86_reg reglen = len; \
const float *src0 = src[0];\
const float *src1 = src[1];\
__asm__ volatile(\
"shl $2, %0 \n"\
"add %0, %1 \n"\
"add %0, %2 \n"\
"add %0, %3 \n"\
"neg %0 \n"\
body\
:"+r"(reglen), "+r"(dst), "+r"(src0), "+r"(src1)\
);\
ff_float_to_int16_interleave2_##cpu(dst, src, len);\
}else if(channels==6){\
ff_float_to_int16_interleave6_##cpu(dst, src, len);\
}else\
float_to_int16_interleave_misc_##cpu(dst, src, len, channels);\
}
FLOAT_TO_INT16_INTERLEAVE
(
3
dnow
,
"1:
\n
"
"pf2id (%2,%0), %%mm0
\n
"
"pf2id 8(%2,%0), %%mm1
\n
"
"pf2id (%3,%0), %%mm2
\n
"
"pf2id 8(%3,%0), %%mm3
\n
"
"packssdw %%mm1, %%mm0
\n
"
"packssdw %%mm3, %%mm2
\n
"
"movq %%mm0, %%mm1
\n
"
"punpcklwd %%mm2, %%mm0
\n
"
"punpckhwd %%mm2, %%mm1
\n
"
"movq %%mm0, (%1,%0)
\n
"
"movq %%mm1, 8(%1,%0)
\n
"
"add $16, %0
\n
"
"js 1b
\n
"
"femms
\n
"
)
FLOAT_TO_INT16_INTERLEAVE
(
sse
,
"1:
\n
"
"cvtps2pi (%2,%0), %%mm0
\n
"
"cvtps2pi 8(%2,%0), %%mm1
\n
"
"cvtps2pi (%3,%0), %%mm2
\n
"
"cvtps2pi 8(%3,%0), %%mm3
\n
"
"packssdw %%mm1, %%mm0
\n
"
"packssdw %%mm3, %%mm2
\n
"
"movq %%mm0, %%mm1
\n
"
"punpcklwd %%mm2, %%mm0
\n
"
"punpckhwd %%mm2, %%mm1
\n
"
"movq %%mm0, (%1,%0)
\n
"
"movq %%mm1, 8(%1,%0)
\n
"
"add $16, %0
\n
"
"js 1b
\n
"
"emms
\n
"
)
FLOAT_TO_INT16_INTERLEAVE
(
sse2
,
"1:
\n
"
"cvtps2dq (%2,%0), %%xmm0
\n
"
"cvtps2dq (%3,%0), %%xmm1
\n
"
"packssdw %%xmm1, %%xmm0
\n
"
"movhlps %%xmm0, %%xmm1
\n
"
"punpcklwd %%xmm1, %%xmm0
\n
"
"movdqa %%xmm0, (%1,%0)
\n
"
"add $16, %0
\n
"
"js 1b
\n
"
)
FLOAT_TO_INT16_INTERLEAVE
(
3
dnow
)
FLOAT_TO_INT16_INTERLEAVE
(
sse
)
FLOAT_TO_INT16_INTERLEAVE
(
sse2
)
static
void
float_to_int16_interleave_3dn2
(
int16_t
*
dst
,
const
float
**
src
,
long
len
,
int
channels
){
if
(
channels
==
6
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment