Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in / Register
Toggle navigation
F
ffmpeg.wasm-core
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Linshizhi
ffmpeg.wasm-core
Commits
708ab7dd
Commit
708ab7dd
authored
Oct 09, 2011
by
Justin Ruggles
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
fmtconvert: port float_to_int16() x86 inline asm to yasm
parent
45add995
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
55 additions
and
76 deletions
+55
-76
fmtconvert.asm
libavcodec/x86/fmtconvert.asm
+42
-0
fmtconvert_mmx.c
libavcodec/x86/fmtconvert_mmx.c
+13
-76
No files found.
libavcodec/x86/fmtconvert.asm
View file @
708ab7dd
...
...
@@ -24,6 +24,48 @@
SECTION_TEXT
;------------------------------------------------------------------------------
; void ff_float_to_int16(int16_t *dst, const float *src, long len);
;------------------------------------------------------------------------------
%macro
FLOAT_TO_INT16
2
cglobal
float_to_int16_
%1
,
3
,
3
,
%2
,
dst
,
src
,
len
add
lenq
,
lenq
lea
srcq
,
[
srcq
+
2
*
lenq
]
add
dstq
,
lenq
neg
lenq
.
loop
:
%ifidn
%1
,
sse2
cvtps2dq
m0
,
[
srcq
+
2
*
lenq
]
cvtps2dq
m1
,
[
srcq
+
2
*
lenq
+
16
]
packssdw
m0
,
m1
mova
[
dstq
+
lenq
]
,
m0
%else
cvtps2pi
m0
,
[
srcq
+
2
*
lenq
]
cvtps2pi
m1
,
[
srcq
+
2
*
lenq
+
8
]
cvtps2pi
m2
,
[
srcq
+
2
*
lenq
+
16
]
cvtps2pi
m3
,
[
srcq
+
2
*
lenq
+
24
]
packssdw
m0
,
m1
packssdw
m2
,
m3
mova
[
dstq
+
lenq
]
,
m0
mova
[
dstq
+
lenq
+
8
]
,
m2
%endif
add
lenq
,
16
js
.
loop
%ifnidn
%1
,
sse2
emms
%endif
REP_RET
%endmacro
INIT_XMM
FLOAT_TO_INT16
sse2
,
2
INIT_MMX
FLOAT_TO_INT16
sse
,
0
%define
cvtps2pi
pf2id
FLOAT_TO_INT16
3
dnow
,
0
%undef
cvtps2pi
%macro
PSWAPD_SSE
2
pshufw
%1
,
%2
,
0x4e
%endmacro
...
...
libavcodec/x86/fmtconvert_mmx.c
View file @
708ab7dd
...
...
@@ -70,80 +70,16 @@ static void int32_to_float_fmul_scalar_sse2(float *dst, const int *src, float mu
);
}
static
void
float_to_int16_3dnow
(
int16_t
*
dst
,
const
float
*
src
,
long
len
){
x86_reg
reglen
=
len
;
// not bit-exact: pf2id uses different rounding than C and SSE
__asm__
volatile
(
"add %0 , %0
\n\t
"
"lea (%2,%0,2) , %2
\n\t
"
"add %0 , %1
\n\t
"
"neg %0
\n\t
"
"1:
\n\t
"
"pf2id (%2,%0,2) , %%mm0
\n\t
"
"pf2id 8(%2,%0,2) , %%mm1
\n\t
"
"pf2id 16(%2,%0,2) , %%mm2
\n\t
"
"pf2id 24(%2,%0,2) , %%mm3
\n\t
"
"packssdw %%mm1 , %%mm0
\n\t
"
"packssdw %%mm3 , %%mm2
\n\t
"
"movq %%mm0 , (%1,%0)
\n\t
"
"movq %%mm2 , 8(%1,%0)
\n\t
"
"add $16 , %0
\n\t
"
" js 1b
\n\t
"
"femms
\n\t
"
:
"+r"
(
reglen
),
"+r"
(
dst
),
"+r"
(
src
)
);
}
#if HAVE_YASM
static
void
float_to_int16_sse
(
int16_t
*
dst
,
const
float
*
src
,
long
len
){
x86_reg
reglen
=
len
;
__asm__
volatile
(
"add %0 , %0
\n\t
"
"lea (%2,%0,2) , %2
\n\t
"
"add %0 , %1
\n\t
"
"neg %0
\n\t
"
"1:
\n\t
"
"cvtps2pi (%2,%0,2) , %%mm0
\n\t
"
"cvtps2pi 8(%2,%0,2) , %%mm1
\n\t
"
"cvtps2pi 16(%2,%0,2) , %%mm2
\n\t
"
"cvtps2pi 24(%2,%0,2) , %%mm3
\n\t
"
"packssdw %%mm1 , %%mm0
\n\t
"
"packssdw %%mm3 , %%mm2
\n\t
"
"movq %%mm0 , (%1,%0)
\n\t
"
"movq %%mm2 , 8(%1,%0)
\n\t
"
"add $16 , %0
\n\t
"
" js 1b
\n\t
"
"emms
\n\t
"
:
"+r"
(
reglen
),
"+r"
(
dst
),
"+r"
(
src
)
);
}
static
void
float_to_int16_sse2
(
int16_t
*
dst
,
const
float
*
src
,
long
len
){
x86_reg
reglen
=
len
;
__asm__
volatile
(
"add %0 , %0
\n\t
"
"lea (%2,%0,2) , %2
\n\t
"
"add %0 , %1
\n\t
"
"neg %0
\n\t
"
"1:
\n\t
"
"cvtps2dq (%2,%0,2) , %%xmm0
\n\t
"
"cvtps2dq 16(%2,%0,2) , %%xmm1
\n\t
"
"packssdw %%xmm1 , %%xmm0
\n\t
"
"movdqa %%xmm0 , (%1,%0)
\n\t
"
"add $16 , %0
\n\t
"
" js 1b
\n\t
"
:
"+r"
(
reglen
),
"+r"
(
dst
),
"+r"
(
src
)
);
}
void
ff_float_to_int16_3dnow
(
int16_t
*
dst
,
const
float
*
src
,
long
len
);
void
ff_float_to_int16_sse
(
int16_t
*
dst
,
const
float
*
src
,
long
len
);
void
ff_float_to_int16_sse2
(
int16_t
*
dst
,
const
float
*
src
,
long
len
);
void
ff_float_to_int16_interleave6_sse
(
int16_t
*
dst
,
const
float
**
src
,
int
len
);
void
ff_float_to_int16_interleave6_3dnow
(
int16_t
*
dst
,
const
float
**
src
,
int
len
);
void
ff_float_to_int16_interleave6_3dn2
(
int16_t
*
dst
,
const
float
**
src
,
int
len
);
#if !HAVE_YASM
#define ff_float_to_int16_interleave6_sse(a,b,c) float_to_int16_interleave_misc_sse(a,b,c,6)
#define ff_float_to_int16_interleave6_3dnow(a,b,c) float_to_int16_interleave_misc_3dnow(a,b,c,6)
#define ff_float_to_int16_interleave6_3dn2(a,b,c) float_to_int16_interleave_misc_3dnow(a,b,c,6)
#endif
#define ff_float_to_int16_interleave6_sse2 ff_float_to_int16_interleave6_sse
#define FLOAT_TO_INT16_INTERLEAVE(cpu, body) \
...
...
@@ -152,7 +88,7 @@ static av_noinline void float_to_int16_interleave_misc_##cpu(int16_t *dst, const
DECLARE_ALIGNED(16, int16_t, tmp)[len];\
int i,j,c;\
for(c=0; c<channels; c++){\
float_to_int16_##cpu(tmp, src[c], len);\
f
f_f
loat_to_int16_##cpu(tmp, src[c], len);\
for(i=0, j=c; i<len; i++, j+=channels)\
dst[j] = tmp[i];\
}\
...
...
@@ -160,7 +96,7 @@ static av_noinline void float_to_int16_interleave_misc_##cpu(int16_t *dst, const
\
static void float_to_int16_interleave_##cpu(int16_t *dst, const float **src, long len, int channels){\
if(channels==1)\
float_to_int16_##cpu(dst, src[0], len);\
f
f_f
loat_to_int16_##cpu(dst, src[0], len);\
else if(channels==2){\
x86_reg reglen = len; \
const float *src0 = src[0];\
...
...
@@ -235,7 +171,6 @@ static void float_to_int16_interleave_3dn2(int16_t *dst, const float **src, long
float_to_int16_interleave_3dnow
(
dst
,
src
,
len
,
channels
);
}
#if HAVE_YASM
void
ff_float_interleave2_mmx
(
float
*
dst
,
const
float
**
src
,
unsigned
int
len
);
void
ff_float_interleave2_sse
(
float
*
dst
,
const
float
**
src
,
unsigned
int
len
);
...
...
@@ -272,11 +207,10 @@ void ff_fmt_convert_init_x86(FmtConvertContext *c, AVCodecContext *avctx)
if
(
mm_flags
&
AV_CPU_FLAG_MMX
)
{
#if HAVE_YASM
c
->
float_interleave
=
float_interleave_mmx
;
#endif
if
(
mm_flags
&
AV_CPU_FLAG_3DNOW
){
if
(
!
(
avctx
->
flags
&
CODEC_FLAG_BITEXACT
)){
c
->
float_to_int16
=
float_to_int16_3dnow
;
c
->
float_to_int16
=
f
f_f
loat_to_int16_3dnow
;
c
->
float_to_int16_interleave
=
float_to_int16_interleave_3dnow
;
}
}
...
...
@@ -285,18 +219,21 @@ void ff_fmt_convert_init_x86(FmtConvertContext *c, AVCodecContext *avctx)
c
->
float_to_int16_interleave
=
float_to_int16_interleave_3dn2
;
}
}
#endif
if
(
mm_flags
&
AV_CPU_FLAG_SSE
){
c
->
int32_to_float_fmul_scalar
=
int32_to_float_fmul_scalar_sse
;
c
->
float_to_int16
=
float_to_int16_sse
;
c
->
float_to_int16_interleave
=
float_to_int16_interleave_sse
;
#if HAVE_YASM
c
->
float_to_int16
=
ff_float_to_int16_sse
;
c
->
float_to_int16_interleave
=
float_to_int16_interleave_sse
;
c
->
float_interleave
=
float_interleave_sse
;
#endif
}
if
(
mm_flags
&
AV_CPU_FLAG_SSE2
){
c
->
int32_to_float_fmul_scalar
=
int32_to_float_fmul_scalar_sse2
;
c
->
float_to_int16
=
float_to_int16_sse2
;
#if HAVE_YASM
c
->
float_to_int16
=
ff_float_to_int16_sse2
;
c
->
float_to_int16_interleave
=
float_to_int16_interleave_sse2
;
#endif
}
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment