Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in / Register
Toggle navigation
F
ffmpeg.wasm-core
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Linshizhi
ffmpeg.wasm-core
Commits
4e8e2624
Commit
4e8e2624
authored
Oct 10, 2011
by
Justin Ruggles
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
fmtconvert: port int32_to_float_fmul_scalar() x86 inline asm to yasm
parent
185142a5
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
65 additions
and
60 deletions
+65
-60
dsputil_yasm.asm
libavcodec/x86/dsputil_yasm.asm
+0
-8
fmtconvert.asm
libavcodec/x86/fmtconvert.asm
+46
-0
fmtconvert_mmx.c
libavcodec/x86/fmtconvert_mmx.c
+7
-52
x86util.asm
libavutil/x86/x86util.asm
+12
-0
No files found.
libavcodec/x86/dsputil_yasm.asm
View file @
4e8e2624
...
...
@@ -1055,14 +1055,6 @@ emu_edge mmx
; int32_t max, unsigned int len)
;-----------------------------------------------------------------------------
%macro
SPLATD_MMX
1
punpckldq
%1
,
%1
%endmacro
%macro
SPLATD_SSE2
1
pshufd
%1
,
%1
,
0
%endmacro
%macro
VECTOR_CLIP_INT32
4
cglobal
vector_clip_int32_
%1
,
5
,
5
,
%2
,
dst
,
src
,
min
,
max
,
len
%ifidn
%1
,
sse2
...
...
libavcodec/x86/fmtconvert.asm
View file @
4e8e2624
...
...
@@ -24,6 +24,52 @@
SECTION_TEXT
;---------------------------------------------------------------------------------
; void int32_to_float_fmul_scalar(float *dst, const int *src, float mul, int len);
;---------------------------------------------------------------------------------
%macro
INT32_TO_FLOAT_FMUL_SCALAR
2
%ifdef
ARCH_X86_64
cglobal
int32_to_float_fmul_scalar_
%1
,
3
,
3
,
%2
,
dst
,
src
,
len
%else
cglobal
int32_to_float_fmul_scalar_
%1
,
4
,
4
,
%2
,
dst
,
src
,
mul
,
len
movss
m0
,
mulm
%endif
SPLATD
m0
shl
lenq
,
2
add
srcq
,
lenq
add
dstq
,
lenq
neg
lenq
.
loop
:
%ifidn
%1
,
sse2
cvtdq2ps
m1
,
[
srcq
+
lenq
]
cvtdq2ps
m2
,
[
srcq
+
lenq
+
16
]
%else
cvtpi2ps
m1
,
[
srcq
+
lenq
]
cvtpi2ps
m3
,
[
srcq
+
lenq
+
8
]
cvtpi2ps
m2
,
[
srcq
+
lenq
+
16
]
cvtpi2ps
m4
,
[
srcq
+
lenq
+
24
]
movlhps
m1
,
m3
movlhps
m2
,
m4
%endif
mulps
m1
,
m0
mulps
m2
,
m0
mova
[
dstq
+
lenq
]
,
m1
mova
[
dstq
+
lenq
+
16
]
,
m2
add
lenq
,
32
jl
.
loop
REP_RET
%endmacro
INIT_XMM
%define
SPLATD
SPLATD_SSE
%define
movdqa
movaps
INT32_TO_FLOAT_FMUL_SCALAR
sse
,
5
%undef
movdqa
%define
SPLATD
SPLATD_SSE2
INT32_TO_FLOAT_FMUL_SCALAR
sse2
,
3
%undef
SPLATD
;------------------------------------------------------------------------------
; void ff_float_to_int16(int16_t *dst, const float *src, long len);
;------------------------------------------------------------------------------
...
...
libavcodec/x86/fmtconvert_mmx.c
View file @
4e8e2624
...
...
@@ -26,52 +26,11 @@
#include "libavutil/x86_cpu.h"
#include "libavcodec/fmtconvert.h"
static
void
int32_to_float_fmul_scalar_sse
(
float
*
dst
,
const
int
*
src
,
float
mul
,
int
len
)
{
x86_reg
i
=
-
4
*
len
;
__asm__
volatile
(
"movss %3, %%xmm4
\n
"
"shufps $0, %%xmm4, %%xmm4
\n
"
"1:
\n
"
"cvtpi2ps (%2,%0), %%xmm0
\n
"
"cvtpi2ps 8(%2,%0), %%xmm1
\n
"
"cvtpi2ps 16(%2,%0), %%xmm2
\n
"
"cvtpi2ps 24(%2,%0), %%xmm3
\n
"
"movlhps %%xmm1, %%xmm0
\n
"
"movlhps %%xmm3, %%xmm2
\n
"
"mulps %%xmm4, %%xmm0
\n
"
"mulps %%xmm4, %%xmm2
\n
"
"movaps %%xmm0, (%1,%0)
\n
"
"movaps %%xmm2, 16(%1,%0)
\n
"
"add $32, %0
\n
"
"jl 1b
\n
"
:
"+r"
(
i
)
:
"r"
(
dst
+
len
),
"r"
(
src
+
len
),
"m"
(
mul
)
);
}
static
void
int32_to_float_fmul_scalar_sse2
(
float
*
dst
,
const
int
*
src
,
float
mul
,
int
len
)
{
x86_reg
i
=
-
4
*
len
;
__asm__
volatile
(
"movss %3, %%xmm4
\n
"
"shufps $0, %%xmm4, %%xmm4
\n
"
"1:
\n
"
"cvtdq2ps (%2,%0), %%xmm0
\n
"
"cvtdq2ps 16(%2,%0), %%xmm1
\n
"
"mulps %%xmm4, %%xmm0
\n
"
"mulps %%xmm4, %%xmm1
\n
"
"movaps %%xmm0, (%1,%0)
\n
"
"movaps %%xmm1, 16(%1,%0)
\n
"
"add $32, %0
\n
"
"jl 1b
\n
"
:
"+r"
(
i
)
:
"r"
(
dst
+
len
),
"r"
(
src
+
len
),
"m"
(
mul
)
);
}
#if HAVE_YASM
void
ff_int32_to_float_fmul_scalar_sse
(
float
*
dst
,
const
int
*
src
,
float
mul
,
int
len
);
void
ff_int32_to_float_fmul_scalar_sse2
(
float
*
dst
,
const
int
*
src
,
float
mul
,
int
len
);
void
ff_float_to_int16_3dnow
(
int16_t
*
dst
,
const
float
*
src
,
long
len
);
void
ff_float_to_int16_sse
(
int16_t
*
dst
,
const
float
*
src
,
long
len
);
void
ff_float_to_int16_sse2
(
int16_t
*
dst
,
const
float
*
src
,
long
len
);
...
...
@@ -204,8 +163,8 @@ void ff_fmt_convert_init_x86(FmtConvertContext *c, AVCodecContext *avctx)
{
int
mm_flags
=
av_get_cpu_flags
();
if
(
mm_flags
&
AV_CPU_FLAG_MMX
)
{
#if HAVE_YASM
if
(
mm_flags
&
AV_CPU_FLAG_MMX
)
{
c
->
float_interleave
=
float_interleave_mmx
;
if
(
HAVE_AMD3DNOW
&&
mm_flags
&
AV_CPU_FLAG_3DNOW
)
{
...
...
@@ -219,21 +178,17 @@ void ff_fmt_convert_init_x86(FmtConvertContext *c, AVCodecContext *avctx)
c
->
float_to_int16_interleave
=
float_to_int16_interleave_3dn2
;
}
}
#endif
if
(
HAVE_SSE
&&
mm_flags
&
AV_CPU_FLAG_SSE
)
{
c
->
int32_to_float_fmul_scalar
=
int32_to_float_fmul_scalar_sse
;
#if HAVE_YASM
c
->
int32_to_float_fmul_scalar
=
ff_int32_to_float_fmul_scalar_sse
;
c
->
float_to_int16
=
ff_float_to_int16_sse
;
c
->
float_to_int16_interleave
=
float_to_int16_interleave_sse
;
c
->
float_interleave
=
float_interleave_sse
;
#endif
}
if
(
HAVE_SSE
&&
mm_flags
&
AV_CPU_FLAG_SSE2
)
{
c
->
int32_to_float_fmul_scalar
=
int32_to_float_fmul_scalar_sse2
;
#if HAVE_YASM
c
->
int32_to_float_fmul_scalar
=
ff_int32_to_float_fmul_scalar_sse2
;
c
->
float_to_int16
=
ff_float_to_int16_sse2
;
c
->
float_to_int16_interleave
=
float_to_int16_interleave_sse2
;
#endif
}
}
#endif
}
libavutil/x86/x86util.asm
View file @
4e8e2624
...
...
@@ -536,6 +536,18 @@
%endif
%endmacro
%macro
SPLATD_MMX
1
punpckldq
%1
,
%1
%endmacro
%macro
SPLATD_SSE
1
shufps
%1
,
%1
,
0
%endmacro
%macro
SPLATD_SSE2
1
pshufd
%1
,
%1
,
0
%endmacro
%macro
CLIPW
3
;(dst, min, max)
pmaxsw
%1
,
%2
pminsw
%1
,
%3
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment