Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in / Register
Toggle navigation
F
ffmpeg.wasm-core
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Linshizhi
ffmpeg.wasm-core
Commits
b8f02f5b
Commit
b8f02f5b
authored
Nov 06, 2011
by
Justin Ruggles
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
dsputil: use cpuflags in x86 versions of vector_clip_int32()
parent
f2bd8a07
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
27 additions
and
21 deletions
+27
-21
dsputil_mmx.c
libavcodec/x86/dsputil_mmx.c
+4
-4
dsputil_yasm.asm
libavcodec/x86/dsputil_yasm.asm
+23
-17
No files found.
libavcodec/x86/dsputil_mmx.c
View file @
b8f02f5b
...
@@ -2419,9 +2419,9 @@ void ff_vector_clip_int32_mmx (int32_t *dst, const int32_t *src, int32_t min
...
@@ -2419,9 +2419,9 @@ void ff_vector_clip_int32_mmx (int32_t *dst, const int32_t *src, int32_t min
int32_t
max
,
unsigned
int
len
);
int32_t
max
,
unsigned
int
len
);
void
ff_vector_clip_int32_sse2
(
int32_t
*
dst
,
const
int32_t
*
src
,
int32_t
min
,
void
ff_vector_clip_int32_sse2
(
int32_t
*
dst
,
const
int32_t
*
src
,
int32_t
min
,
int32_t
max
,
unsigned
int
len
);
int32_t
max
,
unsigned
int
len
);
void
ff_vector_clip_int32_
sse2_int
(
int32_t
*
dst
,
const
int32_t
*
src
,
int32_t
min
,
void
ff_vector_clip_int32_
int_sse2
(
int32_t
*
dst
,
const
int32_t
*
src
,
int32_t
min
,
int32_t
max
,
unsigned
int
len
);
int32_t
max
,
unsigned
int
len
);
void
ff_vector_clip_int32_sse4
1
(
int32_t
*
dst
,
const
int32_t
*
src
,
int32_t
min
,
void
ff_vector_clip_int32_sse4
(
int32_t
*
dst
,
const
int32_t
*
src
,
int32_t
min
,
int32_t
max
,
unsigned
int
len
);
int32_t
max
,
unsigned
int
len
);
void
dsputil_init_mmx
(
DSPContext
*
c
,
AVCodecContext
*
avctx
)
void
dsputil_init_mmx
(
DSPContext
*
c
,
AVCodecContext
*
avctx
)
...
@@ -2877,7 +2877,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
...
@@ -2877,7 +2877,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
c
->
scalarproduct_int16
=
ff_scalarproduct_int16_sse2
;
c
->
scalarproduct_int16
=
ff_scalarproduct_int16_sse2
;
c
->
scalarproduct_and_madd_int16
=
ff_scalarproduct_and_madd_int16_sse2
;
c
->
scalarproduct_and_madd_int16
=
ff_scalarproduct_and_madd_int16_sse2
;
if
(
mm_flags
&
AV_CPU_FLAG_ATOM
)
{
if
(
mm_flags
&
AV_CPU_FLAG_ATOM
)
{
c
->
vector_clip_int32
=
ff_vector_clip_int32_
sse2_int
;
c
->
vector_clip_int32
=
ff_vector_clip_int32_
int_sse2
;
}
else
{
}
else
{
c
->
vector_clip_int32
=
ff_vector_clip_int32_sse2
;
c
->
vector_clip_int32
=
ff_vector_clip_int32_sse2
;
}
}
...
@@ -2909,7 +2909,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
...
@@ -2909,7 +2909,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
if
(
mm_flags
&
AV_CPU_FLAG_SSE4
&&
HAVE_SSE
)
{
if
(
mm_flags
&
AV_CPU_FLAG_SSE4
&&
HAVE_SSE
)
{
#if HAVE_YASM
#if HAVE_YASM
c
->
vector_clip_int32
=
ff_vector_clip_int32_sse4
1
;
c
->
vector_clip_int32
=
ff_vector_clip_int32_sse4
;
#endif
#endif
}
}
...
...
libavcodec/x86/dsputil_yasm.asm
View file @
b8f02f5b
...
@@ -1055,9 +1055,14 @@ emu_edge mmx
...
@@ -1055,9 +1055,14 @@ emu_edge mmx
; int32_t max, unsigned int len)
; int32_t max, unsigned int len)
;-----------------------------------------------------------------------------
;-----------------------------------------------------------------------------
%macro
VECTOR_CLIP_INT32
4
; %1 = number of xmm registers used
cglobal
vector_clip_int32_
%1
,
5
,
5
,
%2
,
dst
,
src
,
min
,
max
,
len
; %2 = number of inline load/process/store loops per asm loop
%ifidn
%1
,
sse2
; %3 = process 4*mmsize (%3=0) or 8*mmsize (%3=1) bytes per loop
; %4 = CLIPD function takes min/max as float instead of int (CLIPD_SSE2)
; %5 = suffix
%macro
VECTOR_CLIP_INT32
4
-
5
cglobal
vector_clip_int32
%5
,
5
,
5
,
%2
,
dst
,
src
,
min
,
max
,
len
%if
%4
cvtsi2ss
m4
,
minm
cvtsi2ss
m4
,
minm
cvtsi2ss
m5
,
maxm
cvtsi2ss
m5
,
maxm
%else
%else
...
@@ -1068,12 +1073,12 @@ cglobal vector_clip_int32_%1, 5,5,%2, dst, src, min, max, len
...
@@ -1068,12 +1073,12 @@ cglobal vector_clip_int32_%1, 5,5,%2, dst, src, min, max, len
SPLATD
m5
SPLATD
m5
.
loop
:
.
loop
:
%assign
%%
i
1
%assign
%%
i
1
%rep
%
3
%rep
%
2
mova
m0
,
[
srcq
+
mmsize
*
0
*
%%
i
]
mova
m0
,
[
srcq
+
mmsize
*
0
*
%%
i
]
mova
m1
,
[
srcq
+
mmsize
*
1
*
%%
i
]
mova
m1
,
[
srcq
+
mmsize
*
1
*
%%
i
]
mova
m2
,
[
srcq
+
mmsize
*
2
*
%%
i
]
mova
m2
,
[
srcq
+
mmsize
*
2
*
%%
i
]
mova
m3
,
[
srcq
+
mmsize
*
3
*
%%
i
]
mova
m3
,
[
srcq
+
mmsize
*
3
*
%%
i
]
%if
%
4
%if
%
3
mova
m7
,
[
srcq
+
mmsize
*
4
*
%%
i
]
mova
m7
,
[
srcq
+
mmsize
*
4
*
%%
i
]
mova
m8
,
[
srcq
+
mmsize
*
5
*
%%
i
]
mova
m8
,
[
srcq
+
mmsize
*
5
*
%%
i
]
mova
m9
,
[
srcq
+
mmsize
*
6
*
%%
i
]
mova
m9
,
[
srcq
+
mmsize
*
6
*
%%
i
]
...
@@ -1083,7 +1088,7 @@ cglobal vector_clip_int32_%1, 5,5,%2, dst, src, min, max, len
...
@@ -1083,7 +1088,7 @@ cglobal vector_clip_int32_%1, 5,5,%2, dst, src, min, max, len
CLIPD
m1
,
m4
,
m5
,
m6
CLIPD
m1
,
m4
,
m5
,
m6
CLIPD
m2
,
m4
,
m5
,
m6
CLIPD
m2
,
m4
,
m5
,
m6
CLIPD
m3
,
m4
,
m5
,
m6
CLIPD
m3
,
m4
,
m5
,
m6
%if
%
4
%if
%
3
CLIPD
m7
,
m4
,
m5
,
m6
CLIPD
m7
,
m4
,
m5
,
m6
CLIPD
m8
,
m4
,
m5
,
m6
CLIPD
m8
,
m4
,
m5
,
m6
CLIPD
m9
,
m4
,
m5
,
m6
CLIPD
m9
,
m4
,
m5
,
m6
...
@@ -1093,7 +1098,7 @@ cglobal vector_clip_int32_%1, 5,5,%2, dst, src, min, max, len
...
@@ -1093,7 +1098,7 @@ cglobal vector_clip_int32_%1, 5,5,%2, dst, src, min, max, len
mova
[
dstq
+
mmsize
*
1
*
%%
i
]
,
m1
mova
[
dstq
+
mmsize
*
1
*
%%
i
]
,
m1
mova
[
dstq
+
mmsize
*
2
*
%%
i
]
,
m2
mova
[
dstq
+
mmsize
*
2
*
%%
i
]
,
m2
mova
[
dstq
+
mmsize
*
3
*
%%
i
]
,
m3
mova
[
dstq
+
mmsize
*
3
*
%%
i
]
,
m3
%if
%
4
%if
%
3
mova
[
dstq
+
mmsize
*
4
*
%%
i
]
,
m7
mova
[
dstq
+
mmsize
*
4
*
%%
i
]
,
m7
mova
[
dstq
+
mmsize
*
5
*
%%
i
]
,
m8
mova
[
dstq
+
mmsize
*
5
*
%%
i
]
,
m8
mova
[
dstq
+
mmsize
*
6
*
%%
i
]
,
m9
mova
[
dstq
+
mmsize
*
6
*
%%
i
]
,
m9
...
@@ -1101,25 +1106,26 @@ cglobal vector_clip_int32_%1, 5,5,%2, dst, src, min, max, len
...
@@ -1101,25 +1106,26 @@ cglobal vector_clip_int32_%1, 5,5,%2, dst, src, min, max, len
%endif
%endif
%assign
%%
i
%%
i
+
1
%assign
%%
i
%%
i
+
1
%endrep
%endrep
add
srcq
,
mmsize
*
4
*
(
%
3
+
%4
)
add
srcq
,
mmsize
*
4
*
(
%
2
+
%3
)
add
dstq
,
mmsize
*
4
*
(
%
3
+
%4
)
add
dstq
,
mmsize
*
4
*
(
%
2
+
%3
)
sub
lend
,
mmsize
*
(
%
3
+
%4
)
sub
lend
,
mmsize
*
(
%
2
+
%3
)
jg
.
loop
jg
.
loop
REP_RET
REP_RET
%endmacro
%endmacro
INIT_MMX
INIT_MMX
mmx
%define
SPLATD
SPLATD_MMX
%define
SPLATD
SPLATD_MMX
%define
CLIPD
CLIPD_MMX
%define
CLIPD
CLIPD_MMX
VECTOR_CLIP_INT32
mmx
,
0
,
1
,
0
VECTOR_CLIP_INT32
0
,
1
,
0
,
0
INIT_XMM
INIT_XMM
sse2
%define
SPLATD
SPLATD_SSE2
%define
SPLATD
SPLATD_SSE2
VECTOR_CLIP_INT32
sse2_int
,
6
,
1
,
0
VECTOR_CLIP_INT32
6
,
1
,
0
,
0
,
_int
%define
CLIPD
CLIPD_SSE2
%define
CLIPD
CLIPD_SSE2
VECTOR_CLIP_INT32
sse2
,
6
,
2
,
0
VECTOR_CLIP_INT32
6
,
2
,
0
,
1
INIT_XMM
sse4
%define
CLIPD
CLIPD_SSE41
%define
CLIPD
CLIPD_SSE41
%ifdef
m8
%ifdef
m8
VECTOR_CLIP_INT32
sse41
,
11
,
1
,
1
VECTOR_CLIP_INT32
11
,
1
,
1
,
0
%else
%else
VECTOR_CLIP_INT32
sse41
,
6
,
1
,
0
VECTOR_CLIP_INT32
6
,
1
,
0
,
0
%endif
%endif
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment