Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in / Register
Toggle navigation
F
ffmpeg.wasm-core
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Linshizhi
ffmpeg.wasm-core
Commits
869efbf9
Commit
869efbf9
authored
Dec 07, 2017
by
Martin Vignali
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
avfilter/x86/vf_threshold : add threshold16 SIMD (SSE4 and AVX2)
parent
713f9c5b
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
34 additions
and
21 deletions
+34
-21
vf_threshold.asm
libavfilter/x86/vf_threshold.asm
+14
-7
vf_threshold_init.c
libavfilter/x86/vf_threshold_init.c
+20
-14
No files found.
libavfilter/x86/vf_threshold.asm
View file @
869efbf9
...
@@ -25,16 +25,18 @@
...
@@ -25,16 +25,18 @@
SECTION_RODATA
SECTION_RODATA
pb_128
:
times
16
db
128
pb_128
:
times
16
db
128
pb_128_0
:
times
8
db
0
,
128
SECTION
.
text
SECTION
.
text
%macro
THRESHOLD_8
0
;%1 depth (8 or 16) ; %2 b or w ; %3 constant
%macro
THRESHOLD
3
%if
ARCH_X86_64
%if
ARCH_X86_64
cglobal
threshold
8
,
10
,
13
,
5
,
in
,
threshold
,
min
,
max
,
out
,
ilinesize
,
tlinesize
,
flinesize
,
slinesize
,
olinesize
,
w
,
h
,
x
cglobal
threshold
%1
,
10
,
13
,
5
,
in
,
threshold
,
min
,
max
,
out
,
ilinesize
,
tlinesize
,
flinesize
,
slinesize
,
olinesize
,
w
,
h
,
x
mov
wd
,
dword
wm
mov
wd
,
dword
wm
mov
hd
,
dword
hm
mov
hd
,
dword
hm
%else
%else
cglobal
threshold
8
,
5
,
7
,
5
,
in
,
threshold
,
min
,
max
,
out
,
w
,
x
cglobal
threshold
%1
,
5
,
7
,
5
,
in
,
threshold
,
min
,
max
,
out
,
w
,
x
mov
wd
,
r10m
mov
wd
,
r10m
%define
ilinesizeq
r5mp
%define
ilinesizeq
r5mp
%define
tlinesizeq
r6mp
%define
tlinesizeq
r6mp
...
@@ -43,7 +45,10 @@ cglobal threshold8, 5, 7, 5, in, threshold, min, max, out, w, x
...
@@ -43,7 +45,10 @@ cglobal threshold8, 5, 7, 5, in, threshold, min, max, out, w, x
%define
olinesizeq
r9mp
%define
olinesizeq
r9mp
%define
hd
r11mp
%define
hd
r11mp
%endif
%endif
VBROADCASTI128
m4
,
[
pb_128
]
VBROADCASTI128
m4
,
[
%3
]
%if
%1
==
16
add
wq
,
wq
; w *= 2 (16 bits instead of 8)
%endif
add
inq
,
wq
add
inq
,
wq
add
thresholdq
,
wq
add
thresholdq
,
wq
add
minq
,
wq
add
minq
,
wq
...
@@ -60,7 +65,7 @@ cglobal threshold8, 5, 7, 5, in, threshold, min, max, out, w, x
...
@@ -60,7 +65,7 @@ cglobal threshold8, 5, 7, 5, in, threshold, min, max, out, w, x
movu
m3
,
[
maxq
+
xq
]
movu
m3
,
[
maxq
+
xq
]
pxor
m0
,
m4
pxor
m0
,
m4
pxor
m1
,
m4
pxor
m1
,
m4
pcmpgt
b
m0
,
m1
pcmpgt
%2
m0
,
m1
PBLENDVB
m3
,
m2
,
m0
PBLENDVB
m3
,
m2
,
m0
movu
[
outq
+
xq
]
,
m3
movu
[
outq
+
xq
]
,
m3
add
xq
,
mmsize
add
xq
,
mmsize
...
@@ -77,9 +82,11 @@ RET
...
@@ -77,9 +82,11 @@ RET
%endmacro
%endmacro
INIT_XMM
sse4
INIT_XMM
sse4
THRESHOLD_8
THRESHOLD
8
,
b
,
pb_128
THRESHOLD
16
,
w
,
pb_128_0
%if
HAVE_AVX2_EXTERNAL
%if
HAVE_AVX2_EXTERNAL
INIT_YMM
avx2
INIT_YMM
avx2
THRESHOLD_8
THRESHOLD
8
,
b
,
pb_128
THRESHOLD
16
,
w
,
pb_128_0
%endif
%endif
libavfilter/x86/vf_threshold_init.c
View file @
869efbf9
...
@@ -23,21 +23,20 @@
...
@@ -23,21 +23,20 @@
#include "libavutil/x86/cpu.h"
#include "libavutil/x86/cpu.h"
#include "libavfilter/threshold.h"
#include "libavfilter/threshold.h"
void
ff_threshold8_sse4
(
const
uint8_t
*
in
,
const
uint8_t
*
threshold
,
#define THRESHOLD_FUNC(depth, opt) \
const
uint8_t
*
min
,
const
uint8_t
*
max
,
void ff_threshold##depth##_##opt(const uint8_t *in, const uint8_t *threshold,\
uint8_t
*
out
,
const uint8_t *min, const uint8_t *max, \
ptrdiff_t
ilinesize
,
ptrdiff_t
tlinesize
,
uint8_t *out, \
ptrdiff_t
flinesize
,
ptrdiff_t
slinesize
,
ptrdiff_t ilinesize, ptrdiff_t tlinesize, \
ptrdiff_t
olinesize
,
ptrdiff_t flinesize, ptrdiff_t slinesize, \
int
w
,
int
h
);
ptrdiff_t olinesize, \
void
ff_threshold8_avx2
(
const
uint8_t
*
in
,
const
uint8_t
*
threshold
,
const
uint8_t
*
min
,
const
uint8_t
*
max
,
uint8_t
*
out
,
ptrdiff_t
ilinesize
,
ptrdiff_t
tlinesize
,
ptrdiff_t
flinesize
,
ptrdiff_t
slinesize
,
ptrdiff_t
olinesize
,
int w, int h);
int w, int h);
THRESHOLD_FUNC
(
8
,
sse4
)
THRESHOLD_FUNC
(
8
,
avx2
)
THRESHOLD_FUNC
(
16
,
sse4
)
THRESHOLD_FUNC
(
16
,
avx2
)
av_cold
void
ff_threshold_init_x86
(
ThresholdContext
*
s
)
av_cold
void
ff_threshold_init_x86
(
ThresholdContext
*
s
)
{
{
int
cpu_flags
=
av_get_cpu_flags
();
int
cpu_flags
=
av_get_cpu_flags
();
...
@@ -49,5 +48,12 @@ av_cold void ff_threshold_init_x86(ThresholdContext *s)
...
@@ -49,5 +48,12 @@ av_cold void ff_threshold_init_x86(ThresholdContext *s)
if
(
EXTERNAL_AVX2_FAST
(
cpu_flags
))
{
if
(
EXTERNAL_AVX2_FAST
(
cpu_flags
))
{
s
->
threshold
=
ff_threshold8_avx2
;
s
->
threshold
=
ff_threshold8_avx2
;
}
}
}
else
if
(
s
->
depth
==
16
)
{
if
(
EXTERNAL_SSE4
(
cpu_flags
))
{
s
->
threshold
=
ff_threshold16_sse4
;
}
if
(
EXTERNAL_AVX2_FAST
(
cpu_flags
))
{
s
->
threshold
=
ff_threshold16_avx2
;
}
}
}
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment