Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in / Register
Toggle navigation
F
ffmpeg.wasm-core
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Linshizhi
ffmpeg.wasm-core
Commits
f3df42e8
Commit
f3df42e8
authored
Mar 17, 2018
by
Martin Vignali
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
avfilter/x86/vf_blend : add SIMD for 16 bit version of
grainextract grainmerge average extremity negation
parent
8eb0bb11
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
128 additions
and
60 deletions
+128
-60
vf_blend.asm
libavfilter/x86/vf_blend.asm
+108
-60
vf_blend_init.c
libavfilter/x86/vf_blend_init.c
+20
-0
No files found.
libavfilter/x86/vf_blend.asm
View file @
f3df42e8
...
...
@@ -27,6 +27,8 @@
SECTION_RODATA
ps_255
:
times
4
dd
255
.
0
pd_32768
:
times
4
dd
32768
pd_65535
:
times
4
dd
65535
pw_1
:
times
8
dw
1
pw_128
:
times
8
dw
128
pw_255
:
times
8
dw
255
...
...
@@ -79,26 +81,33 @@ BLEND_INIT %1, 2, %3
BLEND_END
%endmacro
%macro
GRAINEXTRACT
0
BLEND_INIT
grainextract
,
6
; %1 name , %2 src (b or w), %3 inter (w or d), %4 (1 if 16bit, not set if 8 bit)
%macro
GRAINEXTRACT
3
-
4
BLEND_INIT
%1
,
6
,
%4
pxor
m4
,
m4
%if
%0
==
4
; 16 bit
VBROADCASTI128
m5
,
[
pd_32768
]
%else
VBROADCASTI128
m5
,
[
pw_128
]
%endif
.
nextrow
:
mov
xq
,
widthq
.
loop
:
movu
m1
,
[
topq
+
xq
]
movu
m3
,
[
bottomq
+
xq
]
punpcklbw
m0
,
m1
,
m4
punpckhbw
m1
,
m4
punpcklbw
m2
,
m3
,
m4
punpckhbw
m3
,
m4
paddw
m0
,
m5
paddw
m1
,
m5
psubw
m0
,
m2
psubw
m1
,
m3
punpckl%2
%3
m0
,
m1
,
m4
punpckh%2
%3
m1
,
m4
punpckl%2
%3
m2
,
m3
,
m4
punpckh%2
%3
m3
,
m4
padd%3
m0
,
m5
padd%3
m1
,
m5
psub%3
m0
,
m2
psub%3
m1
,
m3
packus%3
%2
m0
,
m1
packuswb
m0
,
m1
mova
[
dstq
+
xq
]
,
m0
add
xq
,
mmsize
jl
.
loop
...
...
@@ -172,8 +181,9 @@ BLEND_INIT screen, 7
BLEND_END
%endmacro
%macro
AVERAGE
0
BLEND_INIT
average
,
3
;%1 name, %2 (b or w), %3 (set if 16 bit)
%macro
AVERAGE
2
-
3
BLEND_INIT
%1
,
3
,
%3
pcmpeqb
m2
,
m2
.
nextrow
:
...
...
@@ -184,7 +194,7 @@ BLEND_INIT average, 3
movu
m1
,
[
bottomq
+
xq
]
pxor
m0
,
m2
pxor
m1
,
m2
pavg
b
m0
,
m1
pavg
%2
m0
,
m1
pxor
m0
,
m2
mova
[
dstq
+
xq
]
,
m0
add
xq
,
mmsize
...
...
@@ -192,29 +202,34 @@ BLEND_INIT average, 3
BLEND_END
%endmacro
%macro
GRAINMERGE
0
BLEND_INIT
grainmerge
,
6
; %1 name , %2 src (b or w), %3 inter (w or d), %4 (1 if 16bit, not set if 8 bit)
%macro
GRAINMERGE
3
-
4
BLEND_INIT
%1
,
6
,
%4
pxor
m4
,
m4
%if
%0
==
4
; 16 bit
VBROADCASTI128
m5
,
[
pd_32768
]
%else
VBROADCASTI128
m5
,
[
pw_128
]
%endif
.
nextrow
:
mov
xq
,
widthq
.
loop
:
movu
m1
,
[
topq
+
xq
]
movu
m3
,
[
bottomq
+
xq
]
punpcklbw
m0
,
m1
,
m4
punpckhbw
m1
,
m4
punpcklbw
m2
,
m3
,
m4
punpckhbw
m3
,
m4
paddw
m0
,
m2
paddw
m1
,
m3
psubw
m0
,
m5
psubw
m1
,
m5
punpckl%2
%3
m0
,
m1
,
m4
punpckh%2
%3
m1
,
m4
punpckl%2
%3
m2
,
m3
,
m4
punpckh%2
%3
m3
,
m4
padd%3
m0
,
m2
padd%3
m1
,
m3
psub%3
m0
,
m5
psub%3
m1
,
m5
packus%3
%2
m0
,
m1
packuswb
m0
,
m1
mova
[
dstq
+
xq
]
,
m0
add
xq
,
mmsize
jl
.
loop
...
...
@@ -324,52 +339,73 @@ BLEND_INIT %1, 5, %4
BLEND_END
%endmacro
%macro
BLEND_ABS
0
BLEND_INIT
extremity
,
8
; %1 name , %2 src (b or w), %3 inter (w or d), %4 (1 if 16bit, not set if 8 bit)
%macro
EXTREMITY
3
-
4
BLEND_INIT
%1
,
8
,
%4
pxor
m2
,
m2
%if
%0
==
4
; 16 bit
VBROADCASTI128
m4
,
[
pd_65535
]
%else
VBROADCASTI128
m4
,
[
pw_255
]
%endif
.
nextrow
:
mov
xq
,
widthq
.
loop
:
movu
m0
,
[
topq
+
xq
]
movu
m1
,
[
bottomq
+
xq
]
punpckhbw
m5
,
m0
,
m2
punpcklbw
m0
,
m2
punpckhbw
m6
,
m1
,
m2
punpcklbw
m1
,
m2
psubw
m3
,
m4
,
m0
psubw
m7
,
m4
,
m5
psubw
m3
,
m1
psubw
m7
,
m6
punpckh%2
%3
m5
,
m0
,
m2
punpckl%2
%3
m0
,
m2
punpckh%2
%3
m6
,
m1
,
m2
punpckl%2
%3
m1
,
m2
psub%3
m3
,
m4
,
m0
psub%3
m7
,
m4
,
m5
psub%3
m3
,
m1
psub%3
m7
,
m6
%if
%0
==
4
; 16 bit
pabsd
m3
,
m3
pabsd
m7
,
m7
%else
ABS2
m3
,
m7
,
m1
,
m6
packuswb
m3
,
m7
%endif
packus%3
%2
m3
,
m7
mova
[
dstq
+
xq
]
,
m3
add
xq
,
mmsize
jl
.
loop
BLEND_END
%endmacro
BLEND_INIT
negation
,
8
%macro
NEGATION
3
-
4
BLEND_INIT
%1
,
8
,
%4
pxor
m2
,
m2
%if
%0
==
4
; 16 bit
VBROADCASTI128
m4
,
[
pd_65535
]
%else
VBROADCASTI128
m4
,
[
pw_255
]
%endif
.
nextrow
:
mov
xq
,
widthq
.
loop
:
movu
m0
,
[
topq
+
xq
]
movu
m1
,
[
bottomq
+
xq
]
punpckhbw
m5
,
m0
,
m2
punpcklbw
m0
,
m2
punpckhbw
m6
,
m1
,
m2
punpcklbw
m1
,
m2
psubw
m3
,
m4
,
m0
psubw
m7
,
m4
,
m5
psubw
m3
,
m1
psubw
m7
,
m6
punpckh%2
%3
m5
,
m0
,
m2
punpckl%2
%3
m0
,
m2
punpckh%2
%3
m6
,
m1
,
m2
punpckl%2
%3
m1
,
m2
psub%3
m3
,
m4
,
m0
psub%3
m7
,
m4
,
m5
psub%3
m3
,
m1
psub%3
m7
,
m6
%if
%0
==
4
; 16 bit
pabsd
m3
,
m3
pabsd
m7
,
m7
%else
ABS2
m3
,
m7
,
m1
,
m6
psubw
m0
,
m4
,
m3
psubw
m1
,
m4
,
m7
packuswb
m0
,
m1
%endif
psub%3
m0
,
m4
,
m3
psub%3
m1
,
m4
,
m7
packus%3
%2
m0
,
m1
mova
[
dstq
+
xq
]
,
m0
add
xq
,
mmsize
jl
.
loop
...
...
@@ -384,17 +420,17 @@ BLEND_SIMPLE addition, addusb
BLEND_SIMPLE
subtract
,
subusb
BLEND_SIMPLE
darken
,
minub
BLEND_SIMPLE
lighten
,
maxub
GRAINEXTRACT
GRAINEXTRACT
grainextract
,
b
,
w
BLEND_MULTIPLY
BLEND_SCREEN
AVERAGE
GRAINMERGE
AVERAGE
average
,
b
GRAINMERGE
grainmerge
,
b
,
w
HARDMIX
PHOENIX
phoenix
,
b
DIFFERENCE
difference
,
b
,
w
DIVIDE
BLEND_ABS
EXTREMITY
extremity
,
b
,
w
NEGATION
negation
,
b
,
w
%if
ARCH_X86_64
BLEND_SIMPLE
addition_16
,
addusw
,
1
...
...
@@ -402,18 +438,24 @@ BLEND_SIMPLE and_16, and, 1
BLEND_SIMPLE
or_16
,
or
,
1
BLEND_SIMPLE
subtract_16
,
subusw
,
1
BLEND_SIMPLE
xor_16
,
xor
,
1
AVERAGE
average_16
,
w
,
1
%endif
INIT_XMM
ssse3
DIFFERENCE
difference
,
b
,
w
BLEND_ABS
EXTREMITY
extremity
,
b
,
w
NEGATION
negation
,
b
,
w
INIT_XMM
sse4
%if
ARCH_X86_64
BLEND_SIMPLE
darken_16
,
minuw
,
1
BLEND_SIMPLE
lighten_16
,
maxuw
,
1
GRAINEXTRACT
grainextract_16
,
w
,
d
,
1
GRAINMERGE
grainmerge_16
,
w
,
d
,
1
PHOENIX
phoenix_16
,
w
,
1
DIFFERENCE
difference_16
,
w
,
d
,
1
EXTREMITY
extremity_16
,
w
,
d
,
1
NEGATION
negation_16
,
w
,
d
,
1
%endif
%if
HAVE_AVX2_EXTERNAL
...
...
@@ -425,16 +467,17 @@ BLEND_SIMPLE addition, addusb
BLEND_SIMPLE
subtract
,
subusb
BLEND_SIMPLE
darken
,
minub
BLEND_SIMPLE
lighten
,
maxub
GRAINEXTRACT
GRAINEXTRACT
grainextract
,
b
,
w
BLEND_MULTIPLY
BLEND_SCREEN
AVERAGE
GRAINMERGE
AVERAGE
average
,
b
GRAINMERGE
grainmerge
,
b
,
w
HARDMIX
PHOENIX
phoenix
,
b
DIFFERENCE
difference
,
b
,
w
BLEND_ABS
EXTREMITY
extremity
,
b
,
w
NEGATION
negation
,
b
,
w
%if
ARCH_X86_64
BLEND_SIMPLE
addition_16
,
addusw
,
1
...
...
@@ -444,7 +487,12 @@ BLEND_SIMPLE lighten_16, maxuw, 1
BLEND_SIMPLE
or_16
,
or
,
1
BLEND_SIMPLE
subtract_16
,
subusw
,
1
BLEND_SIMPLE
xor_16
,
xor
,
1
GRAINEXTRACT
grainextract_16
,
w
,
d
,
1
AVERAGE
average_16
,
w
,
1
GRAINMERGE
grainmerge_16
,
w
,
d
,
1
PHOENIX
phoenix_16
,
w
,
1
DIFFERENCE
difference_16
,
w
,
d
,
1
EXTREMITY
extremity_16
,
w
,
d
,
1
NEGATION
negation_16
,
w
,
d
,
1
%endif
%endif
libavfilter/x86/vf_blend_init.c
View file @
f3df42e8
...
...
@@ -72,12 +72,22 @@ BLEND_FUNC(negation, avx2)
#if ARCH_X86_64
BLEND_FUNC
(
addition_16
,
sse2
)
BLEND_FUNC
(
addition_16
,
avx2
)
BLEND_FUNC
(
grainmerge_16
,
sse4
)
BLEND_FUNC
(
grainmerge_16
,
avx2
)
BLEND_FUNC
(
average_16
,
sse2
)
BLEND_FUNC
(
average_16
,
avx2
)
BLEND_FUNC
(
and_16
,
sse2
)
BLEND_FUNC
(
and_16
,
avx2
)
BLEND_FUNC
(
darken_16
,
sse4
)
BLEND_FUNC
(
darken_16
,
avx2
)
BLEND_FUNC
(
grainextract_16
,
sse4
)
BLEND_FUNC
(
grainextract_16
,
avx2
)
BLEND_FUNC
(
difference_16
,
sse4
)
BLEND_FUNC
(
difference_16
,
avx2
)
BLEND_FUNC
(
extremity_16
,
sse4
)
BLEND_FUNC
(
extremity_16
,
avx2
)
BLEND_FUNC
(
negation_16
,
sse4
)
BLEND_FUNC
(
negation_16
,
avx2
)
BLEND_FUNC
(
lighten_16
,
sse4
)
BLEND_FUNC
(
lighten_16
,
avx2
)
BLEND_FUNC
(
or_16
,
sse2
)
...
...
@@ -152,6 +162,7 @@ av_cold void ff_blend_init_x86(FilterParams *param, int is_16bit)
switch
(
param
->
mode
)
{
case
BLEND_ADDITION
:
param
->
blend
=
ff_blend_addition_16_sse2
;
break
;
case
BLEND_AND
:
param
->
blend
=
ff_blend_and_16_sse2
;
break
;
case
BLEND_AVERAGE
:
param
->
blend
=
ff_blend_average_16_sse2
;
break
;
case
BLEND_OR
:
param
->
blend
=
ff_blend_or_16_sse2
;
break
;
case
BLEND_SUBTRACT
:
param
->
blend
=
ff_blend_subtract_16_sse2
;
break
;
case
BLEND_XOR
:
param
->
blend
=
ff_blend_xor_16_sse2
;
break
;
...
...
@@ -159,8 +170,12 @@ av_cold void ff_blend_init_x86(FilterParams *param, int is_16bit)
}
if
(
EXTERNAL_SSE4
(
cpu_flags
)
&&
param
->
opacity
==
1
)
{
switch
(
param
->
mode
)
{
case
BLEND_GRAINMERGE
:
param
->
blend
=
ff_blend_grainmerge_16_sse4
;
break
;
case
BLEND_DARKEN
:
param
->
blend
=
ff_blend_darken_16_sse4
;
break
;
case
BLEND_GRAINEXTRACT
:
param
->
blend
=
ff_blend_grainextract_16_sse4
;
break
;
case
BLEND_DIFFERENCE
:
param
->
blend
=
ff_blend_difference_16_sse4
;
break
;
case
BLEND_EXTREMITY
:
param
->
blend
=
ff_blend_extremity_16_sse4
;
break
;
case
BLEND_NEGATION
:
param
->
blend
=
ff_blend_negation_16_sse4
;
break
;
case
BLEND_LIGHTEN
:
param
->
blend
=
ff_blend_lighten_16_sse4
;
break
;
case
BLEND_PHOENIX
:
param
->
blend
=
ff_blend_phoenix_16_sse4
;
break
;
}
...
...
@@ -168,9 +183,14 @@ av_cold void ff_blend_init_x86(FilterParams *param, int is_16bit)
if
(
EXTERNAL_AVX2_FAST
(
cpu_flags
)
&&
param
->
opacity
==
1
)
{
switch
(
param
->
mode
)
{
case
BLEND_ADDITION
:
param
->
blend
=
ff_blend_addition_16_avx2
;
break
;
case
BLEND_GRAINMERGE
:
param
->
blend
=
ff_blend_grainmerge_16_avx2
;
break
;
case
BLEND_AND
:
param
->
blend
=
ff_blend_and_16_avx2
;
break
;
case
BLEND_AVERAGE
:
param
->
blend
=
ff_blend_average_16_avx2
;
break
;
case
BLEND_DARKEN
:
param
->
blend
=
ff_blend_darken_16_avx2
;
break
;
case
BLEND_GRAINEXTRACT
:
param
->
blend
=
ff_blend_grainextract_16_avx2
;
break
;
case
BLEND_DIFFERENCE
:
param
->
blend
=
ff_blend_difference_16_avx2
;
break
;
case
BLEND_EXTREMITY
:
param
->
blend
=
ff_blend_extremity_16_avx2
;
break
;
case
BLEND_NEGATION
:
param
->
blend
=
ff_blend_negation_16_avx2
;
break
;
case
BLEND_LIGHTEN
:
param
->
blend
=
ff_blend_lighten_16_avx2
;
break
;
case
BLEND_OR
:
param
->
blend
=
ff_blend_or_16_avx2
;
break
;
case
BLEND_PHOENIX
:
param
->
blend
=
ff_blend_phoenix_16_avx2
;
break
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment