Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in / Register
Toggle navigation
F
ffmpeg.wasm-core
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Linshizhi
ffmpeg.wasm-core
Commits
f2e3d706
Commit
f2e3d706
authored
Jan 30, 2014
by
Clément Bœsch
Committed by
Anton Khirnov
Oct 04, 2016
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
vp9lpf/x86: add ff_vp9_loop_filter_h_{48,84}_16_{sse2,ssse3,avx}().
Signed-off-by:
Anton Khirnov
<
anton@khirnov.net
>
parent
92d47550
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
53 additions
and
48 deletions
+53
-48
vp9dsp_init.c
libavcodec/x86/vp9dsp_init.c
+22
-20
vp9lpf.asm
libavcodec/x86/vp9lpf.asm
+31
-28
No files found.
libavcodec/x86/vp9dsp_init.c
View file @
f2e3d706
...
...
@@ -226,6 +226,12 @@ void ff_vp9_loop_filter_h_##size1##_##size2##_##opt(uint8_t *dst, ptrdiff_t stri
lpf_funcs
(
16
,
16
,
sse2
);
lpf_funcs
(
16
,
16
,
ssse3
);
lpf_funcs
(
16
,
16
,
avx
);
lpf_funcs
(
84
,
16
,
sse2
);
lpf_funcs
(
84
,
16
,
ssse3
);
lpf_funcs
(
84
,
16
,
avx
);
lpf_funcs
(
48
,
16
,
sse2
);
lpf_funcs
(
48
,
16
,
ssse3
);
lpf_funcs
(
48
,
16
,
avx
);
lpf_funcs
(
88
,
16
,
sse2
);
lpf_funcs
(
88
,
16
,
ssse3
);
lpf_funcs
(
88
,
16
,
avx
);
...
...
@@ -269,6 +275,19 @@ av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp)
init_subpel3_8to64(idx, type, opt); \
init_subpel2(4, idx, 4, type, opt)
#define init_lpf(opt) do { \
if (ARCH_X86_64) { \
dsp->loop_filter_16[0] = ff_vp9_loop_filter_h_16_16_##opt; \
dsp->loop_filter_16[1] = ff_vp9_loop_filter_v_16_16_##opt; \
dsp->loop_filter_mix2[0][1][0] = ff_vp9_loop_filter_h_48_16_##opt; \
dsp->loop_filter_mix2[0][1][1] = ff_vp9_loop_filter_v_48_16_##opt; \
dsp->loop_filter_mix2[1][0][0] = ff_vp9_loop_filter_h_84_16_##opt; \
dsp->loop_filter_mix2[1][0][1] = ff_vp9_loop_filter_v_84_16_##opt; \
dsp->loop_filter_mix2[1][1][0] = ff_vp9_loop_filter_h_88_16_##opt; \
dsp->loop_filter_mix2[1][1][1] = ff_vp9_loop_filter_v_88_16_##opt; \
} \
} while (0)
if
(
EXTERNAL_MMX
(
cpu_flags
))
{
init_fpel
(
4
,
0
,
4
,
put
,
mmx
);
init_fpel
(
3
,
0
,
8
,
put
,
mmx
);
...
...
@@ -293,36 +312,19 @@ av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp)
init_fpel
(
2
,
1
,
16
,
avg
,
sse2
);
init_fpel
(
1
,
1
,
32
,
avg
,
sse2
);
init_fpel
(
0
,
1
,
64
,
avg
,
sse2
);
if
(
ARCH_X86_64
)
{
dsp
->
loop_filter_mix2
[
1
][
1
][
0
]
=
ff_vp9_loop_filter_h_88_16_sse2
;
dsp
->
loop_filter_mix2
[
1
][
1
][
1
]
=
ff_vp9_loop_filter_v_88_16_sse2
;
dsp
->
loop_filter_16
[
0
]
=
ff_vp9_loop_filter_h_16_16_sse2
;
dsp
->
loop_filter_16
[
1
]
=
ff_vp9_loop_filter_v_16_16_sse2
;
}
init_lpf
(
sse2
);
}
if
(
EXTERNAL_SSSE3
(
cpu_flags
))
{
init_subpel3
(
0
,
put
,
ssse3
);
init_subpel3
(
1
,
avg
,
ssse3
);
if
(
ARCH_X86_64
)
{
dsp
->
loop_filter_mix2
[
1
][
1
][
0
]
=
ff_vp9_loop_filter_h_88_16_ssse3
;
dsp
->
loop_filter_mix2
[
1
][
1
][
1
]
=
ff_vp9_loop_filter_v_88_16_ssse3
;
dsp
->
loop_filter_16
[
0
]
=
ff_vp9_loop_filter_h_16_16_ssse3
;
dsp
->
loop_filter_16
[
1
]
=
ff_vp9_loop_filter_v_16_16_ssse3
;
}
init_lpf
(
ssse3
);
}
if
(
EXTERNAL_AVX
(
cpu_flags
))
{
init_fpel
(
1
,
0
,
32
,
put
,
avx
);
init_fpel
(
0
,
0
,
64
,
put
,
avx
);
if
(
ARCH_X86_64
)
{
dsp
->
loop_filter_mix2
[
1
][
1
][
0
]
=
ff_vp9_loop_filter_h_88_16_avx
;
dsp
->
loop_filter_mix2
[
1
][
1
][
1
]
=
ff_vp9_loop_filter_v_88_16_avx
;
dsp
->
loop_filter_16
[
0
]
=
ff_vp9_loop_filter_h_16_16_avx
;
dsp
->
loop_filter_16
[
1
]
=
ff_vp9_loop_filter_v_16_16_avx
;
}
init_lpf
(
avx
);
}
if
(
EXTERNAL_AVX2
(
cpu_flags
))
{
...
...
libavcodec/x86/vp9lpf.asm
View file @
f2e3d706
...
...
@@ -45,6 +45,11 @@ pw_8: times 8 dw 8
mask_mix
:
times
8
db
0
times
8
db
1
mask_mix84
:
times
8
db
0xff
times
8
db
0x00
mask_mix48
:
times
8
db
0x00
times
8
db
0xff
SECTION
.
text
; %1 = abs(%2-%3)
...
...
@@ -312,7 +317,7 @@ SECTION .text
neg
mstride3q
%ifidn
%1
,
h
%if
%2
==
88
%if
%2
>
16
%define
movx
movh
lea
dstq
,
[
dstq
+
8
*
strideq
-
4
]
%else
...
...
@@ -360,7 +365,7 @@ SECTION .text
%define
Q6
rsp
+
224
%define
Q7
rsp
+
240
%if
%2
!
=
88
%if
%2
==
16
TRANSPOSE16x16B
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
[rsp]
mova
[P7],
m0
mova
[P6],
m1
...
...
@@ -377,7 +382,7 @@ SECTION .text
mova
[Q1],
m9
mova
[Q2],
m10
mova
[Q3],
m11
%if
%2
!
=
88
%if
%2
==
16
mova
[Q4],
m12
mova
[Q5],
m13
mova
[Q6],
m14
...
...
@@ -392,7 +397,7 @@ SECTION .text
%endif
SPLATB_REG
m2
,
I
,
m0
; I I I I ...
SPLATB_REG
m3
,
E
,
m0
; E E E E ...
%el
if
%2
==
88
%el
se
%if
cpuflag
(
ssse3
)
mova
m0
,
[
mask_mix
]
%endif
...
...
@@ -452,7 +457,7 @@ SECTION .text
ABSSUB_CMP
m1
,
m9
,
m11
,
m6
,
m4
,
m5
,
m8
; abs(p2 - p0) <= 1
pand
m2
,
m1
ABSSUB
m4
,
m10
,
m11
,
m5
; abs(p1 - p0)
%if
%2
!
=
88
%if
%2
==
16
%if
cpuflag
(
ssse3
)
pxor
m0
,
m0
%endif
...
...
@@ -476,8 +481,11 @@ SECTION .text
pand
m2
,
m1
ABSSUB_CMP
m1
,
m15
,
m12
,
m6
,
m4
,
m5
,
m8
; abs(q3 - q0) <= 1
pand
m2
,
m1
; flat8in final value
%if
%2
==
84
||
%2
==
48
pand
m2
,
[
mask_mix
%2
]
%endif
%if
%2
!
=
88
%if
%2
==
16
; (m0: hev, m2: flat8in, m3: fm, m6: pb_81, m9..15: p2 p1 p0 q0 q1 q2 q3)
; calc flat8out mask
mova
m8
,
[P7]
...
...
@@ -570,7 +578,7 @@ SECTION .text
; ([m1: flat8out], m2: flat8in, m3: fm, m10..13: p1 p0 q0 q1)
; filter6()
pxor
m0
,
m0
%if
%2
==
88
%if
%2
>
16
pand
m3
,
m2
%else
pand
m2
,
m3
; mask(fm) & mask(in)
...
...
@@ -608,7 +616,7 @@ SECTION .text
; q5 +5 -p2 -q4 +q5 +q7 . q5 . .
; q6 +6 -p1 -q5 +q6 +q7 . q6 . .
%if
%2
!
=
88
%if
%2
==
16
pand
m1
,
m2
; mask(out) & (mask(fm) & mask(in))
mova
m2
,
[P7]
mova
m3
,
[P6]
...
...
@@ -631,7 +639,7 @@ SECTION .text
%endif
%ifidn
%1
,
h
%if
%2
!
=
88
%if
%2
==
16
mova
m0
,
[P7]
mova
m1
,
[P6]
mova
m2
,
[P5]
...
...
@@ -720,28 +728,23 @@ SECTION .text
RET
%endmacro
%macro
LPF_16_
16_VH
1
INIT_XMM
%
1
cglobal
vp9_loop_filter_v_
16
_16
,
5
,
10
,
16
,
dst
,
stride
,
E
,
I
,
H
,
mstride
,
dst1
,
dst2
,
stride3
,
mstride3
LOOPFILTER
v
,
16
cglobal
vp9_loop_filter_h_
16
_16
,
5
,
10
,
16
,
256
,
dst
,
stride
,
E
,
I
,
H
,
mstride
,
dst1
,
dst2
,
stride3
,
mstride3
LOOPFILTER
h
,
16
%macro
LPF_16_
VH
2
INIT_XMM
%
2
cglobal
vp9_loop_filter_v_
%1
_16
,
5
,
10
,
16
,
dst
,
stride
,
E
,
I
,
H
,
mstride
,
dst1
,
dst2
,
stride3
,
mstride3
LOOPFILTER
v
,
%1
cglobal
vp9_loop_filter_h_
%1
_16
,
5
,
10
,
16
,
256
,
dst
,
stride
,
E
,
I
,
H
,
mstride
,
dst1
,
dst2
,
stride3
,
mstride3
LOOPFILTER
h
,
%1
%endmacro
%macro
LPF_88_16_VH
1
INIT_XMM
%1
cglobal
vp9_loop_filter_v_88_16
,
5
,
10
,
16
,
dst
,
stride
,
E
,
I
,
H
,
mstride
,
dst1
,
dst2
,
stride3
,
mstride3
LOOPFILTER
v
,
88
cglobal
vp9_loop_filter_h_88_16
,
5
,
10
,
16
,
256
,
dst
,
stride
,
E
,
I
,
H
,
mstride
,
dst1
,
dst2
,
stride3
,
mstride3
LOOPFILTER
h
,
88
%macro
LPF_16_VH_ALL_OPTS
1
LPF_16_VH
%1
,
sse2
LPF_16_VH
%1
,
ssse3
LPF_16_VH
%1
,
avx
%endmacro
LPF_16_16_VH
sse2
LPF_16_16_VH
ssse3
LPF_16_16_VH
avx
LPF_88_16_VH
sse2
LPF_88_16_VH
ssse3
LPF_88_16_VH
avx
LPF_16_VH_ALL_OPTS
16
LPF_16_VH_ALL_OPTS
48
LPF_16_VH_ALL_OPTS
84
LPF_16_VH_ALL_OPTS
88
%endif
; x86-64
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment