Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in / Register
Toggle navigation
F
ffmpeg.wasm-core
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Linshizhi
ffmpeg.wasm-core
Commits
715f139c
Commit
715f139c
authored
Dec 26, 2014
by
Ronald S. Bultje
Committed by
Anton Khirnov
Oct 04, 2016
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
vp9lpf/x86: make filter_16_h work on 32-bit.
Signed-off-by:
Anton Khirnov
<
anton@khirnov.net
>
parent
8915320d
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
154 additions
and
41 deletions
+154
-41
vp9dsp_init.c
libavcodec/x86/vp9dsp_init.c
+1
-3
vp9lpf.asm
libavcodec/x86/vp9lpf.asm
+153
-38
No files found.
libavcodec/x86/vp9dsp_init.c
View file @
715f139c
...
...
@@ -279,9 +279,7 @@ av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp)
init_subpel2(4, idx, 4, type, opt)
#define init_lpf(opt) do { \
if (ARCH_X86_64) { \
dsp->loop_filter_16[0] = ff_vp9_loop_filter_h_16_16_##opt; \
} \
dsp->loop_filter_16[0] = ff_vp9_loop_filter_h_16_16_##opt; \
dsp->loop_filter_16[1] = ff_vp9_loop_filter_v_16_16_##opt; \
dsp->loop_filter_mix2[0][0][0] = ff_vp9_loop_filter_h_44_16_##opt; \
dsp->loop_filter_mix2[0][0][1] = ff_vp9_loop_filter_v_44_16_##opt; \
...
...
libavcodec/x86/vp9lpf.asm
View file @
715f139c
...
...
@@ -291,6 +291,30 @@ SECTION .text
SWAP
%12
,
%14
%endmacro
%macro
TRANSPOSE8x8B
13
SBUTTERFLY
bw
,
%1
,
%2
,
%7
movdq%10
m%7
,
%9
movdqa
%11
,
m%2
SBUTTERFLY
bw
,
%3
,
%4
,
%2
SBUTTERFLY
bw
,
%5
,
%6
,
%2
SBUTTERFLY
bw
,
%7
,
%8
,
%2
SBUTTERFLY
wd
,
%1
,
%3
,
%2
movdqa
m%2
,
%11
movdqa
%11
,
m%3
SBUTTERFLY
wd
,
%2
,
%4
,
%3
SBUTTERFLY
wd
,
%5
,
%7
,
%3
SBUTTERFLY
wd
,
%6
,
%8
,
%3
SBUTTERFLY
dq
,
%1
,
%5
,
%3
SBUTTERFLY
dq
,
%2
,
%6
,
%3
movdqa
m%3
,
%11
movh
%12
,
m%2
movhps
%13
,
m%2
SBUTTERFLY
dq
,
%3
,
%7
,
%2
SBUTTERFLY
dq
,
%4
,
%8
,
%2
SWAP
%2
,
%5
SWAP
%4
,
%7
%endmacro
%macro
DEFINE_REAL_P7_TO_Q7
0
-
1
0
%define
P7
dstq
+
4
*
mstrideq
+
%1
%define
P6
dstq
+
mstride3q
+
%1
...
...
@@ -310,6 +334,25 @@ SECTION .text
%define
Q7
dst2q
+
stride3q
+
%1
%endmacro
%macro
DEFINE_TRANSPOSED_P7_TO_Q7
0
-
1
0
%define
P3
rsp
+
0
+
%1
%define
P2
rsp
+
16
+
%1
%define
P1
rsp
+
32
+
%1
%define
P0
rsp
+
48
+
%1
%define
Q0
rsp
+
64
+
%1
%define
Q1
rsp
+
80
+
%1
%define
Q2
rsp
+
96
+
%1
%define
Q3
rsp
+
112
+
%1
%define
P7
rsp
+
128
+
%1
%define
P6
rsp
+
144
+
%1
%define
P5
rsp
+
160
+
%1
%define
P4
rsp
+
176
+
%1
%define
Q4
rsp
+
192
+
%1
%define
Q5
rsp
+
208
+
%1
%define
Q6
rsp
+
224
+
%1
%define
Q7
rsp
+
240
+
%1
%endmacro
; ..............AB -> AAAAAAAABBBBBBBB
%macro
SPLATB_MIX
1
-
2
[
mask_mix
]
%if
cpuflag
(
ssse3
)
...
...
@@ -364,7 +407,9 @@ cglobal vp9_loop_filter_%1_%2_16, 2, 6, 16, %3 + %4 + %5, dst, stride, mstride,
movx
m3
,
[P4]
movx
m4
,
[P3]
movx
m5
,
[P2]
%if
ARCH_X86_64
||
%2
!
=
16
movx
m6
,
[P1]
%endif
movx
m7
,
[P0]
%if
ARCH_X86_64
movx
m8
,
[Q0]
...
...
@@ -375,21 +420,14 @@ cglobal vp9_loop_filter_%1_%2_16, 2, 6, 16, %3 + %4 + %5, dst, stride, mstride,
movx
m13
,
[Q5]
movx
m14
,
[Q6]
movx
m15
,
[Q7]
DEFINE_TRANSPOSED_P7_TO_Q7
%if
%2
==
16
TRANSPOSE16x16B
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
[rsp]
%define
P7
rsp
+
128
%define
P6
rsp
+
144
%define
P5
rsp
+
160
%define
P4
rsp
+
176
%define
Q4
rsp
+
192
%define
Q5
rsp
+
208
%define
Q6
rsp
+
224
%define
Q7
rsp
+
240
mova
[P7],
m0
mova
[P6],
m1
mova
[P5],
m2
mova
[P4],
m3
%else
%else
; %2 == 44/48/84/88
; 8x16 transpose
punpcklbw
m0
,
m1
punpcklbw
m2
,
m3
...
...
@@ -407,8 +445,65 @@ cglobal vp9_loop_filter_%1_%2_16, 2, 6, 16, %3 + %4 + %5, dst, stride, mstride,
SWAP
10
,
9
SWAP
12
,
10
SWAP
14
,
11
%endif
%endif
; %2
mova
[P3],
m4
mova
[P2],
m5
mova
[P1],
m6
mova
[P0],
m7
mova
[Q0],
m8
mova
[Q1],
m9
mova
[Q2],
m10
mova
[Q3],
m11
%if
%2
==
16
mova
[Q4],
m12
mova
[Q5],
m13
mova
[Q6],
m14
mova
[Q7],
m15
%endif
; %2
%else
; x86-32
%if
%2
==
16
TRANSPOSE8x8B
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
[P1],
u
,
[
rsp
+
%3
+
%4
]
,
[
rsp
+
64
]
,
[
rsp
+
80
]
DEFINE_TRANSPOSED_P7_TO_Q7
movh
[P7],
m0
movh
[P5],
m1
movh
[P3],
m2
movh
[P1],
m3
movh
[Q2],
m5
movh
[Q4],
m6
movh
[Q6],
m7
movhps
[P6],
m0
movhps
[P4],
m1
movhps
[P2],
m2
movhps
[P0],
m3
movhps
[Q3],
m5
movhps
[Q5],
m6
movhps
[Q7],
m7
DEFINE_REAL_P7_TO_Q7
movx
m0
,
[Q0]
movx
m1
,
[Q1]
movx
m2
,
[Q2]
movx
m3
,
[Q3]
movx
m4
,
[Q4]
movx
m5
,
[Q5]
movx
m7
,
[Q7]
TRANSPOSE8x8B
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
[Q6],
u
,
[
rsp
+
%3
+
%4
]
,
[
rsp
+
72
]
,
[
rsp
+
88
]
DEFINE_TRANSPOSED_P7_TO_Q7
8
movh
[P7],
m0
movh
[P5],
m1
movh
[P3],
m2
movh
[P1],
m3
movh
[Q2],
m5
movh
[Q4],
m6
movh
[Q6],
m7
movhps
[P6],
m0
movhps
[P4],
m1
movhps
[P2],
m2
movhps
[P0],
m3
movhps
[Q3],
m5
movhps
[Q5],
m6
movhps
[Q7],
m7
DEFINE_TRANSPOSED_P7_TO_Q7
%else
; %2 == 44/48/84/88
punpcklbw
m0
,
m1
punpcklbw
m2
,
m3
punpcklbw
m4
,
m5
...
...
@@ -426,31 +521,7 @@ cglobal vp9_loop_filter_%1_%2_16, 2, 6, 16, %3 + %4 + %5, dst, stride, mstride,
movx
m3
,
[Q6]
movx
m7
,
[Q7]
punpcklbw
m3
,
m7
%endif
%define
P3
rsp
+
0
%define
P2
rsp
+
16
%define
P1
rsp
+
32
%define
P0
rsp
+
48
%define
Q0
rsp
+
64
%define
Q1
rsp
+
80
%define
Q2
rsp
+
96
%define
Q3
rsp
+
112
%if
ARCH_X86_64
mova
[P3],
m4
mova
[P2],
m5
mova
[P1],
m6
mova
[P0],
m7
mova
[Q0],
m8
mova
[Q1],
m9
mova
[Q2],
m10
mova
[Q3],
m11
%if
%2
==
16
mova
[Q4],
m12
mova
[Q5],
m13
mova
[Q6],
m14
mova
[Q7],
m15
%endif
%else
; x86-32
DEFINE_TRANSPOSED_P7_TO_Q7
TRANSPOSE8x8W
0
,
2
,
4
,
6
,
1
,
5
,
7
,
3
,
[rsp],
[Q0],
1
mova
[P3],
m0
mova
[P2],
m2
...
...
@@ -459,7 +530,8 @@ cglobal vp9_loop_filter_%1_%2_16, 2, 6, 16, %3 + %4 + %5, dst, stride, mstride,
mova
[Q1],
m5
mova
[Q2],
m7
mova
[Q3],
m3
%endif
%endif
; %2
%endif
; x86-32/64
%endif
; %1 == h
; calc fm mask
...
...
@@ -864,8 +936,11 @@ cglobal vp9_loop_filter_%1_%2_16, 2, 6, 16, %3 + %4 + %5, dst, stride, mstride,
mova
m3
,
[P4]
mova
m4
,
[P3]
mova
m5
,
[P2]
%if
ARCH_X86_64
mova
m6
,
[P1]
%endif
mova
m7
,
[P0]
%if
ARCH_X86_64
mova
m8
,
[Q0]
mova
m9
,
[Q1]
mova
m10
,
[Q2]
...
...
@@ -892,6 +967,48 @@ cglobal vp9_loop_filter_%1_%2_16, 2, 6, 16, %3 + %4 + %5, dst, stride, mstride,
movu
[Q5],
m13
movu
[Q6],
m14
movu
[Q7],
m15
%else
DEFINE_REAL_P7_TO_Q7
TRANSPOSE8x8B
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
[
rsp
+
32
]
,
a
,
[
rsp
+
%3
+
%4
]
,
[Q0],
[Q1]
movh
[P7],
m0
movh
[P5],
m1
movh
[P3],
m2
movh
[P1],
m3
movh
[Q2],
m5
movh
[Q4],
m6
movh
[Q6],
m7
movhps
[P6],
m0
movhps
[P4],
m1
movhps
[P2],
m2
movhps
[P0],
m3
movhps
[Q3],
m5
movhps
[Q5],
m6
movhps
[Q7],
m7
DEFINE_TRANSPOSED_P7_TO_Q7
mova
m0
,
[Q0]
mova
m1
,
[Q1]
mova
m2
,
[Q2]
mova
m3
,
[Q3]
mova
m4
,
[Q4]
mova
m5
,
[Q5]
mova
m7
,
[Q7]
DEFINE_REAL_P7_TO_Q7
8
TRANSPOSE8x8B
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
[
rsp
+
224
]
,
a
,
[
rsp
+
%3
+
%4
]
,
[Q0],
[Q1]
movh
[P7],
m0
movh
[P5],
m1
movh
[P3],
m2
movh
[P1],
m3
movh
[Q2],
m5
movh
[Q4],
m6
movh
[Q6],
m7
movhps
[P6],
m0
movhps
[P4],
m1
movhps
[P2],
m2
movhps
[P0],
m3
movhps
[Q3],
m5
movhps
[Q5],
m6
movhps
[Q7],
m7
%endif
%elif
%2
==
44
SWAP
0
,
1
; m0 = p1
SWAP
1
,
7
; m1 = p0
...
...
@@ -1007,9 +1124,7 @@ cglobal vp9_loop_filter_%1_%2_16, 2, 6, 16, %3 + %4 + %5, dst, stride, mstride,
%macro
LPF_16_VH
5
INIT_XMM
%5
LOOPFILTER
v
,
%1
,
%2
,
0
,
%4
%if
ARCH_X86_64
||
%1
!
=
16
LOOPFILTER
h
,
%1
,
%2
,
%3
,
%4
%endif
%endmacro
%macro
LPF_16_VH_ALL_OPTS
4
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment