Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in / Register
Toggle navigation
F
ffmpeg.wasm-core
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Linshizhi
ffmpeg.wasm-core
Commits
8a1cff1c
Commit
8a1cff1c
authored
Dec 26, 2014
by
Ronald S. Bultje
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
vp9/x86: make filter_44_h work on 32-bit.
parent
047088b8
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
78 additions
and
66 deletions
+78
-66
vp9dsp_init.c
libavcodec/x86/vp9dsp_init.c
+1
-3
vp9lpf.asm
libavcodec/x86/vp9lpf.asm
+77
-63
No files found.
libavcodec/x86/vp9dsp_init.c
View file @
8a1cff1c
...
...
@@ -346,9 +346,7 @@ av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp)
dsp->loop_filter_16[0] = ff_vp9_loop_filter_h_16_16_##opt; \
} \
dsp->loop_filter_16[1] = ff_vp9_loop_filter_v_16_16_##opt; \
if (ARCH_X86_64) { \
dsp->loop_filter_mix2[0][0][0] = ff_vp9_loop_filter_h_44_16_##opt; \
} \
dsp->loop_filter_mix2[0][0][0] = ff_vp9_loop_filter_h_44_16_##opt; \
dsp->loop_filter_mix2[0][0][1] = ff_vp9_loop_filter_v_44_16_##opt; \
if (ARCH_X86_64) { \
dsp->loop_filter_mix2[0][1][0] = ff_vp9_loop_filter_h_48_16_##opt; \
...
...
libavcodec/x86/vp9lpf.asm
View file @
8a1cff1c
...
...
@@ -289,38 +289,6 @@ SECTION .text
SWAP
%12
,
%14
%endmacro
; transpose 16 half lines (high part) to 8 full centered lines
%macro
TRANSPOSE16x8B
16
punpcklbw
m%1
,
m%2
punpcklbw
m%3
,
m%4
punpcklbw
m%5
,
m%6
punpcklbw
m%7
,
m%8
punpcklbw
m%9
,
m%10
punpcklbw
m%11
,
m%12
punpcklbw
m%13
,
m%14
punpcklbw
m%15
,
m%16
SBUTTERFLY
wd
,
%1
,
%3
,
%2
SBUTTERFLY
wd
,
%5
,
%7
,
%2
SBUTTERFLY
wd
,
%9
,
%11
,
%2
SBUTTERFLY
wd
,
%13
,
%15
,
%2
SBUTTERFLY
dq
,
%1
,
%5
,
%2
SBUTTERFLY
dq
,
%3
,
%7
,
%2
SBUTTERFLY
dq
,
%9
,
%13
,
%2
SBUTTERFLY
dq
,
%11
,
%15
,
%2
SBUTTERFLY
qdq
,
%1
,
%9
,
%2
SBUTTERFLY
qdq
,
%3
,
%11
,
%2
SBUTTERFLY
qdq
,
%5
,
%13
,
%2
SBUTTERFLY
qdq
,
%7
,
%15
,
%2
SWAP
%5
,
%1
SWAP
%6
,
%9
SWAP
%7
,
%1
SWAP
%8
,
%13
SWAP
%9
,
%3
SWAP
%10
,
%11
SWAP
%11
,
%1
SWAP
%12
,
%15
%endmacro
%macro
DEFINE_REAL_P7_TO_Q7
0
-
1
0
%define
P7
dstq
+
4
*
mstrideq
+
%1
%define
P6
dstq
+
mstride3q
+
%1
...
...
@@ -396,6 +364,7 @@ cglobal vp9_loop_filter_%1_%2_16, 2, 6, 16, %3 + %4 + %5, dst, stride, mstride,
movx
m5
,
[P2]
movx
m6
,
[P1]
movx
m7
,
[P0]
%if
ARCH_X86_64
movx
m8
,
[Q0]
movx
m9
,
[Q1]
movx
m10
,
[Q2]
...
...
@@ -404,32 +373,67 @@ cglobal vp9_loop_filter_%1_%2_16, 2, 6, 16, %3 + %4 + %5, dst, stride, mstride,
movx
m13
,
[Q5]
movx
m14
,
[Q6]
movx
m15
,
[Q7]
%define
P7
rsp
+
0
%define
P6
rsp
+
16
%define
P5
rsp
+
32
%define
P4
rsp
+
48
%define
P3
rsp
+
64
%define
P2
rsp
+
80
%define
P1
rsp
+
96
%define
P0
rsp
+
112
%define
Q0
rsp
+
128
%define
Q1
rsp
+
144
%define
Q2
rsp
+
160
%define
Q3
rsp
+
176
%if
%2
==
16
TRANSPOSE16x16B
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
[rsp]
%define
P7
rsp
+
128
%define
P6
rsp
+
144
%define
P5
rsp
+
160
%define
P4
rsp
+
176
%define
Q4
rsp
+
192
%define
Q5
rsp
+
208
%define
Q6
rsp
+
224
%define
Q7
rsp
+
240
%if
%2
==
16
TRANSPOSE16x16B
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
[rsp]
mova
[P7],
m0
mova
[P6],
m1
mova
[P5],
m2
mova
[P4],
m3
%else
TRANSPOSE16x8B
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
%endif
; 8x16 transpose
punpcklbw
m0
,
m1
punpcklbw
m2
,
m3
punpcklbw
m4
,
m5
punpcklbw
m6
,
m7
punpcklbw
m8
,
m9
punpcklbw
m10
,
m11
punpcklbw
m12
,
m13
punpcklbw
m14
,
m15
TRANSPOSE8x8W
0
,
2
,
4
,
6
,
8
,
10
,
12
,
14
,
15
SWAP
0
,
4
SWAP
2
,
5
SWAP
0
,
6
SWAP
0
,
7
SWAP
10
,
9
SWAP
12
,
10
SWAP
14
,
11
%endif
%else
; x86-32
punpcklbw
m0
,
m1
punpcklbw
m2
,
m3
punpcklbw
m4
,
m5
punpcklbw
m6
,
m7
movx
m1
,
[Q0]
movx
m3
,
[Q1]
movx
m5
,
[Q2]
movx
m7
,
[Q3]
punpcklbw
m1
,
m3
punpcklbw
m5
,
m7
movx
m3
,
[Q4]
movx
m7
,
[Q5]
punpcklbw
m3
,
m7
mova
[rsp],
m3
movx
m3
,
[Q6]
movx
m7
,
[Q7]
punpcklbw
m3
,
m7
%endif
%define
P3
rsp
+
0
%define
P2
rsp
+
16
%define
P1
rsp
+
32
%define
P0
rsp
+
48
%define
Q0
rsp
+
64
%define
Q1
rsp
+
80
%define
Q2
rsp
+
96
%define
Q3
rsp
+
112
%if
ARCH_X86_64
mova
[P3],
m4
mova
[P2],
m5
mova
[P1],
m6
...
...
@@ -444,7 +448,17 @@ cglobal vp9_loop_filter_%1_%2_16, 2, 6, 16, %3 + %4 + %5, dst, stride, mstride,
mova
[Q6],
m14
mova
[Q7],
m15
%endif
%else
; x86-32
TRANSPOSE8x8W
0
,
2
,
4
,
6
,
1
,
5
,
7
,
3
,
[rsp],
[Q0],
1
mova
[P3],
m0
mova
[P2],
m2
mova
[P1],
m4
mova
[P0],
m6
mova
[Q1],
m5
mova
[Q2],
m7
mova
[Q3],
m3
%endif
%endif
; %1 == h
; calc fm mask
%if
%2
==
16
...
...
@@ -960,22 +974,22 @@ cglobal vp9_loop_filter_%1_%2_16, 2, 6, 16, %3 + %4 + %5, dst, stride, mstride,
RET
%endmacro
%macro
LPF_16_VH
4
INIT_XMM
%
4
LOOPFILTER
v
,
%1
,
%2
,
0
,
%3
%if
ARCH_X86_64
LOOPFILTER
h
,
%1
,
%2
,
256
,
%3
%macro
LPF_16_VH
5
INIT_XMM
%
5
LOOPFILTER
v
,
%1
,
%2
,
0
,
%4
%if
ARCH_X86_64
||
%1
==
44
LOOPFILTER
h
,
%1
,
%2
,
%3
,
%4
%endif
%endmacro
%macro
LPF_16_VH_ALL_OPTS
2
-
3
0
LPF_16_VH
%1
,
%2
,
%3
,
sse2
LPF_16_VH
%1
,
%2
,
%3
,
ssse3
LPF_16_VH
%1
,
%2
,
%3
,
avx
%macro
LPF_16_VH_ALL_OPTS
4
LPF_16_VH
%1
,
%2
,
%3
,
%4
,
sse2
LPF_16_VH
%1
,
%2
,
%3
,
%4
,
ssse3
LPF_16_VH
%1
,
%2
,
%3
,
%4
,
avx
%endmacro
LPF_16_VH_ALL_OPTS
16
,
512
,
32
LPF_16_VH_ALL_OPTS
44
,
0
,
0
LPF_16_VH_ALL_OPTS
48
,
256
,
16
LPF_16_VH_ALL_OPTS
84
,
256
,
16
LPF_16_VH_ALL_OPTS
88
,
256
,
16
LPF_16_VH_ALL_OPTS
16
,
512
,
256
,
32
LPF_16_VH_ALL_OPTS
44
,
0
,
128
,
0
LPF_16_VH_ALL_OPTS
48
,
256
,
1
28
,
1
6
LPF_16_VH_ALL_OPTS
84
,
256
,
1
28
,
1
6
LPF_16_VH_ALL_OPTS
88
,
256
,
1
28
,
1
6
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment