Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in / Register
Toggle navigation
F
ffmpeg.wasm-core
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Linshizhi
ffmpeg.wasm-core
Commits
725a2164
Commit
725a2164
authored
Dec 26, 2014
by
Ronald S. Bultje
Committed by
Anton Khirnov
Oct 04, 2016
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
vp9lpf/x86: make filter_44_h work on 32-bit.
Signed-off-by:
Anton Khirnov
<
anton@khirnov.net
>
parent
5bfa96c4
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
78 additions
and
66 deletions
+78
-66
vp9dsp_init.c
libavcodec/x86/vp9dsp_init.c
+1
-3
vp9lpf.asm
libavcodec/x86/vp9lpf.asm
+77
-63
No files found.
libavcodec/x86/vp9dsp_init.c
View file @
725a2164
...
@@ -283,9 +283,7 @@ av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp)
...
@@ -283,9 +283,7 @@ av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp)
dsp->loop_filter_16[0] = ff_vp9_loop_filter_h_16_16_##opt; \
dsp->loop_filter_16[0] = ff_vp9_loop_filter_h_16_16_##opt; \
} \
} \
dsp->loop_filter_16[1] = ff_vp9_loop_filter_v_16_16_##opt; \
dsp->loop_filter_16[1] = ff_vp9_loop_filter_v_16_16_##opt; \
if (ARCH_X86_64) { \
dsp->loop_filter_mix2[0][0][0] = ff_vp9_loop_filter_h_44_16_##opt; \
dsp->loop_filter_mix2[0][0][0] = ff_vp9_loop_filter_h_44_16_##opt; \
} \
dsp->loop_filter_mix2[0][0][1] = ff_vp9_loop_filter_v_44_16_##opt; \
dsp->loop_filter_mix2[0][0][1] = ff_vp9_loop_filter_v_44_16_##opt; \
if (ARCH_X86_64) { \
if (ARCH_X86_64) { \
dsp->loop_filter_mix2[0][1][0] = ff_vp9_loop_filter_h_48_16_##opt; \
dsp->loop_filter_mix2[0][1][0] = ff_vp9_loop_filter_h_48_16_##opt; \
...
...
libavcodec/x86/vp9lpf.asm
View file @
725a2164
...
@@ -291,38 +291,6 @@ SECTION .text
...
@@ -291,38 +291,6 @@ SECTION .text
SWAP
%12
,
%14
SWAP
%12
,
%14
%endmacro
%endmacro
; transpose 16 half lines (high part) to 8 full centered lines
%macro
TRANSPOSE16x8B
16
punpcklbw
m%1
,
m%2
punpcklbw
m%3
,
m%4
punpcklbw
m%5
,
m%6
punpcklbw
m%7
,
m%8
punpcklbw
m%9
,
m%10
punpcklbw
m%11
,
m%12
punpcklbw
m%13
,
m%14
punpcklbw
m%15
,
m%16
SBUTTERFLY
wd
,
%1
,
%3
,
%2
SBUTTERFLY
wd
,
%5
,
%7
,
%2
SBUTTERFLY
wd
,
%9
,
%11
,
%2
SBUTTERFLY
wd
,
%13
,
%15
,
%2
SBUTTERFLY
dq
,
%1
,
%5
,
%2
SBUTTERFLY
dq
,
%3
,
%7
,
%2
SBUTTERFLY
dq
,
%9
,
%13
,
%2
SBUTTERFLY
dq
,
%11
,
%15
,
%2
SBUTTERFLY
qdq
,
%1
,
%9
,
%2
SBUTTERFLY
qdq
,
%3
,
%11
,
%2
SBUTTERFLY
qdq
,
%5
,
%13
,
%2
SBUTTERFLY
qdq
,
%7
,
%15
,
%2
SWAP
%5
,
%1
SWAP
%6
,
%9
SWAP
%7
,
%1
SWAP
%8
,
%13
SWAP
%9
,
%3
SWAP
%10
,
%11
SWAP
%11
,
%1
SWAP
%12
,
%15
%endmacro
%macro
DEFINE_REAL_P7_TO_Q7
0
-
1
0
%macro
DEFINE_REAL_P7_TO_Q7
0
-
1
0
%define
P7
dstq
+
4
*
mstrideq
+
%1
%define
P7
dstq
+
4
*
mstrideq
+
%1
%define
P6
dstq
+
mstride3q
+
%1
%define
P6
dstq
+
mstride3q
+
%1
...
@@ -398,6 +366,7 @@ cglobal vp9_loop_filter_%1_%2_16, 2, 6, 16, %3 + %4 + %5, dst, stride, mstride,
...
@@ -398,6 +366,7 @@ cglobal vp9_loop_filter_%1_%2_16, 2, 6, 16, %3 + %4 + %5, dst, stride, mstride,
movx
m5
,
[P2]
movx
m5
,
[P2]
movx
m6
,
[P1]
movx
m6
,
[P1]
movx
m7
,
[P0]
movx
m7
,
[P0]
%if
ARCH_X86_64
movx
m8
,
[Q0]
movx
m8
,
[Q0]
movx
m9
,
[Q1]
movx
m9
,
[Q1]
movx
m10
,
[Q2]
movx
m10
,
[Q2]
...
@@ -406,32 +375,67 @@ cglobal vp9_loop_filter_%1_%2_16, 2, 6, 16, %3 + %4 + %5, dst, stride, mstride,
...
@@ -406,32 +375,67 @@ cglobal vp9_loop_filter_%1_%2_16, 2, 6, 16, %3 + %4 + %5, dst, stride, mstride,
movx
m13
,
[Q5]
movx
m13
,
[Q5]
movx
m14
,
[Q6]
movx
m14
,
[Q6]
movx
m15
,
[Q7]
movx
m15
,
[Q7]
%define
P7
rsp
+
0
%if
%2
==
16
%define
P6
rsp
+
16
TRANSPOSE16x16B
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
[rsp]
%define
P5
rsp
+
32
%define
P7
rsp
+
128
%define
P4
rsp
+
48
%define
P6
rsp
+
144
%define
P3
rsp
+
64
%define
P5
rsp
+
160
%define
P2
rsp
+
80
%define
P4
rsp
+
176
%define
P1
rsp
+
96
%define
P0
rsp
+
112
%define
Q0
rsp
+
128
%define
Q1
rsp
+
144
%define
Q2
rsp
+
160
%define
Q3
rsp
+
176
%define
Q4
rsp
+
192
%define
Q4
rsp
+
192
%define
Q5
rsp
+
208
%define
Q5
rsp
+
208
%define
Q6
rsp
+
224
%define
Q6
rsp
+
224
%define
Q7
rsp
+
240
%define
Q7
rsp
+
240
%if
%2
==
16
TRANSPOSE16x16B
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
[rsp]
mova
[P7],
m0
mova
[P7],
m0
mova
[P6],
m1
mova
[P6],
m1
mova
[P5],
m2
mova
[P5],
m2
mova
[P4],
m3
mova
[P4],
m3
%else
%else
TRANSPOSE16x8B
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
; 8x16 transpose
%endif
punpcklbw
m0
,
m1
punpcklbw
m2
,
m3
punpcklbw
m4
,
m5
punpcklbw
m6
,
m7
punpcklbw
m8
,
m9
punpcklbw
m10
,
m11
punpcklbw
m12
,
m13
punpcklbw
m14
,
m15
TRANSPOSE8x8W
0
,
2
,
4
,
6
,
8
,
10
,
12
,
14
,
15
SWAP
0
,
4
SWAP
2
,
5
SWAP
0
,
6
SWAP
0
,
7
SWAP
10
,
9
SWAP
12
,
10
SWAP
14
,
11
%endif
%else
; x86-32
punpcklbw
m0
,
m1
punpcklbw
m2
,
m3
punpcklbw
m4
,
m5
punpcklbw
m6
,
m7
movx
m1
,
[Q0]
movx
m3
,
[Q1]
movx
m5
,
[Q2]
movx
m7
,
[Q3]
punpcklbw
m1
,
m3
punpcklbw
m5
,
m7
movx
m3
,
[Q4]
movx
m7
,
[Q5]
punpcklbw
m3
,
m7
mova
[rsp],
m3
movx
m3
,
[Q6]
movx
m7
,
[Q7]
punpcklbw
m3
,
m7
%endif
%define
P3
rsp
+
0
%define
P2
rsp
+
16
%define
P1
rsp
+
32
%define
P0
rsp
+
48
%define
Q0
rsp
+
64
%define
Q1
rsp
+
80
%define
Q2
rsp
+
96
%define
Q3
rsp
+
112
%if
ARCH_X86_64
mova
[P3],
m4
mova
[P3],
m4
mova
[P2],
m5
mova
[P2],
m5
mova
[P1],
m6
mova
[P1],
m6
...
@@ -446,7 +450,17 @@ cglobal vp9_loop_filter_%1_%2_16, 2, 6, 16, %3 + %4 + %5, dst, stride, mstride,
...
@@ -446,7 +450,17 @@ cglobal vp9_loop_filter_%1_%2_16, 2, 6, 16, %3 + %4 + %5, dst, stride, mstride,
mova
[Q6],
m14
mova
[Q6],
m14
mova
[Q7],
m15
mova
[Q7],
m15
%endif
%endif
%else
; x86-32
TRANSPOSE8x8W
0
,
2
,
4
,
6
,
1
,
5
,
7
,
3
,
[rsp],
[Q0],
1
mova
[P3],
m0
mova
[P2],
m2
mova
[P1],
m4
mova
[P0],
m6
mova
[Q1],
m5
mova
[Q2],
m7
mova
[Q3],
m3
%endif
%endif
%endif
; %1 == h
; calc fm mask
; calc fm mask
%if
%2
==
16
%if
%2
==
16
...
@@ -962,22 +976,22 @@ cglobal vp9_loop_filter_%1_%2_16, 2, 6, 16, %3 + %4 + %5, dst, stride, mstride,
...
@@ -962,22 +976,22 @@ cglobal vp9_loop_filter_%1_%2_16, 2, 6, 16, %3 + %4 + %5, dst, stride, mstride,
RET
RET
%endmacro
%endmacro
%macro
LPF_16_VH
4
%macro
LPF_16_VH
5
INIT_XMM
%
4
INIT_XMM
%
5
LOOPFILTER
v
,
%1
,
%2
,
0
,
%3
LOOPFILTER
v
,
%1
,
%2
,
0
,
%4
%if
ARCH_X86_64
%if
ARCH_X86_64
||
%1
==
44
LOOPFILTER
h
,
%1
,
%2
,
256
,
%3
LOOPFILTER
h
,
%1
,
%2
,
%3
,
%4
%endif
%endif
%endmacro
%endmacro
%macro
LPF_16_VH_ALL_OPTS
2
-
3
0
%macro
LPF_16_VH_ALL_OPTS
4
LPF_16_VH
%1
,
%2
,
%3
,
sse2
LPF_16_VH
%1
,
%2
,
%3
,
%4
,
sse2
LPF_16_VH
%1
,
%2
,
%3
,
ssse3
LPF_16_VH
%1
,
%2
,
%3
,
%4
,
ssse3
LPF_16_VH
%1
,
%2
,
%3
,
avx
LPF_16_VH
%1
,
%2
,
%3
,
%4
,
avx
%endmacro
%endmacro
LPF_16_VH_ALL_OPTS
16
,
512
,
32
LPF_16_VH_ALL_OPTS
16
,
512
,
256
,
32
LPF_16_VH_ALL_OPTS
44
,
0
,
0
LPF_16_VH_ALL_OPTS
44
,
0
,
128
,
0
LPF_16_VH_ALL_OPTS
48
,
256
,
16
LPF_16_VH_ALL_OPTS
48
,
256
,
1
28
,
1
6
LPF_16_VH_ALL_OPTS
84
,
256
,
16
LPF_16_VH_ALL_OPTS
84
,
256
,
1
28
,
1
6
LPF_16_VH_ALL_OPTS
88
,
256
,
16
LPF_16_VH_ALL_OPTS
88
,
256
,
1
28
,
1
6
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment