Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in / Register
Toggle navigation
F
ffmpeg.wasm-core
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Linshizhi
ffmpeg.wasm-core
Commits
4b66274a
Commit
4b66274a
authored
Sep 11, 2015
by
Ronald S. Bultje
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
vp9: save one (PSIGNW) instruction in iadst16_1d sse2/ssse3.
parent
b8423269
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
17 additions
and
16 deletions
+17
-16
vp9itxfm.asm
libavcodec/x86/vp9itxfm.asm
+17
-16
No files found.
libavcodec/x86/vp9itxfm.asm
View file @
4b66274a
...
@@ -29,6 +29,7 @@ pw_11585x2: times 8 dw 23170
...
@@ -29,6 +29,7 @@ pw_11585x2: times 8 dw 23170
pw_m11585x2
:
times
8
dw
-
23170
pw_m11585x2
:
times
8
dw
-
23170
pw_m11585_11585
:
times
4
dw
-
11585
,
11585
pw_m11585_11585
:
times
4
dw
-
11585
,
11585
pw_11585_11585
:
times
8
dw
11585
pw_11585_11585
:
times
8
dw
11585
pw_m11585_m11585
:
times
8
dw
-
11585
%macro
VP9_IDCT_COEFFS
2
-
3
0
%macro
VP9_IDCT_COEFFS
2
-
3
0
pw_
%1
x2
:
times
8
dw
%1
*
2
pw_
%1
x2
:
times
8
dw
%1
*
2
...
@@ -1716,13 +1717,13 @@ VP9_IDCT_IDCT_16x16_ADD_XMM avx
...
@@ -1716,13 +1717,13 @@ VP9_IDCT_IDCT_16x16_ADD_XMM avx
SUMSUB_BA
w
,
7
,
6
,
4
SUMSUB_BA
w
,
7
,
6
,
4
pmulhrsw
m7
,
[
pw_m11585x2
]
; m8=out7[w]
pmulhrsw
m7
,
[
pw_m11585x2
]
; m8=out7[w]
pmulhrsw
m6
,
[
pw_11585x2
]
; m1=out8[w]
pmulhrsw
m6
,
[
pw_11585x2
]
; m1=out8[w]
SWAP
6
,
7
SUMSUB_BA
w
,
3
,
2
,
4
SUMSUB_BA
w
,
3
,
2
,
4
pmulhrsw
m3
,
[
pw_11585x2
]
; m3=out4[w]
pmulhrsw
m3
,
[
pw_11585x2
]
; m3=out4[w]
pmulhrsw
m2
,
[
pw_11585x2
]
; m2=out11[w]
pmulhrsw
m2
,
[
pw_11585x2
]
; m2=out11[w]
%else
%else
SCRATCH
5
,
8
,
tmpq
+
10
*
%%
str
SCRATCH
5
,
8
,
tmpq
+
10
*
%%
str
PSIGNW
m7
,
[
pw_m1
]
VP9_UNPACK_MULSUB_2W_4X
6
,
7
,
11585
,
m11585
,
[
pd_8192
]
,
5
,
4
VP9_UNPACK_MULSUB_2W_4X
7
,
6
,
11585
,
11585
,
[
pd_8192
]
,
5
,
4
VP9_UNPACK_MULSUB_2W_4X
2
,
3
,
11585
,
11585
,
[
pd_8192
]
,
5
,
4
VP9_UNPACK_MULSUB_2W_4X
2
,
3
,
11585
,
11585
,
[
pd_8192
]
,
5
,
4
UNSCRATCH
5
,
8
,
tmpq
+
10
*
%%
str
UNSCRATCH
5
,
8
,
tmpq
+
10
*
%%
str
%endif
%endif
...
@@ -1733,7 +1734,7 @@ VP9_IDCT_IDCT_16x16_ADD_XMM avx
...
@@ -1733,7 +1734,7 @@ VP9_IDCT_IDCT_16x16_ADD_XMM avx
%if
%2
==
1
%if
%2
==
1
%if
ARCH_X86_64
%if
ARCH_X86_64
mova
m13
,
[
tmpq
+
6
*
%%
str
]
mova
m13
,
[
tmpq
+
6
*
%%
str
]
TRANSPOSE8x8W
1
,
11
,
14
,
0
,
3
,
15
,
13
,
7
,
10
TRANSPOSE8x8W
1
,
11
,
14
,
0
,
3
,
15
,
13
,
6
,
10
mova
[
tmpq
+
0
*
16
]
,
m1
mova
[
tmpq
+
0
*
16
]
,
m1
mova
[
tmpq
+
2
*
16
]
,
m11
mova
[
tmpq
+
2
*
16
]
,
m11
mova
[
tmpq
+
4
*
16
]
,
m14
mova
[
tmpq
+
4
*
16
]
,
m14
...
@@ -1745,10 +1746,10 @@ VP9_IDCT_IDCT_16x16_ADD_XMM avx
...
@@ -1745,10 +1746,10 @@ VP9_IDCT_IDCT_16x16_ADD_XMM avx
mova
[
tmpq
+
8
*
16
]
,
m3
mova
[
tmpq
+
8
*
16
]
,
m3
mova
[
tmpq
+
10
*
16
]
,
m15
mova
[
tmpq
+
10
*
16
]
,
m15
mova
[
tmpq
+
12
*
16
]
,
m13
mova
[
tmpq
+
12
*
16
]
,
m13
mova
[
tmpq
+
14
*
16
]
,
m
7
mova
[
tmpq
+
14
*
16
]
,
m
6
TRANSPOSE8x8W
6
,
1
,
11
,
2
,
9
,
14
,
0
,
5
,
10
TRANSPOSE8x8W
7
,
1
,
11
,
2
,
9
,
14
,
0
,
5
,
10
mova
[
tmpq
+
1
*
16
]
,
m
6
mova
[
tmpq
+
1
*
16
]
,
m
7
mova
[
tmpq
+
3
*
16
]
,
m1
mova
[
tmpq
+
3
*
16
]
,
m1
mova
[
tmpq
+
5
*
16
]
,
m11
mova
[
tmpq
+
5
*
16
]
,
m11
mova
[
tmpq
+
7
*
16
]
,
m2
mova
[
tmpq
+
7
*
16
]
,
m2
...
@@ -1759,20 +1760,20 @@ VP9_IDCT_IDCT_16x16_ADD_XMM avx
...
@@ -1759,20 +1760,20 @@ VP9_IDCT_IDCT_16x16_ADD_XMM avx
%else
%else
mova
[
tmpq
+
12
*
%%
str
]
,
m2
mova
[
tmpq
+
12
*
%%
str
]
,
m2
mova
[
tmpq
+
1
*
%%
str
]
,
m5
mova
[
tmpq
+
1
*
%%
str
]
,
m5
mova
[
tmpq
+
15
*
%%
str
]
,
m
6
mova
[
tmpq
+
15
*
%%
str
]
,
m
7
mova
m2
,
[
tmpq
+
9
*
%%
str
]
mova
m2
,
[
tmpq
+
9
*
%%
str
]
mova
m5
,
[
tmpq
+
5
*
%%
str
]
mova
m5
,
[
tmpq
+
5
*
%%
str
]
mova
m
6
,
[
tmpq
+
8
*
%%
str
]
mova
m
7
,
[
tmpq
+
8
*
%%
str
]
TRANSPOSE8x8W
1
,
2
,
5
,
0
,
3
,
6
,
4
,
7
,
[
tmpq
+
6
*
%%
str
]
,
[
tmpq
+
8
*
%%
str
]
,
1
TRANSPOSE8x8W
1
,
2
,
5
,
0
,
3
,
7
,
4
,
6
,
[
tmpq
+
6
*
%%
str
]
,
[
tmpq
+
8
*
%%
str
]
,
1
mova
[
tmpq
+
0
*
16
]
,
m1
mova
[
tmpq
+
0
*
16
]
,
m1
mova
[
tmpq
+
2
*
16
]
,
m2
mova
[
tmpq
+
2
*
16
]
,
m2
mova
[
tmpq
+
4
*
16
]
,
m5
mova
[
tmpq
+
4
*
16
]
,
m5
mova
[
tmpq
+
6
*
16
]
,
m0
mova
[
tmpq
+
6
*
16
]
,
m0
mova
[
tmpq
+
10
*
16
]
,
m
6
mova
[
tmpq
+
10
*
16
]
,
m
7
mova
m3
,
[
tmpq
+
12
*
%%
str
]
mova
m3
,
[
tmpq
+
12
*
%%
str
]
mova
[
tmpq
+
12
*
16
]
,
m4
mova
[
tmpq
+
12
*
16
]
,
m4
mova
m4
,
[
tmpq
+
14
*
%%
str
]
mova
m4
,
[
tmpq
+
14
*
%%
str
]
mova
[
tmpq
+
14
*
16
]
,
m
7
mova
[
tmpq
+
14
*
16
]
,
m
6
mova
m0
,
[
tmpq
+
15
*
%%
str
]
mova
m0
,
[
tmpq
+
15
*
%%
str
]
mova
m1
,
[
tmpq
+
3
*
%%
str
]
mova
m1
,
[
tmpq
+
3
*
%%
str
]
...
@@ -1805,7 +1806,7 @@ VP9_IDCT_IDCT_16x16_ADD_XMM avx
...
@@ -1805,7 +1806,7 @@ VP9_IDCT_IDCT_16x16_ADD_XMM avx
lea
dstq
,
[
dstq
+
strideq
*
2
]
lea
dstq
,
[
dstq
+
strideq
*
2
]
VP9_IDCT8_WRITEx2
3
,
15
,
10
,
8
,
4
,
ROUND_REG
,
6
VP9_IDCT8_WRITEx2
3
,
15
,
10
,
8
,
4
,
ROUND_REG
,
6
lea
dstq
,
[
dstq
+
strideq
*
2
]
lea
dstq
,
[
dstq
+
strideq
*
2
]
VP9_IDCT8_WRITEx2
12
,
7
,
10
,
8
,
4
,
ROUND_REG
,
6
VP9_IDCT8_WRITEx2
12
,
6
,
10
,
8
,
4
,
ROUND_REG
,
6
lea
dstq
,
[
dstq
+
strideq
*
2
]
lea
dstq
,
[
dstq
+
strideq
*
2
]
mova
m1
,
[
tmpq
+
3
*
%%
str
]
mova
m1
,
[
tmpq
+
3
*
%%
str
]
...
@@ -1813,7 +1814,7 @@ VP9_IDCT_IDCT_16x16_ADD_XMM avx
...
@@ -1813,7 +1814,7 @@ VP9_IDCT_IDCT_16x16_ADD_XMM avx
mova
m14
,
[
tmpq
+
11
*
%%
str
]
mova
m14
,
[
tmpq
+
11
*
%%
str
]
mova
m0
,
[
tmpq
+
13
*
%%
str
]
mova
m0
,
[
tmpq
+
13
*
%%
str
]
VP9_IDCT8_WRITEx2
6
,
1
,
10
,
8
,
4
,
ROUND_REG
,
6
VP9_IDCT8_WRITEx2
7
,
1
,
10
,
8
,
4
,
ROUND_REG
,
6
lea
dstq
,
[
dstq
+
strideq
*
2
]
lea
dstq
,
[
dstq
+
strideq
*
2
]
VP9_IDCT8_WRITEx2
11
,
2
,
10
,
8
,
4
,
ROUND_REG
,
6
VP9_IDCT8_WRITEx2
11
,
2
,
10
,
8
,
4
,
ROUND_REG
,
6
lea
dstq
,
[
dstq
+
strideq
*
2
]
lea
dstq
,
[
dstq
+
strideq
*
2
]
...
@@ -1823,9 +1824,9 @@ VP9_IDCT_IDCT_16x16_ADD_XMM avx
...
@@ -1823,9 +1824,9 @@ VP9_IDCT_IDCT_16x16_ADD_XMM avx
%else
%else
mova
[
tmpq
+
0
*
%%
str
]
,
m2
mova
[
tmpq
+
0
*
%%
str
]
,
m2
mova
[
tmpq
+
1
*
%%
str
]
,
m5
mova
[
tmpq
+
1
*
%%
str
]
,
m5
mova
[
tmpq
+
2
*
%%
str
]
,
m
6
mova
[
tmpq
+
2
*
%%
str
]
,
m
7
mova
m2
,
[
tmpq
+
9
*
%%
str
]
mova
m2
,
[
tmpq
+
9
*
%%
str
]
VP9_IDCT8_WRITEx2
1
,
2
,
5
,
6
,
4
,
ROUND_REG
,
6
VP9_IDCT8_WRITEx2
1
,
2
,
5
,
7
,
4
,
ROUND_REG
,
6
lea
dstq
,
[
dstq
+
strideq
*
2
]
lea
dstq
,
[
dstq
+
strideq
*
2
]
mova
m5
,
[
tmpq
+
5
*
%%
str
]
mova
m5
,
[
tmpq
+
5
*
%%
str
]
VP9_IDCT8_WRITEx2
5
,
0
,
1
,
2
,
4
,
ROUND_REG
,
6
VP9_IDCT8_WRITEx2
5
,
0
,
1
,
2
,
4
,
ROUND_REG
,
6
...
@@ -1834,7 +1835,7 @@ VP9_IDCT_IDCT_16x16_ADD_XMM avx
...
@@ -1834,7 +1835,7 @@ VP9_IDCT_IDCT_16x16_ADD_XMM avx
VP9_IDCT8_WRITEx2
3
,
5
,
1
,
2
,
4
,
ROUND_REG
,
6
VP9_IDCT8_WRITEx2
3
,
5
,
1
,
2
,
4
,
ROUND_REG
,
6
lea
dstq
,
[
dstq
+
strideq
*
2
]
lea
dstq
,
[
dstq
+
strideq
*
2
]
mova
m5
,
[
tmpq
+
6
*
%%
str
]
mova
m5
,
[
tmpq
+
6
*
%%
str
]
VP9_IDCT8_WRITEx2
5
,
7
,
1
,
2
,
4
,
ROUND_REG
,
6
VP9_IDCT8_WRITEx2
5
,
6
,
1
,
2
,
4
,
ROUND_REG
,
6
lea
dstq
,
[
dstq
+
strideq
*
2
]
lea
dstq
,
[
dstq
+
strideq
*
2
]
mova
m0
,
[
tmpq
+
2
*
%%
str
]
mova
m0
,
[
tmpq
+
2
*
%%
str
]
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment