Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in / Register
Toggle navigation
F
ffmpeg.wasm-core
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Linshizhi
ffmpeg.wasm-core
Commits
8476ca3b
Commit
8476ca3b
authored
Mar 03, 2012
by
Ronald S. Bultje
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
vp8: convert idct x86 assembly to use named arguments.
parent
21ffc78f
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
103 additions
and
97 deletions
+103
-97
vp8dsp.asm
libavcodec/x86/vp8dsp.asm
+103
-97
No files found.
libavcodec/x86/vp8dsp.asm
View file @
8476ca3b
...
...
@@ -906,10 +906,10 @@ cglobal put_vp8_pixels16, 5, 5, 2, dst, dststride, src, srcstride, height
;-----------------------------------------------------------------------------
%macro
ADD_DC
4
%4
m2
,
[
r0
+
%3
]
%4
m3
,
[
r0
+
r2
+
%3
]
%4
m4
,
[
r1
+
%3
]
%4
m5
,
[
r1
+
r2
+
%3
]
%4
m2
,
[
dst1q
+
%3
]
%4
m3
,
[
dst1q
+
strideq
+
%3
]
%4
m4
,
[
dst2q
+
%3
]
%4
m5
,
[
dst2q
+
strideq
+
%3
]
paddusb
m2
,
%1
paddusb
m3
,
%1
paddusb
m4
,
%1
...
...
@@ -918,22 +918,22 @@ cglobal put_vp8_pixels16, 5, 5, 2, dst, dststride, src, srcstride, height
psubusb
m3
,
%2
psubusb
m4
,
%2
psubusb
m5
,
%2
%4
[
r0
+
%3
]
,
m2
%4
[
r0
+
r2
+
%3
]
,
m3
%4
[
r1
+
%3
]
,
m4
%4
[
r1
+
r2
+
%3
]
,
m5
%4
[
dst1q
+
%3
]
,
m2
%4
[
dst1q
+
strideq
+
%3
]
,
m3
%4
[
dst2q
+
%3
]
,
m4
%4
[
dst2q
+
strideq
+
%3
]
,
m5
%endmacro
INIT_MMX
mmx
cglobal
vp8_idct_dc_add
,
3
,
3
cglobal
vp8_idct_dc_add
,
3
,
3
,
0
,
dst
,
block
,
stride
; load data
movd
m0
,
[
r1
]
movd
m0
,
[
blockq
]
; calculate DC
paddw
m0
,
[
pw_4
]
pxor
m1
,
m1
psraw
m0
,
3
movd
[r1
],
m1
movd
[blockq
],
m1
psubw
m1
,
m0
packuswb
m0
,
m0
packuswb
m1
,
m1
...
...
@@ -943,24 +943,26 @@ cglobal vp8_idct_dc_add, 3, 3
punpcklwd
m1
,
m1
; add DC
lea
r1
,
[
r0
+
r2
*
2
]
DEFINE_ARGS
dst1
,
dst2
,
stride
lea
dst2q
,
[
dst1q
+
strideq
*
2
]
ADD_DC
m0
,
m1
,
0
,
movh
RET
INIT_XMM
sse4
cglobal
vp8_idct_dc_add
,
3
,
3
,
6
cglobal
vp8_idct_dc_add
,
3
,
3
,
6
,
dst
,
block
,
stride
; load data
movd
m0
,
[
r1
]
movd
m0
,
[
blockq
]
pxor
m1
,
m1
; calculate DC
paddw
m0
,
[
pw_4
]
movd
[r1],
m1
lea
r1
,
[
r0
+
r2
*
2
]
movd
m2
,
[r0]
movd
m3
,
[
r0
+
r2
]
movd
m4
,
[r1]
movd
m5
,
[
r1
+
r2
]
movd
[blockq],
m1
DEFINE_ARGS
dst1
,
dst2
,
stride
lea
dst2q
,
[
dst1q
+
strideq
*
2
]
movd
m2
,
[dst1q]
movd
m3
,
[
dst1q
+
strideq
]
movd
m4
,
[dst2q]
movd
m5
,
[
dst2q
+
strideq
]
psraw
m0
,
3
pshuflw
m0
,
m0
,
0
punpcklqdq
m0
,
m0
...
...
@@ -971,10 +973,10 @@ cglobal vp8_idct_dc_add, 3, 3, 6
paddw
m2
,
m0
paddw
m4
,
m0
packuswb
m2
,
m4
movd
[r0
],
m2
pextrd
[
r0
+
r2
]
,
m2
,
1
pextrd
[r1
],
m2
,
2
pextrd
[
r1
+
r2
]
,
m2
,
3
movd
[dst1q
],
m2
pextrd
[
dst1q
+
strideq
]
,
m2
,
1
pextrd
[dst2q
],
m2
,
2
pextrd
[
dst2q
+
strideq
]
,
m2
,
3
RET
;-----------------------------------------------------------------------------
...
...
@@ -983,21 +985,21 @@ cglobal vp8_idct_dc_add, 3, 3, 6
%if
ARCH_X86_32
INIT_MMX
mmx
cglobal
vp8_idct_dc_add4y
,
3
,
3
cglobal
vp8_idct_dc_add4y
,
3
,
3
,
0
,
dst
,
block
,
stride
; load data
movd
m0
,
[
r1
+
32
*
0
]
; A
movd
m1
,
[
r1
+
32
*
2
]
; C
punpcklwd
m0
,
[
r1
+
32
*
1
]
; A B
punpcklwd
m1
,
[
r1
+
32
*
3
]
; C D
movd
m0
,
[
blockq
+
32
*
0
]
; A
movd
m1
,
[
blockq
+
32
*
2
]
; C
punpcklwd
m0
,
[
blockq
+
32
*
1
]
; A B
punpcklwd
m1
,
[
blockq
+
32
*
3
]
; C D
punpckldq
m0
,
m1
; A B C D
pxor
m6
,
m6
; calculate DC
paddw
m0
,
[
pw_4
]
movd
[
r1
+
32
*
0
]
,
m6
movd
[
r1
+
32
*
1
]
,
m6
movd
[
r1
+
32
*
2
]
,
m6
movd
[
r1
+
32
*
3
]
,
m6
movd
[
blockq
+
32
*
0
]
,
m6
movd
[
blockq
+
32
*
1
]
,
m6
movd
[
blockq
+
32
*
2
]
,
m6
movd
[
blockq
+
32
*
3
]
,
m6
psraw
m0
,
3
psubw
m6
,
m0
packuswb
m0
,
m0
...
...
@@ -1012,28 +1014,29 @@ cglobal vp8_idct_dc_add4y, 3, 3
punpckhbw
m7
,
m7
; CCCCDDDD
; add DC
lea
r1
,
[
r0
+
r2
*
2
]
DEFINE_ARGS
dst1
,
dst2
,
stride
lea
dst2q
,
[
dst1q
+
strideq
*
2
]
ADD_DC
m0
,
m6
,
0
,
mova
ADD_DC
m1
,
m7
,
8
,
mova
RET
%endif
INIT_XMM
sse2
cglobal
vp8_idct_dc_add4y
,
3
,
3
,
6
cglobal
vp8_idct_dc_add4y
,
3
,
3
,
6
,
dst
,
block
,
stride
; load data
movd
m0
,
[
r1
+
32
*
0
]
; A
movd
m1
,
[
r1
+
32
*
2
]
; C
punpcklwd
m0
,
[
r1
+
32
*
1
]
; A B
punpcklwd
m1
,
[
r1
+
32
*
3
]
; C D
movd
m0
,
[
blockq
+
32
*
0
]
; A
movd
m1
,
[
blockq
+
32
*
2
]
; C
punpcklwd
m0
,
[
blockq
+
32
*
1
]
; A B
punpcklwd
m1
,
[
blockq
+
32
*
3
]
; C D
punpckldq
m0
,
m1
; A B C D
pxor
m1
,
m1
; calculate DC
paddw
m0
,
[
pw_4
]
movd
[
r1
+
32
*
0
]
,
m1
movd
[
r1
+
32
*
1
]
,
m1
movd
[
r1
+
32
*
2
]
,
m1
movd
[
r1
+
32
*
3
]
,
m1
movd
[
blockq
+
32
*
0
]
,
m1
movd
[
blockq
+
32
*
1
]
,
m1
movd
[
blockq
+
32
*
2
]
,
m1
movd
[
blockq
+
32
*
3
]
,
m1
psraw
m0
,
3
psubw
m1
,
m0
packuswb
m0
,
m0
...
...
@@ -1044,7 +1047,8 @@ cglobal vp8_idct_dc_add4y, 3, 3, 6
punpcklbw
m1
,
m1
; add DC
lea
r1
,
[
r0
+
r2
*
2
]
DEFINE_ARGS
dst1
,
dst2
,
stride
lea
dst2q
,
[
dst1q
+
strideq
*
2
]
ADD_DC
m0
,
m1
,
0
,
mova
RET
...
...
@@ -1053,21 +1057,21 @@ cglobal vp8_idct_dc_add4y, 3, 3, 6
;-----------------------------------------------------------------------------
INIT_MMX
mmx
cglobal
vp8_idct_dc_add4uv
,
3
,
3
cglobal
vp8_idct_dc_add4uv
,
3
,
3
,
0
,
dst
,
block
,
stride
; load data
movd
m0
,
[
r1
+
32
*
0
]
; A
movd
m1
,
[
r1
+
32
*
2
]
; C
punpcklwd
m0
,
[
r1
+
32
*
1
]
; A B
punpcklwd
m1
,
[
r1
+
32
*
3
]
; C D
movd
m0
,
[
blockq
+
32
*
0
]
; A
movd
m1
,
[
blockq
+
32
*
2
]
; C
punpcklwd
m0
,
[
blockq
+
32
*
1
]
; A B
punpcklwd
m1
,
[
blockq
+
32
*
3
]
; C D
punpckldq
m0
,
m1
; A B C D
pxor
m6
,
m6
; calculate DC
paddw
m0
,
[
pw_4
]
movd
[
r1
+
32
*
0
]
,
m6
movd
[
r1
+
32
*
1
]
,
m6
movd
[
r1
+
32
*
2
]
,
m6
movd
[
r1
+
32
*
3
]
,
m6
movd
[
blockq
+
32
*
0
]
,
m6
movd
[
blockq
+
32
*
1
]
,
m6
movd
[
blockq
+
32
*
2
]
,
m6
movd
[
blockq
+
32
*
3
]
,
m6
psraw
m0
,
3
psubw
m6
,
m0
packuswb
m0
,
m0
...
...
@@ -1082,10 +1086,11 @@ cglobal vp8_idct_dc_add4uv, 3, 3
punpckhbw
m7
,
m7
; CCCCDDDD
; add DC
lea
r1
,
[
r0
+
r2
*
2
]
DEFINE_ARGS
dst1
,
dst2
,
stride
lea
dst2q
,
[
dst1q
+
strideq
*
2
]
ADD_DC
m0
,
m6
,
0
,
mova
lea
r0
,
[
r0
+
r2
*
4
]
lea
r1
,
[
r1
+
r2
*
4
]
lea
dst1q
,
[
dst1q
+
strideq
*
4
]
lea
dst2q
,
[
dst2q
+
strideq
*
4
]
ADD_DC
m1
,
m7
,
0
,
mova
RET
...
...
@@ -1125,24 +1130,24 @@ cglobal vp8_idct_dc_add4uv, 3, 3
%endmacro
%macro
VP8_IDCT_ADD
0
cglobal
vp8_idct_add
,
3
,
3
cglobal
vp8_idct_add
,
3
,
3
,
0
,
dst
,
block
,
stride
; load block data
movq
m0
,
[
r1
+
0
]
movq
m1
,
[
r1
+
8
]
movq
m2
,
[
r1
+
16
]
movq
m3
,
[
r1
+
24
]
movq
m0
,
[
blockq
+
0
]
movq
m1
,
[
blockq
+
8
]
movq
m2
,
[
blockq
+
16
]
movq
m3
,
[
blockq
+
24
]
movq
m6
,
[
pw_20091
]
movq
m7
,
[
pw_17734
]
%if
cpuflag
(
sse
)
xorps
xmm0
,
xmm0
movaps
[
r1
+
0
]
,
xmm0
movaps
[
r1
+
16
]
,
xmm0
movaps
[
blockq
+
0
]
,
xmm0
movaps
[
blockq
+
16
]
,
xmm0
%else
pxor
m4
,
m4
movq
[
r1
+
0
]
,
m4
movq
[
r1
+
8
]
,
m4
movq
[
r1
+
16
]
,
m4
movq
[
r1
+
24
]
,
m4
movq
[
blockq
+
0
]
,
m4
movq
[
blockq
+
8
]
,
m4
movq
[
blockq
+
16
]
,
m4
movq
[
blockq
+
24
]
,
m4
%endif
; actual IDCT
...
...
@@ -1154,9 +1159,10 @@ cglobal vp8_idct_add, 3, 3
; store
pxor
m4
,
m4
lea
r1
,
[
r0
+
2
*
r2
]
STORE_DIFFx2
m0
,
m1
,
m6
,
m7
,
m4
,
3
,
r0
,
r2
STORE_DIFFx2
m2
,
m3
,
m6
,
m7
,
m4
,
3
,
r1
,
r2
DEFINE_ARGS
dst1
,
dst2
,
stride
lea
dst2q
,
[
dst1q
+
2
*
strideq
]
STORE_DIFFx2
m0
,
m1
,
m6
,
m7
,
m4
,
3
,
dst1q
,
strideq
STORE_DIFFx2
m2
,
m3
,
m6
,
m7
,
m4
,
3
,
dst2q
,
strideq
RET
%endmacro
...
...
@@ -1173,24 +1179,24 @@ VP8_IDCT_ADD
;-----------------------------------------------------------------------------
%macro
SCATTER_WHT
3
movd
r
1d
,
m%1
movd
r
2d
,
m%2
mov
[
r0
+
2
*
16
*
(
0
+
%3
)
]
,
r
1w
mov
[
r0
+
2
*
16
*
(
1
+
%3
)
]
,
r
2w
shr
r
1d
,
16
shr
r
2d
,
16
movd
dc
1d
,
m%1
movd
dc
2d
,
m%2
mov
[
blockq
+
2
*
16
*
(
0
+
%3
)
]
,
dc
1w
mov
[
blockq
+
2
*
16
*
(
1
+
%3
)
]
,
dc
2w
shr
dc
1d
,
16
shr
dc
2d
,
16
psrlq
m%1
,
32
psrlq
m%2
,
32
mov
[
r0
+
2
*
16
*
(
4
+
%3
)
]
,
r
1w
mov
[
r0
+
2
*
16
*
(
5
+
%3
)
]
,
r
2w
movd
r
1d
,
m%1
movd
r
2d
,
m%2
mov
[
r0
+
2
*
16
*
(
8
+
%3
)
]
,
r
1w
mov
[
r0
+
2
*
16
*
(
9
+
%3
)
]
,
r
2w
shr
r
1d
,
16
shr
r
2d
,
16
mov
[
r0
+
2
*
16
*
(
12
+
%3
)
]
,
r
1w
mov
[
r0
+
2
*
16
*
(
13
+
%3
)
]
,
r
2w
mov
[
blockq
+
2
*
16
*
(
4
+
%3
)
]
,
dc
1w
mov
[
blockq
+
2
*
16
*
(
5
+
%3
)
]
,
dc
2w
movd
dc
1d
,
m%1
movd
dc
2d
,
m%2
mov
[
blockq
+
2
*
16
*
(
8
+
%3
)
]
,
dc
1w
mov
[
blockq
+
2
*
16
*
(
9
+
%3
)
]
,
dc
2w
shr
dc
1d
,
16
shr
dc
2d
,
16
mov
[
blockq
+
2
*
16
*
(
12
+
%3
)
]
,
dc
1w
mov
[
blockq
+
2
*
16
*
(
13
+
%3
)
]
,
dc
2w
%endmacro
%macro
HADAMARD4_1D
4
...
...
@@ -1200,21 +1206,21 @@ VP8_IDCT_ADD
%endmacro
%macro
VP8_DC_WHT
0
cglobal
vp8_luma_dc_wht
,
2
,
3
movq
m0
,
[
r1
]
movq
m1
,
[
r1
+
8
]
movq
m2
,
[
r1
+
16
]
movq
m3
,
[
r1
+
24
]
cglobal
vp8_luma_dc_wht
,
2
,
3
,
0
,
block
,
dc1
,
dc2
movq
m0
,
[
dc1q
]
movq
m1
,
[
dc1q
+
8
]
movq
m2
,
[
dc1q
+
16
]
movq
m3
,
[
dc1q
+
24
]
%if
cpuflag
(
sse
)
xorps
xmm0
,
xmm0
movaps
[
r1
+
0
]
,
xmm0
movaps
[
r1
+
16
]
,
xmm0
movaps
[
dc1q
+
0
]
,
xmm0
movaps
[
dc1q
+
16
]
,
xmm0
%else
pxor
m4
,
m4
movq
[
r1
+
0
]
,
m4
movq
[
r1
+
8
]
,
m4
movq
[
r1
+
16
]
,
m4
movq
[
r1
+
24
]
,
m4
movq
[
dc1q
+
0
]
,
m4
movq
[
dc1q
+
8
]
,
m4
movq
[
dc1q
+
16
]
,
m4
movq
[
dc1q
+
24
]
,
m4
%endif
HADAMARD4_1D
0
,
1
,
2
,
3
TRANSPOSE4x4W
0
,
1
,
2
,
3
,
4
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment