Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in / Register
Toggle navigation
F
ffmpeg.wasm-core
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Linshizhi
ffmpeg.wasm-core
Commits
5361e10a
Commit
5361e10a
authored
Jul 27, 2012
by
Ronald S. Bultje
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
proresdsp: port x86 assembly to cpuflags.
parent
e9da9a31
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
18 additions
and
21 deletions
+18
-21
proresdsp.asm
libavcodec/x86/proresdsp.asm
+18
-21
No files found.
libavcodec/x86/proresdsp.asm
View file @
5361e10a
...
@@ -82,8 +82,7 @@ section .text align=16
...
@@ -82,8 +82,7 @@ section .text align=16
; %1 = row or col (for rounding variable)
; %1 = row or col (for rounding variable)
; %2 = number of bits to shift at the end
; %2 = number of bits to shift at the end
; %3 = optimization
%macro
IDCT_1D
2
%macro
IDCT_1D
3
; a0 = (W4 * row[0]) + (1 << (15 - 1));
; a0 = (W4 * row[0]) + (1 << (15 - 1));
; a1 = a0;
; a1 = a0;
; a2 = a0;
; a2 = a0;
...
@@ -330,8 +329,8 @@ section .text align=16
...
@@ -330,8 +329,8 @@ section .text align=16
; void prores_idct_put_10_<opt>(uint8_t *pixels, int stride,
; void prores_idct_put_10_<opt>(uint8_t *pixels, int stride,
; DCTELEM *block, const int16_t *qmat);
; DCTELEM *block, const int16_t *qmat);
%macro
idct_put_fn
2
%macro
idct_put_fn
1
cglobal
prores_idct_put_10
_
%1
,
4
,
4
,
%2
cglobal
prores_idct_put_10
,
4
,
4
,
%1
movsxd
r1
,
r1d
movsxd
r1
,
r1d
pxor
m15
,
m15
; zero
pxor
m15
,
m15
; zero
...
@@ -347,7 +346,7 @@ cglobal prores_idct_put_10_%1, 4, 4, %2
...
@@ -347,7 +346,7 @@ cglobal prores_idct_put_10_%1, 4, 4, %2
pmullw
m13
,
[
r3
+
64
]
pmullw
m13
,
[
r3
+
64
]
pmullw
m12
,
[
r3
+
96
]
pmullw
m12
,
[
r3
+
96
]
IDCT_1D
row
,
17
,
%1
IDCT_1D
row
,
17
; transpose for second part of IDCT
; transpose for second part of IDCT
TRANSPOSE8x8W
8
,
0
,
1
,
2
,
4
,
11
,
9
,
10
,
3
TRANSPOSE8x8W
8
,
0
,
1
,
2
,
4
,
11
,
9
,
10
,
3
...
@@ -362,7 +361,7 @@ cglobal prores_idct_put_10_%1, 4, 4, %2
...
@@ -362,7 +361,7 @@ cglobal prores_idct_put_10_%1, 4, 4, %2
; for (i = 0; i < 8; i++)
; for (i = 0; i < 8; i++)
; idctSparseColAdd(dest + i, line_size, block + i);
; idctSparseColAdd(dest + i, line_size, block + i);
IDCT_1D
col
,
20
,
%1
IDCT_1D
col
,
20
; clip/store
; clip/store
mova
m6
,
[
pw_512
]
mova
m6
,
[
pw_512
]
...
@@ -406,27 +405,25 @@ cglobal prores_idct_put_10_%1, 4, 4, %2
...
@@ -406,27 +405,25 @@ cglobal prores_idct_put_10_%1, 4, 4, %2
RET
RET
%endmacro
%endmacro
%macro
signextend_sse2
3
; dstlow, dsthigh, tmp
%macro
SIGNEXTEND
2
-
3
; dstlow, dsthigh, tmp
%if
cpuflag
(
sse4
)
movhlps
%2
,
%1
pmovsxwd
%1
,
%1
pmovsxwd
%2
,
%2
%else
; sse2
pxor
%3
,
%3
pxor
%3
,
%3
pcmpgtw
%3
,
%1
pcmpgtw
%3
,
%1
mova
%2
,
%1
mova
%2
,
%1
punpcklwd
%1
,
%3
punpcklwd
%1
,
%3
punpckhwd
%2
,
%3
punpckhwd
%2
,
%3
%endif
%endmacro
%endmacro
%macro
signextend_sse4
2
-
3
; dstlow, dsthigh
INIT_XMM
sse2
movhlps
%2
,
%1
idct_put_fn
16
pmovsxwd
%1
,
%1
INIT_XMM
sse4
pmovsxwd
%2
,
%2
idct_put_fn
16
%endmacro
INIT_XMM
avx
idct_put_fn
16
INIT_XMM
%define
SIGNEXTEND
signextend_sse2
idct_put_fn
sse2
,
16
INIT_XMM
%define
SIGNEXTEND
signextend_sse4
idct_put_fn
sse4
,
16
INIT_AVX
idct_put_fn
avx
,
16
%endif
%endif
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment