Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in / Register
Toggle navigation
F
ffmpeg.wasm-core
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Linshizhi
ffmpeg.wasm-core
Commits
c83f44db
Commit
c83f44db
authored
Jul 28, 2012
by
Ronald S. Bultje
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
h264_idct_10bit: port x86 assembly to cpuflags.
parent
f8d8fe25
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
127 additions
and
127 deletions
+127
-127
h264_idct_10bit.asm
libavcodec/x86/h264_idct_10bit.asm
+127
-127
No files found.
libavcodec/x86/h264_idct_10bit.asm
View file @
c83f44db
...
@@ -72,25 +72,25 @@ SECTION .text
...
@@ -72,25 +72,25 @@ SECTION .text
STORE_DIFFx2
m2
,
m3
,
m4
,
m5
,
%1
,
%3
STORE_DIFFx2
m2
,
m3
,
m4
,
m5
,
%1
,
%3
%endmacro
%endmacro
%macro
IDCT_ADD_10
1
%macro
IDCT_ADD_10
0
cglobal
h264_idct_add_10
_
%1
,
3
,
3
cglobal
h264_idct_add_10
,
3
,
3
IDCT4_ADD_10
r0
,
r1
,
r2
IDCT4_ADD_10
r0
,
r1
,
r2
RET
RET
%endmacro
%endmacro
INIT_XMM
INIT_XMM
sse2
IDCT_ADD_10
sse2
IDCT_ADD_10
%if
HAVE_AVX
%if
HAVE_AVX
INIT_
AVX
INIT_
XMM
avx
IDCT_ADD_10
avx
IDCT_ADD_10
%endif
%endif
;-----------------------------------------------------------------------------
;-----------------------------------------------------------------------------
; h264_idct_add16(pixel *dst, const int *block_offset, dctcoef *block, int stride, const uint8_t nnzc[6*8])
; h264_idct_add16(pixel *dst, const int *block_offset, dctcoef *block, int stride, const uint8_t nnzc[6*8])
;-----------------------------------------------------------------------------
;-----------------------------------------------------------------------------
;;;;;;; NO FATE SAMPLES TRIGGER THIS
;;;;;;; NO FATE SAMPLES TRIGGER THIS
%macro
ADD4x4IDCT
1
%macro
ADD4x4IDCT
0
add4x4_idct
_
%1
:
add4x4_idct
%
+
SUFFIX
:
add
r5
,
r0
add
r5
,
r0
mova
m0
,
[
r2
+
0
]
mova
m0
,
[
r2
+
0
]
mova
m1
,
[
r2
+
16
]
mova
m1
,
[
r2
+
16
]
...
@@ -107,52 +107,52 @@ add4x4_idct_%1:
...
@@ -107,52 +107,52 @@ add4x4_idct_%1:
ret
ret
%endmacro
%endmacro
INIT_XMM
INIT_XMM
sse2
ALIGN
16
ALIGN
16
ADD4x4IDCT
sse2
ADD4x4IDCT
%if
HAVE_AVX
%if
HAVE_AVX
INIT_
AVX
INIT_
XMM
avx
ALIGN
16
ALIGN
16
ADD4x4IDCT
avx
ADD4x4IDCT
%endif
%endif
%macro
ADD16_OP
3
%macro
ADD16_OP
2
cmp
byte
[
r4
+
%
3
]
,
0
cmp
byte
[
r4
+
%
2
]
,
0
jz
.
skipblock%
2
jz
.
skipblock%
1
mov
r5d
,
[
r1
+
%
2
*
4
]
mov
r5d
,
[
r1
+
%
1
*
4
]
call
add4x4_idct
_
%1
call
add4x4_idct
%
+
SUFFIX
.
skipblock%
2
:
.
skipblock%
1
:
%if
%
2
<
15
%if
%
1
<
15
add
r2
,
64
add
r2
,
64
%endif
%endif
%endmacro
%endmacro
%macro
IDCT_ADD16_10
1
%macro
IDCT_ADD16_10
0
cglobal
h264_idct_add16_10
_
%1
,
5
,
6
cglobal
h264_idct_add16_10
,
5
,
6
ADD16_OP
%1
,
0
,
4
+
1
*
8
ADD16_OP
0
,
4
+
1
*
8
ADD16_OP
%1
,
1
,
5
+
1
*
8
ADD16_OP
1
,
5
+
1
*
8
ADD16_OP
%1
,
2
,
4
+
2
*
8
ADD16_OP
2
,
4
+
2
*
8
ADD16_OP
%1
,
3
,
5
+
2
*
8
ADD16_OP
3
,
5
+
2
*
8
ADD16_OP
%1
,
4
,
6
+
1
*
8
ADD16_OP
4
,
6
+
1
*
8
ADD16_OP
%1
,
5
,
7
+
1
*
8
ADD16_OP
5
,
7
+
1
*
8
ADD16_OP
%1
,
6
,
6
+
2
*
8
ADD16_OP
6
,
6
+
2
*
8
ADD16_OP
%1
,
7
,
7
+
2
*
8
ADD16_OP
7
,
7
+
2
*
8
ADD16_OP
%1
,
8
,
4
+
3
*
8
ADD16_OP
8
,
4
+
3
*
8
ADD16_OP
%1
,
9
,
5
+
3
*
8
ADD16_OP
9
,
5
+
3
*
8
ADD16_OP
%1
,
10
,
4
+
4
*
8
ADD16_OP
10
,
4
+
4
*
8
ADD16_OP
%1
,
11
,
5
+
4
*
8
ADD16_OP
11
,
5
+
4
*
8
ADD16_OP
%1
,
12
,
6
+
3
*
8
ADD16_OP
12
,
6
+
3
*
8
ADD16_OP
%1
,
13
,
7
+
3
*
8
ADD16_OP
13
,
7
+
3
*
8
ADD16_OP
%1
,
14
,
6
+
4
*
8
ADD16_OP
14
,
6
+
4
*
8
ADD16_OP
%1
,
15
,
7
+
4
*
8
ADD16_OP
15
,
7
+
4
*
8
REP_RET
REP_RET
%endmacro
%endmacro
INIT_XMM
INIT_XMM
sse2
IDCT_ADD16_10
sse2
IDCT_ADD16_10
%if
HAVE_AVX
%if
HAVE_AVX
INIT_
AVX
INIT_
XMM
avx
IDCT_ADD16_10
avx
IDCT_ADD16_10
%endif
%endif
;-----------------------------------------------------------------------------
;-----------------------------------------------------------------------------
...
@@ -185,8 +185,8 @@ IDCT_ADD16_10 avx
...
@@ -185,8 +185,8 @@ IDCT_ADD16_10 avx
mova
[
%1
+
%3
]
,
m4
mova
[
%1
+
%3
]
,
m4
%endmacro
%endmacro
INIT_MMX
INIT_MMX
mmx2
cglobal
h264_idct_dc_add_10
_mmx2
,
3
,
3
cglobal
h264_idct_dc_add_10
,
3
,
3
movd
m0
,
[r1]
movd
m0
,
[r1]
paddd
m0
,
[
pd_32
]
paddd
m0
,
[
pd_32
]
psrad
m0
,
6
psrad
m0
,
6
...
@@ -199,8 +199,8 @@ cglobal h264_idct_dc_add_10_mmx2,3,3
...
@@ -199,8 +199,8 @@ cglobal h264_idct_dc_add_10_mmx2,3,3
;-----------------------------------------------------------------------------
;-----------------------------------------------------------------------------
; void h264_idct8_dc_add(pixel *dst, dctcoef *block, int stride)
; void h264_idct8_dc_add(pixel *dst, dctcoef *block, int stride)
;-----------------------------------------------------------------------------
;-----------------------------------------------------------------------------
%macro
IDCT8_DC_ADD
1
%macro
IDCT8_DC_ADD
0
cglobal
h264_idct8_dc_add_10
_
%1
,
3
,
3
,
7
cglobal
h264_idct8_dc_add_10
,
3
,
3
,
7
mov
r1d
,
[r1]
mov
r1d
,
[r1]
add
r1
,
32
add
r1
,
32
sar
r1
,
6
sar
r1
,
6
...
@@ -214,45 +214,45 @@ cglobal h264_idct8_dc_add_10_%1,3,3,7
...
@@ -214,45 +214,45 @@ cglobal h264_idct8_dc_add_10_%1,3,3,7
RET
RET
%endmacro
%endmacro
INIT_XMM
INIT_XMM
sse2
IDCT8_DC_ADD
sse2
IDCT8_DC_ADD
%if
HAVE_AVX
%if
HAVE_AVX
INIT_
AVX
INIT_
XMM
avx
IDCT8_DC_ADD
avx
IDCT8_DC_ADD
%endif
%endif
;-----------------------------------------------------------------------------
;-----------------------------------------------------------------------------
; h264_idct_add16intra(pixel *dst, const int *block_offset, dctcoef *block, int stride, const uint8_t nnzc[6*8])
; h264_idct_add16intra(pixel *dst, const int *block_offset, dctcoef *block, int stride, const uint8_t nnzc[6*8])
;-----------------------------------------------------------------------------
;-----------------------------------------------------------------------------
%macro
AC
2
%macro
AC
1
.
ac%
2
.
ac%
1
mov
r5d
,
[
r1
+
(
%
2
+
0
)
*
4
]
mov
r5d
,
[
r1
+
(
%
1
+
0
)
*
4
]
call
add4x4_idct
_
%1
call
add4x4_idct
%
+
SUFFIX
mov
r5d
,
[
r1
+
(
%
2
+
1
)
*
4
]
mov
r5d
,
[
r1
+
(
%
1
+
1
)
*
4
]
add
r2
,
64
add
r2
,
64
call
add4x4_idct
_
%1
call
add4x4_idct
%
+
SUFFIX
add
r2
,
64
add
r2
,
64
jmp
.
skipadd%
2
jmp
.
skipadd%
1
%endmacro
%endmacro
%assign
last_block
16
%assign
last_block
16
%macro
ADD16_OP_INTRA
3
%macro
ADD16_OP_INTRA
2
cmp
word
[
r4
+
%
3
]
,
0
cmp
word
[
r4
+
%
2
]
,
0
jnz
.
ac%
2
jnz
.
ac%
1
mov
r5d
,
[
r2
+
0
]
mov
r5d
,
[
r2
+
0
]
or
r5d
,
[
r2
+
64
]
or
r5d
,
[
r2
+
64
]
jz
.
skipblock%
2
jz
.
skipblock%
1
mov
r5d
,
[
r1
+
(
%
2
+
0
)
*
4
]
mov
r5d
,
[
r1
+
(
%
1
+
0
)
*
4
]
call
idct_dc_add
_
%1
call
idct_dc_add
%
+
SUFFIX
.
skipblock%
2
:
.
skipblock%
1
:
%if
%
2
<
last_block
-
2
%if
%
1
<
last_block
-
2
add
r2
,
128
add
r2
,
128
%endif
%endif
.
skipadd%
2
:
.
skipadd%
1
:
%endmacro
%endmacro
%macro
IDCT_ADD16INTRA_10
1
%macro
IDCT_ADD16INTRA_10
0
idct_dc_add
_
%1
:
idct_dc_add
%
+
SUFFIX
:
add
r5
,
r0
add
r5
,
r0
movq
m0
,
[
r2
+
0
]
movq
m0
,
[
r2
+
0
]
movhps
m0
,
[
r2
+
64
]
movhps
m0
,
[
r2
+
64
]
...
@@ -265,46 +265,46 @@ idct_dc_add_%1:
...
@@ -265,46 +265,46 @@ idct_dc_add_%1:
IDCT_DC_ADD_OP_10
r5
,
r3
,
r6
IDCT_DC_ADD_OP_10
r5
,
r3
,
r6
ret
ret
cglobal
h264_idct_add16intra_10
_
%1
,
5
,
7
,
8
cglobal
h264_idct_add16intra_10
,
5
,
7
,
8
ADD16_OP_INTRA
%1
,
0
,
4
+
1
*
8
ADD16_OP_INTRA
0
,
4
+
1
*
8
ADD16_OP_INTRA
%1
,
2
,
4
+
2
*
8
ADD16_OP_INTRA
2
,
4
+
2
*
8
ADD16_OP_INTRA
%1
,
4
,
6
+
1
*
8
ADD16_OP_INTRA
4
,
6
+
1
*
8
ADD16_OP_INTRA
%1
,
6
,
6
+
2
*
8
ADD16_OP_INTRA
6
,
6
+
2
*
8
ADD16_OP_INTRA
%1
,
8
,
4
+
3
*
8
ADD16_OP_INTRA
8
,
4
+
3
*
8
ADD16_OP_INTRA
%1
,
10
,
4
+
4
*
8
ADD16_OP_INTRA
10
,
4
+
4
*
8
ADD16_OP_INTRA
%1
,
12
,
6
+
3
*
8
ADD16_OP_INTRA
12
,
6
+
3
*
8
ADD16_OP_INTRA
%1
,
14
,
6
+
4
*
8
ADD16_OP_INTRA
14
,
6
+
4
*
8
REP_RET
REP_RET
AC
%1
,
8
AC
8
AC
%1
,
10
AC
10
AC
%1
,
12
AC
12
AC
%1
,
14
AC
14
AC
%1
,
0
AC
0
AC
%1
,
2
AC
2
AC
%1
,
4
AC
4
AC
%1
,
6
AC
6
%endmacro
%endmacro
INIT_XMM
INIT_XMM
sse2
IDCT_ADD16INTRA_10
sse2
IDCT_ADD16INTRA_10
%if
HAVE_AVX
%if
HAVE_AVX
INIT_
AVX
INIT_
XMM
avx
IDCT_ADD16INTRA_10
avx
IDCT_ADD16INTRA_10
%endif
%endif
%assign
last_block
36
%assign
last_block
36
;-----------------------------------------------------------------------------
;-----------------------------------------------------------------------------
; h264_idct_add8(pixel **dst, const int *block_offset, dctcoef *block, int stride, const uint8_t nnzc[6*8])
; h264_idct_add8(pixel **dst, const int *block_offset, dctcoef *block, int stride, const uint8_t nnzc[6*8])
;-----------------------------------------------------------------------------
;-----------------------------------------------------------------------------
%macro
IDCT_ADD8
1
%macro
IDCT_ADD8
0
cglobal
h264_idct_add8_10
_
%1
,
5
,
8
,
7
cglobal
h264_idct_add8_10
,
5
,
8
,
7
%if
ARCH_X86_64
%if
ARCH_X86_64
mov
r7
,
r0
mov
r7
,
r0
%endif
%endif
add
r2
,
1024
add
r2
,
1024
mov
r0
,
[r0]
mov
r0
,
[r0]
ADD16_OP_INTRA
%1
,
16
,
4
+
6
*
8
ADD16_OP_INTRA
16
,
4
+
6
*
8
ADD16_OP_INTRA
%1
,
18
,
4
+
7
*
8
ADD16_OP_INTRA
18
,
4
+
7
*
8
add
r2
,
1024
-
128
*
2
add
r2
,
1024
-
128
*
2
%if
ARCH_X86_64
%if
ARCH_X86_64
mov
r0
,
[
r7
+
gprsize
]
mov
r0
,
[
r7
+
gprsize
]
...
@@ -312,21 +312,21 @@ cglobal h264_idct_add8_10_%1,5,8,7
...
@@ -312,21 +312,21 @@ cglobal h264_idct_add8_10_%1,5,8,7
mov
r0
,
r0m
mov
r0
,
r0m
mov
r0
,
[
r0
+
gprsize
]
mov
r0
,
[
r0
+
gprsize
]
%endif
%endif
ADD16_OP_INTRA
%1
,
32
,
4
+
11
*
8
ADD16_OP_INTRA
32
,
4
+
11
*
8
ADD16_OP_INTRA
%1
,
34
,
4
+
12
*
8
ADD16_OP_INTRA
34
,
4
+
12
*
8
REP_RET
REP_RET
AC
%1
,
16
AC
16
AC
%1
,
18
AC
18
AC
%1
,
32
AC
32
AC
%1
,
34
AC
34
%endmacro
; IDCT_ADD8
%endmacro
; IDCT_ADD8
INIT_XMM
INIT_XMM
sse2
IDCT_ADD8
sse2
IDCT_ADD8
%if
HAVE_AVX
%if
HAVE_AVX
INIT_
AVX
INIT_
XMM
avx
IDCT_ADD8
avx
IDCT_ADD8
%endif
%endif
;-----------------------------------------------------------------------------
;-----------------------------------------------------------------------------
...
@@ -432,19 +432,19 @@ IDCT_ADD8 avx
...
@@ -432,19 +432,19 @@ IDCT_ADD8 avx
STORE_DIFFx2
m0
,
m1
,
m6
,
m7
,
%1
,
%3
STORE_DIFFx2
m0
,
m1
,
m6
,
m7
,
%1
,
%3
%endmacro
%endmacro
%macro
IDCT8_ADD
1
%macro
IDCT8_ADD
0
cglobal
h264_idct8_add_10
_
%1
,
3
,
4
,
16
cglobal
h264_idct8_add_10
,
3
,
4
,
16
%if
UNIX64
==
0
%if
UNIX64
==
0
%
assign
pad
16
-
gprsize
-
(
stack_offset
&
15
)
%
assign
pad
16
-
gprsize
-
(
stack_offset
&
15
)
sub
rsp
,
pad
sub
rsp
,
pad
call
h264_idct8_add1_10
_
%1
call
h264_idct8_add1_10
%
+
SUFFIX
add
rsp
,
pad
add
rsp
,
pad
RET
RET
%endif
%endif
ALIGN
16
ALIGN
16
; TODO: does not need to use stack
; TODO: does not need to use stack
h264_idct8_add1_10
_
%1
:
h264_idct8_add1_10
%
+
SUFFIX
:
%assign
pad
256
+
16
-
gprsize
%assign
pad
256
+
16
-
gprsize
sub
rsp
,
pad
sub
rsp
,
pad
add
dword
[r1],
32
add
dword
[r1],
32
...
@@ -499,31 +499,31 @@ h264_idct8_add1_10_%1:
...
@@ -499,31 +499,31 @@ h264_idct8_add1_10_%1:
ret
ret
%endmacro
%endmacro
INIT_XMM
INIT_XMM
sse2
IDCT8_ADD
sse2
IDCT8_ADD
%if
HAVE_AVX
%if
HAVE_AVX
INIT_
AVX
INIT_
XMM
avx
IDCT8_ADD
avx
IDCT8_ADD
%endif
%endif
;-----------------------------------------------------------------------------
;-----------------------------------------------------------------------------
; h264_idct8_add4(pixel **dst, const int *block_offset, dctcoef *block, int stride, const uint8_t nnzc[6*8])
; h264_idct8_add4(pixel **dst, const int *block_offset, dctcoef *block, int stride, const uint8_t nnzc[6*8])
;-----------------------------------------------------------------------------
;-----------------------------------------------------------------------------
;;;;;;; NO FATE SAMPLES TRIGGER THIS
;;;;;;; NO FATE SAMPLES TRIGGER THIS
%macro
IDCT8_ADD4_OP
3
%macro
IDCT8_ADD4_OP
2
cmp
byte
[
r4
+
%
3
]
,
0
cmp
byte
[
r4
+
%
2
]
,
0
jz
.
skipblock%
2
jz
.
skipblock%
1
mov
r0d
,
[
r6
+
%
2
*
4
]
mov
r0d
,
[
r6
+
%
1
*
4
]
add
r0
,
r5
add
r0
,
r5
call
h264_idct8_add1_10
_
%1
call
h264_idct8_add1_10
%
+
SUFFIX
.
skipblock%
2
:
.
skipblock%
1
:
%if
%
2
<
12
%if
%
1
<
12
add
r1
,
256
add
r1
,
256
%endif
%endif
%endmacro
%endmacro
%macro
IDCT8_ADD4
1
%macro
IDCT8_ADD4
0
cglobal
h264_idct8_add4_10
_
%1
,
0
,
7
,
16
cglobal
h264_idct8_add4_10
,
0
,
7
,
16
%
assign
pad
16
-
gprsize
-
(
stack_offset
&
15
)
%
assign
pad
16
-
gprsize
-
(
stack_offset
&
15
)
SUB
rsp
,
pad
SUB
rsp
,
pad
mov
r5
,
r0mp
mov
r5
,
r0mp
...
@@ -531,17 +531,17 @@ cglobal h264_idct8_add4_10_%1, 0,7,16
...
@@ -531,17 +531,17 @@ cglobal h264_idct8_add4_10_%1, 0,7,16
mov
r1
,
r2mp
mov
r1
,
r2mp
mov
r2d
,
r3m
mov
r2d
,
r3m
movifnidn
r4
,
r4mp
movifnidn
r4
,
r4mp
IDCT8_ADD4_OP
%1
,
0
,
4
+
1
*
8
IDCT8_ADD4_OP
0
,
4
+
1
*
8
IDCT8_ADD4_OP
%1
,
4
,
6
+
1
*
8
IDCT8_ADD4_OP
4
,
6
+
1
*
8
IDCT8_ADD4_OP
%1
,
8
,
4
+
3
*
8
IDCT8_ADD4_OP
8
,
4
+
3
*
8
IDCT8_ADD4_OP
%1
,
12
,
6
+
3
*
8
IDCT8_ADD4_OP
12
,
6
+
3
*
8
ADD
rsp
,
pad
ADD
rsp
,
pad
RET
RET
%endmacro
; IDCT8_ADD4
%endmacro
; IDCT8_ADD4
INIT_XMM
INIT_XMM
sse2
IDCT8_ADD4
sse2
IDCT8_ADD4
%if
HAVE_AVX
%if
HAVE_AVX
INIT_
AVX
INIT_
XMM
avx
IDCT8_ADD4
avx
IDCT8_ADD4
%endif
%endif
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment