Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in / Register
Toggle navigation
F
ffmpeg.wasm-core
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Linshizhi
ffmpeg.wasm-core
Commits
6405ca7d
Commit
6405ca7d
authored
Oct 06, 2013
by
Diego Biurrun
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
x86: h264_idct: Update comments to match 8/10-bit depth optimization split
parent
3e2fa991
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
42 additions
and
32 deletions
+42
-32
h264_idct.asm
libavcodec/x86/h264_idct.asm
+42
-32
No files found.
libavcodec/x86/h264_idct.asm
View file @
6405ca7d
...
@@ -80,7 +80,7 @@ SECTION .text
...
@@ -80,7 +80,7 @@ SECTION .text
%endmacro
%endmacro
INIT_MMX
mmx
INIT_MMX
mmx
; ff_h264_idct_add_mmx(uint8_t *dst, int16_t *block, int stride)
; ff_h264_idct_add_
8_
mmx(uint8_t *dst, int16_t *block, int stride)
cglobal
h264_idct_add_8
,
3
,
3
,
0
cglobal
h264_idct_add_8
,
3
,
3
,
0
IDCT4_ADD
r0
,
r1
,
r2
IDCT4_ADD
r0
,
r1
,
r2
RET
RET
...
@@ -202,7 +202,7 @@ cglobal h264_idct_add_8, 3, 3, 0
...
@@ -202,7 +202,7 @@ cglobal h264_idct_add_8, 3, 3, 0
%endmacro
%endmacro
INIT_MMX
mmx
INIT_MMX
mmx
; ff_h264_idct8_add_mmx(uint8_t *dst, int16_t *block, int stride)
; ff_h264_idct8_add_
8_
mmx(uint8_t *dst, int16_t *block, int stride)
cglobal
h264_idct8_add_8
,
3
,
4
,
0
cglobal
h264_idct8_add_8
,
3
,
4
,
0
%
assign
pad
128
+
4
-
(
stack_offset
&
7
)
%
assign
pad
128
+
4
-
(
stack_offset
&
7
)
SUB
rsp
,
pad
SUB
rsp
,
pad
...
@@ -270,7 +270,7 @@ cglobal h264_idct8_add_8, 3, 4, 0
...
@@ -270,7 +270,7 @@ cglobal h264_idct8_add_8, 3, 4, 0
%endmacro
%endmacro
INIT_XMM
sse2
INIT_XMM
sse2
; ff_h264_idct8_add_sse2(uint8_t *dst, int16_t *block, int stride)
; ff_h264_idct8_add_
8_
sse2(uint8_t *dst, int16_t *block, int stride)
cglobal
h264_idct8_add_8
,
3
,
4
,
10
cglobal
h264_idct8_add_8
,
3
,
4
,
10
IDCT8_ADD_SSE
r0
,
r1
,
r2
,
r3
IDCT8_ADD_SSE
r0
,
r1
,
r2
,
r3
RET
RET
...
@@ -307,7 +307,7 @@ cglobal h264_idct8_add_8, 3, 4, 10
...
@@ -307,7 +307,7 @@ cglobal h264_idct8_add_8, 3, 4, 10
%endmacro
%endmacro
INIT_MMX
mmxext
INIT_MMX
mmxext
; ff_h264_idct_dc_add_mmxext(uint8_t *dst, int16_t *block, int stride)
; ff_h264_idct_dc_add_
8_
mmxext(uint8_t *dst, int16_t *block, int stride)
%if
ARCH_X86_64
%if
ARCH_X86_64
cglobal
h264_idct_dc_add_8
,
3
,
4
,
0
cglobal
h264_idct_dc_add_8
,
3
,
4
,
0
movsx
r3
,
word
[r1]
movsx
r3
,
word
[r1]
...
@@ -316,7 +316,7 @@ cglobal h264_idct_dc_add_8, 3, 4, 0
...
@@ -316,7 +316,7 @@ cglobal h264_idct_dc_add_8, 3, 4, 0
DC_ADD_MMXEXT_OP
movh
,
r0
,
r2
,
r3
DC_ADD_MMXEXT_OP
movh
,
r0
,
r2
,
r3
RET
RET
; ff_h264_idct8_dc_add_mmxext(uint8_t *dst, int16_t *block, int stride)
; ff_h264_idct8_dc_add_
8_
mmxext(uint8_t *dst, int16_t *block, int stride)
cglobal
h264_idct8_dc_add_8
,
3
,
4
,
0
cglobal
h264_idct8_dc_add_8
,
3
,
4
,
0
movsx
r3
,
word
[r1]
movsx
r3
,
word
[r1]
mov
dword
[r1],
0
mov
dword
[r1],
0
...
@@ -326,6 +326,7 @@ cglobal h264_idct8_dc_add_8, 3, 4, 0
...
@@ -326,6 +326,7 @@ cglobal h264_idct8_dc_add_8, 3, 4, 0
DC_ADD_MMXEXT_OP
mova
,
r0
,
r2
,
r3
DC_ADD_MMXEXT_OP
mova
,
r0
,
r2
,
r3
RET
RET
%else
%else
; ff_h264_idct_dc_add_8_mmxext(uint8_t *dst, int16_t *block, int stride)
cglobal
h264_idct_dc_add_8
,
2
,
3
,
0
cglobal
h264_idct_dc_add_8
,
2
,
3
,
0
movsx
r2
,
word
[r1]
movsx
r2
,
word
[r1]
mov
dword
[r1],
0
mov
dword
[r1],
0
...
@@ -334,7 +335,7 @@ cglobal h264_idct_dc_add_8, 2, 3, 0
...
@@ -334,7 +335,7 @@ cglobal h264_idct_dc_add_8, 2, 3, 0
DC_ADD_MMXEXT_OP
movh
,
r0
,
r1
,
r2
DC_ADD_MMXEXT_OP
movh
,
r0
,
r1
,
r2
RET
RET
; ff_h264_idct8_dc_add_mmxext(uint8_t *dst, int16_t *block, int stride)
; ff_h264_idct8_dc_add_
8_
mmxext(uint8_t *dst, int16_t *block, int stride)
cglobal
h264_idct8_dc_add_8
,
2
,
3
,
0
cglobal
h264_idct8_dc_add_8
,
2
,
3
,
0
movsx
r2
,
word
[r1]
movsx
r2
,
word
[r1]
mov
dword
[r1],
0
mov
dword
[r1],
0
...
@@ -347,8 +348,9 @@ cglobal h264_idct8_dc_add_8, 2, 3, 0
...
@@ -347,8 +348,9 @@ cglobal h264_idct8_dc_add_8, 2, 3, 0
%endif
%endif
INIT_MMX
mmx
INIT_MMX
mmx
; ff_h264_idct_add16_mmx(uint8_t *dst, const int *block_offset,
; ff_h264_idct_add16_8_mmx(uint8_t *dst, const int *block_offset,
; int16_t *block, int stride, const uint8_t nnzc[6*8])
; int16_t *block, int stride,
; const uint8_t nnzc[6 * 8])
cglobal
h264_idct_add16_8
,
5
,
7
+
npicregs
,
0
,
dst
,
block_offset
,
block
,
stride
,
nnzc
,
cntr
,
coeff
,
picreg
cglobal
h264_idct_add16_8
,
5
,
7
+
npicregs
,
0
,
dst
,
block_offset
,
block
,
stride
,
nnzc
,
cntr
,
coeff
,
picreg
xor
r5
,
r5
xor
r5
,
r5
%ifdef
PIC
%ifdef
PIC
...
@@ -369,8 +371,9 @@ cglobal h264_idct_add16_8, 5, 7 + npicregs, 0, dst, block_offset, block, stride,
...
@@ -369,8 +371,9 @@ cglobal h264_idct_add16_8, 5, 7 + npicregs, 0, dst, block_offset, block, stride,
jl
.
nextblock
jl
.
nextblock
REP_RET
REP_RET
; ff_h264_idct8_add4_mmx(uint8_t *dst, const int *block_offset,
; ff_h264_idct8_add4_8_mmx(uint8_t *dst, const int *block_offset,
; int16_t *block, int stride, const uint8_t nnzc[6*8])
; int16_t *block, int stride,
; const uint8_t nnzc[6 * 8])
cglobal
h264_idct8_add4_8
,
5
,
7
+
npicregs
,
0
,
dst
,
block_offset
,
block
,
stride
,
nnzc
,
cntr
,
coeff
,
picreg
cglobal
h264_idct8_add4_8
,
5
,
7
+
npicregs
,
0
,
dst
,
block_offset
,
block
,
stride
,
nnzc
,
cntr
,
coeff
,
picreg
%
assign
pad
128
+
4
-
(
stack_offset
&
7
)
%
assign
pad
128
+
4
-
(
stack_offset
&
7
)
SUB
rsp
,
pad
SUB
rsp
,
pad
...
@@ -402,8 +405,9 @@ cglobal h264_idct8_add4_8, 5, 7 + npicregs, 0, dst, block_offset, block, stride,
...
@@ -402,8 +405,9 @@ cglobal h264_idct8_add4_8, 5, 7 + npicregs, 0, dst, block_offset, block, stride,
RET
RET
INIT_MMX
mmxext
INIT_MMX
mmxext
; ff_h264_idct_add16_mmxext(uint8_t *dst, const int *block_offset,
; ff_h264_idct_add16_8_mmxext(uint8_t *dst, const int *block_offset,
; int16_t *block, int stride, const uint8_t nnzc[6*8])
; int16_t *block, int stride,
; const uint8_t nnzc[6 * 8])
cglobal
h264_idct_add16_8
,
5
,
8
+
npicregs
,
0
,
dst1
,
block_offset
,
block
,
stride
,
nnzc
,
cntr
,
coeff
,
dst2
,
picreg
cglobal
h264_idct_add16_8
,
5
,
8
+
npicregs
,
0
,
dst1
,
block_offset
,
block
,
stride
,
nnzc
,
cntr
,
coeff
,
dst2
,
picreg
xor
r5
,
r5
xor
r5
,
r5
%ifdef
PIC
%ifdef
PIC
...
@@ -448,8 +452,9 @@ cglobal h264_idct_add16_8, 5, 8 + npicregs, 0, dst1, block_offset, block, stride
...
@@ -448,8 +452,9 @@ cglobal h264_idct_add16_8, 5, 8 + npicregs, 0, dst1, block_offset, block, stride
REP_RET
REP_RET
INIT_MMX
mmx
INIT_MMX
mmx
; ff_h264_idct_add16intra_mmx(uint8_t *dst, const int *block_offset,
; ff_h264_idct_add16intra_8_mmx(uint8_t *dst, const int *block_offset,
; int16_t *block, int stride, const uint8_t nnzc[6*8])
; int16_t *block, int stride,
; const uint8_t nnzc[6 * 8])
cglobal
h264_idct_add16intra_8
,
5
,
7
+
npicregs
,
0
,
dst
,
block_offset
,
block
,
stride
,
nnzc
,
cntr
,
coeff
,
picreg
cglobal
h264_idct_add16intra_8
,
5
,
7
+
npicregs
,
0
,
dst
,
block_offset
,
block
,
stride
,
nnzc
,
cntr
,
coeff
,
picreg
xor
r5
,
r5
xor
r5
,
r5
%ifdef
PIC
%ifdef
PIC
...
@@ -472,9 +477,9 @@ cglobal h264_idct_add16intra_8, 5, 7 + npicregs, 0, dst, block_offset, block, st
...
@@ -472,9 +477,9 @@ cglobal h264_idct_add16intra_8, 5, 7 + npicregs, 0, dst, block_offset, block, st
REP_RET
REP_RET
INIT_MMX
mmxext
INIT_MMX
mmxext
; ff_h264_idct_add16intra_mmxext(uint8_t *dst, const int *block_offset,
; ff_h264_idct_add16intra_
8_
mmxext(uint8_t *dst, const int *block_offset,
; int16_t *block, int stride,
;
int16_t *block, int stride,
;
const uint8_t nnzc[6*
8])
;
const uint8_t nnzc[6 *
8])
cglobal
h264_idct_add16intra_8
,
5
,
8
+
npicregs
,
0
,
dst1
,
block_offset
,
block
,
stride
,
nnzc
,
cntr
,
coeff
,
dst2
,
picreg
cglobal
h264_idct_add16intra_8
,
5
,
8
+
npicregs
,
0
,
dst1
,
block_offset
,
block
,
stride
,
nnzc
,
cntr
,
coeff
,
dst2
,
picreg
xor
r5
,
r5
xor
r5
,
r5
%ifdef
PIC
%ifdef
PIC
...
@@ -516,9 +521,9 @@ cglobal h264_idct_add16intra_8, 5, 8 + npicregs, 0, dst1, block_offset, block, s
...
@@ -516,9 +521,9 @@ cglobal h264_idct_add16intra_8, 5, 8 + npicregs, 0, dst1, block_offset, block, s
jl
.
nextblock
jl
.
nextblock
REP_RET
REP_RET
; ff_h264_idct8_add4_mmxext(uint8_t *dst, const int *block_offset,
; ff_h264_idct8_add4_
8_
mmxext(uint8_t *dst, const int *block_offset,
; int16_t *block, int stride,
;
int16_t *block, int stride,
;
const uint8_t nnzc[6*
8])
;
const uint8_t nnzc[6 *
8])
cglobal
h264_idct8_add4_8
,
5
,
8
+
npicregs
,
0
,
dst1
,
block_offset
,
block
,
stride
,
nnzc
,
cntr
,
coeff
,
dst2
,
picreg
cglobal
h264_idct8_add4_8
,
5
,
8
+
npicregs
,
0
,
dst1
,
block_offset
,
block
,
stride
,
nnzc
,
cntr
,
coeff
,
dst2
,
picreg
%
assign
pad
128
+
4
-
(
stack_offset
&
7
)
%
assign
pad
128
+
4
-
(
stack_offset
&
7
)
SUB
rsp
,
pad
SUB
rsp
,
pad
...
@@ -578,8 +583,9 @@ cglobal h264_idct8_add4_8, 5, 8 + npicregs, 0, dst1, block_offset, block, stride
...
@@ -578,8 +583,9 @@ cglobal h264_idct8_add4_8, 5, 8 + npicregs, 0, dst1, block_offset, block, stride
RET
RET
INIT_XMM
sse2
INIT_XMM
sse2
; ff_h264_idct8_add4_sse2(uint8_t *dst, const int *block_offset,
; ff_h264_idct8_add4_8_sse2(uint8_t *dst, const int *block_offset,
; int16_t *block, int stride, const uint8_t nnzc[6*8])
; int16_t *block, int stride,
; const uint8_t nnzc[6 * 8])
cglobal
h264_idct8_add4_8
,
5
,
8
+
npicregs
,
10
,
dst1
,
block_offset
,
block
,
stride
,
nnzc
,
cntr
,
coeff
,
dst2
,
picreg
cglobal
h264_idct8_add4_8
,
5
,
8
+
npicregs
,
10
,
dst1
,
block_offset
,
block
,
stride
,
nnzc
,
cntr
,
coeff
,
dst2
,
picreg
xor
r5
,
r5
xor
r5
,
r5
%ifdef
PIC
%ifdef
PIC
...
@@ -654,8 +660,8 @@ h264_idct_add8_mmx_plane:
...
@@ -654,8 +660,8 @@ h264_idct_add8_mmx_plane:
jnz
.
nextblock
jnz
.
nextblock
rep
ret
rep
ret
; ff_h264_idct_add8_mmx(uint8_t **dest, const int *block_offset,
; ff_h264_idct_add8_
8_
mmx(uint8_t **dest, const int *block_offset,
;
int16_t *block, int stride, const uint8_t nnzc[6*
8])
;
int16_t *block, int stride, const uint8_t nnzc[6 *
8])
cglobal
h264_idct_add8_8
,
5
,
8
+
npicregs
,
0
,
dst1
,
block_offset
,
block
,
stride
,
nnzc
,
cntr
,
coeff
,
dst2
,
picreg
cglobal
h264_idct_add8_8
,
5
,
8
+
npicregs
,
0
,
dst1
,
block_offset
,
block
,
stride
,
nnzc
,
cntr
,
coeff
,
dst2
,
picreg
mov
r5
,
16
mov
r5
,
16
add
r2
,
512
add
r2
,
512
...
@@ -719,8 +725,9 @@ h264_idct_add8_mmxext_plane:
...
@@ -719,8 +725,9 @@ h264_idct_add8_mmxext_plane:
rep
ret
rep
ret
INIT_MMX
mmxext
INIT_MMX
mmxext
; ff_h264_idct_add8_mmxext(uint8_t **dest, const int *block_offset,
; ff_h264_idct_add8_8_mmxext(uint8_t **dest, const int *block_offset,
; int16_t *block, int stride, const uint8_t nnzc[6*8])
; int16_t *block, int stride,
; const uint8_t nnzc[6 * 8])
cglobal
h264_idct_add8_8
,
5
,
8
+
npicregs
,
0
,
dst1
,
block_offset
,
block
,
stride
,
nnzc
,
cntr
,
coeff
,
dst2
,
picreg
cglobal
h264_idct_add8_8
,
5
,
8
+
npicregs
,
0
,
dst1
,
block_offset
,
block
,
stride
,
nnzc
,
cntr
,
coeff
,
dst2
,
picreg
mov
r5
,
16
mov
r5
,
16
add
r2
,
512
add
r2
,
512
...
@@ -802,8 +809,9 @@ h264_add8x4_idct_sse2:
...
@@ -802,8 +809,9 @@ h264_add8x4_idct_sse2:
%endif
%endif
%endmacro
%endmacro
; ff_h264_idct_add16_sse2(uint8_t *dst, const int *block_offset,
; ff_h264_idct_add16_8_sse2(uint8_t *dst, const int *block_offset,
; int16_t *block, int stride, const uint8_t nnzc[6*8])
; int16_t *block, int stride,
; const uint8_t nnzc[6 * 8])
cglobal
h264_idct_add16_8
,
5
,
5
+
ARCH_X86_64
,
8
cglobal
h264_idct_add16_8
,
5
,
5
+
ARCH_X86_64
,
8
%if
ARCH_X86_64
%if
ARCH_X86_64
mov
r5
,
r0
mov
r5
,
r0
...
@@ -849,8 +857,9 @@ cglobal h264_idct_add16_8, 5, 5 + ARCH_X86_64, 8
...
@@ -849,8 +857,9 @@ cglobal h264_idct_add16_8, 5, 5 + ARCH_X86_64, 8
%endif
%endif
%endmacro
%endmacro
; ff_h264_idct_add16intra_sse2(uint8_t *dst, const int *block_offset,
; ff_h264_idct_add16intra_8_sse2(uint8_t *dst, const int *block_offset,
; int16_t *block, int stride, const uint8_t nnzc[6*8])
; int16_t *block, int stride,
; const uint8_t nnzc[6 * 8])
cglobal
h264_idct_add16intra_8
,
5
,
7
+
ARCH_X86_64
,
8
cglobal
h264_idct_add16intra_8
,
5
,
7
+
ARCH_X86_64
,
8
%if
ARCH_X86_64
%if
ARCH_X86_64
mov
r7
,
r0
mov
r7
,
r0
...
@@ -900,8 +909,9 @@ cglobal h264_idct_add16intra_8, 5, 7 + ARCH_X86_64, 8
...
@@ -900,8 +909,9 @@ cglobal h264_idct_add16intra_8, 5, 7 + ARCH_X86_64, 8
%endif
%endif
%endmacro
%endmacro
; ff_h264_idct_add8_sse2(uint8_t **dest, const int *block_offset,
; ff_h264_idct_add8_8_sse2(uint8_t **dest, const int *block_offset,
; int16_t *block, int stride, const uint8_t nnzc[6*8])
; int16_t *block, int stride,
; const uint8_t nnzc[6 * 8])
cglobal
h264_idct_add8_8
,
5
,
7
+
ARCH_X86_64
,
8
cglobal
h264_idct_add8_8
,
5
,
7
+
ARCH_X86_64
,
8
add
r2
,
512
add
r2
,
512
%if
ARCH_X86_64
%if
ARCH_X86_64
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment