Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in / Register
Toggle navigation
F
ffmpeg.wasm-core
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Linshizhi
ffmpeg.wasm-core
Commits
f1a9eee4
Commit
f1a9eee4
authored
Jun 11, 2016
by
Martin Storsjö
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
x86: Add missing movsxd for the int stride parameter
Signed-off-by:
Martin Storsjö
<
martin@martin.st
>
parent
a2ddfadc
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
29 additions
and
0 deletions
+29
-0
h264_idct.asm
libavcodec/x86/h264_idct.asm
+21
-0
h264_idct_10bit.asm
libavcodec/x86/h264_idct_10bit.asm
+8
-0
No files found.
libavcodec/x86/h264_idct.asm
View file @
f1a9eee4
...
...
@@ -82,6 +82,7 @@ SECTION .text
INIT_MMX
mmx
; void ff_h264_idct_add_8_mmx(uint8_t *dst, int16_t *block, int stride)
cglobal
h264_idct_add_8
,
3
,
3
,
0
movsxdifnidn
r2
,
r2d
IDCT4_ADD
r0
,
r1
,
r2
RET
...
...
@@ -204,6 +205,7 @@ cglobal h264_idct_add_8, 3, 3, 0
INIT_MMX
mmx
; void ff_h264_idct8_add_8_mmx(uint8_t *dst, int16_t *block, int stride)
cglobal
h264_idct8_add_8
,
3
,
4
,
0
movsxdifnidn
r2
,
r2d
%
assign
pad
128
+
4
-
(
stack_offset
&
7
)
SUB
rsp
,
pad
...
...
@@ -272,6 +274,7 @@ cglobal h264_idct8_add_8, 3, 4, 0
INIT_XMM
sse2
; void ff_h264_idct8_add_8_sse2(uint8_t *dst, int16_t *block, int stride)
cglobal
h264_idct8_add_8
,
3
,
4
,
10
movsxdifnidn
r2
,
r2d
IDCT8_ADD_SSE
r0
,
r1
,
r2
,
r3
RET
...
...
@@ -310,6 +313,7 @@ INIT_MMX mmxext
; void ff_h264_idct_dc_add_8_mmxext(uint8_t *dst, int16_t *block, int stride)
%if
ARCH_X86_64
cglobal
h264_idct_dc_add_8
,
3
,
4
,
0
movsxd
r2
,
r2d
movsx
r3
,
word
[r1]
mov
dword
[r1],
0
DC_ADD_MMXEXT_INIT
r3
,
r2
...
...
@@ -318,6 +322,7 @@ cglobal h264_idct_dc_add_8, 3, 4, 0
; void ff_h264_idct8_dc_add_8_mmxext(uint8_t *dst, int16_t *block, int stride)
cglobal
h264_idct8_dc_add_8
,
3
,
4
,
0
movsxd
r2
,
r2d
movsx
r3
,
word
[r1]
mov
dword
[r1],
0
DC_ADD_MMXEXT_INIT
r3
,
r2
...
...
@@ -352,6 +357,7 @@ INIT_MMX mmx
; int16_t *block, int stride,
; const uint8_t nnzc[6 * 8])
cglobal
h264_idct_add16_8
,
5
,
7
+
npicregs
,
0
,
dst
,
block_offset
,
block
,
stride
,
nnzc
,
cntr
,
coeff
,
picreg
movsxdifnidn
r3
,
r3d
xor
r5
,
r5
%ifdef
PIC
lea
picregq
,
[
scan8_mem
]
...
...
@@ -375,6 +381,7 @@ cglobal h264_idct_add16_8, 5, 7 + npicregs, 0, dst, block_offset, block, stride,
; int16_t *block, int stride,
; const uint8_t nnzc[6 * 8])
cglobal
h264_idct8_add4_8
,
5
,
7
+
npicregs
,
0
,
dst
,
block_offset
,
block
,
stride
,
nnzc
,
cntr
,
coeff
,
picreg
movsxdifnidn
r3
,
r3d
%
assign
pad
128
+
4
-
(
stack_offset
&
7
)
SUB
rsp
,
pad
...
...
@@ -409,6 +416,7 @@ INIT_MMX mmxext
; int16_t *block, int stride,
; const uint8_t nnzc[6 * 8])
cglobal
h264_idct_add16_8
,
5
,
8
+
npicregs
,
0
,
dst1
,
block_offset
,
block
,
stride
,
nnzc
,
cntr
,
coeff
,
dst2
,
picreg
movsxdifnidn
r3
,
r3d
xor
r5
,
r5
%ifdef
PIC
lea
picregq
,
[
scan8_mem
]
...
...
@@ -456,6 +464,7 @@ INIT_MMX mmx
; int16_t *block, int stride,
; const uint8_t nnzc[6 * 8])
cglobal
h264_idct_add16intra_8
,
5
,
7
+
npicregs
,
0
,
dst
,
block_offset
,
block
,
stride
,
nnzc
,
cntr
,
coeff
,
picreg
movsxdifnidn
r3
,
r3d
xor
r5
,
r5
%ifdef
PIC
lea
picregq
,
[
scan8_mem
]
...
...
@@ -481,6 +490,7 @@ INIT_MMX mmxext
; int16_t *block, int stride,
; const uint8_t nnzc[6 * 8])
cglobal
h264_idct_add16intra_8
,
5
,
8
+
npicregs
,
0
,
dst1
,
block_offset
,
block
,
stride
,
nnzc
,
cntr
,
coeff
,
dst2
,
picreg
movsxdifnidn
r3
,
r3d
xor
r5
,
r5
%ifdef
PIC
lea
picregq
,
[
scan8_mem
]
...
...
@@ -525,6 +535,7 @@ cglobal h264_idct_add16intra_8, 5, 8 + npicregs, 0, dst1, block_offset, block, s
; int16_t *block, int stride,
; const uint8_t nnzc[6 * 8])
cglobal
h264_idct8_add4_8
,
5
,
8
+
npicregs
,
0
,
dst1
,
block_offset
,
block
,
stride
,
nnzc
,
cntr
,
coeff
,
dst2
,
picreg
movsxdifnidn
r3
,
r3d
%
assign
pad
128
+
4
-
(
stack_offset
&
7
)
SUB
rsp
,
pad
...
...
@@ -587,6 +598,7 @@ INIT_XMM sse2
; int16_t *block, int stride,
; const uint8_t nnzc[6 * 8])
cglobal
h264_idct8_add4_8
,
5
,
8
+
npicregs
,
10
,
dst1
,
block_offset
,
block
,
stride
,
nnzc
,
cntr
,
coeff
,
dst2
,
picreg
movsxdifnidn
r3
,
r3d
xor
r5
,
r5
%ifdef
PIC
lea
picregq
,
[
scan8_mem
]
...
...
@@ -638,6 +650,7 @@ INIT_XMM cpuname
INIT_MMX
mmx
h264_idct_add8_mmx_plane
:
movsxdifnidn
r3
,
r3d
.
nextblock
:
movzx
r6
,
byte
[
scan8
+
r5
]
movzx
r6
,
byte
[
r4
+
r6
]
...
...
@@ -664,6 +677,7 @@ h264_idct_add8_mmx_plane:
; int16_t *block, int stride,
; const uint8_t nnzc[6 * 8])
cglobal
h264_idct_add8_8
,
5
,
8
+
npicregs
,
0
,
dst1
,
block_offset
,
block
,
stride
,
nnzc
,
cntr
,
coeff
,
dst2
,
picreg
movsxdifnidn
r3
,
r3d
mov
r5
,
16
add
r2
,
512
%ifdef
PIC
...
...
@@ -684,6 +698,7 @@ cglobal h264_idct_add8_8, 5, 8 + npicregs, 0, dst1, block_offset, block, stride,
RET
h264_idct_add8_mmxext_plane
:
movsxdifnidn
r3
,
r3d
.
nextblock
:
movzx
r6
,
byte
[
scan8
+
r5
]
movzx
r6
,
byte
[
r4
+
r6
]
...
...
@@ -730,6 +745,7 @@ INIT_MMX mmxext
; int16_t *block, int stride,
; const uint8_t nnzc[6 * 8])
cglobal
h264_idct_add8_8
,
5
,
8
+
npicregs
,
0
,
dst1
,
block_offset
,
block
,
stride
,
nnzc
,
cntr
,
coeff
,
dst2
,
picreg
movsxdifnidn
r3
,
r3d
mov
r5
,
16
add
r2
,
512
%if
ARCH_X86_64
...
...
@@ -751,6 +767,7 @@ cglobal h264_idct_add8_8, 5, 8 + npicregs, 0, dst1, block_offset, block, stride,
; r0 = uint8_t *dst, r2 = int16_t *block, r3 = int stride, r6=clobbered
h264_idct_dc_add8_mmxext
:
movsxdifnidn
r3
,
r3d
movd
m0
,
[
r2
]
; 0 0 X D
mov
word
[
r2
+
0
]
,
0
punpcklwd
m0
,
[
r2
+
32
]
; x X d D
...
...
@@ -771,6 +788,7 @@ ALIGN 16
INIT_XMM
sse2
; r0 = uint8_t *dst (clobbered), r2 = int16_t *block, r3 = int stride
h264_add8x4_idct_sse2
:
movsxdifnidn
r3
,
r3d
movq
m0
,
[
r2
+
0
]
movq
m1
,
[
r2
+
8
]
movq
m2
,
[
r2
+
16
]
...
...
@@ -814,6 +832,7 @@ h264_add8x4_idct_sse2:
; int16_t *block, int stride,
; const uint8_t nnzc[6 * 8])
cglobal
h264_idct_add16_8
,
5
,
5
+
ARCH_X86_64
,
8
movsxdifnidn
r3
,
r3d
%if
ARCH_X86_64
mov
r5
,
r0
%endif
...
...
@@ -862,6 +881,7 @@ cglobal h264_idct_add16_8, 5, 5 + ARCH_X86_64, 8
; int16_t *block, int stride,
; const uint8_t nnzc[6 * 8])
cglobal
h264_idct_add16intra_8
,
5
,
7
+
ARCH_X86_64
,
8
movsxdifnidn
r3
,
r3d
%if
ARCH_X86_64
mov
r7
,
r0
%endif
...
...
@@ -914,6 +934,7 @@ cglobal h264_idct_add16intra_8, 5, 7 + ARCH_X86_64, 8
; int16_t *block, int stride,
; const uint8_t nnzc[6 * 8])
cglobal
h264_idct_add8_8
,
5
,
7
+
ARCH_X86_64
,
8
movsxdifnidn
r3
,
r3d
add
r2
,
512
%if
ARCH_X86_64
mov
r7
,
r0
...
...
libavcodec/x86/h264_idct_10bit.asm
View file @
f1a9eee4
...
...
@@ -77,6 +77,7 @@ SECTION .text
%macro
IDCT_ADD_10
0
cglobal
h264_idct_add_10
,
3
,
3
movsxdifnidn
r2
,
r2d
IDCT4_ADD_10
r0
,
r1
,
r2
RET
%endmacro
...
...
@@ -134,6 +135,7 @@ ADD4x4IDCT
%macro
IDCT_ADD16_10
0
cglobal
h264_idct_add16_10
,
5
,
6
movsxdifnidn
r3
,
r3d
ADD16_OP
0
,
4
+
1
*
8
ADD16_OP
1
,
5
+
1
*
8
ADD16_OP
2
,
4
+
2
*
8
...
...
@@ -190,6 +192,7 @@ IDCT_ADD16_10
INIT_MMX
mmxext
cglobal
h264_idct_dc_add_10
,
3
,
3
movsxdifnidn
r2
,
r2d
movd
m0
,
[r1]
mov
dword
[r1],
0
paddd
m0
,
[
pd_32
]
...
...
@@ -205,6 +208,7 @@ cglobal h264_idct_dc_add_10,3,3
;-----------------------------------------------------------------------------
%macro
IDCT8_DC_ADD
0
cglobal
h264_idct8_dc_add_10
,
3
,
4
,
7
movsxdifnidn
r2
,
r2d
movd
m0
,
[r1]
mov
dword
[r1],
0
paddd
m0
,
[
pd_32
]
...
...
@@ -272,6 +276,7 @@ idct_dc_add %+ SUFFIX:
ret
cglobal
h264_idct_add16intra_10
,
5
,
7
,
8
movsxdifnidn
r3
,
r3d
ADD16_OP_INTRA
0
,
4
+
1
*
8
ADD16_OP_INTRA
2
,
4
+
2
*
8
ADD16_OP_INTRA
4
,
6
+
1
*
8
...
...
@@ -304,6 +309,7 @@ IDCT_ADD16INTRA_10
;-----------------------------------------------------------------------------
%macro
IDCT_ADD8
0
cglobal
h264_idct_add8_10
,
5
,
8
,
7
movsxdifnidn
r3
,
r3d
%if
ARCH_X86_64
mov
r7
,
r0
%endif
...
...
@@ -438,6 +444,7 @@ IDCT_ADD8
%macro
IDCT8_ADD
0
cglobal
h264_idct8_add_10
,
3
,
4
,
16
movsxdifnidn
r2
,
r2d
%if
UNIX64
==
0
%
assign
pad
16
-
gprsize
-
(
stack_offset
&
15
)
sub
rsp
,
pad
...
...
@@ -560,6 +567,7 @@ IDCT8_ADD
%macro
IDCT8_ADD4
0
cglobal
h264_idct8_add4_10
,
0
,
7
,
16
movsxdifnidn
r3
,
r3d
%
assign
pad
16
-
gprsize
-
(
stack_offset
&
15
)
SUB
rsp
,
pad
mov
r5
,
r0mp
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment