Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in / Register
Toggle navigation
F
ffmpeg.wasm-core
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Linshizhi
ffmpeg.wasm-core
Commits
96c9cc10
Commit
96c9cc10
authored
Jul 27, 2012
by
Ronald S. Bultje
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
x86inc: sync to latest version from x264.
parent
b3c5ae56
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
124 additions
and
92 deletions
+124
-92
x86inc.asm
libavutil/x86/x86inc.asm
+124
-92
No files found.
libavutil/x86/x86inc.asm
View file @
96c9cc10
...
...
@@ -36,8 +36,8 @@
%define
program_name
ff
%define
UNIX64
0
%define
WIN64
0
%define
UNIX64
0
%if
ARCH_X86_64
%
ifidn
__OUTPUT_FORMAT__
,
win32
%
define
WIN64
1
...
...
@@ -54,11 +54,6 @@
%
define
mangle
(
x
)
x
%endif
; FIXME: All of the 64bit asm functions that take a stride as an argument
; via register, assume that the high dword of that register is filled with 0.
; This is true in practice (since we never do any 64bit arithmetic on strides,
; and x264's strides are all positive), but is not guaranteed by the ABI.
; Name of the .rodata section.
; Kludge: Something on OS X fails to align .rodata even given an align attribute,
; so use a different read-only section.
...
...
@@ -129,34 +124,38 @@ CPU amdnop
; registers:
; rN and rNq are the native-size register holding function argument N
; rNd, rNw, rNb are dword, word, and byte size
; rNh is the high 8 bits of the word size
; rNm is the original location of arg N (a register or on the stack), dword
; rNmp is native size
%macro
DECLARE_REG
5
-
6
%macro
DECLARE_REG
2
-
3
%
define
r%1
q
%2
%
define
r%1
d
%3
%
define
r%1
w
%4
%
define
r%1
b
%5
%
if
%0
==
5
%
define
r%1
m
%3
%
define
r%1
d
%2
d
%
define
r%1
w
%2
w
%
define
r%1
b
%2
b
%
define
r%1
h
%2
h
%
if
%0
==
2
%
define
r%1
m
%2
d
%
define
r%1
mp
%2
%
elif
ARCH_X86_64
; memory
%
define
r%1
m
[
rsp
+
stack_offset
+
%
6
]
%
define
r%1
m
[
rsp
+
stack_offset
+
%
3
]
%
define
r%1
mp
qword
r
%
+
%1
m
%
else
%
define
r%1
m
[
esp
+
stack_offset
+
%
6
]
%
define
r%1
m
[
esp
+
stack_offset
+
%
3
]
%
define
r%1
mp
dword
r
%
+
%1
m
%
endif
%
define
r%1
%2
%endmacro
%macro
DECLARE_REG_SIZE
2
%macro
DECLARE_REG_SIZE
3
%
define
r%1
q
r%1
%
define
e%1
q
r%1
%
define
r%1
d
e%1
%
define
e%1
d
e%1
%
define
r%1
w
%1
%
define
e%1
w
%1
%
define
r%1
h
%3
%
define
e%1
h
%3
%
define
r%1
b
%2
%
define
e%1
b
%2
%if
ARCH_X86_64
==
0
...
...
@@ -164,13 +163,13 @@ CPU amdnop
%endif
%endmacro
DECLARE_REG_SIZE
ax
,
al
DECLARE_REG_SIZE
bx
,
bl
DECLARE_REG_SIZE
cx
,
cl
DECLARE_REG_SIZE
dx
,
dl
DECLARE_REG_SIZE
si
,
sil
DECLARE_REG_SIZE
di
,
dil
DECLARE_REG_SIZE
bp
,
bpl
DECLARE_REG_SIZE
ax
,
al
,
ah
DECLARE_REG_SIZE
bx
,
bl
,
bh
DECLARE_REG_SIZE
cx
,
cl
,
ch
DECLARE_REG_SIZE
dx
,
dl
,
dh
DECLARE_REG_SIZE
si
,
sil
,
null
DECLARE_REG_SIZE
di
,
dil
,
null
DECLARE_REG_SIZE
bp
,
bpl
,
null
; t# defines for when per-arch register allocation is more complex than just function arguments
...
...
@@ -188,6 +187,7 @@ DECLARE_REG_SIZE bp, bpl
%
define
t%1
q
t%1
%
+
q
%
define
t%1
d
t%1
%
+
d
%
define
t%1
w
t%1
%
+
w
%
define
t%1
h
t%1
%
+
h
%
define
t%1
b
t%1
%
+
b
%
rotate
1
%
endrep
...
...
@@ -277,6 +277,7 @@ DECLARE_REG_TMP_SIZE 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
CAT_UNDEF
arg_name
%
+
%%
i
,
q
CAT_UNDEF
arg_name
%
+
%%
i
,
d
CAT_UNDEF
arg_name
%
+
%%
i
,
w
CAT_UNDEF
arg_name
%
+
%%
i
,
h
CAT_UNDEF
arg_name
%
+
%%
i
,
b
CAT_UNDEF
arg_name
%
+
%%
i
,
m
CAT_UNDEF
arg_name
%
+
%%
i
,
mp
...
...
@@ -292,6 +293,7 @@ DECLARE_REG_TMP_SIZE 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
%
xdefine
%1
q
r
%
+
%%
i
%
+
q
%
xdefine
%1
d
r
%
+
%%
i
%
+
d
%
xdefine
%1
w
r
%
+
%%
i
%
+
w
%
xdefine
%1
h
r
%
+
%%
i
%
+
h
%
xdefine
%1
b
r
%
+
%%
i
%
+
b
%
xdefine
%1
m
r
%
+
%%
i
%
+
m
%
xdefine
%1
mp
r
%
+
%%
i
%
+
mp
...
...
@@ -305,21 +307,21 @@ DECLARE_REG_TMP_SIZE 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
%if
WIN64
; Windows x64 ;=================================================
DECLARE_REG
0
,
rcx
,
ecx
,
cx
,
cl
DECLARE_REG
1
,
rdx
,
edx
,
dx
,
dl
DECLARE_REG
2
,
R8
,
R8D
,
R8W
,
R8B
DECLARE_REG
3
,
R9
,
R9D
,
R9W
,
R9B
DECLARE_REG
4
,
R10
,
R10D
,
R10W
,
R10B
,
40
DECLARE_REG
5
,
R11
,
R11D
,
R11W
,
R11B
,
48
DECLARE_REG
6
,
rax
,
eax
,
ax
,
al
,
56
DECLARE_REG
7
,
rdi
,
edi
,
di
,
dil
,
64
DECLARE_REG
8
,
rsi
,
esi
,
si
,
sil
,
72
DECLARE_REG
9
,
rbx
,
ebx
,
bx
,
bl
,
80
DECLARE_REG
10
,
rbp
,
ebp
,
bp
,
bpl
,
88
DECLARE_REG
11
,
R12
,
R12D
,
R12W
,
R12B
,
96
DECLARE_REG
12
,
R13
,
R13D
,
R13W
,
R13B
,
104
DECLARE_REG
13
,
R14
,
R14D
,
R14W
,
R14B
,
112
DECLARE_REG
14
,
R15
,
R15D
,
R15W
,
R15B
,
120
DECLARE_REG
0
,
rcx
DECLARE_REG
1
,
rdx
DECLARE_REG
2
,
R8
DECLARE_REG
3
,
R9
DECLARE_REG
4
,
R10
,
40
DECLARE_REG
5
,
R11
,
48
DECLARE_REG
6
,
rax
,
56
DECLARE_REG
7
,
rdi
,
64
DECLARE_REG
8
,
rsi
,
72
DECLARE_REG
9
,
rbx
,
80
DECLARE_REG
10
,
rbp
,
88
DECLARE_REG
11
,
R12
,
96
DECLARE_REG
12
,
R13
,
104
DECLARE_REG
13
,
R14
,
112
DECLARE_REG
14
,
R15
,
120
%macro
PROLOGUE
2
-
4
+
0
; #args, #regs, #xmm_regs, arg_names...
%
assign
num_args
%1
...
...
@@ -366,6 +368,8 @@ DECLARE_REG 14, R15, R15D, R15W, R15B, 120
%
assign
xmm_regs_used
0
%endmacro
%define
has_epilogue
regs_used
>
7
||
xmm_regs_used
>
6
||
mmsize
==
32
%macro
RET
0
WIN64_RESTORE_XMM_INTERNAL
rsp
POP_IF_USED
14
,
13
,
12
,
11
,
10
,
9
,
8
,
7
...
...
@@ -375,31 +379,23 @@ DECLARE_REG 14, R15, R15D, R15W, R15B, 120
ret
%endmacro
%macro
REP_RET
0
%
if
regs_used
>
7
||
xmm_regs_used
>
6
||
mmsize
==
32
RET
%
else
rep
ret
%
endif
%endmacro
%elif
ARCH_X86_64
; *nix x64 ;=============================================
DECLARE_REG
0
,
rdi
,
edi
,
di
,
dil
DECLARE_REG
1
,
rsi
,
esi
,
si
,
sil
DECLARE_REG
2
,
rdx
,
edx
,
dx
,
dl
DECLARE_REG
3
,
rcx
,
ecx
,
cx
,
cl
DECLARE_REG
4
,
R8
,
R8D
,
R8W
,
R8B
DECLARE_REG
5
,
R9
,
R9D
,
R9W
,
R9B
DECLARE_REG
6
,
rax
,
eax
,
ax
,
al
,
8
DECLARE_REG
7
,
R10
,
R10D
,
R10W
,
R10B
,
16
DECLARE_REG
8
,
R11
,
R11D
,
R11W
,
R11B
,
24
DECLARE_REG
9
,
rbx
,
ebx
,
bx
,
bl
,
32
DECLARE_REG
10
,
rbp
,
ebp
,
bp
,
bpl
,
40
DECLARE_REG
11
,
R12
,
R12D
,
R12W
,
R12B
,
48
DECLARE_REG
12
,
R13
,
R13D
,
R13W
,
R13B
,
56
DECLARE_REG
13
,
R14
,
R14D
,
R14W
,
R14B
,
64
DECLARE_REG
14
,
R15
,
R15D
,
R15W
,
R15B
,
72
DECLARE_REG
0
,
rdi
DECLARE_REG
1
,
rsi
DECLARE_REG
2
,
rdx
DECLARE_REG
3
,
rcx
DECLARE_REG
4
,
R8
DECLARE_REG
5
,
R9
DECLARE_REG
6
,
rax
,
8
DECLARE_REG
7
,
R10
,
16
DECLARE_REG
8
,
R11
,
24
DECLARE_REG
9
,
rbx
,
32
DECLARE_REG
10
,
rbp
,
40
DECLARE_REG
11
,
R12
,
48
DECLARE_REG
12
,
R13
,
56
DECLARE_REG
13
,
R14
,
64
DECLARE_REG
14
,
R15
,
72
%macro
PROLOGUE
2
-
4
+
; #args, #regs, #xmm_regs, arg_names...
%
assign
num_args
%1
...
...
@@ -411,6 +407,8 @@ DECLARE_REG 14, R15, R15D, R15W, R15B, 72
DEFINE_ARGS
%4
%endmacro
%define
has_epilogue
regs_used
>
9
||
mmsize
==
32
%macro
RET
0
POP_IF_USED
14
,
13
,
12
,
11
,
10
,
9
%if
mmsize
==
32
...
...
@@ -419,23 +417,15 @@ DECLARE_REG 14, R15, R15D, R15W, R15B, 72
ret
%endmacro
%macro
REP_RET
0
%
if
regs_used
>
9
||
mmsize
==
32
RET
%
else
rep
ret
%
endif
%endmacro
%else
; X86_32 ;==============================================================
DECLARE_REG
0
,
eax
,
eax
,
ax
,
al
,
4
DECLARE_REG
1
,
ecx
,
ecx
,
cx
,
cl
,
8
DECLARE_REG
2
,
edx
,
edx
,
dx
,
dl
,
12
DECLARE_REG
3
,
ebx
,
ebx
,
bx
,
bl
,
16
DECLARE_REG
4
,
esi
,
esi
,
si
,
null
,
20
DECLARE_REG
5
,
edi
,
edi
,
di
,
null
,
24
DECLARE_REG
6
,
ebp
,
ebp
,
bp
,
null
,
28
DECLARE_REG
0
,
eax
,
4
DECLARE_REG
1
,
ecx
,
8
DECLARE_REG
2
,
edx
,
12
DECLARE_REG
3
,
ebx
,
16
DECLARE_REG
4
,
esi
,
20
DECLARE_REG
5
,
edi
,
24
DECLARE_REG
6
,
ebp
,
28
%define
rsp
esp
%macro
DECLARE_ARG
1
-*
...
...
@@ -460,6 +450,8 @@ DECLARE_ARG 7, 8, 9, 10, 11, 12, 13, 14
DEFINE_ARGS
%4
%endmacro
%define
has_epilogue
regs_used
>
3
||
mmsize
==
32
%macro
RET
0
POP_IF_USED
6
,
5
,
4
,
3
%if
mmsize
==
32
...
...
@@ -468,14 +460,6 @@ DECLARE_ARG 7, 8, 9, 10, 11, 12, 13, 14
ret
%endmacro
%macro
REP_RET
0
%
if
regs_used
>
3
||
mmsize
==
32
RET
%
else
rep
ret
%
endif
%endmacro
%endif
;======================================================================
%if
WIN64
==
0
...
...
@@ -485,6 +469,23 @@ DECLARE_ARG 7, 8, 9, 10, 11, 12, 13, 14
%endmacro
%endif
%macro
REP_RET
0
%
if
has_epilogue
RET
%
else
rep
ret
%
endif
%endmacro
%macro
TAIL_CALL
2
; callee, is_nonadjacent
%
if
has_epilogue
call
%1
RET
%
elif
%2
jmp
%1
%
endif
%endmacro
;=============================================================================
; arch-independent part
;=============================================================================
...
...
@@ -564,6 +565,8 @@ SECTION .note.GNU-stack noalloc noexec nowrite progbits
%assign
cpuflags_avx
(
1
<<
11
)
|
cpuflags_sse42
%assign
cpuflags_xop
(
1
<<
12
)
|
cpuflags_avx
%assign
cpuflags_fma4
(
1
<<
13
)
|
cpuflags_avx
%assign
cpuflags_avx2
(
1
<<
14
)
|
cpuflags_avx
%assign
cpuflags_fma3
(
1
<<
15
)
|
cpuflags_avx
%assign
cpuflags_cache32
(
1
<<
16
)
%assign
cpuflags_cache64
(
1
<<
17
)
...
...
@@ -572,6 +575,9 @@ SECTION .note.GNU-stack noalloc noexec nowrite progbits
%assign
cpuflags_misalign
(
1
<<
20
)
%assign
cpuflags_aligned
(
1
<<
21
)
; not a cpu feature, but a function variant
%assign
cpuflags_atom
(
1
<<
22
)
%assign
cpuflags_bmi1
(
1
<<
23
)
%assign
cpuflags_bmi2
(
1
<<
24
)
|
cpuflags_bmi1
%assign
cpuflags_tbm
(
1
<<
25
)
|
cpuflags_bmi1
%define
cpuflag
(
x
)
((
cpuflags
&
(
cpuflags_
%
+
x
))
==
(
cpuflags_
%
+
x
))
%define
notcpuflag
(
x
)
((
cpuflags
&
(
cpuflags_
%
+
x
))
!
=
(
cpuflags_
%
+
x
))
...
...
@@ -833,25 +839,38 @@ INIT_XMM
%endrep
%undef
i
%macro
CHECK_AVX_INSTR_EMU
3
-*
%
xdefine
%%
opcode
%1
%
xdefine
%%
dst
%2
%
rep
%0
-
2
%
ifidn
%%
dst
,
%3
%
error
non
-
avx
emulation
of
``%%
opcode
''
is
not
supported
%
endif
%
rotate
1
%
endrep
%endmacro
;%1 == instruction
;%2 == 1 if float, 0 if int
;%3 == 1 if 4-operand (xmm, xmm, xmm, imm), 0 if 2- or 3-operand (xmm, xmm, xmm)
;%4 == number of operands given
;%5+: operands
%macro
RUN_AVX_INSTR
6
-
7
+
%
ifid
%5
%
define
%%
size
sizeof%5
%
ifid
%6
%
define
%%
sizeofreg
sizeof%6
%
elifid
%5
%
define
%%
sizeofreg
sizeof%5
%
else
%
define
%%
size
mmsize
%
define
%%
size
ofreg
mmsize
%
endif
%
if
%%
size
==
32
%
if
%
0
>=
7
%
if
%%
size
ofreg
==
32
%
if
%
4
>=
3
v%1
%5
,
%6
,
%7
%
else
v%1
%5
,
%6
%
endif
%
else
%
if
%%
size
==
8
%
if
%%
size
ofreg
==
8
%
define
%%
regmov
movq
%
elif
%2
%
define
%%
regmov
movaps
...
...
@@ -861,16 +880,17 @@ INIT_XMM
%
if
%4
>=
3
+
%3
%
ifnidn
%5
,
%6
%
if
avx_enabled
&&
sizeof%5
==
16
%
if
avx_enabled
&&
%%
sizeofreg
==
16
v%1
%5
,
%6
,
%7
%
else
CHECK_AVX_INSTR_EMU
{
%1
%5
,
%6
,
%7
}
,
%5
,
%7
%%
regmov
%5
,
%6
%1
%5
,
%7
%
endif
%
else
%1
%5
,
%7
%
endif
%
elif
%3
%
elif
%
4
>=
3
%1
%5
,
%6
,
%7
%
else
%1
%5
,
%6
...
...
@@ -901,7 +921,7 @@ INIT_XMM
;%1 == instruction
;%2 == 1 if float, 0 if int
;%3 == 1 if 4-operand (xmm, xmm, xmm, imm), 0 if 3-operand (xmm, xmm, xmm)
;%3 == 1 if 4-operand (xmm, xmm, xmm, imm), 0 if
2- or
3-operand (xmm, xmm, xmm)
;%4 == 1 if symmetric (i.e. doesn't matter which src arg is which), 0 if not
%macro
AVX_INSTR
4
%
macro
%1
2
-
9
fnord
,
fnord
,
fnord
,
%1
,
%2
,
%3
,
%4
...
...
@@ -966,6 +986,9 @@ AVX_INSTR mulsd, 1, 0, 1
AVX_INSTR
mulss
,
1
,
0
,
1
AVX_INSTR
orpd
,
1
,
0
,
1
AVX_INSTR
orps
,
1
,
0
,
1
AVX_INSTR
pabsb
,
0
,
0
,
0
AVX_INSTR
pabsw
,
0
,
0
,
0
AVX_INSTR
pabsd
,
0
,
0
,
0
AVX_INSTR
packsswb
,
0
,
0
,
0
AVX_INSTR
packssdw
,
0
,
0
,
0
AVX_INSTR
packuswb
,
0
,
0
,
0
...
...
@@ -1017,6 +1040,7 @@ AVX_INSTR pminsd, 0, 0, 1
AVX_INSTR
pminub
,
0
,
0
,
1
AVX_INSTR
pminuw
,
0
,
0
,
1
AVX_INSTR
pminud
,
0
,
0
,
1
AVX_INSTR
pmovmskb
,
0
,
0
,
0
AVX_INSTR
pmulhuw
,
0
,
0
,
1
AVX_INSTR
pmulhrsw
,
0
,
0
,
1
AVX_INSTR
pmulhw
,
0
,
0
,
1
...
...
@@ -1027,6 +1051,9 @@ AVX_INSTR pmuldq, 0, 0, 1
AVX_INSTR
por
,
0
,
0
,
1
AVX_INSTR
psadbw
,
0
,
0
,
1
AVX_INSTR
pshufb
,
0
,
0
,
0
AVX_INSTR
pshufd
,
0
,
1
,
0
AVX_INSTR
pshufhw
,
0
,
1
,
0
AVX_INSTR
pshuflw
,
0
,
1
,
0
AVX_INSTR
psignb
,
0
,
0
,
0
AVX_INSTR
psignw
,
0
,
0
,
0
AVX_INSTR
psignd
,
0
,
0
,
0
...
...
@@ -1048,6 +1075,7 @@ AVX_INSTR psubsb, 0, 0, 0
AVX_INSTR
psubsw
,
0
,
0
,
0
AVX_INSTR
psubusb
,
0
,
0
,
0
AVX_INSTR
psubusw
,
0
,
0
,
0
AVX_INSTR
ptest
,
0
,
0
,
0
AVX_INSTR
punpckhbw
,
0
,
0
,
0
AVX_INSTR
punpckhwd
,
0
,
0
,
0
AVX_INSTR
punpckhdq
,
0
,
0
,
0
...
...
@@ -1112,3 +1140,7 @@ FMA_INSTR fmaddps, mulps, addps
FMA_INSTR
pmacsdd
,
pmulld
,
paddd
FMA_INSTR
pmacsww
,
pmullw
,
paddw
FMA_INSTR
pmadcswd
,
pmaddwd
,
paddd
; tzcnt is equivalent to "rep bsf" and is backwards-compatible with bsf.
; This lets us use tzcnt without bumping the yasm version requirement yet.
%define
tzcnt
rep
bsf
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment