Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in / Register
Toggle navigation
F
ffmpeg.wasm-core
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Linshizhi
ffmpeg.wasm-core
Commits
1b932eb1
Commit
1b932eb1
authored
Feb 22, 2014
by
James Almer
Committed by
Janne Grunau
Feb 23, 2014
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
x86: add detection for FMA3 instruction set
Based on x264 code Signed-off-by:
James Almer
<
jamrial@gmail.com
>
parent
10b0161d
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
16 additions
and
1 deletion
+16
-1
configure
configure
+5
-0
cpu.c
libavutil/cpu.c
+3
-0
cpu.h
libavutil/cpu.h
+1
-0
cpu.c
libavutil/x86/cpu.c
+4
-1
cpu.h
libavutil/x86/cpu.h
+3
-0
No files found.
configure
View file @
1b932eb1
...
@@ -271,6 +271,7 @@ Optimization options (experts only):
...
@@ -271,6 +271,7 @@ Optimization options (experts only):
--disable-sse42 disable SSE4.2 optimizations
--disable-sse42 disable SSE4.2 optimizations
--disable-avx disable AVX optimizations
--disable-avx disable AVX optimizations
--disable-xop disable XOP optimizations
--disable-xop disable XOP optimizations
--disable-fma3 disable FMA3 optimizations
--disable-fma4 disable FMA4 optimizations
--disable-fma4 disable FMA4 optimizations
--disable-avx2 disable AVX2 optimizations
--disable-avx2 disable AVX2 optimizations
--disable-armv5te disable armv5te optimizations
--disable-armv5te disable armv5te optimizations
...
@@ -1254,6 +1255,7 @@ ARCH_EXT_LIST_X86='
...
@@ -1254,6 +1255,7 @@ ARCH_EXT_LIST_X86='
avx2
avx2
cpunop
cpunop
xop
xop
fma3
fma4
fma4
i686
i686
mmx
mmx
...
@@ -1578,6 +1580,7 @@ sse4_deps="ssse3"
...
@@ -1578,6 +1580,7 @@ sse4_deps="ssse3"
sse42_deps
=
"sse4"
sse42_deps
=
"sse4"
avx_deps
=
"sse42"
avx_deps
=
"sse42"
xop_deps
=
"avx"
xop_deps
=
"avx"
fma3_deps
=
"avx"
fma4_deps
=
"avx"
fma4_deps
=
"avx"
avx2_deps
=
"avx"
avx2_deps
=
"avx"
...
@@ -3761,6 +3764,7 @@ EOF
...
@@ -3761,6 +3764,7 @@ EOF
check_yasm
"movbe ecx, [5]"
&&
enable
yasm
||
check_yasm
"movbe ecx, [5]"
&&
enable
yasm
||
die
"yasm/nasm not found or too old. Use --disable-yasm for a crippled build."
die
"yasm/nasm not found or too old. Use --disable-yasm for a crippled build."
check_yasm
"vpmacsdd xmm0, xmm1, xmm2, xmm3"
||
disable xop_external
check_yasm
"vpmacsdd xmm0, xmm1, xmm2, xmm3"
||
disable xop_external
check_yasm
"vfmadd132ps ymm0, ymm1, ymm2"
||
disable fma3_external
check_yasm
"vfmaddps ymm0, ymm1, ymm2, ymm3"
||
disable fma4_external
check_yasm
"vfmaddps ymm0, ymm1, ymm2, ymm3"
||
disable fma4_external
check_yasm
"CPU amdnop"
||
disable cpunop
check_yasm
"CPU amdnop"
||
disable cpunop
fi
fi
...
@@ -4294,6 +4298,7 @@ if enabled x86; then
...
@@ -4294,6 +4298,7 @@ if enabled x86; then
echo
"SSSE3 enabled
${
ssse3
-no
}
"
echo
"SSSE3 enabled
${
ssse3
-no
}
"
echo
"AVX enabled
${
avx
-no
}
"
echo
"AVX enabled
${
avx
-no
}
"
echo
"XOP enabled
${
xop
-no
}
"
echo
"XOP enabled
${
xop
-no
}
"
echo
"FMA3 enabled
${
fma3
-no
}
"
echo
"FMA4 enabled
${
fma4
-no
}
"
echo
"FMA4 enabled
${
fma4
-no
}
"
echo
"i686 features enabled
${
i686
-no
}
"
echo
"i686 features enabled
${
i686
-no
}
"
echo
"CMOV is fast
${
fast_cmov
-no
}
"
echo
"CMOV is fast
${
fast_cmov
-no
}
"
...
...
libavutil/cpu.c
View file @
1b932eb1
...
@@ -87,6 +87,7 @@ int av_parse_cpu_flags(const char *s)
...
@@ -87,6 +87,7 @@ int av_parse_cpu_flags(const char *s)
#define CPUFLAG_SSE42 (AV_CPU_FLAG_SSE42 | CPUFLAG_SSE4)
#define CPUFLAG_SSE42 (AV_CPU_FLAG_SSE42 | CPUFLAG_SSE4)
#define CPUFLAG_AVX (AV_CPU_FLAG_AVX | CPUFLAG_SSE42)
#define CPUFLAG_AVX (AV_CPU_FLAG_AVX | CPUFLAG_SSE42)
#define CPUFLAG_XOP (AV_CPU_FLAG_XOP | CPUFLAG_AVX)
#define CPUFLAG_XOP (AV_CPU_FLAG_XOP | CPUFLAG_AVX)
#define CPUFLAG_FMA3 (AV_CPU_FLAG_FMA3 | CPUFLAG_AVX)
#define CPUFLAG_FMA4 (AV_CPU_FLAG_FMA4 | CPUFLAG_AVX)
#define CPUFLAG_FMA4 (AV_CPU_FLAG_FMA4 | CPUFLAG_AVX)
#define CPUFLAG_AVX2 (AV_CPU_FLAG_AVX2 | CPUFLAG_AVX)
#define CPUFLAG_AVX2 (AV_CPU_FLAG_AVX2 | CPUFLAG_AVX)
static
const
AVOption
cpuflags_opts
[]
=
{
static
const
AVOption
cpuflags_opts
[]
=
{
...
@@ -107,6 +108,7 @@ int av_parse_cpu_flags(const char *s)
...
@@ -107,6 +108,7 @@ int av_parse_cpu_flags(const char *s)
{
"sse4.2"
,
NULL
,
0
,
AV_OPT_TYPE_CONST
,
{
.
i64
=
CPUFLAG_SSE42
},
.
unit
=
"flags"
},
{
"sse4.2"
,
NULL
,
0
,
AV_OPT_TYPE_CONST
,
{
.
i64
=
CPUFLAG_SSE42
},
.
unit
=
"flags"
},
{
"avx"
,
NULL
,
0
,
AV_OPT_TYPE_CONST
,
{
.
i64
=
CPUFLAG_AVX
},
.
unit
=
"flags"
},
{
"avx"
,
NULL
,
0
,
AV_OPT_TYPE_CONST
,
{
.
i64
=
CPUFLAG_AVX
},
.
unit
=
"flags"
},
{
"xop"
,
NULL
,
0
,
AV_OPT_TYPE_CONST
,
{
.
i64
=
CPUFLAG_XOP
},
.
unit
=
"flags"
},
{
"xop"
,
NULL
,
0
,
AV_OPT_TYPE_CONST
,
{
.
i64
=
CPUFLAG_XOP
},
.
unit
=
"flags"
},
{
"fma3"
,
NULL
,
0
,
AV_OPT_TYPE_CONST
,
{
.
i64
=
CPUFLAG_FMA3
},
.
unit
=
"flags"
},
{
"fma4"
,
NULL
,
0
,
AV_OPT_TYPE_CONST
,
{
.
i64
=
CPUFLAG_FMA4
},
.
unit
=
"flags"
},
{
"fma4"
,
NULL
,
0
,
AV_OPT_TYPE_CONST
,
{
.
i64
=
CPUFLAG_FMA4
},
.
unit
=
"flags"
},
{
"avx2"
,
NULL
,
0
,
AV_OPT_TYPE_CONST
,
{
.
i64
=
CPUFLAG_AVX2
},
.
unit
=
"flags"
},
{
"avx2"
,
NULL
,
0
,
AV_OPT_TYPE_CONST
,
{
.
i64
=
CPUFLAG_AVX2
},
.
unit
=
"flags"
},
{
"3dnow"
,
NULL
,
0
,
AV_OPT_TYPE_CONST
,
{
.
i64
=
CPUFLAG_3DNOW
},
.
unit
=
"flags"
},
{
"3dnow"
,
NULL
,
0
,
AV_OPT_TYPE_CONST
,
{
.
i64
=
CPUFLAG_3DNOW
},
.
unit
=
"flags"
},
...
@@ -204,6 +206,7 @@ static const struct {
...
@@ -204,6 +206,7 @@ static const struct {
{
AV_CPU_FLAG_SSE42
,
"sse4.2"
},
{
AV_CPU_FLAG_SSE42
,
"sse4.2"
},
{
AV_CPU_FLAG_AVX
,
"avx"
},
{
AV_CPU_FLAG_AVX
,
"avx"
},
{
AV_CPU_FLAG_XOP
,
"xop"
},
{
AV_CPU_FLAG_XOP
,
"xop"
},
{
AV_CPU_FLAG_FMA3
,
"fma3"
},
{
AV_CPU_FLAG_FMA4
,
"fma4"
},
{
AV_CPU_FLAG_FMA4
,
"fma4"
},
{
AV_CPU_FLAG_3DNOW
,
"3dnow"
},
{
AV_CPU_FLAG_3DNOW
,
"3dnow"
},
{
AV_CPU_FLAG_3DNOWEXT
,
"3dnowext"
},
{
AV_CPU_FLAG_3DNOWEXT
,
"3dnowext"
},
...
...
libavutil/cpu.h
View file @
1b932eb1
...
@@ -49,6 +49,7 @@
...
@@ -49,6 +49,7 @@
#define AV_CPU_FLAG_FMA4 0x0800 ///< Bulldozer FMA4 functions
#define AV_CPU_FLAG_FMA4 0x0800 ///< Bulldozer FMA4 functions
#define AV_CPU_FLAG_CMOV 0x1000 ///< i686 cmov
#define AV_CPU_FLAG_CMOV 0x1000 ///< i686 cmov
#define AV_CPU_FLAG_AVX2 0x8000 ///< AVX2 functions: requires OS support even if YMM registers aren't used
#define AV_CPU_FLAG_AVX2 0x8000 ///< AVX2 functions: requires OS support even if YMM registers aren't used
#define AV_CPU_FLAG_FMA3 0x10000 ///< Haswell FMA3 functions
#define AV_CPU_FLAG_ALTIVEC 0x0001 ///< standard
#define AV_CPU_FLAG_ALTIVEC 0x0001 ///< standard
...
...
libavutil/x86/cpu.c
View file @
1b932eb1
...
@@ -131,8 +131,11 @@ int ff_get_cpu_flags_x86(void)
...
@@ -131,8 +131,11 @@ int ff_get_cpu_flags_x86(void)
if
((
ecx
&
0x18000000
)
==
0x18000000
)
{
if
((
ecx
&
0x18000000
)
==
0x18000000
)
{
/* Check for OS support */
/* Check for OS support */
xgetbv
(
0
,
eax
,
edx
);
xgetbv
(
0
,
eax
,
edx
);
if
((
eax
&
0x6
)
==
0x6
)
if
((
eax
&
0x6
)
==
0x6
)
{
rval
|=
AV_CPU_FLAG_AVX
;
rval
|=
AV_CPU_FLAG_AVX
;
if
(
ecx
&
0x00001000
)
rval
|=
AV_CPU_FLAG_FMA3
;
}
}
}
#if HAVE_AVX2
#if HAVE_AVX2
if
(
max_std_level
>=
7
)
{
if
(
max_std_level
>=
7
)
{
...
...
libavutil/x86/cpu.h
View file @
1b932eb1
...
@@ -38,6 +38,7 @@
...
@@ -38,6 +38,7 @@
#define X86_SSE42(flags) CPUEXT(flags, SSE42)
#define X86_SSE42(flags) CPUEXT(flags, SSE42)
#define X86_AVX(flags) CPUEXT(flags, AVX)
#define X86_AVX(flags) CPUEXT(flags, AVX)
#define X86_XOP(flags) CPUEXT(flags, XOP)
#define X86_XOP(flags) CPUEXT(flags, XOP)
#define X86_FMA3(flags) CPUEXT(flags, FMA3)
#define X86_FMA4(flags) CPUEXT(flags, FMA4)
#define X86_FMA4(flags) CPUEXT(flags, FMA4)
#define X86_AVX2(flags) CPUEXT(flags, AVX2)
#define X86_AVX2(flags) CPUEXT(flags, AVX2)
...
@@ -53,6 +54,7 @@
...
@@ -53,6 +54,7 @@
#define EXTERNAL_SSE42(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, SSE42)
#define EXTERNAL_SSE42(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, SSE42)
#define EXTERNAL_AVX(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, AVX)
#define EXTERNAL_AVX(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, AVX)
#define EXTERNAL_XOP(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, XOP)
#define EXTERNAL_XOP(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, XOP)
#define EXTERNAL_FMA3(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, FMA3)
#define EXTERNAL_FMA4(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, FMA4)
#define EXTERNAL_FMA4(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, FMA4)
#define EXTERNAL_AVX2(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, AVX2)
#define EXTERNAL_AVX2(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, AVX2)
...
@@ -68,6 +70,7 @@
...
@@ -68,6 +70,7 @@
#define INLINE_SSE42(flags) CPUEXT_SUFFIX(flags, _INLINE, SSE42)
#define INLINE_SSE42(flags) CPUEXT_SUFFIX(flags, _INLINE, SSE42)
#define INLINE_AVX(flags) CPUEXT_SUFFIX(flags, _INLINE, AVX)
#define INLINE_AVX(flags) CPUEXT_SUFFIX(flags, _INLINE, AVX)
#define INLINE_XOP(flags) CPUEXT_SUFFIX(flags, _INLINE, XOP)
#define INLINE_XOP(flags) CPUEXT_SUFFIX(flags, _INLINE, XOP)
#define INLINE_FMA3(flags) CPUEXT_SUFFIX(flags, _INLINE, FMA3)
#define INLINE_FMA4(flags) CPUEXT_SUFFIX(flags, _INLINE, FMA4)
#define INLINE_FMA4(flags) CPUEXT_SUFFIX(flags, _INLINE, FMA4)
#define INLINE_AVX2(flags) CPUEXT_SUFFIX(flags, _INLINE, AVX2)
#define INLINE_AVX2(flags) CPUEXT_SUFFIX(flags, _INLINE, AVX2)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment