Commit 3b345d38 authored by James Almer's avatar James Almer

avutil/cpu: split flag checks per arch in av_cpu_max_align()

Signed-off-by: 's avatarJames Almer <jamrial@gmail.com>
parent 522f8770
......@@ -26,3 +26,13 @@ int ff_get_cpu_flags_aarch64(void)
AV_CPU_FLAG_NEON * HAVE_NEON |
AV_CPU_FLAG_VFP * HAVE_VFP;
}
size_t ff_get_cpu_max_align_aarch64(void)
{
int flags = av_get_cpu_flags();
if (flags & AV_CPU_FLAG_NEON)
return 16;
return 8;
}
......@@ -158,3 +158,13 @@ int ff_get_cpu_flags_arm(void)
}
#endif
size_t ff_get_cpu_max_align_arm(void)
{
int flags = av_get_cpu_flags();
if (flags & AV_CPU_FLAG_NEON)
return 16;
return 8;
}
......@@ -304,37 +304,14 @@ int av_cpu_count(void)
size_t av_cpu_max_align(void)
{
int av_unused flags = av_get_cpu_flags();
#if ARCH_ARM || ARCH_AARCH64
if (flags & AV_CPU_FLAG_NEON)
return 16;
#elif ARCH_PPC
if (flags & (AV_CPU_FLAG_ALTIVEC |
AV_CPU_FLAG_VSX |
AV_CPU_FLAG_POWER8))
return 16;
#elif ARCH_X86
if (flags & (AV_CPU_FLAG_AVX2 |
AV_CPU_FLAG_AVX |
AV_CPU_FLAG_XOP |
AV_CPU_FLAG_FMA4 |
AV_CPU_FLAG_FMA3 |
AV_CPU_FLAG_AVXSLOW))
return 32;
if (flags & (AV_CPU_FLAG_AESNI |
AV_CPU_FLAG_SSE42 |
AV_CPU_FLAG_SSE4 |
AV_CPU_FLAG_SSSE3 |
AV_CPU_FLAG_SSE3 |
AV_CPU_FLAG_SSE2 |
AV_CPU_FLAG_SSE |
AV_CPU_FLAG_ATOM |
AV_CPU_FLAG_SSSE3SLOW |
AV_CPU_FLAG_SSE3SLOW |
AV_CPU_FLAG_SSE2SLOW))
return 16;
#endif
if (ARCH_AARCH64)
return ff_get_cpu_max_align_aarch64();
if (ARCH_ARM)
return ff_get_cpu_max_align_arm();
if (ARCH_PPC)
return ff_get_cpu_max_align_ppc();
if (ARCH_X86)
return ff_get_cpu_max_align_x86();
return 8;
}
......@@ -44,4 +44,9 @@ int ff_get_cpu_flags_arm(void);
int ff_get_cpu_flags_ppc(void);
int ff_get_cpu_flags_x86(void);
size_t ff_get_cpu_max_align_aarch64(void);
size_t ff_get_cpu_max_align_arm(void);
size_t ff_get_cpu_max_align_ppc(void);
size_t ff_get_cpu_max_align_x86(void);
#endif /* AVUTIL_CPU_INTERNAL_H */
......@@ -148,3 +148,15 @@ out:
#endif /* HAVE_ALTIVEC */
return 0;
}
size_t ff_get_cpu_max_align_ppc(void)
{
int flags = av_get_cpu_flags();
if (flags & (AV_CPU_FLAG_ALTIVEC |
AV_CPU_FLAG_VSX |
AV_CPU_FLAG_POWER8))
return 16;
return 8;
}
......@@ -233,3 +233,30 @@ int ff_get_cpu_flags_x86(void)
return rval;
}
size_t ff_get_cpu_max_align_x86(void)
{
int flags = av_get_cpu_flags();
if (flags & (AV_CPU_FLAG_AVX2 |
AV_CPU_FLAG_AVX |
AV_CPU_FLAG_XOP |
AV_CPU_FLAG_FMA4 |
AV_CPU_FLAG_FMA3 |
AV_CPU_FLAG_AVXSLOW))
return 32;
if (flags & (AV_CPU_FLAG_AESNI |
AV_CPU_FLAG_SSE42 |
AV_CPU_FLAG_SSE4 |
AV_CPU_FLAG_SSSE3 |
AV_CPU_FLAG_SSE3 |
AV_CPU_FLAG_SSE2 |
AV_CPU_FLAG_SSE |
AV_CPU_FLAG_ATOM |
AV_CPU_FLAG_SSSE3SLOW |
AV_CPU_FLAG_SSE3SLOW |
AV_CPU_FLAG_SSE2SLOW))
return 16;
return 8;
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment