Commit e9125dd5 authored by Michael Niedermayer's avatar Michael Niedermayer

Merge commit '2c10e2a2'

* commit '2c10e2a2':
  build: Make the H.264 parser select h264qpel
  x86: h264qpel: add cpu flag checks for init function

Conflicts:
	libavcodec/x86/h264_qpel.c
Merged-by: 's avatarMichael Niedermayer <michaelni@gmx.at>
parents d07b0d99 2c10e2a2
...@@ -1824,7 +1824,7 @@ wmv3_vdpau_decoder_select="vc1_vdpau_decoder" ...@@ -1824,7 +1824,7 @@ wmv3_vdpau_decoder_select="vc1_vdpau_decoder"
wmv3_vdpau_hwaccel_select="vc1_vdpau_hwaccel" wmv3_vdpau_hwaccel_select="vc1_vdpau_hwaccel"
# parsers # parsers
h264_parser_select="error_resilience golomb h264dsp h264pred mpegvideo" h264_parser_select="error_resilience golomb h264dsp h264pred h264qpel mpegvideo"
mpeg4video_parser_select="error_resilience mpegvideo" mpeg4video_parser_select="error_resilience mpegvideo"
mpegvideo_parser_select="error_resilience mpegvideo" mpegvideo_parser_select="error_resilience mpegvideo"
vc1_parser_select="error_resilience mpegvideo" vc1_parser_select="error_resilience mpegvideo"
......
...@@ -21,6 +21,7 @@ ...@@ -21,6 +21,7 @@
#include "libavutil/cpu.h" #include "libavutil/cpu.h"
#include "libavutil/x86/asm.h" #include "libavutil/x86/asm.h"
#include "libavutil/x86/cpu.h"
#include "libavcodec/dsputil.h" #include "libavcodec/dsputil.h"
#include "libavcodec/h264qpel.h" #include "libavcodec/h264qpel.h"
#include "libavcodec/mpegvideo.h" #include "libavcodec/mpegvideo.h"
...@@ -530,95 +531,91 @@ QPEL16(mmxext) ...@@ -530,95 +531,91 @@ QPEL16(mmxext)
void ff_h264qpel_init_x86(H264QpelContext *c, int bit_depth) void ff_h264qpel_init_x86(H264QpelContext *c, int bit_depth)
{ {
#if HAVE_YASM
int high_bit_depth = bit_depth > 8; int high_bit_depth = bit_depth > 8;
int mm_flags = av_get_cpu_flags(); int mm_flags = av_get_cpu_flags();
#if HAVE_MMXEXT_EXTERNAL if (EXTERNAL_MMXEXT(mm_flags)) {
if (!(mm_flags & AV_CPU_FLAG_MMXEXT)) if (!high_bit_depth) {
return; SET_QPEL_FUNCS(put_h264_qpel, 0, 16, mmxext, );
if (!high_bit_depth) { SET_QPEL_FUNCS(put_h264_qpel, 1, 8, mmxext, );
SET_QPEL_FUNCS(put_h264_qpel, 0, 16, mmxext, ); SET_QPEL_FUNCS(put_h264_qpel, 2, 4, mmxext, );
SET_QPEL_FUNCS(put_h264_qpel, 1, 8, mmxext, ); SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, mmxext, );
SET_QPEL_FUNCS(put_h264_qpel, 2, 4, mmxext, ); SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, mmxext, );
SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, mmxext, ); SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, mmxext, );
SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, mmxext, ); } else if (bit_depth == 10) {
SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, mmxext, );
} else if (bit_depth == 10) {
#if !ARCH_X86_64 #if !ARCH_X86_64
SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, 10_mmxext, ff_); SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, 10_mmxext, ff_);
SET_QPEL_FUNCS(put_h264_qpel, 0, 16, 10_mmxext, ff_); SET_QPEL_FUNCS(put_h264_qpel, 0, 16, 10_mmxext, ff_);
SET_QPEL_FUNCS(put_h264_qpel, 1, 8, 10_mmxext, ff_); SET_QPEL_FUNCS(put_h264_qpel, 1, 8, 10_mmxext, ff_);
SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, 10_mmxext, ff_); SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, 10_mmxext, ff_);
#endif
SET_QPEL_FUNCS(put_h264_qpel, 2, 4, 10_mmxext, ff_);
SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, 10_mmxext, ff_);
}
#endif #endif
SET_QPEL_FUNCS(put_h264_qpel, 2, 4, 10_mmxext, ff_);
#if HAVE_SSE2_EXTERNAL SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, 10_mmxext, ff_);
if (!(mm_flags & AV_CPU_FLAG_SSE2)) }
return;
if (!(mm_flags & AV_CPU_FLAG_SSE2SLOW) && !high_bit_depth) {
// these functions are slower than mmx on AMD, but faster on Intel
H264_QPEL_FUNCS(0, 0, sse2);
}
if (!high_bit_depth) {
H264_QPEL_FUNCS(0, 1, sse2);
H264_QPEL_FUNCS(0, 2, sse2);
H264_QPEL_FUNCS(0, 3, sse2);
H264_QPEL_FUNCS(1, 1, sse2);
H264_QPEL_FUNCS(1, 2, sse2);
H264_QPEL_FUNCS(1, 3, sse2);
H264_QPEL_FUNCS(2, 1, sse2);
H264_QPEL_FUNCS(2, 2, sse2);
H264_QPEL_FUNCS(2, 3, sse2);
H264_QPEL_FUNCS(3, 1, sse2);
H264_QPEL_FUNCS(3, 2, sse2);
H264_QPEL_FUNCS(3, 3, sse2);
} }
if (bit_depth == 10) { if (EXTERNAL_SSE2(mm_flags)) {
SET_QPEL_FUNCS(put_h264_qpel, 0, 16, 10_sse2, ff_); if (!(mm_flags & AV_CPU_FLAG_SSE2SLOW) && !high_bit_depth) {
SET_QPEL_FUNCS(put_h264_qpel, 1, 8, 10_sse2, ff_); // these functions are slower than mmx on AMD, but faster on Intel
SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, 10_sse2, ff_); H264_QPEL_FUNCS(0, 0, sse2);
SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, 10_sse2, ff_); }
H264_QPEL_FUNCS_10(1, 0, sse2_cache64);
H264_QPEL_FUNCS_10(2, 0, sse2_cache64); if (!high_bit_depth) {
H264_QPEL_FUNCS_10(3, 0, sse2_cache64); H264_QPEL_FUNCS(0, 1, sse2);
H264_QPEL_FUNCS(0, 2, sse2);
H264_QPEL_FUNCS(0, 3, sse2);
H264_QPEL_FUNCS(1, 1, sse2);
H264_QPEL_FUNCS(1, 2, sse2);
H264_QPEL_FUNCS(1, 3, sse2);
H264_QPEL_FUNCS(2, 1, sse2);
H264_QPEL_FUNCS(2, 2, sse2);
H264_QPEL_FUNCS(2, 3, sse2);
H264_QPEL_FUNCS(3, 1, sse2);
H264_QPEL_FUNCS(3, 2, sse2);
H264_QPEL_FUNCS(3, 3, sse2);
}
if (bit_depth == 10) {
SET_QPEL_FUNCS(put_h264_qpel, 0, 16, 10_sse2, ff_);
SET_QPEL_FUNCS(put_h264_qpel, 1, 8, 10_sse2, ff_);
SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, 10_sse2, ff_);
SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, 10_sse2, ff_);
H264_QPEL_FUNCS_10(1, 0, sse2_cache64);
H264_QPEL_FUNCS_10(2, 0, sse2_cache64);
H264_QPEL_FUNCS_10(3, 0, sse2_cache64);
}
} }
#endif
#if HAVE_SSSE3_EXTERNAL if (EXTERNAL_SSSE3(mm_flags)) {
if (!(mm_flags & AV_CPU_FLAG_SSSE3)) if (!high_bit_depth) {
return; H264_QPEL_FUNCS(1, 0, ssse3);
if (!high_bit_depth) { H264_QPEL_FUNCS(1, 1, ssse3);
H264_QPEL_FUNCS(1, 0, ssse3); H264_QPEL_FUNCS(1, 2, ssse3);
H264_QPEL_FUNCS(1, 1, ssse3); H264_QPEL_FUNCS(1, 3, ssse3);
H264_QPEL_FUNCS(1, 2, ssse3); H264_QPEL_FUNCS(2, 0, ssse3);
H264_QPEL_FUNCS(1, 3, ssse3); H264_QPEL_FUNCS(2, 1, ssse3);
H264_QPEL_FUNCS(2, 0, ssse3); H264_QPEL_FUNCS(2, 2, ssse3);
H264_QPEL_FUNCS(2, 1, ssse3); H264_QPEL_FUNCS(2, 3, ssse3);
H264_QPEL_FUNCS(2, 2, ssse3); H264_QPEL_FUNCS(3, 0, ssse3);
H264_QPEL_FUNCS(2, 3, ssse3); H264_QPEL_FUNCS(3, 1, ssse3);
H264_QPEL_FUNCS(3, 0, ssse3); H264_QPEL_FUNCS(3, 2, ssse3);
H264_QPEL_FUNCS(3, 1, ssse3); H264_QPEL_FUNCS(3, 3, ssse3);
H264_QPEL_FUNCS(3, 2, ssse3); }
H264_QPEL_FUNCS(3, 3, ssse3);
if (bit_depth == 10) {
H264_QPEL_FUNCS_10(1, 0, ssse3_cache64);
H264_QPEL_FUNCS_10(2, 0, ssse3_cache64);
H264_QPEL_FUNCS_10(3, 0, ssse3_cache64);
}
} }
if (bit_depth == 10) { if (EXTERNAL_AVX(mm_flags)) {
H264_QPEL_FUNCS_10(1, 0, ssse3_cache64); if (bit_depth == 10) {
H264_QPEL_FUNCS_10(2, 0, ssse3_cache64); H264_QPEL_FUNCS_10(1, 0, sse2);
H264_QPEL_FUNCS_10(3, 0, ssse3_cache64); H264_QPEL_FUNCS_10(2, 0, sse2);
} H264_QPEL_FUNCS_10(3, 0, sse2);
#endif }
#if HAVE_AVX_EXTERNAL
if (bit_depth == 10) {
H264_QPEL_FUNCS_10(1, 0, sse2);
H264_QPEL_FUNCS_10(2, 0, sse2);
H264_QPEL_FUNCS_10(3, 0, sse2);
} }
#endif #endif
} }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment