Commit 77aedc77 authored by Michael Niedermayer's avatar Michael Niedermayer

Merge remote-tracking branch 'qatar/master'

* qatar/master:
  swscale: Provide the right alignment for external mmx asm
  x86: Replace checks for CPU extensions and flags by convenience macros
  configure: msvc: fix/simplify setting of flags for hostcc
  x86: mlpdsp: mlp_filter_channel_x86 requires inline asm

Conflicts:
	libavcodec/x86/fft_init.c
	libavcodec/x86/h264_intrapred_init.c
	libavcodec/x86/h264dsp_init.c
	libavcodec/x86/mpegaudiodec.c
	libavcodec/x86/proresdsp_init.c
	libavutil/x86/float_dsp_init.c
	libswscale/utils.c
	libswscale/x86/swscale.c
Merged-by: 's avatarMichael Niedermayer <michaelni@gmx.at>
parents 4819d43d 75c37c5a
...@@ -2598,7 +2598,7 @@ probe_cc(){ ...@@ -2598,7 +2598,7 @@ probe_cc(){
_flags='-nologo' _flags='-nologo'
_cflags='-D_USE_MATH_DEFINES -Dinline=__inline -FIstdlib.h -Dstrtoll=_strtoi64' _cflags='-D_USE_MATH_DEFINES -Dinline=__inline -FIstdlib.h -Dstrtoll=_strtoi64'
if [ $pfx = hostcc ]; then if [ $pfx = hostcc ]; then
_cflags="$cflags -Dsnprintf=_snprintf" append _cflags -Dsnprintf=_snprintf
fi fi
disable aligned_stack disable aligned_stack
fi fi
......
...@@ -20,6 +20,7 @@ ...@@ -20,6 +20,7 @@
*/ */
#include "libavutil/x86/asm.h" #include "libavutil/x86/asm.h"
#include "libavutil/x86/cpu.h"
#include "dsputil_mmx.h" #include "dsputil_mmx.h"
#include "libavcodec/ac3dsp.h" #include "libavcodec/ac3dsp.h"
...@@ -50,29 +51,28 @@ extern void ff_ac3_extract_exponents_ssse3(uint8_t *exp, int32_t *coef, int nb_c ...@@ -50,29 +51,28 @@ extern void ff_ac3_extract_exponents_ssse3(uint8_t *exp, int32_t *coef, int nb_c
av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c, int bit_exact) av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c, int bit_exact)
{ {
#if HAVE_YASM
int mm_flags = av_get_cpu_flags(); int mm_flags = av_get_cpu_flags();
if (mm_flags & AV_CPU_FLAG_MMX) { if (EXTERNAL_MMX(mm_flags)) {
c->ac3_exponent_min = ff_ac3_exponent_min_mmx; c->ac3_exponent_min = ff_ac3_exponent_min_mmx;
c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_mmx; c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_mmx;
c->ac3_lshift_int16 = ff_ac3_lshift_int16_mmx; c->ac3_lshift_int16 = ff_ac3_lshift_int16_mmx;
c->ac3_rshift_int32 = ff_ac3_rshift_int32_mmx; c->ac3_rshift_int32 = ff_ac3_rshift_int32_mmx;
} }
if (mm_flags & AV_CPU_FLAG_3DNOW && HAVE_AMD3DNOW) { if (EXTERNAL_AMD3DNOW(mm_flags)) {
c->extract_exponents = ff_ac3_extract_exponents_3dnow; c->extract_exponents = ff_ac3_extract_exponents_3dnow;
if (!bit_exact) { if (!bit_exact) {
c->float_to_fixed24 = ff_float_to_fixed24_3dnow; c->float_to_fixed24 = ff_float_to_fixed24_3dnow;
} }
} }
if (mm_flags & AV_CPU_FLAG_MMXEXT && HAVE_MMXEXT) { if (EXTERNAL_MMXEXT(mm_flags)) {
c->ac3_exponent_min = ff_ac3_exponent_min_mmxext; c->ac3_exponent_min = ff_ac3_exponent_min_mmxext;
c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_mmx2; c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_mmx2;
} }
if (mm_flags & AV_CPU_FLAG_SSE && HAVE_SSE) { if (EXTERNAL_SSE(mm_flags)) {
c->float_to_fixed24 = ff_float_to_fixed24_sse; c->float_to_fixed24 = ff_float_to_fixed24_sse;
} }
if (mm_flags & AV_CPU_FLAG_SSE2 && HAVE_SSE) { if (EXTERNAL_SSE2(mm_flags)) {
c->ac3_exponent_min = ff_ac3_exponent_min_sse2; c->ac3_exponent_min = ff_ac3_exponent_min_sse2;
c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_sse2; c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_sse2;
c->float_to_fixed24 = ff_float_to_fixed24_sse2; c->float_to_fixed24 = ff_float_to_fixed24_sse2;
...@@ -83,11 +83,10 @@ av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c, int bit_exact) ...@@ -83,11 +83,10 @@ av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c, int bit_exact)
c->ac3_rshift_int32 = ff_ac3_rshift_int32_sse2; c->ac3_rshift_int32 = ff_ac3_rshift_int32_sse2;
} }
} }
if (mm_flags & AV_CPU_FLAG_SSSE3 && HAVE_SSSE3) { if (EXTERNAL_SSSE3(mm_flags)) {
c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_ssse3; c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_ssse3;
if (!(mm_flags & AV_CPU_FLAG_ATOM)) { if (!(mm_flags & AV_CPU_FLAG_ATOM)) {
c->extract_exponents = ff_ac3_extract_exponents_ssse3; c->extract_exponents = ff_ac3_extract_exponents_ssse3;
} }
} }
#endif
} }
...@@ -24,6 +24,7 @@ ...@@ -24,6 +24,7 @@
#include "libavutil/cpu.h" #include "libavutil/cpu.h"
#include "libavutil/x86/asm.h" #include "libavutil/x86/asm.h"
#include "libavutil/x86/cpu.h"
#include "libavcodec/dsputil.h" #include "libavcodec/dsputil.h"
#include "libavcodec/mpegvideo.h" #include "libavcodec/mpegvideo.h"
#include "libavcodec/mathops.h" #include "libavcodec/mathops.h"
...@@ -1181,17 +1182,16 @@ void ff_dsputilenc_init_mmx(DSPContext* c, AVCodecContext *avctx) ...@@ -1181,17 +1182,16 @@ void ff_dsputilenc_init_mmx(DSPContext* c, AVCodecContext *avctx)
} }
#endif /* HAVE_INLINE_ASM */ #endif /* HAVE_INLINE_ASM */
#if HAVE_YASM if (EXTERNAL_MMX(mm_flags)) {
if (mm_flags & AV_CPU_FLAG_MMX) {
c->hadamard8_diff[0] = ff_hadamard8_diff16_mmx; c->hadamard8_diff[0] = ff_hadamard8_diff16_mmx;
c->hadamard8_diff[1] = ff_hadamard8_diff_mmx; c->hadamard8_diff[1] = ff_hadamard8_diff_mmx;
if (mm_flags & AV_CPU_FLAG_MMXEXT) { if (EXTERNAL_MMXEXT(mm_flags)) {
c->hadamard8_diff[0] = ff_hadamard8_diff16_mmx2; c->hadamard8_diff[0] = ff_hadamard8_diff16_mmx2;
c->hadamard8_diff[1] = ff_hadamard8_diff_mmx2; c->hadamard8_diff[1] = ff_hadamard8_diff_mmx2;
} }
if (mm_flags & AV_CPU_FLAG_SSE2){ if (EXTERNAL_SSE2(mm_flags)) {
c->sse[0] = ff_sse16_sse2; c->sse[0] = ff_sse16_sse2;
#if HAVE_ALIGNED_STACK #if HAVE_ALIGNED_STACK
...@@ -1200,14 +1200,11 @@ void ff_dsputilenc_init_mmx(DSPContext* c, AVCodecContext *avctx) ...@@ -1200,14 +1200,11 @@ void ff_dsputilenc_init_mmx(DSPContext* c, AVCodecContext *avctx)
#endif #endif
} }
#if HAVE_SSSE3 && HAVE_ALIGNED_STACK if (EXTERNAL_SSSE3(mm_flags) && HAVE_ALIGNED_STACK) {
if (mm_flags & AV_CPU_FLAG_SSSE3) {
c->hadamard8_diff[0] = ff_hadamard8_diff16_ssse3; c->hadamard8_diff[0] = ff_hadamard8_diff16_ssse3;
c->hadamard8_diff[1] = ff_hadamard8_diff_ssse3; c->hadamard8_diff[1] = ff_hadamard8_diff_ssse3;
} }
#endif
} }
#endif /* HAVE_YASM */
ff_dsputil_init_pix_mmx(c, avctx); ff_dsputil_init_pix_mmx(c, avctx);
} }
...@@ -17,29 +17,29 @@ ...@@ -17,29 +17,29 @@
*/ */
#include "libavutil/cpu.h" #include "libavutil/cpu.h"
#include "libavutil/x86/cpu.h"
#include "libavcodec/dsputil.h" #include "libavcodec/dsputil.h"
#include "libavcodec/dct.h" #include "libavcodec/dct.h"
#include "fft.h" #include "fft.h"
av_cold void ff_fft_init_mmx(FFTContext *s) av_cold void ff_fft_init_mmx(FFTContext *s)
{ {
#if HAVE_YASM
int has_vectors = av_get_cpu_flags(); int has_vectors = av_get_cpu_flags();
#if ARCH_X86_32 #if ARCH_X86_32
if (has_vectors & AV_CPU_FLAG_3DNOW && HAVE_AMD3DNOW) { if (EXTERNAL_AMD3DNOW(has_vectors)) {
/* 3DNow! for K6-2/3 */ /* 3DNow! for K6-2/3 */
s->imdct_calc = ff_imdct_calc_3dnow; s->imdct_calc = ff_imdct_calc_3dnow;
s->imdct_half = ff_imdct_half_3dnow; s->imdct_half = ff_imdct_half_3dnow;
s->fft_calc = ff_fft_calc_3dnow; s->fft_calc = ff_fft_calc_3dnow;
} }
if (has_vectors & AV_CPU_FLAG_3DNOWEXT && HAVE_AMD3DNOWEXT) { if (EXTERNAL_AMD3DNOWEXT(has_vectors)) {
/* 3DNowEx for K7 */ /* 3DNowEx for K7 */
s->imdct_calc = ff_imdct_calc_3dnowext; s->imdct_calc = ff_imdct_calc_3dnowext;
s->imdct_half = ff_imdct_half_3dnowext; s->imdct_half = ff_imdct_half_3dnowext;
s->fft_calc = ff_fft_calc_3dnowext; s->fft_calc = ff_fft_calc_3dnowext;
} }
#endif #endif
if (has_vectors & AV_CPU_FLAG_SSE && HAVE_SSE) { if (EXTERNAL_SSE(has_vectors)) {
/* SSE for P3/P4/K8 */ /* SSE for P3/P4/K8 */
s->imdct_calc = ff_imdct_calc_sse; s->imdct_calc = ff_imdct_calc_sse;
s->imdct_half = ff_imdct_half_sse; s->imdct_half = ff_imdct_half_sse;
...@@ -47,26 +47,23 @@ av_cold void ff_fft_init_mmx(FFTContext *s) ...@@ -47,26 +47,23 @@ av_cold void ff_fft_init_mmx(FFTContext *s)
s->fft_calc = ff_fft_calc_sse; s->fft_calc = ff_fft_calc_sse;
s->fft_permutation = FF_FFT_PERM_SWAP_LSBS; s->fft_permutation = FF_FFT_PERM_SWAP_LSBS;
} }
if (has_vectors & AV_CPU_FLAG_AVX && HAVE_AVX_EXTERNAL && s->nbits >= 5) { if (EXTERNAL_AVX(has_vectors) && s->nbits >= 5) {
/* AVX for SB */ /* AVX for SB */
s->imdct_half = ff_imdct_half_avx; s->imdct_half = ff_imdct_half_avx;
s->fft_calc = ff_fft_calc_avx; s->fft_calc = ff_fft_calc_avx;
s->fft_permutation = FF_FFT_PERM_AVX; s->fft_permutation = FF_FFT_PERM_AVX;
} }
#endif
} }
#if CONFIG_DCT #if CONFIG_DCT
av_cold void ff_dct_init_mmx(DCTContext *s) av_cold void ff_dct_init_mmx(DCTContext *s)
{ {
#if HAVE_YASM
int has_vectors = av_get_cpu_flags(); int has_vectors = av_get_cpu_flags();
if (has_vectors & AV_CPU_FLAG_SSE && HAVE_SSE) if (EXTERNAL_SSE(has_vectors))
s->dct32 = ff_dct32_float_sse; s->dct32 = ff_dct32_float_sse;
if (has_vectors & AV_CPU_FLAG_SSE2 && HAVE_SSE) if (EXTERNAL_SSE2(has_vectors))
s->dct32 = ff_dct32_float_sse2; s->dct32 = ff_dct32_float_sse2;
if (has_vectors & AV_CPU_FLAG_AVX && HAVE_AVX_EXTERNAL) if (EXTERNAL_AVX(has_vectors))
s->dct32 = ff_dct32_float_avx; s->dct32 = ff_dct32_float_avx;
#endif
} }
#endif #endif
...@@ -24,6 +24,7 @@ ...@@ -24,6 +24,7 @@
#include "libavutil/cpu.h" #include "libavutil/cpu.h"
#include "libavutil/x86/asm.h" #include "libavutil/x86/asm.h"
#include "libavutil/x86/cpu.h"
#include "libavcodec/fmtconvert.h" #include "libavcodec/fmtconvert.h"
#include "libavcodec/dsputil.h" #include "libavcodec/dsputil.h"
...@@ -117,27 +118,27 @@ void ff_fmt_convert_init_x86(FmtConvertContext *c, AVCodecContext *avctx) ...@@ -117,27 +118,27 @@ void ff_fmt_convert_init_x86(FmtConvertContext *c, AVCodecContext *avctx)
#if HAVE_YASM #if HAVE_YASM
int mm_flags = av_get_cpu_flags(); int mm_flags = av_get_cpu_flags();
if (mm_flags & AV_CPU_FLAG_MMX) { if (EXTERNAL_MMX(mm_flags)) {
c->float_interleave = float_interleave_mmx; c->float_interleave = float_interleave_mmx;
if (HAVE_AMD3DNOW && mm_flags & AV_CPU_FLAG_3DNOW) { if (EXTERNAL_AMD3DNOW(mm_flags)) {
if(!(avctx->flags & CODEC_FLAG_BITEXACT)){ if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
c->float_to_int16 = ff_float_to_int16_3dnow; c->float_to_int16 = ff_float_to_int16_3dnow;
c->float_to_int16_interleave = float_to_int16_interleave_3dnow; c->float_to_int16_interleave = float_to_int16_interleave_3dnow;
} }
} }
if (HAVE_AMD3DNOWEXT && mm_flags & AV_CPU_FLAG_3DNOWEXT) { if (EXTERNAL_AMD3DNOWEXT(mm_flags)) {
if(!(avctx->flags & CODEC_FLAG_BITEXACT)){ if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
c->float_to_int16_interleave = float_to_int16_interleave_3dnowext; c->float_to_int16_interleave = float_to_int16_interleave_3dnowext;
} }
} }
if (HAVE_SSE && mm_flags & AV_CPU_FLAG_SSE) { if (EXTERNAL_SSE(mm_flags)) {
c->int32_to_float_fmul_scalar = ff_int32_to_float_fmul_scalar_sse; c->int32_to_float_fmul_scalar = ff_int32_to_float_fmul_scalar_sse;
c->float_to_int16 = ff_float_to_int16_sse; c->float_to_int16 = ff_float_to_int16_sse;
c->float_to_int16_interleave = float_to_int16_interleave_sse; c->float_to_int16_interleave = float_to_int16_interleave_sse;
c->float_interleave = float_interleave_sse; c->float_interleave = float_interleave_sse;
} }
if (HAVE_SSE && mm_flags & AV_CPU_FLAG_SSE2) { if (EXTERNAL_SSE2(mm_flags)) {
c->int32_to_float_fmul_scalar = ff_int32_to_float_fmul_scalar_sse2; c->int32_to_float_fmul_scalar = ff_int32_to_float_fmul_scalar_sse2;
c->float_to_int16 = ff_float_to_int16_sse2; c->float_to_int16 = ff_float_to_int16_sse2;
c->float_to_int16_interleave = float_to_int16_interleave_sse2; c->float_to_int16_interleave = float_to_int16_interleave_sse2;
......
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
*/ */
#include "libavutil/cpu.h" #include "libavutil/cpu.h"
#include "libavutil/x86/cpu.h"
#include "libavcodec/h264pred.h" #include "libavcodec/h264pred.h"
#define PRED4x4(TYPE, DEPTH, OPT) \ #define PRED4x4(TYPE, DEPTH, OPT) \
...@@ -169,11 +170,10 @@ void ff_pred4x4_vertical_vp8_mmxext(uint8_t *src, const uint8_t *topright, int s ...@@ -169,11 +170,10 @@ void ff_pred4x4_vertical_vp8_mmxext(uint8_t *src, const uint8_t *topright, int s
void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth, const int chroma_format_idc) void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth, const int chroma_format_idc)
{ {
#if HAVE_YASM
int mm_flags = av_get_cpu_flags(); int mm_flags = av_get_cpu_flags();
if (bit_depth == 8) { if (bit_depth == 8) {
if (mm_flags & AV_CPU_FLAG_MMX) { if (EXTERNAL_MMX(mm_flags)) {
h->pred16x16[VERT_PRED8x8 ] = ff_pred16x16_vertical_mmx; h->pred16x16[VERT_PRED8x8 ] = ff_pred16x16_vertical_mmx;
h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_mmx; h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_mmx;
if (chroma_format_idc == 1) { if (chroma_format_idc == 1) {
...@@ -198,7 +198,7 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth ...@@ -198,7 +198,7 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth
} }
} }
if (mm_flags & AV_CPU_FLAG_MMXEXT) { if (EXTERNAL_MMXEXT(mm_flags)) {
h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_mmx2; h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_mmx2;
h->pred16x16[DC_PRED8x8 ] = ff_pred16x16_dc_mmx2; h->pred16x16[DC_PRED8x8 ] = ff_pred16x16_dc_mmx2;
if (chroma_format_idc == 1) if (chroma_format_idc == 1)
...@@ -250,11 +250,11 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth ...@@ -250,11 +250,11 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth
} }
} }
if (mm_flags & AV_CPU_FLAG_SSE) { if (EXTERNAL_SSE(mm_flags)) {
h->pred16x16[VERT_PRED8x8] = ff_pred16x16_vertical_sse; h->pred16x16[VERT_PRED8x8] = ff_pred16x16_vertical_sse;
} }
if (mm_flags & AV_CPU_FLAG_SSE2) { if (EXTERNAL_SSE2(mm_flags)) {
h->pred16x16[DC_PRED8x8 ] = ff_pred16x16_dc_sse2; h->pred16x16[DC_PRED8x8 ] = ff_pred16x16_dc_sse2;
h->pred8x8l [DIAG_DOWN_LEFT_PRED ] = ff_pred8x8l_down_left_sse2; h->pred8x8l [DIAG_DOWN_LEFT_PRED ] = ff_pred8x8l_down_left_sse2;
h->pred8x8l [DIAG_DOWN_RIGHT_PRED ] = ff_pred8x8l_down_right_sse2; h->pred8x8l [DIAG_DOWN_RIGHT_PRED ] = ff_pred8x8l_down_right_sse2;
...@@ -277,7 +277,7 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth ...@@ -277,7 +277,7 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth
} }
} }
if (mm_flags & AV_CPU_FLAG_SSSE3) { if (EXTERNAL_SSSE3(mm_flags)) {
h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_ssse3; h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_ssse3;
h->pred16x16[DC_PRED8x8 ] = ff_pred16x16_dc_ssse3; h->pred16x16[DC_PRED8x8 ] = ff_pred16x16_dc_ssse3;
if (chroma_format_idc == 1) if (chroma_format_idc == 1)
...@@ -308,7 +308,7 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth ...@@ -308,7 +308,7 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth
} }
} }
} else if (bit_depth == 10) { } else if (bit_depth == 10) {
if (mm_flags & AV_CPU_FLAG_MMXEXT) { if (EXTERNAL_MMXEXT(mm_flags)) {
h->pred4x4[DC_PRED ] = ff_pred4x4_dc_10_mmxext; h->pred4x4[DC_PRED ] = ff_pred4x4_dc_10_mmxext;
h->pred4x4[HOR_UP_PRED ] = ff_pred4x4_horizontal_up_10_mmxext; h->pred4x4[HOR_UP_PRED ] = ff_pred4x4_horizontal_up_10_mmxext;
...@@ -324,7 +324,7 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth ...@@ -324,7 +324,7 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth
h->pred16x16[VERT_PRED8x8 ] = ff_pred16x16_vertical_10_mmxext; h->pred16x16[VERT_PRED8x8 ] = ff_pred16x16_vertical_10_mmxext;
h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_10_mmxext; h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_10_mmxext;
} }
if (mm_flags & AV_CPU_FLAG_SSE2) { if (EXTERNAL_SSE2(mm_flags)) {
h->pred4x4[DIAG_DOWN_LEFT_PRED ] = ff_pred4x4_down_left_10_sse2; h->pred4x4[DIAG_DOWN_LEFT_PRED ] = ff_pred4x4_down_left_10_sse2;
h->pred4x4[DIAG_DOWN_RIGHT_PRED] = ff_pred4x4_down_right_10_sse2; h->pred4x4[DIAG_DOWN_RIGHT_PRED] = ff_pred4x4_down_right_10_sse2;
h->pred4x4[VERT_LEFT_PRED ] = ff_pred4x4_vertical_left_10_sse2; h->pred4x4[VERT_LEFT_PRED ] = ff_pred4x4_vertical_left_10_sse2;
...@@ -356,7 +356,7 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth ...@@ -356,7 +356,7 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth
h->pred16x16[VERT_PRED8x8 ] = ff_pred16x16_vertical_10_sse2; h->pred16x16[VERT_PRED8x8 ] = ff_pred16x16_vertical_10_sse2;
h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_10_sse2; h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_10_sse2;
} }
if (mm_flags & AV_CPU_FLAG_SSSE3) { if (EXTERNAL_SSSE3(mm_flags)) {
h->pred4x4[DIAG_DOWN_RIGHT_PRED] = ff_pred4x4_down_right_10_ssse3; h->pred4x4[DIAG_DOWN_RIGHT_PRED] = ff_pred4x4_down_right_10_ssse3;
h->pred4x4[VERT_RIGHT_PRED ] = ff_pred4x4_vertical_right_10_ssse3; h->pred4x4[VERT_RIGHT_PRED ] = ff_pred4x4_vertical_right_10_ssse3;
h->pred4x4[HOR_DOWN_PRED ] = ff_pred4x4_horizontal_down_10_ssse3; h->pred4x4[HOR_DOWN_PRED ] = ff_pred4x4_horizontal_down_10_ssse3;
...@@ -367,7 +367,7 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth ...@@ -367,7 +367,7 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth
h->pred8x8l[VERT_RIGHT_PRED ] = ff_pred8x8l_vertical_right_10_ssse3; h->pred8x8l[VERT_RIGHT_PRED ] = ff_pred8x8l_vertical_right_10_ssse3;
h->pred8x8l[HOR_UP_PRED ] = ff_pred8x8l_horizontal_up_10_ssse3; h->pred8x8l[HOR_UP_PRED ] = ff_pred8x8l_horizontal_up_10_ssse3;
} }
if (HAVE_AVX_EXTERNAL && mm_flags & AV_CPU_FLAG_AVX) { if (EXTERNAL_AVX(mm_flags)) {
h->pred4x4[DIAG_DOWN_LEFT_PRED ] = ff_pred4x4_down_left_10_avx; h->pred4x4[DIAG_DOWN_LEFT_PRED ] = ff_pred4x4_down_left_10_avx;
h->pred4x4[DIAG_DOWN_RIGHT_PRED] = ff_pred4x4_down_right_10_avx; h->pred4x4[DIAG_DOWN_RIGHT_PRED] = ff_pred4x4_down_right_10_avx;
h->pred4x4[VERT_LEFT_PRED ] = ff_pred4x4_vertical_left_10_avx; h->pred4x4[VERT_LEFT_PRED ] = ff_pred4x4_vertical_left_10_avx;
...@@ -384,5 +384,4 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth ...@@ -384,5 +384,4 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth
h->pred8x8l[HOR_UP_PRED ] = ff_pred8x8l_horizontal_up_10_avx; h->pred8x8l[HOR_UP_PRED ] = ff_pred8x8l_horizontal_up_10_avx;
} }
} }
#endif /* HAVE_YASM */
} }
...@@ -20,6 +20,7 @@ ...@@ -20,6 +20,7 @@
#include "libavutil/cpu.h" #include "libavutil/cpu.h"
#include "libavutil/x86/asm.h" #include "libavutil/x86/asm.h"
#include "libavutil/x86/cpu.h"
#include "libavcodec/h264dsp.h" #include "libavcodec/h264dsp.h"
#include "dsputil_mmx.h" #include "dsputil_mmx.h"
...@@ -209,14 +210,13 @@ H264_BIWEIGHT_10_SSE(4, 10) ...@@ -209,14 +210,13 @@ H264_BIWEIGHT_10_SSE(4, 10)
void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth, void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth,
const int chroma_format_idc) const int chroma_format_idc)
{ {
#if HAVE_YASM
int mm_flags = av_get_cpu_flags(); int mm_flags = av_get_cpu_flags();
if (chroma_format_idc == 1 && mm_flags & AV_CPU_FLAG_MMXEXT) if (chroma_format_idc == 1 && EXTERNAL_MMXEXT(mm_flags))
c->h264_loop_filter_strength = ff_h264_loop_filter_strength_mmx2; c->h264_loop_filter_strength = ff_h264_loop_filter_strength_mmx2;
if (bit_depth == 8) { if (bit_depth == 8) {
if (mm_flags & AV_CPU_FLAG_MMX) { if (EXTERNAL_MMX(mm_flags)) {
c->h264_idct_dc_add = c->h264_idct_dc_add =
c->h264_idct_add = ff_h264_idct_add_8_mmx; c->h264_idct_add = ff_h264_idct_add_8_mmx;
c->h264_idct8_dc_add = c->h264_idct8_dc_add =
...@@ -230,7 +230,7 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth, ...@@ -230,7 +230,7 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth,
if (mm_flags & AV_CPU_FLAG_CMOV) if (mm_flags & AV_CPU_FLAG_CMOV)
c->h264_luma_dc_dequant_idct = ff_h264_luma_dc_dequant_idct_mmx; c->h264_luma_dc_dequant_idct = ff_h264_luma_dc_dequant_idct_mmx;
if (mm_flags & AV_CPU_FLAG_MMXEXT) { if (EXTERNAL_MMXEXT(mm_flags)) {
c->h264_idct_dc_add = ff_h264_idct_dc_add_8_mmx2; c->h264_idct_dc_add = ff_h264_idct_dc_add_8_mmx2;
c->h264_idct8_dc_add = ff_h264_idct8_dc_add_8_mmx2; c->h264_idct8_dc_add = ff_h264_idct8_dc_add_8_mmx2;
c->h264_idct_add16 = ff_h264_idct_add16_8_mmx2; c->h264_idct_add16 = ff_h264_idct_add16_8_mmx2;
...@@ -259,7 +259,7 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth, ...@@ -259,7 +259,7 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth,
c->biweight_h264_pixels_tab[1] = ff_h264_biweight_8_mmx2; c->biweight_h264_pixels_tab[1] = ff_h264_biweight_8_mmx2;
c->biweight_h264_pixels_tab[2] = ff_h264_biweight_4_mmx2; c->biweight_h264_pixels_tab[2] = ff_h264_biweight_4_mmx2;
if (mm_flags & AV_CPU_FLAG_SSE2) { if (EXTERNAL_SSE2(mm_flags)) {
c->h264_idct8_add = ff_h264_idct8_add_8_sse2; c->h264_idct8_add = ff_h264_idct8_add_8_sse2;
c->h264_idct_add16 = ff_h264_idct_add16_8_sse2; c->h264_idct_add16 = ff_h264_idct_add16_8_sse2;
...@@ -282,23 +282,21 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth, ...@@ -282,23 +282,21 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth,
c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_8_sse2; c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_8_sse2;
#endif /* HAVE_ALIGNED_STACK */ #endif /* HAVE_ALIGNED_STACK */
} }
if (mm_flags & AV_CPU_FLAG_SSSE3) { if (EXTERNAL_SSSE3(mm_flags)) {
c->biweight_h264_pixels_tab[0] = ff_h264_biweight_16_ssse3; c->biweight_h264_pixels_tab[0] = ff_h264_biweight_16_ssse3;
c->biweight_h264_pixels_tab[1] = ff_h264_biweight_8_ssse3; c->biweight_h264_pixels_tab[1] = ff_h264_biweight_8_ssse3;
} }
if (HAVE_AVX_EXTERNAL && mm_flags & AV_CPU_FLAG_AVX) { if (EXTERNAL_AVX(mm_flags) && HAVE_ALIGNED_STACK) {
#if HAVE_ALIGNED_STACK
c->h264_v_loop_filter_luma = ff_deblock_v_luma_8_avx; c->h264_v_loop_filter_luma = ff_deblock_v_luma_8_avx;
c->h264_h_loop_filter_luma = ff_deblock_h_luma_8_avx; c->h264_h_loop_filter_luma = ff_deblock_h_luma_8_avx;
c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_8_avx; c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_8_avx;
c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_8_avx; c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_8_avx;
#endif /* HAVE_ALIGNED_STACK */
} }
} }
} }
} else if (bit_depth == 10) { } else if (bit_depth == 10) {
if (mm_flags & AV_CPU_FLAG_MMX) { if (EXTERNAL_MMX(mm_flags)) {
if (mm_flags & AV_CPU_FLAG_MMXEXT) { if (EXTERNAL_MMXEXT(mm_flags)) {
#if ARCH_X86_32 #if ARCH_X86_32
c->h264_v_loop_filter_chroma = ff_deblock_v_chroma_10_mmx2; c->h264_v_loop_filter_chroma = ff_deblock_v_chroma_10_mmx2;
c->h264_v_loop_filter_chroma_intra = ff_deblock_v_chroma_intra_10_mmx2; c->h264_v_loop_filter_chroma_intra = ff_deblock_v_chroma_intra_10_mmx2;
...@@ -308,7 +306,7 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth, ...@@ -308,7 +306,7 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth,
c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_10_mmx2; c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_10_mmx2;
#endif /* ARCH_X86_32 */ #endif /* ARCH_X86_32 */
c->h264_idct_dc_add = ff_h264_idct_dc_add_10_mmx2; c->h264_idct_dc_add = ff_h264_idct_dc_add_10_mmx2;
if (mm_flags & AV_CPU_FLAG_SSE2) { if (EXTERNAL_SSE2(mm_flags)) {
c->h264_idct_add = ff_h264_idct_add_10_sse2; c->h264_idct_add = ff_h264_idct_add_10_sse2;
c->h264_idct8_dc_add = ff_h264_idct8_dc_add_10_sse2; c->h264_idct8_dc_add = ff_h264_idct8_dc_add_10_sse2;
...@@ -338,7 +336,7 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth, ...@@ -338,7 +336,7 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth,
c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_10_sse2; c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_10_sse2;
#endif /* HAVE_ALIGNED_STACK */ #endif /* HAVE_ALIGNED_STACK */
} }
if (mm_flags & AV_CPU_FLAG_SSE4) { if (EXTERNAL_SSE4(mm_flags)) {
c->weight_h264_pixels_tab[0] = ff_h264_weight_16_10_sse4; c->weight_h264_pixels_tab[0] = ff_h264_weight_16_10_sse4;
c->weight_h264_pixels_tab[1] = ff_h264_weight_8_10_sse4; c->weight_h264_pixels_tab[1] = ff_h264_weight_8_10_sse4;
c->weight_h264_pixels_tab[2] = ff_h264_weight_4_10_sse4; c->weight_h264_pixels_tab[2] = ff_h264_weight_4_10_sse4;
...@@ -347,8 +345,7 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth, ...@@ -347,8 +345,7 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth,
c->biweight_h264_pixels_tab[1] = ff_h264_biweight_8_10_sse4; c->biweight_h264_pixels_tab[1] = ff_h264_biweight_8_10_sse4;
c->biweight_h264_pixels_tab[2] = ff_h264_biweight_4_10_sse4; c->biweight_h264_pixels_tab[2] = ff_h264_biweight_4_10_sse4;
} }
#if HAVE_AVX_EXTERNAL if (EXTERNAL_AVX(mm_flags)) {
if (mm_flags & AV_CPU_FLAG_AVX) {
c->h264_idct_dc_add = c->h264_idct_dc_add =
c->h264_idct_add = ff_h264_idct_add_10_avx; c->h264_idct_add = ff_h264_idct_add_10_avx;
c->h264_idct8_dc_add = ff_h264_idct8_dc_add_10_avx; c->h264_idct8_dc_add = ff_h264_idct8_dc_add_10_avx;
...@@ -371,9 +368,7 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth, ...@@ -371,9 +368,7 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth,
c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_10_avx; c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_10_avx;
#endif /* HAVE_ALIGNED_STACK */ #endif /* HAVE_ALIGNED_STACK */
} }
#endif /* HAVE_AVX_EXTERNAL */
} }
} }
} }
#endif /* HAVE_YASM */
} }
...@@ -23,7 +23,7 @@ ...@@ -23,7 +23,7 @@
#include "libavcodec/dsputil.h" #include "libavcodec/dsputil.h"
#include "libavcodec/mlp.h" #include "libavcodec/mlp.h"
#if HAVE_7REGS #if HAVE_7REGS && HAVE_INLINE_ASM
extern char ff_mlp_firorder_8; extern char ff_mlp_firorder_8;
extern char ff_mlp_firorder_7; extern char ff_mlp_firorder_7;
...@@ -171,11 +171,11 @@ static void mlp_filter_channel_x86(int32_t *state, const int32_t *coeff, ...@@ -171,11 +171,11 @@ static void mlp_filter_channel_x86(int32_t *state, const int32_t *coeff,
); );
} }
#endif /* HAVE_7REGS */ #endif /* HAVE_7REGS && HAVE_INLINE_ASM */
void ff_mlp_init_x86(DSPContext* c, AVCodecContext *avctx) void ff_mlp_init_x86(DSPContext* c, AVCodecContext *avctx)
{ {
#if HAVE_7REGS #if HAVE_7REGS && HAVE_INLINE_ASM
c->mlp_filter_channel = mlp_filter_channel_x86; c->mlp_filter_channel = mlp_filter_channel_x86;
#endif #endif
} }
...@@ -21,6 +21,7 @@ ...@@ -21,6 +21,7 @@
#include "libavutil/cpu.h" #include "libavutil/cpu.h"
#include "libavutil/x86/asm.h" #include "libavutil/x86/asm.h"
#include "libavutil/x86/cpu.h"
#include "libavcodec/dsputil.h" #include "libavcodec/dsputil.h"
#include "libavcodec/mpegaudiodsp.h" #include "libavcodec/mpegaudiodsp.h"
...@@ -251,21 +252,16 @@ void ff_mpadsp_init_mmx(MPADSPContext *s) ...@@ -251,21 +252,16 @@ void ff_mpadsp_init_mmx(MPADSPContext *s)
#endif /* HAVE_SSE2_INLINE */ #endif /* HAVE_SSE2_INLINE */
#if HAVE_YASM #if HAVE_YASM
if (0) { if (EXTERNAL_AVX(mm_flags)) {
#if HAVE_AVX_EXTERNAL
} else if (mm_flags & AV_CPU_FLAG_AVX && HAVE_AVX) {
s->imdct36_blocks_float = imdct36_blocks_avx; s->imdct36_blocks_float = imdct36_blocks_avx;
#endif } else if (EXTERNAL_SSSE3(mm_flags)) {
#if HAVE_SSE
} else if (mm_flags & AV_CPU_FLAG_SSSE3) {
s->imdct36_blocks_float = imdct36_blocks_ssse3; s->imdct36_blocks_float = imdct36_blocks_ssse3;
} else if (mm_flags & AV_CPU_FLAG_SSE3) { } else if (EXTERNAL_SSE3(mm_flags)) {
s->imdct36_blocks_float = imdct36_blocks_sse3; s->imdct36_blocks_float = imdct36_blocks_sse3;
} else if (mm_flags & AV_CPU_FLAG_SSE2) { } else if (EXTERNAL_SSE2(mm_flags)) {
s->imdct36_blocks_float = imdct36_blocks_sse2; s->imdct36_blocks_float = imdct36_blocks_sse2;
} else if (mm_flags & AV_CPU_FLAG_SSE) { } else if (EXTERNAL_SSE(mm_flags)) {
s->imdct36_blocks_float = imdct36_blocks_sse; s->imdct36_blocks_float = imdct36_blocks_sse;
#endif /* HAVE_SSE */
} }
#endif /* HAVE_YASM */ #endif /* HAVE_YASM */
} }
...@@ -21,6 +21,7 @@ ...@@ -21,6 +21,7 @@
#include "libavutil/cpu.h" #include "libavutil/cpu.h"
#include "libavutil/x86/asm.h" #include "libavutil/x86/asm.h"
#include "libavutil/x86/cpu.h"
#include "libavcodec/avcodec.h" #include "libavcodec/avcodec.h"
#include "libavcodec/dsputil.h" #include "libavcodec/dsputil.h"
#include "libavcodec/mpegvideo.h" #include "libavcodec/mpegvideo.h"
...@@ -86,19 +87,19 @@ void ff_MPV_encode_init_x86(MpegEncContext *s) ...@@ -86,19 +87,19 @@ void ff_MPV_encode_init_x86(MpegEncContext *s)
if (dct_algo == FF_DCT_AUTO || dct_algo == FF_DCT_MMX) { if (dct_algo == FF_DCT_AUTO || dct_algo == FF_DCT_MMX) {
#if HAVE_MMX_INLINE #if HAVE_MMX_INLINE
if (mm_flags & AV_CPU_FLAG_MMX && HAVE_MMX) if (INLINE_MMX(mm_flags))
s->dct_quantize = dct_quantize_MMX; s->dct_quantize = dct_quantize_MMX;
#endif #endif
#if HAVE_MMXEXT_INLINE #if HAVE_MMXEXT_INLINE
if (mm_flags & AV_CPU_FLAG_MMXEXT && HAVE_MMXEXT) if (INLINE_MMXEXT(mm_flags))
s->dct_quantize = dct_quantize_MMX2; s->dct_quantize = dct_quantize_MMX2;
#endif #endif
#if HAVE_SSE2_INLINE #if HAVE_SSE2_INLINE
if (mm_flags & AV_CPU_FLAG_SSE2 && HAVE_SSE2) if (INLINE_SSE2(mm_flags))
s->dct_quantize = dct_quantize_SSE2; s->dct_quantize = dct_quantize_SSE2;
#endif #endif
#if HAVE_SSSE3_INLINE #if HAVE_SSSE3_INLINE
if (mm_flags & AV_CPU_FLAG_SSSE3) if (INLINE_SSSE3(mm_flags))
s->dct_quantize = dct_quantize_SSSE3; s->dct_quantize = dct_quantize_SSSE3;
#endif #endif
} }
......
...@@ -20,7 +20,7 @@ ...@@ -20,7 +20,7 @@
*/ */
#include "libavutil/common.h" #include "libavutil/common.h"
#include "libavutil/cpu.h" #include "libavutil/x86/cpu.h"
#include "libavcodec/pngdsp.h" #include "libavcodec/pngdsp.h"
void ff_add_png_paeth_prediction_mmx2 (uint8_t *dst, uint8_t *src, void ff_add_png_paeth_prediction_mmx2 (uint8_t *dst, uint8_t *src,
...@@ -34,18 +34,16 @@ void ff_add_bytes_l2_sse2(uint8_t *dst, uint8_t *src1, ...@@ -34,18 +34,16 @@ void ff_add_bytes_l2_sse2(uint8_t *dst, uint8_t *src1,
void ff_pngdsp_init_x86(PNGDSPContext *dsp) void ff_pngdsp_init_x86(PNGDSPContext *dsp)
{ {
#if HAVE_YASM
int flags = av_get_cpu_flags(); int flags = av_get_cpu_flags();
#if ARCH_X86_32 #if ARCH_X86_32
if (flags & AV_CPU_FLAG_MMX) if (EXTERNAL_MMX(flags))
dsp->add_bytes_l2 = ff_add_bytes_l2_mmx; dsp->add_bytes_l2 = ff_add_bytes_l2_mmx;
#endif #endif
if (flags & AV_CPU_FLAG_MMXEXT) if (EXTERNAL_MMXEXT(flags))
dsp->add_paeth_prediction = ff_add_png_paeth_prediction_mmx2; dsp->add_paeth_prediction = ff_add_png_paeth_prediction_mmx2;
if (flags & AV_CPU_FLAG_SSE2) if (EXTERNAL_SSE2(flags))
dsp->add_bytes_l2 = ff_add_bytes_l2_sse2; dsp->add_bytes_l2 = ff_add_bytes_l2_sse2;
if (flags & AV_CPU_FLAG_SSSE3) if (EXTERNAL_SSSE3(flags))
dsp->add_paeth_prediction = ff_add_png_paeth_prediction_ssse3; dsp->add_paeth_prediction = ff_add_png_paeth_prediction_ssse3;
#endif
} }
...@@ -20,6 +20,7 @@ ...@@ -20,6 +20,7 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/ */
#include "libavutil/x86/cpu.h"
#include "libavcodec/proresdsp.h" #include "libavcodec/proresdsp.h"
void ff_prores_idct_put_10_sse2(uint16_t *dst, int linesize, void ff_prores_idct_put_10_sse2(uint16_t *dst, int linesize,
...@@ -31,25 +32,25 @@ void ff_prores_idct_put_10_avx (uint16_t *dst, int linesize, ...@@ -31,25 +32,25 @@ void ff_prores_idct_put_10_avx (uint16_t *dst, int linesize,
void ff_proresdsp_x86_init(ProresDSPContext *dsp, AVCodecContext *avctx) void ff_proresdsp_x86_init(ProresDSPContext *dsp, AVCodecContext *avctx)
{ {
#if ARCH_X86_64 && HAVE_YASM #if ARCH_X86_64
int flags = av_get_cpu_flags(); int flags = av_get_cpu_flags();
if(avctx->flags & CODEC_FLAG_BITEXACT) if(avctx->flags & CODEC_FLAG_BITEXACT)
return; return;
if (flags & AV_CPU_FLAG_SSE2) { if (EXTERNAL_SSE2(flags)) {
dsp->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM; dsp->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM;
dsp->idct_put = ff_prores_idct_put_10_sse2; dsp->idct_put = ff_prores_idct_put_10_sse2;
} }
if (flags & AV_CPU_FLAG_SSE4) { if (EXTERNAL_SSE4(flags)) {
dsp->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM; dsp->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM;
dsp->idct_put = ff_prores_idct_put_10_sse4; dsp->idct_put = ff_prores_idct_put_10_sse4;
} }
if (HAVE_AVX_EXTERNAL && flags & AV_CPU_FLAG_AVX) { if (EXTERNAL_AVX(flags)) {
dsp->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM; dsp->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM;
dsp->idct_put = ff_prores_idct_put_10_avx; dsp->idct_put = ff_prores_idct_put_10_avx;
} }
#endif /* ARCH_X86_64 && HAVE_YASM */ #endif /* ARCH_X86_64 */
} }
...@@ -21,6 +21,7 @@ ...@@ -21,6 +21,7 @@
#include "libavutil/cpu.h" #include "libavutil/cpu.h"
#include "libavutil/x86/asm.h" #include "libavutil/x86/asm.h"
#include "libavutil/x86/cpu.h"
#include "libavcodec/dsputil.h" #include "libavcodec/dsputil.h"
#include "libavcodec/rv34dsp.h" #include "libavcodec/rv34dsp.h"
...@@ -32,16 +33,14 @@ void ff_rv34_idct_add_mmx2(uint8_t *dst, ptrdiff_t stride, DCTELEM *block); ...@@ -32,16 +33,14 @@ void ff_rv34_idct_add_mmx2(uint8_t *dst, ptrdiff_t stride, DCTELEM *block);
av_cold void ff_rv34dsp_init_x86(RV34DSPContext* c, DSPContext *dsp) av_cold void ff_rv34dsp_init_x86(RV34DSPContext* c, DSPContext *dsp)
{ {
#if HAVE_YASM
int mm_flags = av_get_cpu_flags(); int mm_flags = av_get_cpu_flags();
if (mm_flags & AV_CPU_FLAG_MMX) if (EXTERNAL_MMX(mm_flags))
c->rv34_idct_dc_add = ff_rv34_idct_dc_add_mmx; c->rv34_idct_dc_add = ff_rv34_idct_dc_add_mmx;
if (mm_flags & AV_CPU_FLAG_MMXEXT) { if (EXTERNAL_MMXEXT(mm_flags)) {
c->rv34_inv_transform_dc = ff_rv34_idct_dc_noround_mmx2; c->rv34_inv_transform_dc = ff_rv34_idct_dc_noround_mmx2;
c->rv34_idct_add = ff_rv34_idct_add_mmx2; c->rv34_idct_add = ff_rv34_idct_add_mmx2;
} }
if (mm_flags & AV_CPU_FLAG_SSE4) if (EXTERNAL_SSE4(mm_flags))
c->rv34_idct_dc_add = ff_rv34_idct_dc_add_sse4; c->rv34_idct_dc_add = ff_rv34_idct_dc_add_sse4;
#endif /* HAVE_YASM */
} }
...@@ -28,6 +28,7 @@ ...@@ -28,6 +28,7 @@
#include "libavcodec/rv34dsp.h" #include "libavcodec/rv34dsp.h"
#include "libavutil/mem.h" #include "libavutil/mem.h"
#include "libavutil/x86/cpu.h"
#include "dsputil_mmx.h" #include "dsputil_mmx.h"
#if HAVE_YASM #if HAVE_YASM
...@@ -191,7 +192,7 @@ void ff_rv40dsp_init_x86(RV34DSPContext *c, DSPContext *dsp) ...@@ -191,7 +192,7 @@ void ff_rv40dsp_init_x86(RV34DSPContext *c, DSPContext *dsp)
#if HAVE_YASM #if HAVE_YASM
int mm_flags = av_get_cpu_flags(); int mm_flags = av_get_cpu_flags();
if (mm_flags & AV_CPU_FLAG_MMX) { if (EXTERNAL_MMX(mm_flags)) {
c->put_chroma_pixels_tab[0] = ff_put_rv40_chroma_mc8_mmx; c->put_chroma_pixels_tab[0] = ff_put_rv40_chroma_mc8_mmx;
c->put_chroma_pixels_tab[1] = ff_put_rv40_chroma_mc4_mmx; c->put_chroma_pixels_tab[1] = ff_put_rv40_chroma_mc4_mmx;
#if HAVE_INLINE_ASM #if HAVE_INLINE_ASM
...@@ -204,7 +205,7 @@ void ff_rv40dsp_init_x86(RV34DSPContext *c, DSPContext *dsp) ...@@ -204,7 +205,7 @@ void ff_rv40dsp_init_x86(RV34DSPContext *c, DSPContext *dsp)
QPEL_MC_SET(put_, _mmx) QPEL_MC_SET(put_, _mmx)
#endif #endif
} }
if (mm_flags & AV_CPU_FLAG_MMXEXT) { if (EXTERNAL_MMXEXT(mm_flags)) {
c->avg_chroma_pixels_tab[0] = ff_avg_rv40_chroma_mc8_mmx2; c->avg_chroma_pixels_tab[0] = ff_avg_rv40_chroma_mc8_mmx2;
c->avg_chroma_pixels_tab[1] = ff_avg_rv40_chroma_mc4_mmx2; c->avg_chroma_pixels_tab[1] = ff_avg_rv40_chroma_mc4_mmx2;
c->rv40_weight_pixels_tab[0][0] = ff_rv40_weight_func_rnd_16_mmx2; c->rv40_weight_pixels_tab[0][0] = ff_rv40_weight_func_rnd_16_mmx2;
...@@ -214,14 +215,14 @@ void ff_rv40dsp_init_x86(RV34DSPContext *c, DSPContext *dsp) ...@@ -214,14 +215,14 @@ void ff_rv40dsp_init_x86(RV34DSPContext *c, DSPContext *dsp)
#if ARCH_X86_32 #if ARCH_X86_32
QPEL_MC_SET(avg_, _mmx2) QPEL_MC_SET(avg_, _mmx2)
#endif #endif
} else if (mm_flags & AV_CPU_FLAG_3DNOW) { } else if (EXTERNAL_AMD3DNOW(mm_flags)) {
c->avg_chroma_pixels_tab[0] = ff_avg_rv40_chroma_mc8_3dnow; c->avg_chroma_pixels_tab[0] = ff_avg_rv40_chroma_mc8_3dnow;
c->avg_chroma_pixels_tab[1] = ff_avg_rv40_chroma_mc4_3dnow; c->avg_chroma_pixels_tab[1] = ff_avg_rv40_chroma_mc4_3dnow;
#if ARCH_X86_32 #if ARCH_X86_32
QPEL_MC_SET(avg_, _3dnow) QPEL_MC_SET(avg_, _3dnow)
#endif #endif
} }
if (mm_flags & AV_CPU_FLAG_SSE2) { if (EXTERNAL_SSE2(mm_flags)) {
c->rv40_weight_pixels_tab[0][0] = ff_rv40_weight_func_rnd_16_sse2; c->rv40_weight_pixels_tab[0][0] = ff_rv40_weight_func_rnd_16_sse2;
c->rv40_weight_pixels_tab[0][1] = ff_rv40_weight_func_rnd_8_sse2; c->rv40_weight_pixels_tab[0][1] = ff_rv40_weight_func_rnd_8_sse2;
c->rv40_weight_pixels_tab[1][0] = ff_rv40_weight_func_nornd_16_sse2; c->rv40_weight_pixels_tab[1][0] = ff_rv40_weight_func_nornd_16_sse2;
...@@ -229,7 +230,7 @@ void ff_rv40dsp_init_x86(RV34DSPContext *c, DSPContext *dsp) ...@@ -229,7 +230,7 @@ void ff_rv40dsp_init_x86(RV34DSPContext *c, DSPContext *dsp)
QPEL_MC_SET(put_, _sse2) QPEL_MC_SET(put_, _sse2)
QPEL_MC_SET(avg_, _sse2) QPEL_MC_SET(avg_, _sse2)
} }
if (mm_flags & AV_CPU_FLAG_SSSE3) { if (EXTERNAL_SSSE3(mm_flags)) {
c->rv40_weight_pixels_tab[0][0] = ff_rv40_weight_func_rnd_16_ssse3; c->rv40_weight_pixels_tab[0][0] = ff_rv40_weight_func_rnd_16_ssse3;
c->rv40_weight_pixels_tab[0][1] = ff_rv40_weight_func_rnd_8_ssse3; c->rv40_weight_pixels_tab[0][1] = ff_rv40_weight_func_rnd_8_ssse3;
c->rv40_weight_pixels_tab[1][0] = ff_rv40_weight_func_nornd_16_ssse3; c->rv40_weight_pixels_tab[1][0] = ff_rv40_weight_func_nornd_16_ssse3;
......
...@@ -21,6 +21,7 @@ ...@@ -21,6 +21,7 @@
#include "config.h" #include "config.h"
#include "libavutil/cpu.h" #include "libavutil/cpu.h"
#include "libavutil/x86/cpu.h"
#include "libavcodec/sbrdsp.h" #include "libavcodec/sbrdsp.h"
float ff_sbr_sum_square_sse(float (*x)[2], int n); float ff_sbr_sum_square_sse(float (*x)[2], int n);
...@@ -29,12 +30,10 @@ void ff_sbr_hf_g_filt_sse(float (*Y)[2], const float (*X_high)[40][2], ...@@ -29,12 +30,10 @@ void ff_sbr_hf_g_filt_sse(float (*Y)[2], const float (*X_high)[40][2],
void ff_sbrdsp_init_x86(SBRDSPContext *s) void ff_sbrdsp_init_x86(SBRDSPContext *s)
{ {
if (HAVE_YASM) { int mm_flags = av_get_cpu_flags();
int mm_flags = av_get_cpu_flags();
if (mm_flags & AV_CPU_FLAG_SSE) { if (EXTERNAL_SSE(mm_flags)) {
s->sum_square = ff_sbr_sum_square_sse; s->sum_square = ff_sbr_sum_square_sse;
s->hf_g_filt = ff_sbr_hf_g_filt_sse; s->hf_g_filt = ff_sbr_hf_g_filt_sse;
}
} }
} }
...@@ -20,6 +20,7 @@ ...@@ -20,6 +20,7 @@
#include "libavutil/attributes.h" #include "libavutil/attributes.h"
#include "libavutil/cpu.h" #include "libavutil/cpu.h"
#include "libavutil/x86/cpu.h"
#include "libavcodec/avcodec.h" #include "libavcodec/avcodec.h"
#include "libavcodec/vp3dsp.h" #include "libavcodec/vp3dsp.h"
#include "config.h" #include "config.h"
...@@ -38,18 +39,17 @@ void ff_vp3_h_loop_filter_mmx2(uint8_t *src, int stride, int *bounding_values); ...@@ -38,18 +39,17 @@ void ff_vp3_h_loop_filter_mmx2(uint8_t *src, int stride, int *bounding_values);
av_cold void ff_vp3dsp_init_x86(VP3DSPContext *c, int flags) av_cold void ff_vp3dsp_init_x86(VP3DSPContext *c, int flags)
{ {
#if HAVE_YASM
int cpuflags = av_get_cpu_flags(); int cpuflags = av_get_cpu_flags();
#if ARCH_X86_32 #if ARCH_X86_32
if (HAVE_MMX && cpuflags & AV_CPU_FLAG_MMX) { if (EXTERNAL_MMX(cpuflags)) {
c->idct_put = ff_vp3_idct_put_mmx; c->idct_put = ff_vp3_idct_put_mmx;
c->idct_add = ff_vp3_idct_add_mmx; c->idct_add = ff_vp3_idct_add_mmx;
c->idct_perm = FF_PARTTRANS_IDCT_PERM; c->idct_perm = FF_PARTTRANS_IDCT_PERM;
} }
#endif #endif
if (HAVE_MMXEXT && cpuflags & AV_CPU_FLAG_MMXEXT) { if (EXTERNAL_MMXEXT(cpuflags)) {
c->idct_dc_add = ff_vp3_idct_dc_add_mmx2; c->idct_dc_add = ff_vp3_idct_dc_add_mmx2;
if (!(flags & CODEC_FLAG_BITEXACT)) { if (!(flags & CODEC_FLAG_BITEXACT)) {
...@@ -58,10 +58,9 @@ av_cold void ff_vp3dsp_init_x86(VP3DSPContext *c, int flags) ...@@ -58,10 +58,9 @@ av_cold void ff_vp3dsp_init_x86(VP3DSPContext *c, int flags)
} }
} }
if (cpuflags & AV_CPU_FLAG_SSE2) { if (EXTERNAL_SSE2(cpuflags)) {
c->idct_put = ff_vp3_idct_put_sse2; c->idct_put = ff_vp3_idct_put_sse2;
c->idct_add = ff_vp3_idct_add_sse2; c->idct_add = ff_vp3_idct_add_sse2;
c->idct_perm = FF_TRANSPOSE_IDCT_PERM; c->idct_perm = FF_TRANSPOSE_IDCT_PERM;
} }
#endif
} }
...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
#include "libavutil/cpu.h" #include "libavutil/cpu.h"
#include "libavutil/x86/asm.h" #include "libavutil/x86/asm.h"
#include "libavutil/x86/cpu.h"
#include "libavcodec/dsputil.h" #include "libavcodec/dsputil.h"
#include "libavcodec/vp56dsp.h" #include "libavcodec/vp56dsp.h"
...@@ -32,19 +33,17 @@ void ff_vp6_filter_diag4_sse2(uint8_t *dst, uint8_t *src, int stride, ...@@ -32,19 +33,17 @@ void ff_vp6_filter_diag4_sse2(uint8_t *dst, uint8_t *src, int stride,
av_cold void ff_vp56dsp_init_x86(VP56DSPContext* c, enum AVCodecID codec) av_cold void ff_vp56dsp_init_x86(VP56DSPContext* c, enum AVCodecID codec)
{ {
#if HAVE_YASM
int mm_flags = av_get_cpu_flags(); int mm_flags = av_get_cpu_flags();
if (CONFIG_VP6_DECODER && codec == AV_CODEC_ID_VP6) { if (CONFIG_VP6_DECODER && codec == AV_CODEC_ID_VP6) {
#if ARCH_X86_32 #if ARCH_X86_32
if (mm_flags & AV_CPU_FLAG_MMX) { if (EXTERNAL_MMX(mm_flags)) {
c->vp6_filter_diag4 = ff_vp6_filter_diag4_mmx; c->vp6_filter_diag4 = ff_vp6_filter_diag4_mmx;
} }
#endif #endif
if (mm_flags & AV_CPU_FLAG_SSE2) { if (EXTERNAL_SSE2(mm_flags)) {
c->vp6_filter_diag4 = ff_vp6_filter_diag4_sse2; c->vp6_filter_diag4 = ff_vp6_filter_diag4_sse2;
} }
} }
#endif
} }
...@@ -20,6 +20,7 @@ ...@@ -20,6 +20,7 @@
#include "config.h" #include "config.h"
#include "libavutil/cpu.h" #include "libavutil/cpu.h"
#include "libavutil/x86/cpu.h"
#include "libavresample/audio_convert.h" #include "libavresample/audio_convert.h"
/* flat conversions */ /* flat conversions */
...@@ -144,16 +145,15 @@ extern void ff_conv_flt_to_fltp_6ch_avx (float *const *dst, float *src, int len, ...@@ -144,16 +145,15 @@ extern void ff_conv_flt_to_fltp_6ch_avx (float *const *dst, float *src, int len,
av_cold void ff_audio_convert_init_x86(AudioConvert *ac) av_cold void ff_audio_convert_init_x86(AudioConvert *ac)
{ {
#if HAVE_YASM
int mm_flags = av_get_cpu_flags(); int mm_flags = av_get_cpu_flags();
if (mm_flags & AV_CPU_FLAG_MMX && HAVE_MMX) { if (EXTERNAL_MMX(mm_flags)) {
ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S32, ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S32,
0, 1, 8, "MMX", ff_conv_s32_to_s16_mmx); 0, 1, 8, "MMX", ff_conv_s32_to_s16_mmx);
ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_FLTP, ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_FLTP,
6, 1, 4, "MMX", ff_conv_fltp_to_flt_6ch_mmx); 6, 1, 4, "MMX", ff_conv_fltp_to_flt_6ch_mmx);
} }
if (mm_flags & AV_CPU_FLAG_SSE && HAVE_SSE) { if (EXTERNAL_SSE(mm_flags)) {
ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_FLTP, ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_FLTP,
6, 1, 2, "SSE", ff_conv_fltp_to_s16_6ch_sse); 6, 1, 2, "SSE", ff_conv_fltp_to_s16_6ch_sse);
ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_FLTP, ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_FLTP,
...@@ -161,7 +161,7 @@ av_cold void ff_audio_convert_init_x86(AudioConvert *ac) ...@@ -161,7 +161,7 @@ av_cold void ff_audio_convert_init_x86(AudioConvert *ac)
ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_FLT, ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_FLT,
2, 16, 4, "SSE", ff_conv_flt_to_fltp_2ch_sse); 2, 16, 4, "SSE", ff_conv_flt_to_fltp_2ch_sse);
} }
if (mm_flags & AV_CPU_FLAG_SSE2 && HAVE_SSE) { if (EXTERNAL_SSE2(mm_flags)) {
if (!(mm_flags & AV_CPU_FLAG_SSE2SLOW)) { if (!(mm_flags & AV_CPU_FLAG_SSE2SLOW)) {
ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S32, ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S32,
0, 16, 16, "SSE2", ff_conv_s32_to_s16_sse2); 0, 16, 16, "SSE2", ff_conv_s32_to_s16_sse2);
...@@ -206,7 +206,7 @@ av_cold void ff_audio_convert_init_x86(AudioConvert *ac) ...@@ -206,7 +206,7 @@ av_cold void ff_audio_convert_init_x86(AudioConvert *ac)
ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_FLT, ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_FLT,
6, 16, 4, "SSE2", ff_conv_flt_to_fltp_6ch_sse2); 6, 16, 4, "SSE2", ff_conv_flt_to_fltp_6ch_sse2);
} }
if (mm_flags & AV_CPU_FLAG_SSSE3 && HAVE_SSE) { if (EXTERNAL_SSSE3(mm_flags)) {
ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16P, ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16P,
6, 16, 4, "SSSE3", ff_conv_s16p_to_flt_6ch_ssse3); 6, 16, 4, "SSSE3", ff_conv_s16p_to_flt_6ch_ssse3);
ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_FLTP, ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_FLTP,
...@@ -220,13 +220,13 @@ av_cold void ff_audio_convert_init_x86(AudioConvert *ac) ...@@ -220,13 +220,13 @@ av_cold void ff_audio_convert_init_x86(AudioConvert *ac)
ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16P, AV_SAMPLE_FMT_FLT, ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16P, AV_SAMPLE_FMT_FLT,
6, 16, 4, "SSSE3", ff_conv_flt_to_s16p_6ch_ssse3); 6, 16, 4, "SSSE3", ff_conv_flt_to_s16p_6ch_ssse3);
} }
if (mm_flags & AV_CPU_FLAG_SSE4 && HAVE_SSE) { if (EXTERNAL_SSE4(mm_flags)) {
ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16, ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16,
0, 16, 8, "SSE4", ff_conv_s16_to_flt_sse4); 0, 16, 8, "SSE4", ff_conv_s16_to_flt_sse4);
ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_FLTP, ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_FLTP,
6, 16, 4, "SSE4", ff_conv_fltp_to_flt_6ch_sse4); 6, 16, 4, "SSE4", ff_conv_fltp_to_flt_6ch_sse4);
} }
if (mm_flags & AV_CPU_FLAG_AVX && HAVE_AVX) { if (EXTERNAL_AVX(mm_flags)) {
ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S32, ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S32,
0, 32, 16, "AVX", ff_conv_s32_to_flt_avx); 0, 32, 16, "AVX", ff_conv_s32_to_flt_avx);
ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S32, AV_SAMPLE_FMT_FLT, ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S32, AV_SAMPLE_FMT_FLT,
...@@ -260,5 +260,4 @@ av_cold void ff_audio_convert_init_x86(AudioConvert *ac) ...@@ -260,5 +260,4 @@ av_cold void ff_audio_convert_init_x86(AudioConvert *ac)
ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_FLT, ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_FLT,
6, 16, 4, "AVX", ff_conv_flt_to_fltp_6ch_avx); 6, 16, 4, "AVX", ff_conv_flt_to_fltp_6ch_avx);
} }
#endif
} }
...@@ -20,6 +20,7 @@ ...@@ -20,6 +20,7 @@
#include "config.h" #include "config.h"
#include "libavutil/cpu.h" #include "libavutil/cpu.h"
#include "libavutil/x86/cpu.h"
#include "libavresample/audio_mix.h" #include "libavresample/audio_mix.h"
extern void ff_mix_2_to_1_fltp_flt_sse(float **src, float **matrix, int len, extern void ff_mix_2_to_1_fltp_flt_sse(float **src, float **matrix, int len,
...@@ -105,7 +106,7 @@ DEFINE_MIX_3_8_TO_1_2(7) ...@@ -105,7 +106,7 @@ DEFINE_MIX_3_8_TO_1_2(7)
DEFINE_MIX_3_8_TO_1_2(8) DEFINE_MIX_3_8_TO_1_2(8)
#define SET_MIX_3_8_TO_1_2(chan) \ #define SET_MIX_3_8_TO_1_2(chan) \
if (mm_flags & AV_CPU_FLAG_SSE && HAVE_SSE) { \ if (EXTERNAL_SSE(mm_flags)) { \
ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,\ ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,\
chan, 1, 16, 4, "SSE", \ chan, 1, 16, 4, "SSE", \
ff_mix_ ## chan ## _to_1_fltp_flt_sse); \ ff_mix_ ## chan ## _to_1_fltp_flt_sse); \
...@@ -113,7 +114,7 @@ DEFINE_MIX_3_8_TO_1_2(8) ...@@ -113,7 +114,7 @@ DEFINE_MIX_3_8_TO_1_2(8)
chan, 2, 16, 4, "SSE", \ chan, 2, 16, 4, "SSE", \
ff_mix_## chan ##_to_2_fltp_flt_sse); \ ff_mix_## chan ##_to_2_fltp_flt_sse); \
} \ } \
if (mm_flags & AV_CPU_FLAG_SSE2 && HAVE_SSE) { \ if (EXTERNAL_SSE2(mm_flags)) { \
ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,\ ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,\
chan, 1, 16, 8, "SSE2", \ chan, 1, 16, 8, "SSE2", \
ff_mix_ ## chan ## _to_1_s16p_flt_sse2); \ ff_mix_ ## chan ## _to_1_s16p_flt_sse2); \
...@@ -121,7 +122,7 @@ DEFINE_MIX_3_8_TO_1_2(8) ...@@ -121,7 +122,7 @@ DEFINE_MIX_3_8_TO_1_2(8)
chan, 2, 16, 8, "SSE2", \ chan, 2, 16, 8, "SSE2", \
ff_mix_ ## chan ## _to_2_s16p_flt_sse2); \ ff_mix_ ## chan ## _to_2_s16p_flt_sse2); \
} \ } \
if (mm_flags & AV_CPU_FLAG_SSE4 && HAVE_SSE) { \ if (EXTERNAL_SSE4(mm_flags)) { \
ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,\ ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,\
chan, 1, 16, 8, "SSE4", \ chan, 1, 16, 8, "SSE4", \
ff_mix_ ## chan ## _to_1_s16p_flt_sse4); \ ff_mix_ ## chan ## _to_1_s16p_flt_sse4); \
...@@ -129,7 +130,7 @@ DEFINE_MIX_3_8_TO_1_2(8) ...@@ -129,7 +130,7 @@ DEFINE_MIX_3_8_TO_1_2(8)
chan, 2, 16, 8, "SSE4", \ chan, 2, 16, 8, "SSE4", \
ff_mix_ ## chan ## _to_2_s16p_flt_sse4); \ ff_mix_ ## chan ## _to_2_s16p_flt_sse4); \
} \ } \
if (mm_flags & AV_CPU_FLAG_AVX && HAVE_AVX) { \ if (EXTERNAL_AVX(mm_flags)) { \
int ptr_align = 32; \ int ptr_align = 32; \
int smp_align = 8; \ int smp_align = 8; \
if (ARCH_X86_32 || chan >= 6) { \ if (ARCH_X86_32 || chan >= 6) { \
...@@ -149,7 +150,7 @@ DEFINE_MIX_3_8_TO_1_2(8) ...@@ -149,7 +150,7 @@ DEFINE_MIX_3_8_TO_1_2(8)
chan, 2, 16, 8, "AVX", \ chan, 2, 16, 8, "AVX", \
ff_mix_ ## chan ## _to_2_s16p_flt_avx); \ ff_mix_ ## chan ## _to_2_s16p_flt_avx); \
} \ } \
if (mm_flags & AV_CPU_FLAG_FMA4 && HAVE_FMA4) { \ if (EXTERNAL_FMA4(mm_flags)) { \
int ptr_align = 32; \ int ptr_align = 32; \
int smp_align = 8; \ int smp_align = 8; \
if (ARCH_X86_32 || chan >= 6) { \ if (ARCH_X86_32 || chan >= 6) { \
...@@ -175,13 +176,13 @@ av_cold void ff_audio_mix_init_x86(AudioMix *am) ...@@ -175,13 +176,13 @@ av_cold void ff_audio_mix_init_x86(AudioMix *am)
#if HAVE_YASM #if HAVE_YASM
int mm_flags = av_get_cpu_flags(); int mm_flags = av_get_cpu_flags();
if (mm_flags & AV_CPU_FLAG_SSE && HAVE_SSE) { if (EXTERNAL_SSE(mm_flags)) {
ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT, ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,
2, 1, 16, 8, "SSE", ff_mix_2_to_1_fltp_flt_sse); 2, 1, 16, 8, "SSE", ff_mix_2_to_1_fltp_flt_sse);
ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT, ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,
1, 2, 16, 4, "SSE", ff_mix_1_to_2_fltp_flt_sse); 1, 2, 16, 4, "SSE", ff_mix_1_to_2_fltp_flt_sse);
} }
if (mm_flags & AV_CPU_FLAG_SSE2 && HAVE_SSE) { if (EXTERNAL_SSE2(mm_flags)) {
ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT, ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,
2, 1, 16, 8, "SSE2", ff_mix_2_to_1_s16p_flt_sse2); 2, 1, 16, 8, "SSE2", ff_mix_2_to_1_s16p_flt_sse2);
ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_Q8, ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_Q8,
...@@ -189,13 +190,13 @@ av_cold void ff_audio_mix_init_x86(AudioMix *am) ...@@ -189,13 +190,13 @@ av_cold void ff_audio_mix_init_x86(AudioMix *am)
ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT, ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,
1, 2, 16, 8, "SSE2", ff_mix_1_to_2_s16p_flt_sse2); 1, 2, 16, 8, "SSE2", ff_mix_1_to_2_s16p_flt_sse2);
} }
if (mm_flags & AV_CPU_FLAG_SSE4 && HAVE_SSE) { if (EXTERNAL_SSE4(mm_flags)) {
ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT, ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,
2, 1, 16, 8, "SSE4", ff_mix_2_to_1_s16p_flt_sse4); 2, 1, 16, 8, "SSE4", ff_mix_2_to_1_s16p_flt_sse4);
ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT, ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,
1, 2, 16, 8, "SSE4", ff_mix_1_to_2_s16p_flt_sse4); 1, 2, 16, 8, "SSE4", ff_mix_1_to_2_s16p_flt_sse4);
} }
if (mm_flags & AV_CPU_FLAG_AVX && HAVE_AVX) { if (EXTERNAL_AVX(mm_flags)) {
ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT, ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,
2, 1, 32, 16, "AVX", ff_mix_2_to_1_fltp_flt_avx); 2, 1, 32, 16, "AVX", ff_mix_2_to_1_fltp_flt_avx);
ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT, ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,
......
...@@ -20,6 +20,7 @@ ...@@ -20,6 +20,7 @@
#include "libavutil/cpu.h" #include "libavutil/cpu.h"
#include "libavutil/float_dsp.h" #include "libavutil/float_dsp.h"
#include "cpu.h"
extern void ff_vector_fmul_sse(float *dst, const float *src0, const float *src1, extern void ff_vector_fmul_sse(float *dst, const float *src0, const float *src1,
int len); int len);
...@@ -33,16 +34,14 @@ extern void ff_vector_fmac_scalar_avx(float *dst, const float *src, float mul, ...@@ -33,16 +34,14 @@ extern void ff_vector_fmac_scalar_avx(float *dst, const float *src, float mul,
void ff_float_dsp_init_x86(AVFloatDSPContext *fdsp) void ff_float_dsp_init_x86(AVFloatDSPContext *fdsp)
{ {
#if HAVE_YASM
int mm_flags = av_get_cpu_flags(); int mm_flags = av_get_cpu_flags();
if (mm_flags & AV_CPU_FLAG_SSE && HAVE_SSE) { if (EXTERNAL_SSE(mm_flags)) {
fdsp->vector_fmul = ff_vector_fmul_sse; fdsp->vector_fmul = ff_vector_fmul_sse;
fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_sse; fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_sse;
} }
if (mm_flags & AV_CPU_FLAG_AVX && HAVE_AVX_EXTERNAL) { if (EXTERNAL_AVX(mm_flags)) {
fdsp->vector_fmul = ff_vector_fmul_avx; fdsp->vector_fmul = ff_vector_fmul_avx;
fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_avx; fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_avx;
} }
#endif
} }
...@@ -47,6 +47,7 @@ ...@@ -47,6 +47,7 @@
#include "libavutil/opt.h" #include "libavutil/opt.h"
#include "libavutil/pixdesc.h" #include "libavutil/pixdesc.h"
#include "libavutil/x86/asm.h" #include "libavutil/x86/asm.h"
#include "libavutil/x86/cpu.h"
#include "rgb2rgb.h" #include "rgb2rgb.h"
#include "swscale.h" #include "swscale.h"
#include "swscale_internal.h" #include "swscale_internal.h"
...@@ -497,7 +498,7 @@ static int initFilter(int16_t **outFilter, int32_t **filterPos, ...@@ -497,7 +498,7 @@ static int initFilter(int16_t **outFilter, int32_t **filterPos,
filterAlign = 1; filterAlign = 1;
} }
if (HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX) { if (INLINE_MMX(cpu_flags)) {
// special case for unscaled vertical filtering // special case for unscaled vertical filtering
if (minFilterSize == 1 && filterAlign == 2) if (minFilterSize == 1 && filterAlign == 2)
filterAlign = 1; filterAlign = 1;
...@@ -1024,8 +1025,7 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter, ...@@ -1024,8 +1025,7 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter,
c->srcBpc = 16; c->srcBpc = 16;
if (c->dstBpc == 16) if (c->dstBpc == 16)
dst_stride <<= 1; dst_stride <<= 1;
if (HAVE_MMXEXT && HAVE_INLINE_ASM && cpu_flags & AV_CPU_FLAG_MMXEXT && if (INLINE_MMXEXT(cpu_flags) && c->srcBpc == 8 && c->dstBpc <= 14) {
c->srcBpc == 8 && c->dstBpc <= 14) {
c->canMMX2BeUsed = (dstW >= srcW && (dstW & 31) == 0 && c->canMMX2BeUsed = (dstW >= srcW && (dstW & 31) == 0 &&
(srcW & 15) == 0) ? 1 : 0; (srcW & 15) == 0) ? 1 : 0;
if (!c->canMMX2BeUsed && dstW >= srcW && (srcW & 15) == 0 if (!c->canMMX2BeUsed && dstW >= srcW && (srcW & 15) == 0
...@@ -1055,7 +1055,7 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter, ...@@ -1055,7 +1055,7 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter,
c->chrXInc += 20; c->chrXInc += 20;
} }
// we don't use the x86 asm scaler if MMX is available // we don't use the x86 asm scaler if MMX is available
else if (HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX && c->dstBpc <= 14) { else if (INLINE_MMX(cpu_flags) && c->dstBpc <= 14) {
c->lumXInc = ((int64_t)(srcW - 2) << 16) / (dstW - 2) - 20; c->lumXInc = ((int64_t)(srcW - 2) << 16) / (dstW - 2) - 20;
c->chrXInc = ((int64_t)(c->chrSrcW - 2) << 16) / (c->chrDstW - 2) - 20; c->chrXInc = ((int64_t)(c->chrSrcW - 2) << 16) / (c->chrDstW - 2) - 20;
} }
...@@ -1273,11 +1273,11 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter, ...@@ -1273,11 +1273,11 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter,
#endif #endif
av_get_pix_fmt_name(dstFormat)); av_get_pix_fmt_name(dstFormat));
if (HAVE_MMXEXT && cpu_flags & AV_CPU_FLAG_MMXEXT) if (INLINE_MMXEXT(cpu_flags))
av_log(c, AV_LOG_INFO, "using MMX2\n"); av_log(c, AV_LOG_INFO, "using MMX2\n");
else if (HAVE_AMD3DNOW && cpu_flags & AV_CPU_FLAG_3DNOW) else if (INLINE_AMD3DNOW(cpu_flags))
av_log(c, AV_LOG_INFO, "using 3DNOW\n"); av_log(c, AV_LOG_INFO, "using 3DNOW\n");
else if (HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX) else if (INLINE_MMX(cpu_flags))
av_log(c, AV_LOG_INFO, "using MMX\n"); av_log(c, AV_LOG_INFO, "using MMX\n");
else if (HAVE_ALTIVEC && cpu_flags & AV_CPU_FLAG_ALTIVEC) else if (HAVE_ALTIVEC && cpu_flags & AV_CPU_FLAG_ALTIVEC)
av_log(c, AV_LOG_INFO, "using AltiVec\n"); av_log(c, AV_LOG_INFO, "using AltiVec\n");
......
...@@ -28,6 +28,7 @@ ...@@ -28,6 +28,7 @@
#include "config.h" #include "config.h"
#include "libavutil/attributes.h" #include "libavutil/attributes.h"
#include "libavutil/x86/asm.h" #include "libavutil/x86/asm.h"
#include "libavutil/x86/cpu.h"
#include "libavutil/cpu.h" #include "libavutil/cpu.h"
#include "libavutil/bswap.h" #include "libavutil/bswap.h"
#include "libswscale/rgb2rgb.h" #include "libswscale/rgb2rgb.h"
...@@ -136,13 +137,13 @@ av_cold void rgb2rgb_init_x86(void) ...@@ -136,13 +137,13 @@ av_cold void rgb2rgb_init_x86(void)
#if HAVE_INLINE_ASM #if HAVE_INLINE_ASM
int cpu_flags = av_get_cpu_flags(); int cpu_flags = av_get_cpu_flags();
if (cpu_flags & AV_CPU_FLAG_MMX) if (INLINE_MMX(cpu_flags))
rgb2rgb_init_MMX(); rgb2rgb_init_MMX();
if (HAVE_AMD3DNOW && cpu_flags & AV_CPU_FLAG_3DNOW) if (INLINE_AMD3DNOW(cpu_flags))
rgb2rgb_init_3DNOW(); rgb2rgb_init_3DNOW();
if (HAVE_MMXEXT && cpu_flags & AV_CPU_FLAG_MMXEXT) if (INLINE_MMXEXT(cpu_flags))
rgb2rgb_init_MMX2(); rgb2rgb_init_MMX2();
if (HAVE_SSE && cpu_flags & AV_CPU_FLAG_SSE2) if (INLINE_SSE2(cpu_flags))
rgb2rgb_init_SSE2(); rgb2rgb_init_SSE2();
#endif /* HAVE_INLINE_ASM */ #endif /* HAVE_INLINE_ASM */
} }
...@@ -26,6 +26,7 @@ ...@@ -26,6 +26,7 @@
#include "libavutil/avassert.h" #include "libavutil/avassert.h"
#include "libavutil/intreadwrite.h" #include "libavutil/intreadwrite.h"
#include "libavutil/x86/asm.h" #include "libavutil/x86/asm.h"
#include "libavutil/x86/cpu.h"
#include "libavutil/cpu.h" #include "libavutil/cpu.h"
#include "libavutil/pixdesc.h" #include "libavutil/pixdesc.h"
...@@ -385,7 +386,6 @@ av_cold void ff_sws_init_swScale_mmx(SwsContext *c) ...@@ -385,7 +386,6 @@ av_cold void ff_sws_init_swScale_mmx(SwsContext *c)
#endif #endif
#endif /* HAVE_INLINE_ASM */ #endif /* HAVE_INLINE_ASM */
#if HAVE_YASM
#define ASSIGN_SCALE_FUNC2(hscalefn, filtersize, opt1, opt2) do { \ #define ASSIGN_SCALE_FUNC2(hscalefn, filtersize, opt1, opt2) do { \
if (c->srcBpc == 8) { \ if (c->srcBpc == 8) { \
hscalefn = c->dstBpc <= 14 ? ff_hscale8to15_ ## filtersize ## _ ## opt2 : \ hscalefn = c->dstBpc <= 14 ? ff_hscale8to15_ ## filtersize ## _ ## opt2 : \
...@@ -436,7 +436,7 @@ switch(c->dstBpc){ \ ...@@ -436,7 +436,7 @@ switch(c->dstBpc){ \
c->chrToYV12 = ff_ ## x ## ToUV_ ## opt; \ c->chrToYV12 = ff_ ## x ## ToUV_ ## opt; \
break break
#if ARCH_X86_32 #if ARCH_X86_32
if (cpu_flags & AV_CPU_FLAG_MMX) { if (EXTERNAL_MMX(cpu_flags)) {
ASSIGN_MMX_SCALE_FUNC(c->hyScale, c->hLumFilterSize, mmx, mmx); ASSIGN_MMX_SCALE_FUNC(c->hyScale, c->hLumFilterSize, mmx, mmx);
ASSIGN_MMX_SCALE_FUNC(c->hcScale, c->hChrFilterSize, mmx, mmx); ASSIGN_MMX_SCALE_FUNC(c->hcScale, c->hChrFilterSize, mmx, mmx);
ASSIGN_VSCALE_FUNC(c->yuv2plane1, mmx, mmx2, cpu_flags & AV_CPU_FLAG_MMXEXT); ASSIGN_VSCALE_FUNC(c->yuv2plane1, mmx, mmx2, cpu_flags & AV_CPU_FLAG_MMXEXT);
...@@ -471,7 +471,7 @@ switch(c->dstBpc){ \ ...@@ -471,7 +471,7 @@ switch(c->dstBpc){ \
break; break;
} }
} }
if (cpu_flags & AV_CPU_FLAG_MMXEXT) { if (EXTERNAL_MMXEXT(cpu_flags)) {
ASSIGN_VSCALEX_FUNC(c->yuv2planeX, mmx2, , 1); ASSIGN_VSCALEX_FUNC(c->yuv2planeX, mmx2, , 1);
} }
#endif /* ARCH_X86_32 */ #endif /* ARCH_X86_32 */
...@@ -483,7 +483,7 @@ switch(c->dstBpc){ \ ...@@ -483,7 +483,7 @@ switch(c->dstBpc){ \
else ASSIGN_SCALE_FUNC2(hscalefn, X8, opt1, opt2); \ else ASSIGN_SCALE_FUNC2(hscalefn, X8, opt1, opt2); \
break; \ break; \
} }
if (cpu_flags & AV_CPU_FLAG_SSE2) { if (EXTERNAL_SSE2(cpu_flags)) {
ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, sse2, sse2); ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, sse2, sse2);
ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, sse2, sse2); ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, sse2, sse2);
ASSIGN_VSCALEX_FUNC(c->yuv2planeX, sse2, , ASSIGN_VSCALEX_FUNC(c->yuv2planeX, sse2, ,
...@@ -520,7 +520,7 @@ switch(c->dstBpc){ \ ...@@ -520,7 +520,7 @@ switch(c->dstBpc){ \
break; break;
} }
} }
if (cpu_flags & AV_CPU_FLAG_SSSE3) { if (EXTERNAL_SSSE3(cpu_flags)) {
ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, ssse3, ssse3); ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, ssse3, ssse3);
ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, ssse3, ssse3); ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, ssse3, ssse3);
switch (c->srcFormat) { switch (c->srcFormat) {
...@@ -530,7 +530,7 @@ switch(c->dstBpc){ \ ...@@ -530,7 +530,7 @@ switch(c->dstBpc){ \
break; break;
} }
} }
if (cpu_flags & AV_CPU_FLAG_SSE4) { if (EXTERNAL_SSE4(cpu_flags)) {
/* Xto15 don't need special sse4 functions */ /* Xto15 don't need special sse4 functions */
ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, sse4, ssse3); ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, sse4, ssse3);
ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, sse4, ssse3); ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, sse4, ssse3);
...@@ -541,7 +541,7 @@ switch(c->dstBpc){ \ ...@@ -541,7 +541,7 @@ switch(c->dstBpc){ \
c->yuv2plane1 = ff_yuv2plane1_16_sse4; c->yuv2plane1 = ff_yuv2plane1_16_sse4;
} }
if (HAVE_AVX_EXTERNAL && cpu_flags & AV_CPU_FLAG_AVX) { if (EXTERNAL_AVX(cpu_flags)) {
ASSIGN_VSCALEX_FUNC(c->yuv2planeX, avx, , ASSIGN_VSCALEX_FUNC(c->yuv2planeX, avx, ,
HAVE_ALIGNED_STACK || ARCH_X86_64); HAVE_ALIGNED_STACK || ARCH_X86_64);
ASSIGN_VSCALE_FUNC(c->yuv2plane1, avx, avx, 1); ASSIGN_VSCALE_FUNC(c->yuv2plane1, avx, avx, 1);
...@@ -569,5 +569,4 @@ switch(c->dstBpc){ \ ...@@ -569,5 +569,4 @@ switch(c->dstBpc){ \
break; break;
} }
} }
#endif
} }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment