Commit e0c6cce4 authored by Diego Biurrun's avatar Diego Biurrun

x86: Replace checks for CPU extensions and flags by convenience macros

This separates code relying on inline from that relying on external
assembly and fixes instances where the coalesced check was incorrect.
parent 6a0200f2
...@@ -20,6 +20,7 @@ ...@@ -20,6 +20,7 @@
*/ */
#include "libavutil/x86/asm.h" #include "libavutil/x86/asm.h"
#include "libavutil/x86/cpu.h"
#include "dsputil_mmx.h" #include "dsputil_mmx.h"
#include "libavcodec/ac3dsp.h" #include "libavcodec/ac3dsp.h"
...@@ -50,29 +51,28 @@ extern void ff_ac3_extract_exponents_ssse3(uint8_t *exp, int32_t *coef, int nb_c ...@@ -50,29 +51,28 @@ extern void ff_ac3_extract_exponents_ssse3(uint8_t *exp, int32_t *coef, int nb_c
av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c, int bit_exact) av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c, int bit_exact)
{ {
#if HAVE_YASM
int mm_flags = av_get_cpu_flags(); int mm_flags = av_get_cpu_flags();
if (mm_flags & AV_CPU_FLAG_MMX) { if (EXTERNAL_MMX(mm_flags)) {
c->ac3_exponent_min = ff_ac3_exponent_min_mmx; c->ac3_exponent_min = ff_ac3_exponent_min_mmx;
c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_mmx; c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_mmx;
c->ac3_lshift_int16 = ff_ac3_lshift_int16_mmx; c->ac3_lshift_int16 = ff_ac3_lshift_int16_mmx;
c->ac3_rshift_int32 = ff_ac3_rshift_int32_mmx; c->ac3_rshift_int32 = ff_ac3_rshift_int32_mmx;
} }
if (mm_flags & AV_CPU_FLAG_3DNOW && HAVE_AMD3DNOW) { if (EXTERNAL_AMD3DNOW(mm_flags)) {
c->extract_exponents = ff_ac3_extract_exponents_3dnow; c->extract_exponents = ff_ac3_extract_exponents_3dnow;
if (!bit_exact) { if (!bit_exact) {
c->float_to_fixed24 = ff_float_to_fixed24_3dnow; c->float_to_fixed24 = ff_float_to_fixed24_3dnow;
} }
} }
if (mm_flags & AV_CPU_FLAG_MMXEXT && HAVE_MMXEXT) { if (EXTERNAL_MMXEXT(mm_flags)) {
c->ac3_exponent_min = ff_ac3_exponent_min_mmxext; c->ac3_exponent_min = ff_ac3_exponent_min_mmxext;
c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_mmx2; c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_mmx2;
} }
if (mm_flags & AV_CPU_FLAG_SSE && HAVE_SSE) { if (EXTERNAL_SSE(mm_flags)) {
c->float_to_fixed24 = ff_float_to_fixed24_sse; c->float_to_fixed24 = ff_float_to_fixed24_sse;
} }
if (mm_flags & AV_CPU_FLAG_SSE2 && HAVE_SSE) { if (EXTERNAL_SSE2(mm_flags)) {
c->ac3_exponent_min = ff_ac3_exponent_min_sse2; c->ac3_exponent_min = ff_ac3_exponent_min_sse2;
c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_sse2; c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_sse2;
c->float_to_fixed24 = ff_float_to_fixed24_sse2; c->float_to_fixed24 = ff_float_to_fixed24_sse2;
...@@ -83,11 +83,10 @@ av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c, int bit_exact) ...@@ -83,11 +83,10 @@ av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c, int bit_exact)
c->ac3_rshift_int32 = ff_ac3_rshift_int32_sse2; c->ac3_rshift_int32 = ff_ac3_rshift_int32_sse2;
} }
} }
if (mm_flags & AV_CPU_FLAG_SSSE3 && HAVE_SSSE3) { if (EXTERNAL_SSSE3(mm_flags)) {
c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_ssse3; c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_ssse3;
if (!(mm_flags & AV_CPU_FLAG_ATOM)) { if (!(mm_flags & AV_CPU_FLAG_ATOM)) {
c->extract_exponents = ff_ac3_extract_exponents_ssse3; c->extract_exponents = ff_ac3_extract_exponents_ssse3;
} }
} }
#endif
} }
...@@ -24,6 +24,7 @@ ...@@ -24,6 +24,7 @@
#include "libavutil/cpu.h" #include "libavutil/cpu.h"
#include "libavutil/x86/asm.h" #include "libavutil/x86/asm.h"
#include "libavutil/x86/cpu.h"
#include "libavcodec/dsputil.h" #include "libavcodec/dsputil.h"
#include "libavcodec/mpegvideo.h" #include "libavcodec/mpegvideo.h"
#include "libavcodec/mathops.h" #include "libavcodec/mathops.h"
...@@ -1180,17 +1181,16 @@ void ff_dsputilenc_init_mmx(DSPContext* c, AVCodecContext *avctx) ...@@ -1180,17 +1181,16 @@ void ff_dsputilenc_init_mmx(DSPContext* c, AVCodecContext *avctx)
} }
#endif /* HAVE_INLINE_ASM */ #endif /* HAVE_INLINE_ASM */
#if HAVE_YASM if (EXTERNAL_MMX(mm_flags)) {
if (mm_flags & AV_CPU_FLAG_MMX) {
c->hadamard8_diff[0] = ff_hadamard8_diff16_mmx; c->hadamard8_diff[0] = ff_hadamard8_diff16_mmx;
c->hadamard8_diff[1] = ff_hadamard8_diff_mmx; c->hadamard8_diff[1] = ff_hadamard8_diff_mmx;
if (mm_flags & AV_CPU_FLAG_MMXEXT) { if (EXTERNAL_MMXEXT(mm_flags)) {
c->hadamard8_diff[0] = ff_hadamard8_diff16_mmx2; c->hadamard8_diff[0] = ff_hadamard8_diff16_mmx2;
c->hadamard8_diff[1] = ff_hadamard8_diff_mmx2; c->hadamard8_diff[1] = ff_hadamard8_diff_mmx2;
} }
if (mm_flags & AV_CPU_FLAG_SSE2){ if (EXTERNAL_SSE2(mm_flags)) {
c->sse[0] = ff_sse16_sse2; c->sse[0] = ff_sse16_sse2;
#if HAVE_ALIGNED_STACK #if HAVE_ALIGNED_STACK
...@@ -1199,14 +1199,11 @@ void ff_dsputilenc_init_mmx(DSPContext* c, AVCodecContext *avctx) ...@@ -1199,14 +1199,11 @@ void ff_dsputilenc_init_mmx(DSPContext* c, AVCodecContext *avctx)
#endif #endif
} }
#if HAVE_SSSE3 && HAVE_ALIGNED_STACK if (EXTERNAL_SSSE3(mm_flags) && HAVE_ALIGNED_STACK) {
if (mm_flags & AV_CPU_FLAG_SSSE3) {
c->hadamard8_diff[0] = ff_hadamard8_diff16_ssse3; c->hadamard8_diff[0] = ff_hadamard8_diff16_ssse3;
c->hadamard8_diff[1] = ff_hadamard8_diff_ssse3; c->hadamard8_diff[1] = ff_hadamard8_diff_ssse3;
} }
#endif
} }
#endif /* HAVE_YASM */
ff_dsputil_init_pix_mmx(c, avctx); ff_dsputil_init_pix_mmx(c, avctx);
} }
...@@ -17,29 +17,29 @@ ...@@ -17,29 +17,29 @@
*/ */
#include "libavutil/cpu.h" #include "libavutil/cpu.h"
#include "libavutil/x86/cpu.h"
#include "libavcodec/dsputil.h" #include "libavcodec/dsputil.h"
#include "libavcodec/dct.h" #include "libavcodec/dct.h"
#include "fft.h" #include "fft.h"
av_cold void ff_fft_init_mmx(FFTContext *s) av_cold void ff_fft_init_mmx(FFTContext *s)
{ {
#if HAVE_YASM
int has_vectors = av_get_cpu_flags(); int has_vectors = av_get_cpu_flags();
#if ARCH_X86_32 #if ARCH_X86_32
if (has_vectors & AV_CPU_FLAG_3DNOW && HAVE_AMD3DNOW) { if (EXTERNAL_AMD3DNOW(has_vectors)) {
/* 3DNow! for K6-2/3 */ /* 3DNow! for K6-2/3 */
s->imdct_calc = ff_imdct_calc_3dnow; s->imdct_calc = ff_imdct_calc_3dnow;
s->imdct_half = ff_imdct_half_3dnow; s->imdct_half = ff_imdct_half_3dnow;
s->fft_calc = ff_fft_calc_3dnow; s->fft_calc = ff_fft_calc_3dnow;
} }
if (has_vectors & AV_CPU_FLAG_3DNOWEXT && HAVE_AMD3DNOWEXT) { if (EXTERNAL_AMD3DNOWEXT(has_vectors)) {
/* 3DNowEx for K7 */ /* 3DNowEx for K7 */
s->imdct_calc = ff_imdct_calc_3dnowext; s->imdct_calc = ff_imdct_calc_3dnowext;
s->imdct_half = ff_imdct_half_3dnowext; s->imdct_half = ff_imdct_half_3dnowext;
s->fft_calc = ff_fft_calc_3dnowext; s->fft_calc = ff_fft_calc_3dnowext;
} }
#endif #endif
if (has_vectors & AV_CPU_FLAG_SSE && HAVE_SSE) { if (EXTERNAL_SSE(has_vectors)) {
/* SSE for P3/P4/K8 */ /* SSE for P3/P4/K8 */
s->imdct_calc = ff_imdct_calc_sse; s->imdct_calc = ff_imdct_calc_sse;
s->imdct_half = ff_imdct_half_sse; s->imdct_half = ff_imdct_half_sse;
...@@ -47,26 +47,23 @@ av_cold void ff_fft_init_mmx(FFTContext *s) ...@@ -47,26 +47,23 @@ av_cold void ff_fft_init_mmx(FFTContext *s)
s->fft_calc = ff_fft_calc_sse; s->fft_calc = ff_fft_calc_sse;
s->fft_permutation = FF_FFT_PERM_SWAP_LSBS; s->fft_permutation = FF_FFT_PERM_SWAP_LSBS;
} }
if (has_vectors & AV_CPU_FLAG_AVX && HAVE_AVX && s->nbits >= 5) { if (EXTERNAL_AVX(has_vectors) && s->nbits >= 5) {
/* AVX for SB */ /* AVX for SB */
s->imdct_half = ff_imdct_half_avx; s->imdct_half = ff_imdct_half_avx;
s->fft_calc = ff_fft_calc_avx; s->fft_calc = ff_fft_calc_avx;
s->fft_permutation = FF_FFT_PERM_AVX; s->fft_permutation = FF_FFT_PERM_AVX;
} }
#endif
} }
#if CONFIG_DCT #if CONFIG_DCT
av_cold void ff_dct_init_mmx(DCTContext *s) av_cold void ff_dct_init_mmx(DCTContext *s)
{ {
#if HAVE_YASM
int has_vectors = av_get_cpu_flags(); int has_vectors = av_get_cpu_flags();
if (has_vectors & AV_CPU_FLAG_SSE && HAVE_SSE) if (EXTERNAL_SSE(has_vectors))
s->dct32 = ff_dct32_float_sse; s->dct32 = ff_dct32_float_sse;
if (has_vectors & AV_CPU_FLAG_SSE2 && HAVE_SSE) if (EXTERNAL_SSE2(has_vectors))
s->dct32 = ff_dct32_float_sse2; s->dct32 = ff_dct32_float_sse2;
if (has_vectors & AV_CPU_FLAG_AVX && HAVE_AVX) if (EXTERNAL_AVX(has_vectors))
s->dct32 = ff_dct32_float_avx; s->dct32 = ff_dct32_float_avx;
#endif
} }
#endif #endif
...@@ -24,6 +24,7 @@ ...@@ -24,6 +24,7 @@
#include "libavutil/cpu.h" #include "libavutil/cpu.h"
#include "libavutil/x86/asm.h" #include "libavutil/x86/asm.h"
#include "libavutil/x86/cpu.h"
#include "libavcodec/fmtconvert.h" #include "libavcodec/fmtconvert.h"
#include "libavcodec/dsputil.h" #include "libavcodec/dsputil.h"
...@@ -117,27 +118,27 @@ void ff_fmt_convert_init_x86(FmtConvertContext *c, AVCodecContext *avctx) ...@@ -117,27 +118,27 @@ void ff_fmt_convert_init_x86(FmtConvertContext *c, AVCodecContext *avctx)
#if HAVE_YASM #if HAVE_YASM
int mm_flags = av_get_cpu_flags(); int mm_flags = av_get_cpu_flags();
if (mm_flags & AV_CPU_FLAG_MMX) { if (EXTERNAL_MMX(mm_flags)) {
c->float_interleave = float_interleave_mmx; c->float_interleave = float_interleave_mmx;
if (HAVE_AMD3DNOW && mm_flags & AV_CPU_FLAG_3DNOW) { if (EXTERNAL_AMD3DNOW(mm_flags)) {
if(!(avctx->flags & CODEC_FLAG_BITEXACT)){ if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
c->float_to_int16 = ff_float_to_int16_3dnow; c->float_to_int16 = ff_float_to_int16_3dnow;
c->float_to_int16_interleave = float_to_int16_interleave_3dnow; c->float_to_int16_interleave = float_to_int16_interleave_3dnow;
} }
} }
if (HAVE_AMD3DNOWEXT && mm_flags & AV_CPU_FLAG_3DNOWEXT) { if (EXTERNAL_AMD3DNOWEXT(mm_flags)) {
if(!(avctx->flags & CODEC_FLAG_BITEXACT)){ if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
c->float_to_int16_interleave = float_to_int16_interleave_3dnowext; c->float_to_int16_interleave = float_to_int16_interleave_3dnowext;
} }
} }
if (HAVE_SSE && mm_flags & AV_CPU_FLAG_SSE) { if (EXTERNAL_SSE(mm_flags)) {
c->int32_to_float_fmul_scalar = ff_int32_to_float_fmul_scalar_sse; c->int32_to_float_fmul_scalar = ff_int32_to_float_fmul_scalar_sse;
c->float_to_int16 = ff_float_to_int16_sse; c->float_to_int16 = ff_float_to_int16_sse;
c->float_to_int16_interleave = float_to_int16_interleave_sse; c->float_to_int16_interleave = float_to_int16_interleave_sse;
c->float_interleave = float_interleave_sse; c->float_interleave = float_interleave_sse;
} }
if (HAVE_SSE && mm_flags & AV_CPU_FLAG_SSE2) { if (EXTERNAL_SSE2(mm_flags)) {
c->int32_to_float_fmul_scalar = ff_int32_to_float_fmul_scalar_sse2; c->int32_to_float_fmul_scalar = ff_int32_to_float_fmul_scalar_sse2;
c->float_to_int16 = ff_float_to_int16_sse2; c->float_to_int16 = ff_float_to_int16_sse2;
c->float_to_int16_interleave = float_to_int16_interleave_sse2; c->float_to_int16_interleave = float_to_int16_interleave_sse2;
......
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
*/ */
#include "libavutil/cpu.h" #include "libavutil/cpu.h"
#include "libavutil/x86/cpu.h"
#include "libavcodec/h264pred.h" #include "libavcodec/h264pred.h"
#define PRED4x4(TYPE, DEPTH, OPT) \ #define PRED4x4(TYPE, DEPTH, OPT) \
...@@ -169,11 +170,10 @@ void ff_pred4x4_vertical_vp8_mmxext(uint8_t *src, const uint8_t *topright, int s ...@@ -169,11 +170,10 @@ void ff_pred4x4_vertical_vp8_mmxext(uint8_t *src, const uint8_t *topright, int s
void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth, const int chroma_format_idc) void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth, const int chroma_format_idc)
{ {
#if HAVE_YASM
int mm_flags = av_get_cpu_flags(); int mm_flags = av_get_cpu_flags();
if (bit_depth == 8) { if (bit_depth == 8) {
if (mm_flags & AV_CPU_FLAG_MMX) { if (EXTERNAL_MMX(mm_flags)) {
h->pred16x16[VERT_PRED8x8 ] = ff_pred16x16_vertical_mmx; h->pred16x16[VERT_PRED8x8 ] = ff_pred16x16_vertical_mmx;
h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_mmx; h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_mmx;
if (chroma_format_idc == 1) { if (chroma_format_idc == 1) {
...@@ -198,7 +198,7 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth ...@@ -198,7 +198,7 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth
} }
} }
if (mm_flags & AV_CPU_FLAG_MMXEXT) { if (EXTERNAL_MMXEXT(mm_flags)) {
h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_mmx2; h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_mmx2;
h->pred16x16[DC_PRED8x8 ] = ff_pred16x16_dc_mmx2; h->pred16x16[DC_PRED8x8 ] = ff_pred16x16_dc_mmx2;
if (chroma_format_idc == 1) if (chroma_format_idc == 1)
...@@ -250,11 +250,11 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth ...@@ -250,11 +250,11 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth
} }
} }
if (mm_flags & AV_CPU_FLAG_SSE) { if (EXTERNAL_SSE(mm_flags)) {
h->pred16x16[VERT_PRED8x8] = ff_pred16x16_vertical_sse; h->pred16x16[VERT_PRED8x8] = ff_pred16x16_vertical_sse;
} }
if (mm_flags & AV_CPU_FLAG_SSE2) { if (EXTERNAL_SSE2(mm_flags)) {
h->pred16x16[DC_PRED8x8 ] = ff_pred16x16_dc_sse2; h->pred16x16[DC_PRED8x8 ] = ff_pred16x16_dc_sse2;
h->pred8x8l [DIAG_DOWN_LEFT_PRED ] = ff_pred8x8l_down_left_sse2; h->pred8x8l [DIAG_DOWN_LEFT_PRED ] = ff_pred8x8l_down_left_sse2;
h->pred8x8l [DIAG_DOWN_RIGHT_PRED ] = ff_pred8x8l_down_right_sse2; h->pred8x8l [DIAG_DOWN_RIGHT_PRED ] = ff_pred8x8l_down_right_sse2;
...@@ -277,7 +277,7 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth ...@@ -277,7 +277,7 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth
} }
} }
if (mm_flags & AV_CPU_FLAG_SSSE3) { if (EXTERNAL_SSSE3(mm_flags)) {
h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_ssse3; h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_ssse3;
h->pred16x16[DC_PRED8x8 ] = ff_pred16x16_dc_ssse3; h->pred16x16[DC_PRED8x8 ] = ff_pred16x16_dc_ssse3;
if (chroma_format_idc == 1) if (chroma_format_idc == 1)
...@@ -308,7 +308,7 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth ...@@ -308,7 +308,7 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth
} }
} }
} else if (bit_depth == 10) { } else if (bit_depth == 10) {
if (mm_flags & AV_CPU_FLAG_MMXEXT) { if (EXTERNAL_MMXEXT(mm_flags)) {
h->pred4x4[DC_PRED ] = ff_pred4x4_dc_10_mmxext; h->pred4x4[DC_PRED ] = ff_pred4x4_dc_10_mmxext;
h->pred4x4[HOR_UP_PRED ] = ff_pred4x4_horizontal_up_10_mmxext; h->pred4x4[HOR_UP_PRED ] = ff_pred4x4_horizontal_up_10_mmxext;
...@@ -324,7 +324,7 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth ...@@ -324,7 +324,7 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth
h->pred16x16[VERT_PRED8x8 ] = ff_pred16x16_vertical_10_mmxext; h->pred16x16[VERT_PRED8x8 ] = ff_pred16x16_vertical_10_mmxext;
h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_10_mmxext; h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_10_mmxext;
} }
if (mm_flags & AV_CPU_FLAG_SSE2) { if (EXTERNAL_SSE2(mm_flags)) {
h->pred4x4[DIAG_DOWN_LEFT_PRED ] = ff_pred4x4_down_left_10_sse2; h->pred4x4[DIAG_DOWN_LEFT_PRED ] = ff_pred4x4_down_left_10_sse2;
h->pred4x4[DIAG_DOWN_RIGHT_PRED] = ff_pred4x4_down_right_10_sse2; h->pred4x4[DIAG_DOWN_RIGHT_PRED] = ff_pred4x4_down_right_10_sse2;
h->pred4x4[VERT_LEFT_PRED ] = ff_pred4x4_vertical_left_10_sse2; h->pred4x4[VERT_LEFT_PRED ] = ff_pred4x4_vertical_left_10_sse2;
...@@ -356,7 +356,7 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth ...@@ -356,7 +356,7 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth
h->pred16x16[VERT_PRED8x8 ] = ff_pred16x16_vertical_10_sse2; h->pred16x16[VERT_PRED8x8 ] = ff_pred16x16_vertical_10_sse2;
h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_10_sse2; h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_10_sse2;
} }
if (mm_flags & AV_CPU_FLAG_SSSE3) { if (EXTERNAL_SSSE3(mm_flags)) {
h->pred4x4[DIAG_DOWN_RIGHT_PRED] = ff_pred4x4_down_right_10_ssse3; h->pred4x4[DIAG_DOWN_RIGHT_PRED] = ff_pred4x4_down_right_10_ssse3;
h->pred4x4[VERT_RIGHT_PRED ] = ff_pred4x4_vertical_right_10_ssse3; h->pred4x4[VERT_RIGHT_PRED ] = ff_pred4x4_vertical_right_10_ssse3;
h->pred4x4[HOR_DOWN_PRED ] = ff_pred4x4_horizontal_down_10_ssse3; h->pred4x4[HOR_DOWN_PRED ] = ff_pred4x4_horizontal_down_10_ssse3;
...@@ -367,8 +367,7 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth ...@@ -367,8 +367,7 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth
h->pred8x8l[VERT_RIGHT_PRED ] = ff_pred8x8l_vertical_right_10_ssse3; h->pred8x8l[VERT_RIGHT_PRED ] = ff_pred8x8l_vertical_right_10_ssse3;
h->pred8x8l[HOR_UP_PRED ] = ff_pred8x8l_horizontal_up_10_ssse3; h->pred8x8l[HOR_UP_PRED ] = ff_pred8x8l_horizontal_up_10_ssse3;
} }
#if HAVE_AVX if (EXTERNAL_AVX(mm_flags)) {
if (mm_flags & AV_CPU_FLAG_AVX) {
h->pred4x4[DIAG_DOWN_LEFT_PRED ] = ff_pred4x4_down_left_10_avx; h->pred4x4[DIAG_DOWN_LEFT_PRED ] = ff_pred4x4_down_left_10_avx;
h->pred4x4[DIAG_DOWN_RIGHT_PRED] = ff_pred4x4_down_right_10_avx; h->pred4x4[DIAG_DOWN_RIGHT_PRED] = ff_pred4x4_down_right_10_avx;
h->pred4x4[VERT_LEFT_PRED ] = ff_pred4x4_vertical_left_10_avx; h->pred4x4[VERT_LEFT_PRED ] = ff_pred4x4_vertical_left_10_avx;
...@@ -384,7 +383,5 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth ...@@ -384,7 +383,5 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth
h->pred8x8l[VERT_RIGHT_PRED ] = ff_pred8x8l_vertical_right_10_avx; h->pred8x8l[VERT_RIGHT_PRED ] = ff_pred8x8l_vertical_right_10_avx;
h->pred8x8l[HOR_UP_PRED ] = ff_pred8x8l_horizontal_up_10_avx; h->pred8x8l[HOR_UP_PRED ] = ff_pred8x8l_horizontal_up_10_avx;
} }
#endif /* HAVE_AVX */
} }
#endif /* HAVE_YASM */
} }
...@@ -20,6 +20,7 @@ ...@@ -20,6 +20,7 @@
#include "libavutil/cpu.h" #include "libavutil/cpu.h"
#include "libavutil/x86/asm.h" #include "libavutil/x86/asm.h"
#include "libavutil/x86/cpu.h"
#include "libavcodec/h264dsp.h" #include "libavcodec/h264dsp.h"
#include "dsputil_mmx.h" #include "dsputil_mmx.h"
...@@ -209,14 +210,13 @@ H264_BIWEIGHT_10_SSE(4, 10) ...@@ -209,14 +210,13 @@ H264_BIWEIGHT_10_SSE(4, 10)
void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth, void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth,
const int chroma_format_idc) const int chroma_format_idc)
{ {
#if HAVE_YASM
int mm_flags = av_get_cpu_flags(); int mm_flags = av_get_cpu_flags();
if (chroma_format_idc == 1 && mm_flags & AV_CPU_FLAG_MMXEXT) if (chroma_format_idc == 1 && EXTERNAL_MMXEXT(mm_flags))
c->h264_loop_filter_strength = ff_h264_loop_filter_strength_mmx2; c->h264_loop_filter_strength = ff_h264_loop_filter_strength_mmx2;
if (bit_depth == 8) { if (bit_depth == 8) {
if (mm_flags & AV_CPU_FLAG_MMX) { if (EXTERNAL_MMX(mm_flags)) {
c->h264_idct_dc_add = c->h264_idct_dc_add =
c->h264_idct_add = ff_h264_idct_add_8_mmx; c->h264_idct_add = ff_h264_idct_add_8_mmx;
c->h264_idct8_dc_add = c->h264_idct8_dc_add =
...@@ -230,7 +230,7 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth, ...@@ -230,7 +230,7 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth,
if (mm_flags & AV_CPU_FLAG_CMOV) if (mm_flags & AV_CPU_FLAG_CMOV)
c->h264_luma_dc_dequant_idct = ff_h264_luma_dc_dequant_idct_mmx; c->h264_luma_dc_dequant_idct = ff_h264_luma_dc_dequant_idct_mmx;
if (mm_flags & AV_CPU_FLAG_MMXEXT) { if (EXTERNAL_MMXEXT(mm_flags)) {
c->h264_idct_dc_add = ff_h264_idct_dc_add_8_mmx2; c->h264_idct_dc_add = ff_h264_idct_dc_add_8_mmx2;
c->h264_idct8_dc_add = ff_h264_idct8_dc_add_8_mmx2; c->h264_idct8_dc_add = ff_h264_idct8_dc_add_8_mmx2;
c->h264_idct_add16 = ff_h264_idct_add16_8_mmx2; c->h264_idct_add16 = ff_h264_idct_add16_8_mmx2;
...@@ -259,7 +259,7 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth, ...@@ -259,7 +259,7 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth,
c->biweight_h264_pixels_tab[1] = ff_h264_biweight_8_mmx2; c->biweight_h264_pixels_tab[1] = ff_h264_biweight_8_mmx2;
c->biweight_h264_pixels_tab[2] = ff_h264_biweight_4_mmx2; c->biweight_h264_pixels_tab[2] = ff_h264_biweight_4_mmx2;
if (mm_flags & AV_CPU_FLAG_SSE2) { if (EXTERNAL_SSE2(mm_flags)) {
c->h264_idct8_add = ff_h264_idct8_add_8_sse2; c->h264_idct8_add = ff_h264_idct8_add_8_sse2;
c->h264_idct_add16 = ff_h264_idct_add16_8_sse2; c->h264_idct_add16 = ff_h264_idct_add16_8_sse2;
...@@ -282,23 +282,21 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth, ...@@ -282,23 +282,21 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth,
c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_8_sse2; c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_8_sse2;
#endif /* HAVE_ALIGNED_STACK */ #endif /* HAVE_ALIGNED_STACK */
} }
if (mm_flags & AV_CPU_FLAG_SSSE3) { if (EXTERNAL_SSSE3(mm_flags)) {
c->biweight_h264_pixels_tab[0] = ff_h264_biweight_16_ssse3; c->biweight_h264_pixels_tab[0] = ff_h264_biweight_16_ssse3;
c->biweight_h264_pixels_tab[1] = ff_h264_biweight_8_ssse3; c->biweight_h264_pixels_tab[1] = ff_h264_biweight_8_ssse3;
} }
if (mm_flags & AV_CPU_FLAG_AVX) { if (EXTERNAL_AVX(mm_flags) && HAVE_ALIGNED_STACK) {
#if HAVE_ALIGNED_STACK
c->h264_v_loop_filter_luma = ff_deblock_v_luma_8_avx; c->h264_v_loop_filter_luma = ff_deblock_v_luma_8_avx;
c->h264_h_loop_filter_luma = ff_deblock_h_luma_8_avx; c->h264_h_loop_filter_luma = ff_deblock_h_luma_8_avx;
c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_8_avx; c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_8_avx;
c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_8_avx; c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_8_avx;
#endif /* HAVE_ALIGNED_STACK */
} }
} }
} }
} else if (bit_depth == 10) { } else if (bit_depth == 10) {
if (mm_flags & AV_CPU_FLAG_MMX) { if (EXTERNAL_MMX(mm_flags)) {
if (mm_flags & AV_CPU_FLAG_MMXEXT) { if (EXTERNAL_MMXEXT(mm_flags)) {
#if ARCH_X86_32 #if ARCH_X86_32
c->h264_v_loop_filter_chroma = ff_deblock_v_chroma_10_mmx2; c->h264_v_loop_filter_chroma = ff_deblock_v_chroma_10_mmx2;
c->h264_v_loop_filter_chroma_intra = ff_deblock_v_chroma_intra_10_mmx2; c->h264_v_loop_filter_chroma_intra = ff_deblock_v_chroma_intra_10_mmx2;
...@@ -308,7 +306,7 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth, ...@@ -308,7 +306,7 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth,
c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_10_mmx2; c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_10_mmx2;
#endif /* ARCH_X86_32 */ #endif /* ARCH_X86_32 */
c->h264_idct_dc_add = ff_h264_idct_dc_add_10_mmx2; c->h264_idct_dc_add = ff_h264_idct_dc_add_10_mmx2;
if (mm_flags & AV_CPU_FLAG_SSE2) { if (EXTERNAL_SSE2(mm_flags)) {
c->h264_idct_add = ff_h264_idct_add_10_sse2; c->h264_idct_add = ff_h264_idct_add_10_sse2;
c->h264_idct8_dc_add = ff_h264_idct8_dc_add_10_sse2; c->h264_idct8_dc_add = ff_h264_idct8_dc_add_10_sse2;
...@@ -338,7 +336,7 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth, ...@@ -338,7 +336,7 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth,
c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_10_sse2; c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_10_sse2;
#endif /* HAVE_ALIGNED_STACK */ #endif /* HAVE_ALIGNED_STACK */
} }
if (mm_flags & AV_CPU_FLAG_SSE4) { if (EXTERNAL_SSE4(mm_flags)) {
c->weight_h264_pixels_tab[0] = ff_h264_weight_16_10_sse4; c->weight_h264_pixels_tab[0] = ff_h264_weight_16_10_sse4;
c->weight_h264_pixels_tab[1] = ff_h264_weight_8_10_sse4; c->weight_h264_pixels_tab[1] = ff_h264_weight_8_10_sse4;
c->weight_h264_pixels_tab[2] = ff_h264_weight_4_10_sse4; c->weight_h264_pixels_tab[2] = ff_h264_weight_4_10_sse4;
...@@ -347,8 +345,7 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth, ...@@ -347,8 +345,7 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth,
c->biweight_h264_pixels_tab[1] = ff_h264_biweight_8_10_sse4; c->biweight_h264_pixels_tab[1] = ff_h264_biweight_8_10_sse4;
c->biweight_h264_pixels_tab[2] = ff_h264_biweight_4_10_sse4; c->biweight_h264_pixels_tab[2] = ff_h264_biweight_4_10_sse4;
} }
#if HAVE_AVX_EXTERNAL if (EXTERNAL_AVX(mm_flags)) {
if (mm_flags & AV_CPU_FLAG_AVX) {
c->h264_idct_dc_add = c->h264_idct_dc_add =
c->h264_idct_add = ff_h264_idct_add_10_avx; c->h264_idct_add = ff_h264_idct_add_10_avx;
c->h264_idct8_dc_add = ff_h264_idct8_dc_add_10_avx; c->h264_idct8_dc_add = ff_h264_idct8_dc_add_10_avx;
...@@ -371,9 +368,7 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth, ...@@ -371,9 +368,7 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth,
c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_10_avx; c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_10_avx;
#endif /* HAVE_ALIGNED_STACK */ #endif /* HAVE_ALIGNED_STACK */
} }
#endif /* HAVE_AVX_EXTERNAL */
} }
} }
} }
#endif /* HAVE_YASM */
} }
...@@ -21,6 +21,7 @@ ...@@ -21,6 +21,7 @@
#include "libavutil/cpu.h" #include "libavutil/cpu.h"
#include "libavutil/x86/asm.h" #include "libavutil/x86/asm.h"
#include "libavutil/x86/cpu.h"
#include "libavcodec/dsputil.h" #include "libavcodec/dsputil.h"
#include "libavcodec/mpegaudiodsp.h" #include "libavcodec/mpegaudiodsp.h"
...@@ -247,18 +248,16 @@ void ff_mpadsp_init_mmx(MPADSPContext *s) ...@@ -247,18 +248,16 @@ void ff_mpadsp_init_mmx(MPADSPContext *s)
#endif /* HAVE_SSE2_INLINE */ #endif /* HAVE_SSE2_INLINE */
#if HAVE_YASM #if HAVE_YASM
if (mm_flags & AV_CPU_FLAG_AVX && HAVE_AVX) { if (EXTERNAL_AVX(mm_flags)) {
s->imdct36_blocks_float = imdct36_blocks_avx; s->imdct36_blocks_float = imdct36_blocks_avx;
#if HAVE_SSE } else if (EXTERNAL_SSSE3(mm_flags)) {
} else if (mm_flags & AV_CPU_FLAG_SSSE3) {
s->imdct36_blocks_float = imdct36_blocks_ssse3; s->imdct36_blocks_float = imdct36_blocks_ssse3;
} else if (mm_flags & AV_CPU_FLAG_SSE3) { } else if (EXTERNAL_SSE3(mm_flags)) {
s->imdct36_blocks_float = imdct36_blocks_sse3; s->imdct36_blocks_float = imdct36_blocks_sse3;
} else if (mm_flags & AV_CPU_FLAG_SSE2) { } else if (EXTERNAL_SSE2(mm_flags)) {
s->imdct36_blocks_float = imdct36_blocks_sse2; s->imdct36_blocks_float = imdct36_blocks_sse2;
} else if (mm_flags & AV_CPU_FLAG_SSE) { } else if (EXTERNAL_SSE(mm_flags)) {
s->imdct36_blocks_float = imdct36_blocks_sse; s->imdct36_blocks_float = imdct36_blocks_sse;
#endif /* HAVE_SSE */
} }
#endif /* HAVE_YASM */ #endif /* HAVE_YASM */
} }
...@@ -21,6 +21,7 @@ ...@@ -21,6 +21,7 @@
#include "libavutil/cpu.h" #include "libavutil/cpu.h"
#include "libavutil/x86/asm.h" #include "libavutil/x86/asm.h"
#include "libavutil/x86/cpu.h"
#include "libavcodec/avcodec.h" #include "libavcodec/avcodec.h"
#include "libavcodec/dsputil.h" #include "libavcodec/dsputil.h"
#include "libavcodec/mpegvideo.h" #include "libavcodec/mpegvideo.h"
...@@ -86,19 +87,19 @@ void ff_MPV_encode_init_x86(MpegEncContext *s) ...@@ -86,19 +87,19 @@ void ff_MPV_encode_init_x86(MpegEncContext *s)
if (dct_algo == FF_DCT_AUTO || dct_algo == FF_DCT_MMX) { if (dct_algo == FF_DCT_AUTO || dct_algo == FF_DCT_MMX) {
#if HAVE_MMX_INLINE #if HAVE_MMX_INLINE
if (mm_flags & AV_CPU_FLAG_MMX && HAVE_MMX) if (INLINE_MMX(mm_flags))
s->dct_quantize = dct_quantize_MMX; s->dct_quantize = dct_quantize_MMX;
#endif #endif
#if HAVE_MMXEXT_INLINE #if HAVE_MMXEXT_INLINE
if (mm_flags & AV_CPU_FLAG_MMXEXT && HAVE_MMXEXT) if (INLINE_MMXEXT(mm_flags))
s->dct_quantize = dct_quantize_MMX2; s->dct_quantize = dct_quantize_MMX2;
#endif #endif
#if HAVE_SSE2_INLINE #if HAVE_SSE2_INLINE
if (mm_flags & AV_CPU_FLAG_SSE2 && HAVE_SSE2) if (INLINE_SSE2(mm_flags))
s->dct_quantize = dct_quantize_SSE2; s->dct_quantize = dct_quantize_SSE2;
#endif #endif
#if HAVE_SSSE3_INLINE #if HAVE_SSSE3_INLINE
if (mm_flags & AV_CPU_FLAG_SSSE3) if (INLINE_SSSE3(mm_flags))
s->dct_quantize = dct_quantize_SSSE3; s->dct_quantize = dct_quantize_SSSE3;
#endif #endif
} }
......
...@@ -20,7 +20,7 @@ ...@@ -20,7 +20,7 @@
*/ */
#include "libavutil/common.h" #include "libavutil/common.h"
#include "libavutil/cpu.h" #include "libavutil/x86/cpu.h"
#include "libavcodec/pngdsp.h" #include "libavcodec/pngdsp.h"
void ff_add_png_paeth_prediction_mmx2 (uint8_t *dst, uint8_t *src, void ff_add_png_paeth_prediction_mmx2 (uint8_t *dst, uint8_t *src,
...@@ -34,18 +34,16 @@ void ff_add_bytes_l2_sse2(uint8_t *dst, uint8_t *src1, ...@@ -34,18 +34,16 @@ void ff_add_bytes_l2_sse2(uint8_t *dst, uint8_t *src1,
void ff_pngdsp_init_x86(PNGDSPContext *dsp) void ff_pngdsp_init_x86(PNGDSPContext *dsp)
{ {
#if HAVE_YASM
int flags = av_get_cpu_flags(); int flags = av_get_cpu_flags();
#if ARCH_X86_32 #if ARCH_X86_32
if (flags & AV_CPU_FLAG_MMX) if (EXTERNAL_MMX(flags))
dsp->add_bytes_l2 = ff_add_bytes_l2_mmx; dsp->add_bytes_l2 = ff_add_bytes_l2_mmx;
#endif #endif
if (flags & AV_CPU_FLAG_MMXEXT) if (EXTERNAL_MMXEXT(flags))
dsp->add_paeth_prediction = ff_add_png_paeth_prediction_mmx2; dsp->add_paeth_prediction = ff_add_png_paeth_prediction_mmx2;
if (flags & AV_CPU_FLAG_SSE2) if (EXTERNAL_SSE2(flags))
dsp->add_bytes_l2 = ff_add_bytes_l2_sse2; dsp->add_bytes_l2 = ff_add_bytes_l2_sse2;
if (flags & AV_CPU_FLAG_SSSE3) if (EXTERNAL_SSSE3(flags))
dsp->add_paeth_prediction = ff_add_png_paeth_prediction_ssse3; dsp->add_paeth_prediction = ff_add_png_paeth_prediction_ssse3;
#endif
} }
...@@ -20,6 +20,7 @@ ...@@ -20,6 +20,7 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/ */
#include "libavutil/x86/cpu.h"
#include "libavcodec/proresdsp.h" #include "libavcodec/proresdsp.h"
void ff_prores_idct_put_10_sse2(uint16_t *dst, int linesize, void ff_prores_idct_put_10_sse2(uint16_t *dst, int linesize,
...@@ -31,24 +32,22 @@ void ff_prores_idct_put_10_avx (uint16_t *dst, int linesize, ...@@ -31,24 +32,22 @@ void ff_prores_idct_put_10_avx (uint16_t *dst, int linesize,
void ff_proresdsp_x86_init(ProresDSPContext *dsp) void ff_proresdsp_x86_init(ProresDSPContext *dsp)
{ {
#if ARCH_X86_64 && HAVE_YASM #if ARCH_X86_64
int flags = av_get_cpu_flags(); int flags = av_get_cpu_flags();
if (flags & AV_CPU_FLAG_SSE2) { if (EXTERNAL_SSE2(flags)) {
dsp->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM; dsp->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM;
dsp->idct_put = ff_prores_idct_put_10_sse2; dsp->idct_put = ff_prores_idct_put_10_sse2;
} }
if (flags & AV_CPU_FLAG_SSE4) { if (EXTERNAL_SSE4(flags)) {
dsp->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM; dsp->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM;
dsp->idct_put = ff_prores_idct_put_10_sse4; dsp->idct_put = ff_prores_idct_put_10_sse4;
} }
#if HAVE_AVX if (EXTERNAL_AVX(flags)) {
if (flags & AV_CPU_FLAG_AVX) {
dsp->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM; dsp->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM;
dsp->idct_put = ff_prores_idct_put_10_avx; dsp->idct_put = ff_prores_idct_put_10_avx;
} }
#endif /* HAVE_AVX */ #endif /* ARCH_X86_64 */
#endif /* ARCH_X86_64 && HAVE_YASM */
} }
...@@ -21,6 +21,7 @@ ...@@ -21,6 +21,7 @@
#include "libavutil/cpu.h" #include "libavutil/cpu.h"
#include "libavutil/x86/asm.h" #include "libavutil/x86/asm.h"
#include "libavutil/x86/cpu.h"
#include "libavcodec/dsputil.h" #include "libavcodec/dsputil.h"
#include "libavcodec/rv34dsp.h" #include "libavcodec/rv34dsp.h"
...@@ -32,16 +33,14 @@ void ff_rv34_idct_add_mmx2(uint8_t *dst, ptrdiff_t stride, DCTELEM *block); ...@@ -32,16 +33,14 @@ void ff_rv34_idct_add_mmx2(uint8_t *dst, ptrdiff_t stride, DCTELEM *block);
av_cold void ff_rv34dsp_init_x86(RV34DSPContext* c, DSPContext *dsp) av_cold void ff_rv34dsp_init_x86(RV34DSPContext* c, DSPContext *dsp)
{ {
#if HAVE_YASM
int mm_flags = av_get_cpu_flags(); int mm_flags = av_get_cpu_flags();
if (mm_flags & AV_CPU_FLAG_MMX) if (EXTERNAL_MMX(mm_flags))
c->rv34_idct_dc_add = ff_rv34_idct_dc_add_mmx; c->rv34_idct_dc_add = ff_rv34_idct_dc_add_mmx;
if (mm_flags & AV_CPU_FLAG_MMXEXT) { if (EXTERNAL_MMXEXT(mm_flags)) {
c->rv34_inv_transform_dc = ff_rv34_idct_dc_noround_mmx2; c->rv34_inv_transform_dc = ff_rv34_idct_dc_noround_mmx2;
c->rv34_idct_add = ff_rv34_idct_add_mmx2; c->rv34_idct_add = ff_rv34_idct_add_mmx2;
} }
if (mm_flags & AV_CPU_FLAG_SSE4) if (EXTERNAL_SSE4(mm_flags))
c->rv34_idct_dc_add = ff_rv34_idct_dc_add_sse4; c->rv34_idct_dc_add = ff_rv34_idct_dc_add_sse4;
#endif /* HAVE_YASM */
} }
...@@ -28,6 +28,7 @@ ...@@ -28,6 +28,7 @@
#include "libavcodec/rv34dsp.h" #include "libavcodec/rv34dsp.h"
#include "libavutil/mem.h" #include "libavutil/mem.h"
#include "libavutil/x86/cpu.h"
#include "dsputil_mmx.h" #include "dsputil_mmx.h"
#if HAVE_YASM #if HAVE_YASM
...@@ -191,7 +192,7 @@ void ff_rv40dsp_init_x86(RV34DSPContext *c, DSPContext *dsp) ...@@ -191,7 +192,7 @@ void ff_rv40dsp_init_x86(RV34DSPContext *c, DSPContext *dsp)
#if HAVE_YASM #if HAVE_YASM
int mm_flags = av_get_cpu_flags(); int mm_flags = av_get_cpu_flags();
if (mm_flags & AV_CPU_FLAG_MMX) { if (EXTERNAL_MMX(mm_flags)) {
c->put_chroma_pixels_tab[0] = ff_put_rv40_chroma_mc8_mmx; c->put_chroma_pixels_tab[0] = ff_put_rv40_chroma_mc8_mmx;
c->put_chroma_pixels_tab[1] = ff_put_rv40_chroma_mc4_mmx; c->put_chroma_pixels_tab[1] = ff_put_rv40_chroma_mc4_mmx;
#if HAVE_INLINE_ASM #if HAVE_INLINE_ASM
...@@ -204,7 +205,7 @@ void ff_rv40dsp_init_x86(RV34DSPContext *c, DSPContext *dsp) ...@@ -204,7 +205,7 @@ void ff_rv40dsp_init_x86(RV34DSPContext *c, DSPContext *dsp)
QPEL_MC_SET(put_, _mmx) QPEL_MC_SET(put_, _mmx)
#endif #endif
} }
if (mm_flags & AV_CPU_FLAG_MMXEXT) { if (EXTERNAL_MMXEXT(mm_flags)) {
c->avg_chroma_pixels_tab[0] = ff_avg_rv40_chroma_mc8_mmx2; c->avg_chroma_pixels_tab[0] = ff_avg_rv40_chroma_mc8_mmx2;
c->avg_chroma_pixels_tab[1] = ff_avg_rv40_chroma_mc4_mmx2; c->avg_chroma_pixels_tab[1] = ff_avg_rv40_chroma_mc4_mmx2;
c->rv40_weight_pixels_tab[0][0] = ff_rv40_weight_func_rnd_16_mmx2; c->rv40_weight_pixels_tab[0][0] = ff_rv40_weight_func_rnd_16_mmx2;
...@@ -214,14 +215,14 @@ void ff_rv40dsp_init_x86(RV34DSPContext *c, DSPContext *dsp) ...@@ -214,14 +215,14 @@ void ff_rv40dsp_init_x86(RV34DSPContext *c, DSPContext *dsp)
#if ARCH_X86_32 #if ARCH_X86_32
QPEL_MC_SET(avg_, _mmx2) QPEL_MC_SET(avg_, _mmx2)
#endif #endif
} else if (mm_flags & AV_CPU_FLAG_3DNOW) { } else if (EXTERNAL_AMD3DNOW(mm_flags)) {
c->avg_chroma_pixels_tab[0] = ff_avg_rv40_chroma_mc8_3dnow; c->avg_chroma_pixels_tab[0] = ff_avg_rv40_chroma_mc8_3dnow;
c->avg_chroma_pixels_tab[1] = ff_avg_rv40_chroma_mc4_3dnow; c->avg_chroma_pixels_tab[1] = ff_avg_rv40_chroma_mc4_3dnow;
#if ARCH_X86_32 #if ARCH_X86_32
QPEL_MC_SET(avg_, _3dnow) QPEL_MC_SET(avg_, _3dnow)
#endif #endif
} }
if (mm_flags & AV_CPU_FLAG_SSE2) { if (EXTERNAL_SSE2(mm_flags)) {
c->rv40_weight_pixels_tab[0][0] = ff_rv40_weight_func_rnd_16_sse2; c->rv40_weight_pixels_tab[0][0] = ff_rv40_weight_func_rnd_16_sse2;
c->rv40_weight_pixels_tab[0][1] = ff_rv40_weight_func_rnd_8_sse2; c->rv40_weight_pixels_tab[0][1] = ff_rv40_weight_func_rnd_8_sse2;
c->rv40_weight_pixels_tab[1][0] = ff_rv40_weight_func_nornd_16_sse2; c->rv40_weight_pixels_tab[1][0] = ff_rv40_weight_func_nornd_16_sse2;
...@@ -229,7 +230,7 @@ void ff_rv40dsp_init_x86(RV34DSPContext *c, DSPContext *dsp) ...@@ -229,7 +230,7 @@ void ff_rv40dsp_init_x86(RV34DSPContext *c, DSPContext *dsp)
QPEL_MC_SET(put_, _sse2) QPEL_MC_SET(put_, _sse2)
QPEL_MC_SET(avg_, _sse2) QPEL_MC_SET(avg_, _sse2)
} }
if (mm_flags & AV_CPU_FLAG_SSSE3) { if (EXTERNAL_SSSE3(mm_flags)) {
c->rv40_weight_pixels_tab[0][0] = ff_rv40_weight_func_rnd_16_ssse3; c->rv40_weight_pixels_tab[0][0] = ff_rv40_weight_func_rnd_16_ssse3;
c->rv40_weight_pixels_tab[0][1] = ff_rv40_weight_func_rnd_8_ssse3; c->rv40_weight_pixels_tab[0][1] = ff_rv40_weight_func_rnd_8_ssse3;
c->rv40_weight_pixels_tab[1][0] = ff_rv40_weight_func_nornd_16_ssse3; c->rv40_weight_pixels_tab[1][0] = ff_rv40_weight_func_nornd_16_ssse3;
......
...@@ -21,6 +21,7 @@ ...@@ -21,6 +21,7 @@
#include "config.h" #include "config.h"
#include "libavutil/cpu.h" #include "libavutil/cpu.h"
#include "libavutil/x86/cpu.h"
#include "libavcodec/sbrdsp.h" #include "libavcodec/sbrdsp.h"
float ff_sbr_sum_square_sse(float (*x)[2], int n); float ff_sbr_sum_square_sse(float (*x)[2], int n);
...@@ -29,12 +30,10 @@ void ff_sbr_hf_g_filt_sse(float (*Y)[2], const float (*X_high)[40][2], ...@@ -29,12 +30,10 @@ void ff_sbr_hf_g_filt_sse(float (*Y)[2], const float (*X_high)[40][2],
void ff_sbrdsp_init_x86(SBRDSPContext *s) void ff_sbrdsp_init_x86(SBRDSPContext *s)
{ {
if (HAVE_YASM) {
int mm_flags = av_get_cpu_flags(); int mm_flags = av_get_cpu_flags();
if (mm_flags & AV_CPU_FLAG_SSE) { if (EXTERNAL_SSE(mm_flags)) {
s->sum_square = ff_sbr_sum_square_sse; s->sum_square = ff_sbr_sum_square_sse;
s->hf_g_filt = ff_sbr_hf_g_filt_sse; s->hf_g_filt = ff_sbr_hf_g_filt_sse;
} }
}
} }
...@@ -20,6 +20,7 @@ ...@@ -20,6 +20,7 @@
#include "libavutil/attributes.h" #include "libavutil/attributes.h"
#include "libavutil/cpu.h" #include "libavutil/cpu.h"
#include "libavutil/x86/cpu.h"
#include "libavcodec/avcodec.h" #include "libavcodec/avcodec.h"
#include "libavcodec/vp3dsp.h" #include "libavcodec/vp3dsp.h"
#include "config.h" #include "config.h"
...@@ -38,18 +39,17 @@ void ff_vp3_h_loop_filter_mmx2(uint8_t *src, int stride, int *bounding_values); ...@@ -38,18 +39,17 @@ void ff_vp3_h_loop_filter_mmx2(uint8_t *src, int stride, int *bounding_values);
av_cold void ff_vp3dsp_init_x86(VP3DSPContext *c, int flags) av_cold void ff_vp3dsp_init_x86(VP3DSPContext *c, int flags)
{ {
#if HAVE_YASM
int cpuflags = av_get_cpu_flags(); int cpuflags = av_get_cpu_flags();
#if ARCH_X86_32 #if ARCH_X86_32
if (HAVE_MMX && cpuflags & AV_CPU_FLAG_MMX) { if (EXTERNAL_MMX(cpuflags)) {
c->idct_put = ff_vp3_idct_put_mmx; c->idct_put = ff_vp3_idct_put_mmx;
c->idct_add = ff_vp3_idct_add_mmx; c->idct_add = ff_vp3_idct_add_mmx;
c->idct_perm = FF_PARTTRANS_IDCT_PERM; c->idct_perm = FF_PARTTRANS_IDCT_PERM;
} }
#endif #endif
if (HAVE_MMXEXT && cpuflags & AV_CPU_FLAG_MMXEXT) { if (EXTERNAL_MMXEXT(cpuflags)) {
c->idct_dc_add = ff_vp3_idct_dc_add_mmx2; c->idct_dc_add = ff_vp3_idct_dc_add_mmx2;
if (!(flags & CODEC_FLAG_BITEXACT)) { if (!(flags & CODEC_FLAG_BITEXACT)) {
...@@ -58,10 +58,9 @@ av_cold void ff_vp3dsp_init_x86(VP3DSPContext *c, int flags) ...@@ -58,10 +58,9 @@ av_cold void ff_vp3dsp_init_x86(VP3DSPContext *c, int flags)
} }
} }
if (cpuflags & AV_CPU_FLAG_SSE2) { if (EXTERNAL_SSE2(cpuflags)) {
c->idct_put = ff_vp3_idct_put_sse2; c->idct_put = ff_vp3_idct_put_sse2;
c->idct_add = ff_vp3_idct_add_sse2; c->idct_add = ff_vp3_idct_add_sse2;
c->idct_perm = FF_TRANSPOSE_IDCT_PERM; c->idct_perm = FF_TRANSPOSE_IDCT_PERM;
} }
#endif
} }
...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
#include "libavutil/cpu.h" #include "libavutil/cpu.h"
#include "libavutil/x86/asm.h" #include "libavutil/x86/asm.h"
#include "libavutil/x86/cpu.h"
#include "libavcodec/dsputil.h" #include "libavcodec/dsputil.h"
#include "libavcodec/vp56dsp.h" #include "libavcodec/vp56dsp.h"
...@@ -32,19 +33,17 @@ void ff_vp6_filter_diag4_sse2(uint8_t *dst, uint8_t *src, int stride, ...@@ -32,19 +33,17 @@ void ff_vp6_filter_diag4_sse2(uint8_t *dst, uint8_t *src, int stride,
av_cold void ff_vp56dsp_init_x86(VP56DSPContext* c, enum AVCodecID codec) av_cold void ff_vp56dsp_init_x86(VP56DSPContext* c, enum AVCodecID codec)
{ {
#if HAVE_YASM
int mm_flags = av_get_cpu_flags(); int mm_flags = av_get_cpu_flags();
if (CONFIG_VP6_DECODER && codec == AV_CODEC_ID_VP6) { if (CONFIG_VP6_DECODER && codec == AV_CODEC_ID_VP6) {
#if ARCH_X86_32 #if ARCH_X86_32
if (mm_flags & AV_CPU_FLAG_MMX) { if (EXTERNAL_MMX(mm_flags)) {
c->vp6_filter_diag4 = ff_vp6_filter_diag4_mmx; c->vp6_filter_diag4 = ff_vp6_filter_diag4_mmx;
} }
#endif #endif
if (mm_flags & AV_CPU_FLAG_SSE2) { if (EXTERNAL_SSE2(mm_flags)) {
c->vp6_filter_diag4 = ff_vp6_filter_diag4_sse2; c->vp6_filter_diag4 = ff_vp6_filter_diag4_sse2;
} }
} }
#endif
} }
...@@ -20,6 +20,7 @@ ...@@ -20,6 +20,7 @@
#include "config.h" #include "config.h"
#include "libavutil/cpu.h" #include "libavutil/cpu.h"
#include "libavutil/x86/cpu.h"
#include "libavresample/audio_convert.h" #include "libavresample/audio_convert.h"
/* flat conversions */ /* flat conversions */
...@@ -144,16 +145,15 @@ extern void ff_conv_flt_to_fltp_6ch_avx (float *const *dst, float *src, int len, ...@@ -144,16 +145,15 @@ extern void ff_conv_flt_to_fltp_6ch_avx (float *const *dst, float *src, int len,
av_cold void ff_audio_convert_init_x86(AudioConvert *ac) av_cold void ff_audio_convert_init_x86(AudioConvert *ac)
{ {
#if HAVE_YASM
int mm_flags = av_get_cpu_flags(); int mm_flags = av_get_cpu_flags();
if (mm_flags & AV_CPU_FLAG_MMX && HAVE_MMX) { if (EXTERNAL_MMX(mm_flags)) {
ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S32, ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S32,
0, 1, 8, "MMX", ff_conv_s32_to_s16_mmx); 0, 1, 8, "MMX", ff_conv_s32_to_s16_mmx);
ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_FLTP, ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_FLTP,
6, 1, 4, "MMX", ff_conv_fltp_to_flt_6ch_mmx); 6, 1, 4, "MMX", ff_conv_fltp_to_flt_6ch_mmx);
} }
if (mm_flags & AV_CPU_FLAG_SSE && HAVE_SSE) { if (EXTERNAL_SSE(mm_flags)) {
ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_FLTP, ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_FLTP,
6, 1, 2, "SSE", ff_conv_fltp_to_s16_6ch_sse); 6, 1, 2, "SSE", ff_conv_fltp_to_s16_6ch_sse);
ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_FLTP, ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_FLTP,
...@@ -161,7 +161,7 @@ av_cold void ff_audio_convert_init_x86(AudioConvert *ac) ...@@ -161,7 +161,7 @@ av_cold void ff_audio_convert_init_x86(AudioConvert *ac)
ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_FLT, ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_FLT,
2, 16, 4, "SSE", ff_conv_flt_to_fltp_2ch_sse); 2, 16, 4, "SSE", ff_conv_flt_to_fltp_2ch_sse);
} }
if (mm_flags & AV_CPU_FLAG_SSE2 && HAVE_SSE) { if (EXTERNAL_SSE2(mm_flags)) {
if (!(mm_flags & AV_CPU_FLAG_SSE2SLOW)) { if (!(mm_flags & AV_CPU_FLAG_SSE2SLOW)) {
ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S32, ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S32,
0, 16, 16, "SSE2", ff_conv_s32_to_s16_sse2); 0, 16, 16, "SSE2", ff_conv_s32_to_s16_sse2);
...@@ -206,7 +206,7 @@ av_cold void ff_audio_convert_init_x86(AudioConvert *ac) ...@@ -206,7 +206,7 @@ av_cold void ff_audio_convert_init_x86(AudioConvert *ac)
ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_FLT, ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_FLT,
6, 16, 4, "SSE2", ff_conv_flt_to_fltp_6ch_sse2); 6, 16, 4, "SSE2", ff_conv_flt_to_fltp_6ch_sse2);
} }
if (mm_flags & AV_CPU_FLAG_SSSE3 && HAVE_SSE) { if (EXTERNAL_SSSE3(mm_flags)) {
ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16P, ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16P,
6, 16, 4, "SSSE3", ff_conv_s16p_to_flt_6ch_ssse3); 6, 16, 4, "SSSE3", ff_conv_s16p_to_flt_6ch_ssse3);
ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_FLTP, ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_FLTP,
...@@ -220,13 +220,13 @@ av_cold void ff_audio_convert_init_x86(AudioConvert *ac) ...@@ -220,13 +220,13 @@ av_cold void ff_audio_convert_init_x86(AudioConvert *ac)
ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16P, AV_SAMPLE_FMT_FLT, ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16P, AV_SAMPLE_FMT_FLT,
6, 16, 4, "SSSE3", ff_conv_flt_to_s16p_6ch_ssse3); 6, 16, 4, "SSSE3", ff_conv_flt_to_s16p_6ch_ssse3);
} }
if (mm_flags & AV_CPU_FLAG_SSE4 && HAVE_SSE) { if (EXTERNAL_SSE4(mm_flags)) {
ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16, ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16,
0, 16, 8, "SSE4", ff_conv_s16_to_flt_sse4); 0, 16, 8, "SSE4", ff_conv_s16_to_flt_sse4);
ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_FLTP, ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_FLTP,
6, 16, 4, "SSE4", ff_conv_fltp_to_flt_6ch_sse4); 6, 16, 4, "SSE4", ff_conv_fltp_to_flt_6ch_sse4);
} }
if (mm_flags & AV_CPU_FLAG_AVX && HAVE_AVX) { if (EXTERNAL_AVX(mm_flags)) {
ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S32, ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S32,
0, 32, 16, "AVX", ff_conv_s32_to_flt_avx); 0, 32, 16, "AVX", ff_conv_s32_to_flt_avx);
ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S32, AV_SAMPLE_FMT_FLT, ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S32, AV_SAMPLE_FMT_FLT,
...@@ -260,5 +260,4 @@ av_cold void ff_audio_convert_init_x86(AudioConvert *ac) ...@@ -260,5 +260,4 @@ av_cold void ff_audio_convert_init_x86(AudioConvert *ac)
ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_FLT, ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_FLT,
6, 16, 4, "AVX", ff_conv_flt_to_fltp_6ch_avx); 6, 16, 4, "AVX", ff_conv_flt_to_fltp_6ch_avx);
} }
#endif
} }
...@@ -20,6 +20,7 @@ ...@@ -20,6 +20,7 @@
#include "config.h" #include "config.h"
#include "libavutil/cpu.h" #include "libavutil/cpu.h"
#include "libavutil/x86/cpu.h"
#include "libavresample/audio_mix.h" #include "libavresample/audio_mix.h"
extern void ff_mix_2_to_1_fltp_flt_sse(float **src, float **matrix, int len, extern void ff_mix_2_to_1_fltp_flt_sse(float **src, float **matrix, int len,
...@@ -105,7 +106,7 @@ DEFINE_MIX_3_8_TO_1_2(7) ...@@ -105,7 +106,7 @@ DEFINE_MIX_3_8_TO_1_2(7)
DEFINE_MIX_3_8_TO_1_2(8) DEFINE_MIX_3_8_TO_1_2(8)
#define SET_MIX_3_8_TO_1_2(chan) \ #define SET_MIX_3_8_TO_1_2(chan) \
if (mm_flags & AV_CPU_FLAG_SSE && HAVE_SSE) { \ if (EXTERNAL_SSE(mm_flags)) { \
ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,\ ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,\
chan, 1, 16, 4, "SSE", \ chan, 1, 16, 4, "SSE", \
ff_mix_ ## chan ## _to_1_fltp_flt_sse); \ ff_mix_ ## chan ## _to_1_fltp_flt_sse); \
...@@ -113,7 +114,7 @@ DEFINE_MIX_3_8_TO_1_2(8) ...@@ -113,7 +114,7 @@ DEFINE_MIX_3_8_TO_1_2(8)
chan, 2, 16, 4, "SSE", \ chan, 2, 16, 4, "SSE", \
ff_mix_## chan ##_to_2_fltp_flt_sse); \ ff_mix_## chan ##_to_2_fltp_flt_sse); \
} \ } \
if (mm_flags & AV_CPU_FLAG_SSE2 && HAVE_SSE) { \ if (EXTERNAL_SSE2(mm_flags)) { \
ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,\ ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,\
chan, 1, 16, 8, "SSE2", \ chan, 1, 16, 8, "SSE2", \
ff_mix_ ## chan ## _to_1_s16p_flt_sse2); \ ff_mix_ ## chan ## _to_1_s16p_flt_sse2); \
...@@ -121,7 +122,7 @@ DEFINE_MIX_3_8_TO_1_2(8) ...@@ -121,7 +122,7 @@ DEFINE_MIX_3_8_TO_1_2(8)
chan, 2, 16, 8, "SSE2", \ chan, 2, 16, 8, "SSE2", \
ff_mix_ ## chan ## _to_2_s16p_flt_sse2); \ ff_mix_ ## chan ## _to_2_s16p_flt_sse2); \
} \ } \
if (mm_flags & AV_CPU_FLAG_SSE4 && HAVE_SSE) { \ if (EXTERNAL_SSE4(mm_flags)) { \
ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,\ ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,\
chan, 1, 16, 8, "SSE4", \ chan, 1, 16, 8, "SSE4", \
ff_mix_ ## chan ## _to_1_s16p_flt_sse4); \ ff_mix_ ## chan ## _to_1_s16p_flt_sse4); \
...@@ -129,7 +130,7 @@ DEFINE_MIX_3_8_TO_1_2(8) ...@@ -129,7 +130,7 @@ DEFINE_MIX_3_8_TO_1_2(8)
chan, 2, 16, 8, "SSE4", \ chan, 2, 16, 8, "SSE4", \
ff_mix_ ## chan ## _to_2_s16p_flt_sse4); \ ff_mix_ ## chan ## _to_2_s16p_flt_sse4); \
} \ } \
if (mm_flags & AV_CPU_FLAG_AVX && HAVE_AVX) { \ if (EXTERNAL_AVX(mm_flags)) { \
int ptr_align = 32; \ int ptr_align = 32; \
int smp_align = 8; \ int smp_align = 8; \
if (ARCH_X86_32 || chan >= 6) { \ if (ARCH_X86_32 || chan >= 6) { \
...@@ -149,7 +150,7 @@ DEFINE_MIX_3_8_TO_1_2(8) ...@@ -149,7 +150,7 @@ DEFINE_MIX_3_8_TO_1_2(8)
chan, 2, 16, 8, "AVX", \ chan, 2, 16, 8, "AVX", \
ff_mix_ ## chan ## _to_2_s16p_flt_avx); \ ff_mix_ ## chan ## _to_2_s16p_flt_avx); \
} \ } \
if (mm_flags & AV_CPU_FLAG_FMA4 && HAVE_FMA4) { \ if (EXTERNAL_FMA4(mm_flags)) { \
int ptr_align = 32; \ int ptr_align = 32; \
int smp_align = 8; \ int smp_align = 8; \
if (ARCH_X86_32 || chan >= 6) { \ if (ARCH_X86_32 || chan >= 6) { \
...@@ -175,13 +176,13 @@ av_cold void ff_audio_mix_init_x86(AudioMix *am) ...@@ -175,13 +176,13 @@ av_cold void ff_audio_mix_init_x86(AudioMix *am)
#if HAVE_YASM #if HAVE_YASM
int mm_flags = av_get_cpu_flags(); int mm_flags = av_get_cpu_flags();
if (mm_flags & AV_CPU_FLAG_SSE && HAVE_SSE) { if (EXTERNAL_SSE(mm_flags)) {
ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT, ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,
2, 1, 16, 8, "SSE", ff_mix_2_to_1_fltp_flt_sse); 2, 1, 16, 8, "SSE", ff_mix_2_to_1_fltp_flt_sse);
ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT, ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,
1, 2, 16, 4, "SSE", ff_mix_1_to_2_fltp_flt_sse); 1, 2, 16, 4, "SSE", ff_mix_1_to_2_fltp_flt_sse);
} }
if (mm_flags & AV_CPU_FLAG_SSE2 && HAVE_SSE) { if (EXTERNAL_SSE2(mm_flags)) {
ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT, ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,
2, 1, 16, 8, "SSE2", ff_mix_2_to_1_s16p_flt_sse2); 2, 1, 16, 8, "SSE2", ff_mix_2_to_1_s16p_flt_sse2);
ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_Q8, ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_Q8,
...@@ -189,13 +190,13 @@ av_cold void ff_audio_mix_init_x86(AudioMix *am) ...@@ -189,13 +190,13 @@ av_cold void ff_audio_mix_init_x86(AudioMix *am)
ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT, ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,
1, 2, 16, 8, "SSE2", ff_mix_1_to_2_s16p_flt_sse2); 1, 2, 16, 8, "SSE2", ff_mix_1_to_2_s16p_flt_sse2);
} }
if (mm_flags & AV_CPU_FLAG_SSE4 && HAVE_SSE) { if (EXTERNAL_SSE4(mm_flags)) {
ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT, ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,
2, 1, 16, 8, "SSE4", ff_mix_2_to_1_s16p_flt_sse4); 2, 1, 16, 8, "SSE4", ff_mix_2_to_1_s16p_flt_sse4);
ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT, ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,
1, 2, 16, 8, "SSE4", ff_mix_1_to_2_s16p_flt_sse4); 1, 2, 16, 8, "SSE4", ff_mix_1_to_2_s16p_flt_sse4);
} }
if (mm_flags & AV_CPU_FLAG_AVX && HAVE_AVX) { if (EXTERNAL_AVX(mm_flags)) {
ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT, ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,
2, 1, 32, 16, "AVX", ff_mix_2_to_1_fltp_flt_avx); 2, 1, 32, 16, "AVX", ff_mix_2_to_1_fltp_flt_avx);
ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT, ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,
......
...@@ -20,6 +20,7 @@ ...@@ -20,6 +20,7 @@
#include "libavutil/cpu.h" #include "libavutil/cpu.h"
#include "libavutil/float_dsp.h" #include "libavutil/float_dsp.h"
#include "cpu.h"
extern void ff_vector_fmul_sse(float *dst, const float *src0, const float *src1, extern void ff_vector_fmul_sse(float *dst, const float *src0, const float *src1,
int len); int len);
...@@ -33,16 +34,14 @@ extern void ff_vector_fmac_scalar_avx(float *dst, const float *src, float mul, ...@@ -33,16 +34,14 @@ extern void ff_vector_fmac_scalar_avx(float *dst, const float *src, float mul,
void ff_float_dsp_init_x86(AVFloatDSPContext *fdsp) void ff_float_dsp_init_x86(AVFloatDSPContext *fdsp)
{ {
#if HAVE_YASM
int mm_flags = av_get_cpu_flags(); int mm_flags = av_get_cpu_flags();
if (mm_flags & AV_CPU_FLAG_SSE && HAVE_SSE) { if (EXTERNAL_SSE(mm_flags)) {
fdsp->vector_fmul = ff_vector_fmul_sse; fdsp->vector_fmul = ff_vector_fmul_sse;
fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_sse; fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_sse;
} }
if (mm_flags & AV_CPU_FLAG_AVX && HAVE_AVX) { if (EXTERNAL_AVX(mm_flags)) {
fdsp->vector_fmul = ff_vector_fmul_avx; fdsp->vector_fmul = ff_vector_fmul_avx;
fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_avx; fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_avx;
} }
#endif
} }
...@@ -46,6 +46,7 @@ ...@@ -46,6 +46,7 @@
#include "libavutil/opt.h" #include "libavutil/opt.h"
#include "libavutil/pixdesc.h" #include "libavutil/pixdesc.h"
#include "libavutil/x86/asm.h" #include "libavutil/x86/asm.h"
#include "libavutil/x86/cpu.h"
#include "rgb2rgb.h" #include "rgb2rgb.h"
#include "swscale.h" #include "swscale.h"
#include "swscale_internal.h" #include "swscale_internal.h"
...@@ -473,7 +474,7 @@ static int initFilter(int16_t **outFilter, int32_t **filterPos, ...@@ -473,7 +474,7 @@ static int initFilter(int16_t **outFilter, int32_t **filterPos,
filterAlign = 1; filterAlign = 1;
} }
if (HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX) { if (INLINE_MMX(cpu_flags)) {
// special case for unscaled vertical filtering // special case for unscaled vertical filtering
if (minFilterSize == 1 && filterAlign == 2) if (minFilterSize == 1 && filterAlign == 2)
filterAlign = 1; filterAlign = 1;
...@@ -973,8 +974,7 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter, ...@@ -973,8 +974,7 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter,
FF_ALLOC_OR_GOTO(c, c->formatConvBuffer, FF_ALLOC_OR_GOTO(c, c->formatConvBuffer,
(FFALIGN(srcW, 16) * 2 * FFALIGN(c->srcBpc, 8) >> 3) + 16, (FFALIGN(srcW, 16) * 2 * FFALIGN(c->srcBpc, 8) >> 3) + 16,
fail); fail);
if (HAVE_MMXEXT && HAVE_INLINE_ASM && cpu_flags & AV_CPU_FLAG_MMXEXT && if (INLINE_MMXEXT(cpu_flags) && c->srcBpc == 8 && c->dstBpc <= 10) {
c->srcBpc == 8 && c->dstBpc <= 10) {
c->canMMX2BeUsed = (dstW >= srcW && (dstW & 31) == 0 && c->canMMX2BeUsed = (dstW >= srcW && (dstW & 31) == 0 &&
(srcW & 15) == 0) ? 1 : 0; (srcW & 15) == 0) ? 1 : 0;
if (!c->canMMX2BeUsed && dstW >= srcW && (srcW & 15) == 0 if (!c->canMMX2BeUsed && dstW >= srcW && (srcW & 15) == 0
...@@ -1004,7 +1004,7 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter, ...@@ -1004,7 +1004,7 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter,
c->chrXInc += 20; c->chrXInc += 20;
} }
// we don't use the x86 asm scaler if MMX is available // we don't use the x86 asm scaler if MMX is available
else if (HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX) { else if (INLINE_MMX(cpu_flags)) {
c->lumXInc = ((int64_t)(srcW - 2) << 16) / (dstW - 2) - 20; c->lumXInc = ((int64_t)(srcW - 2) << 16) / (dstW - 2) - 20;
c->chrXInc = ((int64_t)(c->chrSrcW - 2) << 16) / (c->chrDstW - 2) - 20; c->chrXInc = ((int64_t)(c->chrSrcW - 2) << 16) / (c->chrDstW - 2) - 20;
} }
...@@ -1050,8 +1050,7 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter, ...@@ -1050,8 +1050,7 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter,
} else } else
#endif /* HAVE_MMXEXT_INLINE */ #endif /* HAVE_MMXEXT_INLINE */
{ {
const int filterAlign = const int filterAlign = INLINE_MMX(cpu_flags) ? 4 :
(HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX) ? 4 :
(HAVE_ALTIVEC && cpu_flags & AV_CPU_FLAG_ALTIVEC) ? 8 : (HAVE_ALTIVEC && cpu_flags & AV_CPU_FLAG_ALTIVEC) ? 8 :
1; 1;
...@@ -1074,8 +1073,7 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter, ...@@ -1074,8 +1073,7 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter,
/* precalculate vertical scaler filter coefficients */ /* precalculate vertical scaler filter coefficients */
{ {
const int filterAlign = const int filterAlign = INLINE_MMX(cpu_flags) ? 2 :
(HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX) ? 2 :
(HAVE_ALTIVEC && cpu_flags & AV_CPU_FLAG_ALTIVEC) ? 8 : (HAVE_ALTIVEC && cpu_flags & AV_CPU_FLAG_ALTIVEC) ? 8 :
1; 1;
...@@ -1208,11 +1206,11 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter, ...@@ -1208,11 +1206,11 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter,
#endif #endif
sws_format_name(dstFormat)); sws_format_name(dstFormat));
if (HAVE_MMXEXT && cpu_flags & AV_CPU_FLAG_MMXEXT) if (INLINE_MMXEXT(cpu_flags))
av_log(c, AV_LOG_INFO, "using MMX2\n"); av_log(c, AV_LOG_INFO, "using MMX2\n");
else if (HAVE_AMD3DNOW && cpu_flags & AV_CPU_FLAG_3DNOW) else if (INLINE_AMD3DNOW(cpu_flags))
av_log(c, AV_LOG_INFO, "using 3DNOW\n"); av_log(c, AV_LOG_INFO, "using 3DNOW\n");
else if (HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX) else if (INLINE_MMX(cpu_flags))
av_log(c, AV_LOG_INFO, "using MMX\n"); av_log(c, AV_LOG_INFO, "using MMX\n");
else if (HAVE_ALTIVEC && cpu_flags & AV_CPU_FLAG_ALTIVEC) else if (HAVE_ALTIVEC && cpu_flags & AV_CPU_FLAG_ALTIVEC)
av_log(c, AV_LOG_INFO, "using AltiVec\n"); av_log(c, AV_LOG_INFO, "using AltiVec\n");
......
...@@ -28,6 +28,7 @@ ...@@ -28,6 +28,7 @@
#include "config.h" #include "config.h"
#include "libavutil/attributes.h" #include "libavutil/attributes.h"
#include "libavutil/x86/asm.h" #include "libavutil/x86/asm.h"
#include "libavutil/x86/cpu.h"
#include "libavutil/cpu.h" #include "libavutil/cpu.h"
#include "libavutil/bswap.h" #include "libavutil/bswap.h"
#include "libswscale/rgb2rgb.h" #include "libswscale/rgb2rgb.h"
...@@ -133,13 +134,13 @@ av_cold void rgb2rgb_init_x86(void) ...@@ -133,13 +134,13 @@ av_cold void rgb2rgb_init_x86(void)
#if HAVE_INLINE_ASM #if HAVE_INLINE_ASM
int cpu_flags = av_get_cpu_flags(); int cpu_flags = av_get_cpu_flags();
if (cpu_flags & AV_CPU_FLAG_MMX) if (INLINE_MMX(cpu_flags))
rgb2rgb_init_MMX(); rgb2rgb_init_MMX();
if (HAVE_AMD3DNOW && cpu_flags & AV_CPU_FLAG_3DNOW) if (INLINE_AMD3DNOW(cpu_flags))
rgb2rgb_init_3DNOW(); rgb2rgb_init_3DNOW();
if (HAVE_MMXEXT && cpu_flags & AV_CPU_FLAG_MMXEXT) if (INLINE_MMXEXT(cpu_flags))
rgb2rgb_init_MMX2(); rgb2rgb_init_MMX2();
if (HAVE_SSE && cpu_flags & AV_CPU_FLAG_SSE2) if (INLINE_SSE2(cpu_flags))
rgb2rgb_init_SSE2(); rgb2rgb_init_SSE2();
#endif /* HAVE_INLINE_ASM */ #endif /* HAVE_INLINE_ASM */
} }
...@@ -25,6 +25,7 @@ ...@@ -25,6 +25,7 @@
#include "libavutil/attributes.h" #include "libavutil/attributes.h"
#include "libavutil/intreadwrite.h" #include "libavutil/intreadwrite.h"
#include "libavutil/x86/asm.h" #include "libavutil/x86/asm.h"
#include "libavutil/x86/cpu.h"
#include "libavutil/cpu.h" #include "libavutil/cpu.h"
#include "libavutil/pixdesc.h" #include "libavutil/pixdesc.h"
...@@ -314,7 +315,6 @@ av_cold void ff_sws_init_swScale_mmx(SwsContext *c) ...@@ -314,7 +315,6 @@ av_cold void ff_sws_init_swScale_mmx(SwsContext *c)
#endif #endif
#endif /* HAVE_INLINE_ASM */ #endif /* HAVE_INLINE_ASM */
#if HAVE_YASM
#define ASSIGN_SCALE_FUNC2(hscalefn, filtersize, opt1, opt2) do { \ #define ASSIGN_SCALE_FUNC2(hscalefn, filtersize, opt1, opt2) do { \
if (c->srcBpc == 8) { \ if (c->srcBpc == 8) { \
hscalefn = c->dstBpc <= 10 ? ff_hscale8to15_ ## filtersize ## _ ## opt2 : \ hscalefn = c->dstBpc <= 10 ? ff_hscale8to15_ ## filtersize ## _ ## opt2 : \
...@@ -357,7 +357,7 @@ switch(c->dstBpc){ \ ...@@ -357,7 +357,7 @@ switch(c->dstBpc){ \
c->chrToYV12 = ff_ ## x ## ToUV_ ## opt; \ c->chrToYV12 = ff_ ## x ## ToUV_ ## opt; \
break break
#if ARCH_X86_32 #if ARCH_X86_32
if (cpu_flags & AV_CPU_FLAG_MMX) { if (EXTERNAL_MMX(cpu_flags)) {
ASSIGN_MMX_SCALE_FUNC(c->hyScale, c->hLumFilterSize, mmx, mmx); ASSIGN_MMX_SCALE_FUNC(c->hyScale, c->hLumFilterSize, mmx, mmx);
ASSIGN_MMX_SCALE_FUNC(c->hcScale, c->hChrFilterSize, mmx, mmx); ASSIGN_MMX_SCALE_FUNC(c->hcScale, c->hChrFilterSize, mmx, mmx);
ASSIGN_VSCALE_FUNC(c->yuv2plane1, mmx, mmx2, cpu_flags & AV_CPU_FLAG_MMXEXT); ASSIGN_VSCALE_FUNC(c->yuv2plane1, mmx, mmx2, cpu_flags & AV_CPU_FLAG_MMXEXT);
...@@ -392,7 +392,7 @@ switch(c->dstBpc){ \ ...@@ -392,7 +392,7 @@ switch(c->dstBpc){ \
break; break;
} }
} }
if (cpu_flags & AV_CPU_FLAG_MMXEXT) { if (EXTERNAL_MMXEXT(cpu_flags)) {
ASSIGN_VSCALEX_FUNC(c->yuv2planeX, mmx2, , 1); ASSIGN_VSCALEX_FUNC(c->yuv2planeX, mmx2, , 1);
} }
#endif /* ARCH_X86_32 */ #endif /* ARCH_X86_32 */
...@@ -404,7 +404,7 @@ switch(c->dstBpc){ \ ...@@ -404,7 +404,7 @@ switch(c->dstBpc){ \
else ASSIGN_SCALE_FUNC2(hscalefn, X8, opt1, opt2); \ else ASSIGN_SCALE_FUNC2(hscalefn, X8, opt1, opt2); \
break; \ break; \
} }
if (cpu_flags & AV_CPU_FLAG_SSE2) { if (EXTERNAL_SSE2(cpu_flags)) {
ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, sse2, sse2); ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, sse2, sse2);
ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, sse2, sse2); ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, sse2, sse2);
ASSIGN_VSCALEX_FUNC(c->yuv2planeX, sse2, , ASSIGN_VSCALEX_FUNC(c->yuv2planeX, sse2, ,
...@@ -441,7 +441,7 @@ switch(c->dstBpc){ \ ...@@ -441,7 +441,7 @@ switch(c->dstBpc){ \
break; break;
} }
} }
if (cpu_flags & AV_CPU_FLAG_SSSE3) { if (EXTERNAL_SSSE3(cpu_flags)) {
ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, ssse3, ssse3); ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, ssse3, ssse3);
ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, ssse3, ssse3); ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, ssse3, ssse3);
switch (c->srcFormat) { switch (c->srcFormat) {
...@@ -451,7 +451,7 @@ switch(c->dstBpc){ \ ...@@ -451,7 +451,7 @@ switch(c->dstBpc){ \
break; break;
} }
} }
if (cpu_flags & AV_CPU_FLAG_SSE4) { if (EXTERNAL_SSE4(cpu_flags)) {
/* Xto15 don't need special sse4 functions */ /* Xto15 don't need special sse4 functions */
ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, sse4, ssse3); ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, sse4, ssse3);
ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, sse4, ssse3); ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, sse4, ssse3);
...@@ -462,7 +462,7 @@ switch(c->dstBpc){ \ ...@@ -462,7 +462,7 @@ switch(c->dstBpc){ \
c->yuv2plane1 = ff_yuv2plane1_16_sse4; c->yuv2plane1 = ff_yuv2plane1_16_sse4;
} }
if (cpu_flags & AV_CPU_FLAG_AVX) { if (EXTERNAL_AVX(cpu_flags)) {
ASSIGN_VSCALEX_FUNC(c->yuv2planeX, avx, , ASSIGN_VSCALEX_FUNC(c->yuv2planeX, avx, ,
HAVE_ALIGNED_STACK || ARCH_X86_64); HAVE_ALIGNED_STACK || ARCH_X86_64);
ASSIGN_VSCALE_FUNC(c->yuv2plane1, avx, avx, 1); ASSIGN_VSCALE_FUNC(c->yuv2plane1, avx, avx, 1);
...@@ -490,5 +490,4 @@ switch(c->dstBpc){ \ ...@@ -490,5 +490,4 @@ switch(c->dstBpc){ \
break; break;
} }
} }
#endif
} }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment