Commit c91a798a authored by Michael Niedermayer's avatar Michael Niedermayer

Merge remote-tracking branch 'cehoyos/master'

* cehoyos/master:
  Fix compilation with !HAVE_6REGS.
  Use MANGLE in cavsdsp.c to save two registers using gcc.
Merged-by: 's avatarMichael Niedermayer <michaelni@gmx.at>
parents e8fc91e2 b38910c9
...@@ -211,10 +211,10 @@ static void cavs_idct8_add_mmx(uint8_t *dst, int16_t *block, int stride) ...@@ -211,10 +211,10 @@ static void cavs_idct8_add_mmx(uint8_t *dst, int16_t *block, int stride)
****************************************************************************/ ****************************************************************************/
/* vertical filter [-1 -2 96 42 -7 0] */ /* vertical filter [-1 -2 96 42 -7 0] */
#define QPEL_CAVSV1(A,B,C,D,E,F,OP,MUL2) \ #define QPEL_CAVSV1(A,B,C,D,E,F,OP,ADD, MUL1, MUL2) \
"movd (%0), "#F" \n\t"\ "movd (%0), "#F" \n\t"\
"movq "#C", %%mm6 \n\t"\ "movq "#C", %%mm6 \n\t"\
"pmullw %5, %%mm6 \n\t"\ "pmullw "MANGLE(MUL1)", %%mm6\n\t"\
"movq "#D", %%mm7 \n\t"\ "movq "#D", %%mm7 \n\t"\
"pmullw "MANGLE(MUL2)", %%mm7\n\t"\ "pmullw "MANGLE(MUL2)", %%mm7\n\t"\
"psllw $3, "#E" \n\t"\ "psllw $3, "#E" \n\t"\
...@@ -229,35 +229,35 @@ static void cavs_idct8_add_mmx(uint8_t *dst, int16_t *block, int stride) ...@@ -229,35 +229,35 @@ static void cavs_idct8_add_mmx(uint8_t *dst, int16_t *block, int stride)
"psubw "#B", %%mm6 \n\t"\ "psubw "#B", %%mm6 \n\t"\
"psraw $1, "#B" \n\t"\ "psraw $1, "#B" \n\t"\
"psubw "#A", %%mm6 \n\t"\ "psubw "#A", %%mm6 \n\t"\
"paddw %4, %%mm6 \n\t"\ "paddw "MANGLE(ADD)", %%mm6 \n\t"\
"psraw $7, %%mm6 \n\t"\ "psraw $7, %%mm6 \n\t"\
"packuswb %%mm6, %%mm6 \n\t"\ "packuswb %%mm6, %%mm6 \n\t"\
OP(%%mm6, (%1), A, d) \ OP(%%mm6, (%1), A, d) \
"add %3, %1 \n\t" "add %3, %1 \n\t"
/* vertical filter [ 0 -1 5 5 -1 0] */ /* vertical filter [ 0 -1 5 5 -1 0] */
#define QPEL_CAVSV2(A,B,C,D,E,F,OP,MUL2) \ #define QPEL_CAVSV2(A,B,C,D,E,F,OP,ADD, MUL1, MUL2) \
"movd (%0), "#F" \n\t"\ "movd (%0), "#F" \n\t"\
"movq "#C", %%mm6 \n\t"\ "movq "#C", %%mm6 \n\t"\
"paddw "#D", %%mm6 \n\t"\ "paddw "#D", %%mm6 \n\t"\
"pmullw %5, %%mm6 \n\t"\ "pmullw "MANGLE(MUL1)", %%mm6\n\t"\
"add %2, %0 \n\t"\ "add %2, %0 \n\t"\
"punpcklbw %%mm7, "#F" \n\t"\ "punpcklbw %%mm7, "#F" \n\t"\
"psubw "#B", %%mm6 \n\t"\ "psubw "#B", %%mm6 \n\t"\
"psubw "#E", %%mm6 \n\t"\ "psubw "#E", %%mm6 \n\t"\
"paddw %4, %%mm6 \n\t"\ "paddw "MANGLE(ADD)", %%mm6 \n\t"\
"psraw $3, %%mm6 \n\t"\ "psraw $3, %%mm6 \n\t"\
"packuswb %%mm6, %%mm6 \n\t"\ "packuswb %%mm6, %%mm6 \n\t"\
OP(%%mm6, (%1), A, d) \ OP(%%mm6, (%1), A, d) \
"add %3, %1 \n\t" "add %3, %1 \n\t"
/* vertical filter [ 0 -7 42 96 -2 -1] */ /* vertical filter [ 0 -7 42 96 -2 -1] */
#define QPEL_CAVSV3(A,B,C,D,E,F,OP,MUL2) \ #define QPEL_CAVSV3(A,B,C,D,E,F,OP,ADD, MUL1, MUL2) \
"movd (%0), "#F" \n\t"\ "movd (%0), "#F" \n\t"\
"movq "#C", %%mm6 \n\t"\ "movq "#C", %%mm6 \n\t"\
"pmullw "MANGLE(MUL2)", %%mm6\n\t"\ "pmullw "MANGLE(MUL2)", %%mm6\n\t"\
"movq "#D", %%mm7 \n\t"\ "movq "#D", %%mm7 \n\t"\
"pmullw %5, %%mm7 \n\t"\ "pmullw "MANGLE(MUL1)", %%mm7\n\t"\
"psllw $3, "#B" \n\t"\ "psllw $3, "#B" \n\t"\
"psubw "#B", %%mm6 \n\t"\ "psubw "#B", %%mm6 \n\t"\
"psraw $3, "#B" \n\t"\ "psraw $3, "#B" \n\t"\
...@@ -270,7 +270,7 @@ static void cavs_idct8_add_mmx(uint8_t *dst, int16_t *block, int stride) ...@@ -270,7 +270,7 @@ static void cavs_idct8_add_mmx(uint8_t *dst, int16_t *block, int stride)
"psubw "#E", %%mm6 \n\t"\ "psubw "#E", %%mm6 \n\t"\
"psraw $1, "#E" \n\t"\ "psraw $1, "#E" \n\t"\
"psubw "#F", %%mm6 \n\t"\ "psubw "#F", %%mm6 \n\t"\
"paddw %4, %%mm6 \n\t"\ "paddw "MANGLE(ADD)", %%mm6 \n\t"\
"psraw $7, %%mm6 \n\t"\ "psraw $7, %%mm6 \n\t"\
"packuswb %%mm6, %%mm6 \n\t"\ "packuswb %%mm6, %%mm6 \n\t"\
OP(%%mm6, (%1), A, d) \ OP(%%mm6, (%1), A, d) \
...@@ -299,34 +299,34 @@ static void cavs_idct8_add_mmx(uint8_t *dst, int16_t *block, int stride) ...@@ -299,34 +299,34 @@ static void cavs_idct8_add_mmx(uint8_t *dst, int16_t *block, int stride)
"punpcklbw %%mm7, %%mm2 \n\t"\ "punpcklbw %%mm7, %%mm2 \n\t"\
"punpcklbw %%mm7, %%mm3 \n\t"\ "punpcklbw %%mm7, %%mm3 \n\t"\
"punpcklbw %%mm7, %%mm4 \n\t"\ "punpcklbw %%mm7, %%mm4 \n\t"\
VOP(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP, MUL2)\ VOP(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP, ADD, MUL1, MUL2)\
VOP(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP, MUL2)\ VOP(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP, ADD, MUL1, MUL2)\
VOP(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP, MUL2)\ VOP(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP, ADD, MUL1, MUL2)\
VOP(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP, MUL2)\ VOP(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP, ADD, MUL1, MUL2)\
VOP(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, OP, MUL2)\ VOP(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, OP, ADD, MUL1, MUL2)\
VOP(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, OP, MUL2)\ VOP(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, OP, ADD, MUL1, MUL2)\
VOP(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP, MUL2)\ VOP(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP, ADD, MUL1, MUL2)\
VOP(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP, MUL2)\ VOP(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP, ADD, MUL1, MUL2)\
\ \
: "+a"(src), "+c"(dst)\ : "+a"(src), "+c"(dst)\
: "S"((x86_reg)srcStride), "r"((x86_reg)dstStride), "m"(ADD), "m"(MUL1)\ : "S"((x86_reg)srcStride), "r"((x86_reg)dstStride)\
NAMED_CONSTRAINTS_ADD(MUL2)\ NAMED_CONSTRAINTS_ADD(ADD,MUL1,MUL2)\
: "memory"\ : "memory"\
);\ );\
if(h==16){\ if(h==16){\
__asm__ volatile(\ __asm__ volatile(\
VOP(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP, MUL2)\ VOP(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP, ADD, MUL1, MUL2)\
VOP(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP, MUL2)\ VOP(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP, ADD, MUL1, MUL2)\
VOP(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, OP, MUL2)\ VOP(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, OP, ADD, MUL1, MUL2)\
VOP(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, OP, MUL2)\ VOP(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, OP, ADD, MUL1, MUL2)\
VOP(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP, MUL2)\ VOP(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP, ADD, MUL1, MUL2)\
VOP(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP, MUL2)\ VOP(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP, ADD, MUL1, MUL2)\
VOP(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP, MUL2)\ VOP(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP, ADD, MUL1, MUL2)\
VOP(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP, MUL2)\ VOP(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP, ADD, MUL1, MUL2)\
\ \
: "+a"(src), "+c"(dst)\ : "+a"(src), "+c"(dst)\
: "S"((x86_reg)srcStride), "r"((x86_reg)dstStride), "m"(ADD), "m"(MUL1)\ : "S"((x86_reg)srcStride), "r"((x86_reg)dstStride)\
NAMED_CONSTRAINTS_ADD(MUL2)\ NAMED_CONSTRAINTS_ADD(ADD,MUL1,MUL2)\
: "memory"\ : "memory"\
);\ );\
}\ }\
...@@ -339,7 +339,7 @@ static void OPNAME ## cavs_qpel8_h_ ## MMX(uint8_t *dst, uint8_t *src, int dstSt ...@@ -339,7 +339,7 @@ static void OPNAME ## cavs_qpel8_h_ ## MMX(uint8_t *dst, uint8_t *src, int dstSt
int h=8;\ int h=8;\
__asm__ volatile(\ __asm__ volatile(\
"pxor %%mm7, %%mm7 \n\t"\ "pxor %%mm7, %%mm7 \n\t"\
"movq %5, %%mm6 \n\t"\ "movq "MANGLE(ff_pw_5)", %%mm6\n\t"\
"1: \n\t"\ "1: \n\t"\
"movq (%0), %%mm0 \n\t"\ "movq (%0), %%mm0 \n\t"\
"movq 1(%0), %%mm2 \n\t"\ "movq 1(%0), %%mm2 \n\t"\
...@@ -365,7 +365,7 @@ static void OPNAME ## cavs_qpel8_h_ ## MMX(uint8_t *dst, uint8_t *src, int dstSt ...@@ -365,7 +365,7 @@ static void OPNAME ## cavs_qpel8_h_ ## MMX(uint8_t *dst, uint8_t *src, int dstSt
"paddw %%mm3, %%mm5 \n\t"\ "paddw %%mm3, %%mm5 \n\t"\
"psubw %%mm2, %%mm0 \n\t"\ "psubw %%mm2, %%mm0 \n\t"\
"psubw %%mm5, %%mm1 \n\t"\ "psubw %%mm5, %%mm1 \n\t"\
"movq %6, %%mm5 \n\t"\ "movq "MANGLE(ff_pw_4)", %%mm5\n\t"\
"paddw %%mm5, %%mm0 \n\t"\ "paddw %%mm5, %%mm0 \n\t"\
"paddw %%mm5, %%mm1 \n\t"\ "paddw %%mm5, %%mm1 \n\t"\
"psraw $3, %%mm0 \n\t"\ "psraw $3, %%mm0 \n\t"\
...@@ -377,7 +377,8 @@ static void OPNAME ## cavs_qpel8_h_ ## MMX(uint8_t *dst, uint8_t *src, int dstSt ...@@ -377,7 +377,8 @@ static void OPNAME ## cavs_qpel8_h_ ## MMX(uint8_t *dst, uint8_t *src, int dstSt
"decl %2 \n\t"\ "decl %2 \n\t"\
" jnz 1b \n\t"\ " jnz 1b \n\t"\
: "+a"(src), "+c"(dst), "+m"(h)\ : "+a"(src), "+c"(dst), "+m"(h)\
: "d"((x86_reg)srcStride), "S"((x86_reg)dstStride), "m"(ff_pw_5), "m"(ff_pw_4)\ : "d"((x86_reg)srcStride), "S"((x86_reg)dstStride)\
NAMED_CONSTRAINTS_ADD(ff_pw_4,ff_pw_5)\
: "memory"\ : "memory"\
);\ );\
}\ }\
......
...@@ -24,6 +24,7 @@ ...@@ -24,6 +24,7 @@
#include "config.h" #include "config.h"
#include "libavutil/common.h" #include "libavutil/common.h"
#include "libavutil/x86/asm.h"
#if HAVE_INLINE_ASM #if HAVE_INLINE_ASM
...@@ -88,6 +89,7 @@ static inline av_const int mid_pred(int a, int b, int c) ...@@ -88,6 +89,7 @@ static inline av_const int mid_pred(int a, int b, int c)
return i; return i;
} }
#if HAVE_6REGS
#define COPY3_IF_LT(x, y, a, b, c, d)\ #define COPY3_IF_LT(x, y, a, b, c, d)\
__asm__ volatile(\ __asm__ volatile(\
"cmpl %0, %3 \n\t"\ "cmpl %0, %3 \n\t"\
...@@ -97,6 +99,7 @@ __asm__ volatile(\ ...@@ -97,6 +99,7 @@ __asm__ volatile(\
: "+&r" (x), "+&r" (a), "+r" (c)\ : "+&r" (x), "+&r" (a), "+r" (c)\
: "r" (y), "r" (b), "r" (d)\ : "r" (y), "r" (b), "r" (d)\
); );
#endif /* HAVE_6REGS */
#endif /* HAVE_I686 */ #endif /* HAVE_I686 */
#define MASK_ABS(mask, level) \ #define MASK_ABS(mask, level) \
......
...@@ -45,7 +45,7 @@ void ff_four_imdct36_float_avx(float *out, float *buf, float *in, float *win, ...@@ -45,7 +45,7 @@ void ff_four_imdct36_float_avx(float *out, float *buf, float *in, float *win,
DECLARE_ALIGNED(16, static float, mdct_win_sse)[2][4][4*40]; DECLARE_ALIGNED(16, static float, mdct_win_sse)[2][4][4*40];
#if HAVE_SSE2_INLINE #if HAVE_6REGS && HAVE_SSE2_INLINE
#define MACS(rt, ra, rb) rt+=(ra)*(rb) #define MACS(rt, ra, rb) rt+=(ra)*(rb)
#define MLSS(rt, ra, rb) rt-=(ra)*(rb) #define MLSS(rt, ra, rb) rt-=(ra)*(rb)
...@@ -189,7 +189,7 @@ static void apply_window_mp3(float *in, float *win, int *unused, float *out, ...@@ -189,7 +189,7 @@ static void apply_window_mp3(float *in, float *win, int *unused, float *out,
*out = sum; *out = sum;
} }
#endif /* HAVE_SSE2_INLINE */ #endif /* HAVE_6REGS && HAVE_SSE2_INLINE */
#if HAVE_YASM #if HAVE_YASM
#define DECL_IMDCT_BLOCKS(CPU1, CPU2) \ #define DECL_IMDCT_BLOCKS(CPU1, CPU2) \
...@@ -255,7 +255,7 @@ av_cold void ff_mpadsp_init_x86(MPADSPContext *s) ...@@ -255,7 +255,7 @@ av_cold void ff_mpadsp_init_x86(MPADSPContext *s)
} }
} }
#if HAVE_SSE2_INLINE #if HAVE_6REGS && HAVE_SSE2_INLINE
if (INLINE_SSE2(cpu_flags)) { if (INLINE_SSE2(cpu_flags)) {
s->apply_window_float = apply_window_mp3; s->apply_window_float = apply_window_mp3;
} }
......
...@@ -30,6 +30,8 @@ ...@@ -30,6 +30,8 @@
/* not permutated inverse zigzag_direct + 1 for MMX quantizer */ /* not permutated inverse zigzag_direct + 1 for MMX quantizer */
DECLARE_ALIGNED(16, static uint16_t, inv_zigzag_direct16)[64]; DECLARE_ALIGNED(16, static uint16_t, inv_zigzag_direct16)[64];
#if HAVE_6REGS
#if HAVE_MMX_INLINE #if HAVE_MMX_INLINE
#define COMPILE_TEMPLATE_MMXEXT 0 #define COMPILE_TEMPLATE_MMXEXT 0
#define COMPILE_TEMPLATE_SSE2 0 #define COMPILE_TEMPLATE_SSE2 0
...@@ -81,6 +83,8 @@ DECLARE_ALIGNED(16, static uint16_t, inv_zigzag_direct16)[64]; ...@@ -81,6 +83,8 @@ DECLARE_ALIGNED(16, static uint16_t, inv_zigzag_direct16)[64];
#include "mpegvideoenc_template.c" #include "mpegvideoenc_template.c"
#endif /* HAVE_SSSE3_INLINE */ #endif /* HAVE_SSSE3_INLINE */
#endif /* HAVE_6REGS */
#if HAVE_INLINE_ASM #if HAVE_INLINE_ASM
static void denoise_dct_mmx(MpegEncContext *s, int16_t *block){ static void denoise_dct_mmx(MpegEncContext *s, int16_t *block){
const int intra= s->mb_intra; const int intra= s->mb_intra;
...@@ -205,21 +209,25 @@ av_cold void ff_dct_encode_init_x86(MpegEncContext *s) ...@@ -205,21 +209,25 @@ av_cold void ff_dct_encode_init_x86(MpegEncContext *s)
#if HAVE_MMX_INLINE #if HAVE_MMX_INLINE
int cpu_flags = av_get_cpu_flags(); int cpu_flags = av_get_cpu_flags();
if (INLINE_MMX(cpu_flags)) { if (INLINE_MMX(cpu_flags)) {
#if HAVE_6REGS
s->dct_quantize = dct_quantize_MMX; s->dct_quantize = dct_quantize_MMX;
#endif
s->denoise_dct = denoise_dct_mmx; s->denoise_dct = denoise_dct_mmx;
} }
#endif #endif
#if HAVE_MMXEXT_INLINE #if HAVE_6REGS && HAVE_MMXEXT_INLINE
if (INLINE_MMXEXT(cpu_flags)) if (INLINE_MMXEXT(cpu_flags))
s->dct_quantize = dct_quantize_MMXEXT; s->dct_quantize = dct_quantize_MMXEXT;
#endif #endif
#if HAVE_SSE2_INLINE #if HAVE_SSE2_INLINE
if (INLINE_SSE2(cpu_flags)) { if (INLINE_SSE2(cpu_flags)) {
#if HAVE_6REGS
s->dct_quantize = dct_quantize_SSE2; s->dct_quantize = dct_quantize_SSE2;
#endif
s->denoise_dct = denoise_dct_sse2; s->denoise_dct = denoise_dct_sse2;
} }
#endif #endif
#if HAVE_SSSE3_INLINE #if HAVE_6REGS && HAVE_SSSE3_INLINE
if (INLINE_SSSE3(cpu_flags)) if (INLINE_SSSE3(cpu_flags))
s->dct_quantize = dct_quantize_SSSE3; s->dct_quantize = dct_quantize_SSSE3;
#endif #endif
......
...@@ -606,6 +606,7 @@ static void ff_snow_vertical_compose97i_mmx(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM ...@@ -606,6 +606,7 @@ static void ff_snow_vertical_compose97i_mmx(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM
} }
#endif //HAVE_7REGS #endif //HAVE_7REGS
#if HAVE_6REGS
#define snow_inner_add_yblock_sse2_header \ #define snow_inner_add_yblock_sse2_header \
IDWTELEM * * dst_array = sb->line + src_y;\ IDWTELEM * * dst_array = sb->line + src_y;\
x86_reg tmp;\ x86_reg tmp;\
...@@ -873,6 +874,7 @@ static void ff_snow_inner_add_yblock_mmx(const uint8_t *obmc, const int obmc_str ...@@ -873,6 +874,7 @@ static void ff_snow_inner_add_yblock_mmx(const uint8_t *obmc, const int obmc_str
else else
ff_snow_inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8); ff_snow_inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
} }
#endif /* HAVE_6REGS */
#endif /* HAVE_INLINE_ASM */ #endif /* HAVE_INLINE_ASM */
...@@ -887,7 +889,9 @@ void ff_dwt_init_x86(SnowDWTContext *c) ...@@ -887,7 +889,9 @@ void ff_dwt_init_x86(SnowDWTContext *c)
#if HAVE_7REGS #if HAVE_7REGS
c->vertical_compose97i = ff_snow_vertical_compose97i_sse2; c->vertical_compose97i = ff_snow_vertical_compose97i_sse2;
#endif #endif
#if HAVE_6REGS
c->inner_add_yblock = ff_snow_inner_add_yblock_sse2; c->inner_add_yblock = ff_snow_inner_add_yblock_sse2;
#endif
} }
else{ else{
if (mm_flags & AV_CPU_FLAG_MMXEXT) { if (mm_flags & AV_CPU_FLAG_MMXEXT) {
...@@ -896,7 +900,9 @@ void ff_dwt_init_x86(SnowDWTContext *c) ...@@ -896,7 +900,9 @@ void ff_dwt_init_x86(SnowDWTContext *c)
c->vertical_compose97i = ff_snow_vertical_compose97i_mmx; c->vertical_compose97i = ff_snow_vertical_compose97i_mmx;
#endif #endif
} }
#if HAVE_6REGS
c->inner_add_yblock = ff_snow_inner_add_yblock_mmx; c->inner_add_yblock = ff_snow_inner_add_yblock_mmx;
#endif
} }
} }
#endif /* HAVE_INLINE_ASM */ #endif /* HAVE_INLINE_ASM */
......
...@@ -27,6 +27,7 @@ ...@@ -27,6 +27,7 @@
#include "libavutil/attributes.h" #include "libavutil/attributes.h"
#include "libavutil/cpu.h" #include "libavutil/cpu.h"
#include "libavutil/x86/cpu.h" #include "libavutil/x86/cpu.h"
#include "libavutil/x86/asm.h"
#include "libavcodec/vc1dsp.h" #include "libavcodec/vc1dsp.h"
#include "fpel.h" #include "fpel.h"
#include "vc1dsp.h" #include "vc1dsp.h"
...@@ -86,10 +87,10 @@ av_cold void ff_vc1dsp_init_x86(VC1DSPContext *dsp) ...@@ -86,10 +87,10 @@ av_cold void ff_vc1dsp_init_x86(VC1DSPContext *dsp)
{ {
int cpu_flags = av_get_cpu_flags(); int cpu_flags = av_get_cpu_flags();
if (INLINE_MMX(cpu_flags)) if (HAVE_6REGS && INLINE_MMX(cpu_flags))
ff_vc1dsp_init_mmx(dsp); ff_vc1dsp_init_mmx(dsp);
if (INLINE_MMXEXT(cpu_flags)) if (HAVE_6REGS && INLINE_MMXEXT(cpu_flags))
ff_vc1dsp_init_mmxext(dsp); ff_vc1dsp_init_mmxext(dsp);
#define ASSIGN_LF(EXT) \ #define ASSIGN_LF(EXT) \
......
...@@ -33,7 +33,7 @@ ...@@ -33,7 +33,7 @@
#include "fpel.h" #include "fpel.h"
#include "vc1dsp.h" #include "vc1dsp.h"
#if HAVE_INLINE_ASM #if HAVE_6REGS && HAVE_INLINE_ASM
#define OP_PUT(S,D) #define OP_PUT(S,D)
#define OP_AVG(S,D) "pavgb " #S ", " #D " \n\t" #define OP_AVG(S,D) "pavgb " #S ", " #D " \n\t"
...@@ -760,4 +760,4 @@ av_cold void ff_vc1dsp_init_mmxext(VC1DSPContext *dsp) ...@@ -760,4 +760,4 @@ av_cold void ff_vc1dsp_init_mmxext(VC1DSPContext *dsp)
dsp->vc1_inv_trans_8x4_dc = vc1_inv_trans_8x4_dc_mmxext; dsp->vc1_inv_trans_8x4_dc = vc1_inv_trans_8x4_dc_mmxext;
dsp->vc1_inv_trans_4x4_dc = vc1_inv_trans_4x4_dc_mmxext; dsp->vc1_inv_trans_4x4_dc = vc1_inv_trans_4x4_dc_mmxext;
} }
#endif /* HAVE_INLINE_ASM */ #endif /* HAVE_6REGS && HAVE_INLINE_ASM */
...@@ -63,6 +63,7 @@ void ff_vp3_h_loop_filter_mmxext(uint8_t *src, int stride, ...@@ -63,6 +63,7 @@ void ff_vp3_h_loop_filter_mmxext(uint8_t *src, int stride,
"paddb "#regb", "#regr" \n\t" \ "paddb "#regb", "#regr" \n\t" \
"paddb "#regd", "#regp" \n\t" "paddb "#regd", "#regp" \n\t"
#if HAVE_6REGS
static void put_vp_no_rnd_pixels8_l2_mmx(uint8_t *dst, const uint8_t *a, const uint8_t *b, ptrdiff_t stride, int h) static void put_vp_no_rnd_pixels8_l2_mmx(uint8_t *dst, const uint8_t *a, const uint8_t *b, ptrdiff_t stride, int h)
{ {
// START_TIMER // START_TIMER
...@@ -94,15 +95,16 @@ static void put_vp_no_rnd_pixels8_l2_mmx(uint8_t *dst, const uint8_t *a, const u ...@@ -94,15 +95,16 @@ static void put_vp_no_rnd_pixels8_l2_mmx(uint8_t *dst, const uint8_t *a, const u
:"memory"); :"memory");
// STOP_TIMER("put_vp_no_rnd_pixels8_l2_mmx") // STOP_TIMER("put_vp_no_rnd_pixels8_l2_mmx")
} }
#endif /*HAVE_6REGS */
#endif /* HAVE_MMX_INLINE */ #endif /* HAVE_MMX_INLINE */
av_cold void ff_vp3dsp_init_x86(VP3DSPContext *c, int flags) av_cold void ff_vp3dsp_init_x86(VP3DSPContext *c, int flags)
{ {
int cpu_flags = av_get_cpu_flags(); int cpu_flags = av_get_cpu_flags();
#if HAVE_MMX_INLINE #if HAVE_6REGS && HAVE_MMX_INLINE
c->put_no_rnd_pixels_l2 = put_vp_no_rnd_pixels8_l2_mmx; c->put_no_rnd_pixels_l2 = put_vp_no_rnd_pixels8_l2_mmx;
#endif /* HAVE_MMX_INLINE */ #endif /* HAVE_6REGS && HAVE_MMX_INLINE */
#if ARCH_X86_32 #if ARCH_X86_32
if (EXTERNAL_MMX(cpu_flags)) { if (EXTERNAL_MMX(cpu_flags)) {
......
...@@ -24,7 +24,7 @@ ...@@ -24,7 +24,7 @@
#ifndef AVCODEC_X86_VP56_ARITH_H #ifndef AVCODEC_X86_VP56_ARITH_H
#define AVCODEC_X86_VP56_ARITH_H #define AVCODEC_X86_VP56_ARITH_H
#if HAVE_INLINE_ASM && HAVE_FAST_CMOV #if HAVE_INLINE_ASM && HAVE_FAST_CMOV && HAVE_6REGS
#define vp56_rac_get_prob vp56_rac_get_prob #define vp56_rac_get_prob vp56_rac_get_prob
static av_always_inline int vp56_rac_get_prob(VP56RangeCoder *c, uint8_t prob) static av_always_inline int vp56_rac_get_prob(VP56RangeCoder *c, uint8_t prob)
{ {
......
...@@ -289,7 +289,7 @@ static inline void line_noise_avg_c(uint8_t *dst, const uint8_t *src, ...@@ -289,7 +289,7 @@ static inline void line_noise_avg_c(uint8_t *dst, const uint8_t *src,
static inline void line_noise_avg_mmx(uint8_t *dst, const uint8_t *src, static inline void line_noise_avg_mmx(uint8_t *dst, const uint8_t *src,
int len, int8_t **shift) int len, int8_t **shift)
{ {
#if HAVE_MMX_INLINE #if HAVE_MMX_INLINE && HAVE_6REGS
x86_reg mmx_len= len&(~7); x86_reg mmx_len= len&(~7);
__asm__ volatile( __asm__ volatile(
...@@ -438,7 +438,9 @@ static av_cold int init(AVFilterContext *ctx) ...@@ -438,7 +438,9 @@ static av_cold int init(AVFilterContext *ctx)
if (HAVE_MMX_INLINE && if (HAVE_MMX_INLINE &&
cpu_flags & AV_CPU_FLAG_MMX) { cpu_flags & AV_CPU_FLAG_MMX) {
n->line_noise = line_noise_mmx; n->line_noise = line_noise_mmx;
#if HAVE_6REGS
n->line_noise_avg = line_noise_avg_mmx; n->line_noise_avg = line_noise_avg_mmx;
#endif
} }
if (HAVE_MMXEXT_INLINE && if (HAVE_MMXEXT_INLINE &&
cpu_flags & AV_CPU_FLAG_MMXEXT) cpu_flags & AV_CPU_FLAG_MMXEXT)
......
...@@ -666,6 +666,7 @@ static void RENAME(yuv2rgb555_X)(SwsContext *c, const int16_t *lumFilter, ...@@ -666,6 +666,7 @@ static void RENAME(yuv2rgb555_X)(SwsContext *c, const int16_t *lumFilter,
#define WRITEBGR24(dst, dstw, index) WRITEBGR24MMX(dst, dstw, index) #define WRITEBGR24(dst, dstw, index) WRITEBGR24MMX(dst, dstw, index)
#endif #endif
#if HAVE_6REGS
static void RENAME(yuv2bgr24_X_ar)(SwsContext *c, const int16_t *lumFilter, static void RENAME(yuv2bgr24_X_ar)(SwsContext *c, const int16_t *lumFilter,
const int16_t **lumSrc, int lumFilterSize, const int16_t **lumSrc, int lumFilterSize,
const int16_t *chrFilter, const int16_t **chrUSrc, const int16_t *chrFilter, const int16_t **chrUSrc,
...@@ -715,6 +716,7 @@ static void RENAME(yuv2bgr24_X)(SwsContext *c, const int16_t *lumFilter, ...@@ -715,6 +716,7 @@ static void RENAME(yuv2bgr24_X)(SwsContext *c, const int16_t *lumFilter,
: "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S
); );
} }
#endif /* HAVE_6REGS */
#define REAL_WRITEYUY2(dst, dstw, index) \ #define REAL_WRITEYUY2(dst, dstw, index) \
"packuswb %%mm3, %%mm3 \n\t"\ "packuswb %%mm3, %%mm3 \n\t"\
...@@ -1664,7 +1666,9 @@ static av_cold void RENAME(sws_init_swscale)(SwsContext *c) ...@@ -1664,7 +1666,9 @@ static av_cold void RENAME(sws_init_swscale)(SwsContext *c)
if (!(c->flags & SWS_FULL_CHR_H_INT)) { if (!(c->flags & SWS_FULL_CHR_H_INT)) {
switch (c->dstFormat) { switch (c->dstFormat) {
case AV_PIX_FMT_RGB32: c->yuv2packedX = RENAME(yuv2rgb32_X_ar); break; case AV_PIX_FMT_RGB32: c->yuv2packedX = RENAME(yuv2rgb32_X_ar); break;
#if HAVE_6REGS
case AV_PIX_FMT_BGR24: c->yuv2packedX = RENAME(yuv2bgr24_X_ar); break; case AV_PIX_FMT_BGR24: c->yuv2packedX = RENAME(yuv2bgr24_X_ar); break;
#endif
case AV_PIX_FMT_RGB555: c->yuv2packedX = RENAME(yuv2rgb555_X_ar); break; case AV_PIX_FMT_RGB555: c->yuv2packedX = RENAME(yuv2rgb555_X_ar); break;
case AV_PIX_FMT_RGB565: c->yuv2packedX = RENAME(yuv2rgb565_X_ar); break; case AV_PIX_FMT_RGB565: c->yuv2packedX = RENAME(yuv2rgb565_X_ar); break;
case AV_PIX_FMT_YUYV422: c->yuv2packedX = RENAME(yuv2yuyv422_X_ar); break; case AV_PIX_FMT_YUYV422: c->yuv2packedX = RENAME(yuv2yuyv422_X_ar); break;
...@@ -1677,7 +1681,9 @@ static av_cold void RENAME(sws_init_swscale)(SwsContext *c) ...@@ -1677,7 +1681,9 @@ static av_cold void RENAME(sws_init_swscale)(SwsContext *c)
if (!(c->flags & SWS_FULL_CHR_H_INT)) { if (!(c->flags & SWS_FULL_CHR_H_INT)) {
switch (c->dstFormat) { switch (c->dstFormat) {
case AV_PIX_FMT_RGB32: c->yuv2packedX = RENAME(yuv2rgb32_X); break; case AV_PIX_FMT_RGB32: c->yuv2packedX = RENAME(yuv2rgb32_X); break;
#if HAVE_6REGS
case AV_PIX_FMT_BGR24: c->yuv2packedX = RENAME(yuv2bgr24_X); break; case AV_PIX_FMT_BGR24: c->yuv2packedX = RENAME(yuv2bgr24_X); break;
#endif
case AV_PIX_FMT_RGB555: c->yuv2packedX = RENAME(yuv2rgb555_X); break; case AV_PIX_FMT_RGB555: c->yuv2packedX = RENAME(yuv2rgb555_X); break;
case AV_PIX_FMT_RGB565: c->yuv2packedX = RENAME(yuv2rgb565_X); break; case AV_PIX_FMT_RGB565: c->yuv2packedX = RENAME(yuv2rgb565_X); break;
case AV_PIX_FMT_YUYV422: c->yuv2packedX = RENAME(yuv2yuyv422_X); break; case AV_PIX_FMT_YUYV422: c->yuv2packedX = RENAME(yuv2yuyv422_X); break;
......
...@@ -50,28 +50,28 @@ DECLARE_ASM_CONST(8, uint64_t, pb_03) = 0x0303030303030303ULL; ...@@ -50,28 +50,28 @@ DECLARE_ASM_CONST(8, uint64_t, pb_03) = 0x0303030303030303ULL;
DECLARE_ASM_CONST(8, uint64_t, pb_07) = 0x0707070707070707ULL; DECLARE_ASM_CONST(8, uint64_t, pb_07) = 0x0707070707070707ULL;
//MMX versions //MMX versions
#if HAVE_MMX_INLINE #if HAVE_MMX_INLINE && HAVE_6REGS
#undef RENAME #undef RENAME
#undef COMPILE_TEMPLATE_MMXEXT #undef COMPILE_TEMPLATE_MMXEXT
#define COMPILE_TEMPLATE_MMXEXT 0 #define COMPILE_TEMPLATE_MMXEXT 0
#define RENAME(a) a ## _mmx #define RENAME(a) a ## _mmx
#include "yuv2rgb_template.c" #include "yuv2rgb_template.c"
#endif /* HAVE_MMX_INLINE */ #endif /* HAVE_MMX_INLINE && HAVE_6REGS */
// MMXEXT versions // MMXEXT versions
#if HAVE_MMXEXT_INLINE #if HAVE_MMXEXT_INLINE && HAVE_6REGS
#undef RENAME #undef RENAME
#undef COMPILE_TEMPLATE_MMXEXT #undef COMPILE_TEMPLATE_MMXEXT
#define COMPILE_TEMPLATE_MMXEXT 1 #define COMPILE_TEMPLATE_MMXEXT 1
#define RENAME(a) a ## _mmxext #define RENAME(a) a ## _mmxext
#include "yuv2rgb_template.c" #include "yuv2rgb_template.c"
#endif /* HAVE_MMXEXT_INLINE */ #endif /* HAVE_MMXEXT_INLINE && HAVE_6REGS */
#endif /* HAVE_INLINE_ASM */ #endif /* HAVE_INLINE_ASM */
av_cold SwsFunc ff_yuv2rgb_init_x86(SwsContext *c) av_cold SwsFunc ff_yuv2rgb_init_x86(SwsContext *c)
{ {
#if HAVE_MMX_INLINE #if HAVE_MMX_INLINE && HAVE_6REGS
int cpu_flags = av_get_cpu_flags(); int cpu_flags = av_get_cpu_flags();
#if HAVE_MMXEXT_INLINE #if HAVE_MMXEXT_INLINE
...@@ -113,7 +113,7 @@ av_cold SwsFunc ff_yuv2rgb_init_x86(SwsContext *c) ...@@ -113,7 +113,7 @@ av_cold SwsFunc ff_yuv2rgb_init_x86(SwsContext *c)
return yuv420_rgb15_mmx; return yuv420_rgb15_mmx;
} }
} }
#endif /* HAVE_MMX_INLINE */ #endif /* HAVE_MMX_INLINE && HAVE_6REGS */
return NULL; return NULL;
} }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment