Commit e20ac54f authored by Derek Buitenhuis's avatar Derek Buitenhuis Committed by Michael Niedermayer

postproc: Fix unprotected inline asm

Signed-off-by: 's avatarDerek Buitenhuis <derek.buitenhuis@gmail.com>
Signed-off-by: 's avatarMichael Niedermayer <michaelni@gmx.at>
parent e592fd0e
...@@ -80,9 +80,9 @@ try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks ...@@ -80,9 +80,9 @@ try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
//#undef HAVE_MMXEXT //#undef HAVE_MMXEXT_INLINE
//#define HAVE_AMD3DNOW //#define HAVE_AMD3DNOW_INLINE
//#undef HAVE_MMX //#undef HAVE_MMX_INLINE
//#undef ARCH_X86 //#undef ARCH_X86
//#define DEBUG_BRIGHTNESS //#define DEBUG_BRIGHTNESS
#include "postprocess.h" #include "postprocess.h"
...@@ -116,7 +116,7 @@ const char *postproc_license(void) ...@@ -116,7 +116,7 @@ const char *postproc_license(void)
#define TEMP_STRIDE 8 #define TEMP_STRIDE 8
//#define NUM_BLOCKS_AT_ONCE 16 //not used yet //#define NUM_BLOCKS_AT_ONCE 16 //not used yet
#if ARCH_X86 #if ARCH_X86 && HAVE_INLINE_ASM
DECLARE_ASM_CONST(8, uint64_t, w05)= 0x0005000500050005LL; DECLARE_ASM_CONST(8, uint64_t, w05)= 0x0005000500050005LL;
DECLARE_ASM_CONST(8, uint64_t, w04)= 0x0004000400040004LL; DECLARE_ASM_CONST(8, uint64_t, w04)= 0x0004000400040004LL;
DECLARE_ASM_CONST(8, uint64_t, w20)= 0x0020002000200020LL; DECLARE_ASM_CONST(8, uint64_t, w20)= 0x0020002000200020LL;
...@@ -165,7 +165,7 @@ static const char *replaceTable[]= ...@@ -165,7 +165,7 @@ static const char *replaceTable[]=
}; };
#if ARCH_X86 #if ARCH_X86 && HAVE_INLINE_ASM
static inline void prefetchnta(void *p) static inline void prefetchnta(void *p)
{ {
__asm__ volatile( "prefetchnta (%0)\n\t" __asm__ volatile( "prefetchnta (%0)\n\t"
...@@ -544,27 +544,27 @@ static av_always_inline void do_a_deblock_C(uint8_t *src, int step, int stride, ...@@ -544,27 +544,27 @@ static av_always_inline void do_a_deblock_C(uint8_t *src, int step, int stride,
#define COMPILE_ALTIVEC #define COMPILE_ALTIVEC
#endif //HAVE_ALTIVEC #endif //HAVE_ALTIVEC
#if ARCH_X86 #if ARCH_X86 && HAVE_INLINE_ASM
#if (HAVE_MMX && !HAVE_AMD3DNOW && !HAVE_MMXEXT) || CONFIG_RUNTIME_CPUDETECT #if (HAVE_MMX_INLINE && !HAVE_AMD3DNOW_INLINE && !HAVE_MMXEXT_INLINE) || CONFIG_RUNTIME_CPUDETECT
#define COMPILE_MMX #define COMPILE_MMX
#endif #endif
#if HAVE_MMXEXT || CONFIG_RUNTIME_CPUDETECT #if HAVE_MMXEXT_INLINE || CONFIG_RUNTIME_CPUDETECT
#define COMPILE_MMX2 #define COMPILE_MMX2
#endif #endif
#if (HAVE_AMD3DNOW && !HAVE_MMXEXT) || CONFIG_RUNTIME_CPUDETECT #if (HAVE_AMD3DNOW_INLINE && !HAVE_MMXEXT_INLINE) || CONFIG_RUNTIME_CPUDETECT
#define COMPILE_3DNOW #define COMPILE_3DNOW
#endif #endif
#endif /* ARCH_X86 */ #endif /* ARCH_X86 */
#undef HAVE_MMX #undef HAVE_MMX_INLINE
#define HAVE_MMX 0 #define HAVE_MMX_INLINE 0
#undef HAVE_MMXEXT #undef HAVE_MMXEXT_INLINE
#define HAVE_MMXEXT 0 #define HAVE_MMXEXT_INLINE 0
#undef HAVE_AMD3DNOW #undef HAVE_AMD3DNOW_INLINE
#define HAVE_AMD3DNOW 0 #define HAVE_AMD3DNOW_INLINE 0
#undef HAVE_ALTIVEC #undef HAVE_ALTIVEC
#define HAVE_ALTIVEC 0 #define HAVE_ALTIVEC 0
...@@ -585,8 +585,8 @@ static av_always_inline void do_a_deblock_C(uint8_t *src, int step, int stride, ...@@ -585,8 +585,8 @@ static av_always_inline void do_a_deblock_C(uint8_t *src, int step, int stride,
//MMX versions //MMX versions
#ifdef COMPILE_MMX #ifdef COMPILE_MMX
#undef RENAME #undef RENAME
#undef HAVE_MMX #undef HAVE_MMX_INLINE
#define HAVE_MMX 1 #define HAVE_MMX_INLINE 1
#define RENAME(a) a ## _MMX #define RENAME(a) a ## _MMX
#include "postprocess_template.c" #include "postprocess_template.c"
#endif #endif
...@@ -594,10 +594,10 @@ static av_always_inline void do_a_deblock_C(uint8_t *src, int step, int stride, ...@@ -594,10 +594,10 @@ static av_always_inline void do_a_deblock_C(uint8_t *src, int step, int stride,
//MMX2 versions //MMX2 versions
#ifdef COMPILE_MMX2 #ifdef COMPILE_MMX2
#undef RENAME #undef RENAME
#undef HAVE_MMX #undef HAVE_MMX_INLINE
#undef HAVE_MMXEXT #undef HAVE_MMXEXT_INLINE
#define HAVE_MMX 1 #define HAVE_MMX_INLINE 1
#define HAVE_MMXEXT 1 #define HAVE_MMXEXT_INLINE 1
#define RENAME(a) a ## _MMX2 #define RENAME(a) a ## _MMX2
#include "postprocess_template.c" #include "postprocess_template.c"
#endif #endif
...@@ -605,12 +605,12 @@ static av_always_inline void do_a_deblock_C(uint8_t *src, int step, int stride, ...@@ -605,12 +605,12 @@ static av_always_inline void do_a_deblock_C(uint8_t *src, int step, int stride,
//3DNOW versions //3DNOW versions
#ifdef COMPILE_3DNOW #ifdef COMPILE_3DNOW
#undef RENAME #undef RENAME
#undef HAVE_MMX #undef HAVE_MMX_INLINE
#undef HAVE_MMXEXT #undef HAVE_MMXEXT_INLINE
#undef HAVE_AMD3DNOW #undef HAVE_AMD3DNOW_INLINE
#define HAVE_MMX 1 #define HAVE_MMX_INLINE 1
#define HAVE_MMXEXT 0 #define HAVE_MMXEXT_INLINE 0
#define HAVE_AMD3DNOW 1 #define HAVE_AMD3DNOW_INLINE 1
#define RENAME(a) a ## _3DNow #define RENAME(a) a ## _3DNow
#include "postprocess_template.c" #include "postprocess_template.c"
#endif #endif
...@@ -633,7 +633,7 @@ static inline void postProcess(const uint8_t src[], int srcStride, uint8_t dst[] ...@@ -633,7 +633,7 @@ static inline void postProcess(const uint8_t src[], int srcStride, uint8_t dst[]
// difference would not be measurable here but it is much better because // difference would not be measurable here but it is much better because
// someone might exchange the CPU whithout restarting MPlayer ;) // someone might exchange the CPU whithout restarting MPlayer ;)
#if CONFIG_RUNTIME_CPUDETECT #if CONFIG_RUNTIME_CPUDETECT
#if ARCH_X86 #if ARCH_X86 && HAVE_INLINE_ASM
// ordered per speed fastest first // ordered per speed fastest first
if(c->cpuCaps & PP_CPU_CAPS_MMX2) if(c->cpuCaps & PP_CPU_CAPS_MMX2)
postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
...@@ -652,11 +652,11 @@ static inline void postProcess(const uint8_t src[], int srcStride, uint8_t dst[] ...@@ -652,11 +652,11 @@ static inline void postProcess(const uint8_t src[], int srcStride, uint8_t dst[]
postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
#endif #endif
#else /* CONFIG_RUNTIME_CPUDETECT */ #else /* CONFIG_RUNTIME_CPUDETECT */
#if HAVE_MMXEXT #if HAVE_MMXEXT_INLINE
postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
#elif HAVE_AMD3DNOW #elif HAVE_AMD3DNOW_INLINE
postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
#elif HAVE_MMX #elif HAVE_MMX_INLINE
postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
#elif HAVE_ALTIVEC #elif HAVE_ALTIVEC
postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
......
...@@ -30,32 +30,32 @@ ...@@ -30,32 +30,32 @@
#undef PMINUB #undef PMINUB
#undef PMAXUB #undef PMAXUB
#if HAVE_MMXEXT #if HAVE_MMXEXT_INLINE
#define REAL_PAVGB(a,b) "pavgb " #a ", " #b " \n\t" #define REAL_PAVGB(a,b) "pavgb " #a ", " #b " \n\t"
#elif HAVE_AMD3DNOW #elif HAVE_AMD3DNOW_INLINE
#define REAL_PAVGB(a,b) "pavgusb " #a ", " #b " \n\t" #define REAL_PAVGB(a,b) "pavgusb " #a ", " #b " \n\t"
#endif #endif
#define PAVGB(a,b) REAL_PAVGB(a,b) #define PAVGB(a,b) REAL_PAVGB(a,b)
#if HAVE_MMXEXT #if HAVE_MMXEXT_INLINE
#define PMINUB(a,b,t) "pminub " #a ", " #b " \n\t" #define PMINUB(a,b,t) "pminub " #a ", " #b " \n\t"
#elif HAVE_MMX #elif HAVE_MMX_INLINE
#define PMINUB(b,a,t) \ #define PMINUB(b,a,t) \
"movq " #a ", " #t " \n\t"\ "movq " #a ", " #t " \n\t"\
"psubusb " #b ", " #t " \n\t"\ "psubusb " #b ", " #t " \n\t"\
"psubb " #t ", " #a " \n\t" "psubb " #t ", " #a " \n\t"
#endif #endif
#if HAVE_MMXEXT #if HAVE_MMXEXT_INLINE
#define PMAXUB(a,b) "pmaxub " #a ", " #b " \n\t" #define PMAXUB(a,b) "pmaxub " #a ", " #b " \n\t"
#elif HAVE_MMX #elif HAVE_MMX_INLINE
#define PMAXUB(a,b) \ #define PMAXUB(a,b) \
"psubusb " #a ", " #b " \n\t"\ "psubusb " #a ", " #b " \n\t"\
"paddb " #a ", " #b " \n\t" "paddb " #a ", " #b " \n\t"
#endif #endif
//FIXME? |255-0| = 1 (should not be a problem ...) //FIXME? |255-0| = 1 (should not be a problem ...)
#if HAVE_MMX #if HAVE_MMX_INLINE
/** /**
* Check if the middle 8x8 Block in the given 8x16 block is flat * Check if the middle 8x8 Block in the given 8x16 block is flat
*/ */
...@@ -135,7 +135,7 @@ static inline int RENAME(vertClassify)(uint8_t src[], int stride, PPContext *c){ ...@@ -135,7 +135,7 @@ static inline int RENAME(vertClassify)(uint8_t src[], int stride, PPContext *c){
"psubusb %%mm3, %%mm4 \n\t" "psubusb %%mm3, %%mm4 \n\t"
" \n\t" " \n\t"
#if HAVE_MMXEXT #if HAVE_MMXEXT_INLINE
"pxor %%mm7, %%mm7 \n\t" "pxor %%mm7, %%mm7 \n\t"
"psadbw %%mm7, %%mm0 \n\t" "psadbw %%mm7, %%mm0 \n\t"
#else #else
...@@ -169,7 +169,7 @@ static inline int RENAME(vertClassify)(uint8_t src[], int stride, PPContext *c){ ...@@ -169,7 +169,7 @@ static inline int RENAME(vertClassify)(uint8_t src[], int stride, PPContext *c){
return 2; return 2;
} }
} }
#endif //HAVE_MMX #endif //HAVE_MMX_INLINE
/** /**
* Do a vertical low pass filter on the 8x16 block (only write to the 8x8 block in the middle) * Do a vertical low pass filter on the 8x16 block (only write to the 8x8 block in the middle)
...@@ -178,7 +178,7 @@ static inline int RENAME(vertClassify)(uint8_t src[], int stride, PPContext *c){ ...@@ -178,7 +178,7 @@ static inline int RENAME(vertClassify)(uint8_t src[], int stride, PPContext *c){
#if !HAVE_ALTIVEC #if !HAVE_ALTIVEC
static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, PPContext *c) static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, PPContext *c)
{ {
#if HAVE_MMXEXT || HAVE_AMD3DNOW #if HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
src+= stride*3; src+= stride*3;
__asm__ volatile( //"movv %0 %1 %2\n\t" __asm__ volatile( //"movv %0 %1 %2\n\t"
"movq %2, %%mm0 \n\t" // QP,..., QP "movq %2, %%mm0 \n\t" // QP,..., QP
...@@ -305,7 +305,7 @@ static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, PPContext *c) ...@@ -305,7 +305,7 @@ static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, PPContext *c)
: "r" (src), "r" ((x86_reg)stride), "m" (c->pQPb) : "r" (src), "r" ((x86_reg)stride), "m" (c->pQPb)
: "%"REG_a, "%"REG_c : "%"REG_a, "%"REG_c
); );
#else //HAVE_MMXEXT || HAVE_AMD3DNOW #else //HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
const int l1= stride; const int l1= stride;
const int l2= stride + l1; const int l2= stride + l1;
const int l3= stride + l2; const int l3= stride + l2;
...@@ -344,7 +344,7 @@ static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, PPContext *c) ...@@ -344,7 +344,7 @@ static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, PPContext *c)
src++; src++;
} }
#endif //HAVE_MMXEXT || HAVE_AMD3DNOW #endif //HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
} }
#endif //HAVE_ALTIVEC #endif //HAVE_ALTIVEC
...@@ -357,7 +357,7 @@ static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, PPContext *c) ...@@ -357,7 +357,7 @@ static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, PPContext *c)
*/ */
static inline void RENAME(vertX1Filter)(uint8_t *src, int stride, PPContext *co) static inline void RENAME(vertX1Filter)(uint8_t *src, int stride, PPContext *co)
{ {
#if HAVE_MMXEXT || HAVE_AMD3DNOW #if HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
src+= stride*3; src+= stride*3;
__asm__ volatile( __asm__ volatile(
...@@ -443,7 +443,7 @@ static inline void RENAME(vertX1Filter)(uint8_t *src, int stride, PPContext *co) ...@@ -443,7 +443,7 @@ static inline void RENAME(vertX1Filter)(uint8_t *src, int stride, PPContext *co)
: "r" (src), "r" ((x86_reg)stride), "m" (co->pQPb) : "r" (src), "r" ((x86_reg)stride), "m" (co->pQPb)
: "%"REG_a, "%"REG_c : "%"REG_a, "%"REG_c
); );
#else //HAVE_MMXEXT || HAVE_AMD3DNOW #else //HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
const int l1= stride; const int l1= stride;
const int l2= stride + l1; const int l2= stride + l1;
...@@ -477,13 +477,13 @@ static inline void RENAME(vertX1Filter)(uint8_t *src, int stride, PPContext *co) ...@@ -477,13 +477,13 @@ static inline void RENAME(vertX1Filter)(uint8_t *src, int stride, PPContext *co)
} }
src++; src++;
} }
#endif //HAVE_MMXEXT || HAVE_AMD3DNOW #endif //HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
} }
#if !HAVE_ALTIVEC #if !HAVE_ALTIVEC
static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext *c) static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext *c)
{ {
#if HAVE_MMXEXT || HAVE_AMD3DNOW #if HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
/* /*
uint8_t tmp[16]; uint8_t tmp[16];
const int l1= stride; const int l1= stride;
...@@ -764,7 +764,7 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext ...@@ -764,7 +764,7 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext
} }
} }
*/ */
#elif HAVE_MMX #elif HAVE_MMX_INLINE
DECLARE_ALIGNED(8, uint64_t, tmp)[4]; // make space for 4 8-byte vars DECLARE_ALIGNED(8, uint64_t, tmp)[4]; // make space for 4 8-byte vars
src+= stride*4; src+= stride*4;
__asm__ volatile( __asm__ volatile(
...@@ -872,7 +872,7 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext ...@@ -872,7 +872,7 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext
"movq (%3), %%mm2 \n\t" // 2L0 - 5L1 + 5L2 - 2L3 "movq (%3), %%mm2 \n\t" // 2L0 - 5L1 + 5L2 - 2L3
"movq 8(%3), %%mm3 \n\t" // 2H0 - 5H1 + 5H2 - 2H3 "movq 8(%3), %%mm3 \n\t" // 2H0 - 5H1 + 5H2 - 2H3
#if HAVE_MMXEXT #if HAVE_MMXEXT_INLINE
"movq %%mm7, %%mm6 \n\t" // 0 "movq %%mm7, %%mm6 \n\t" // 0
"psubw %%mm0, %%mm6 \n\t" "psubw %%mm0, %%mm6 \n\t"
"pmaxsw %%mm6, %%mm0 \n\t" // |2L4 - 5L5 + 5L6 - 2L7| "pmaxsw %%mm6, %%mm0 \n\t" // |2L4 - 5L5 + 5L6 - 2L7|
...@@ -904,7 +904,7 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext ...@@ -904,7 +904,7 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext
"psubw %%mm6, %%mm3 \n\t" // |2H0 - 5H1 + 5H2 - 2H3| "psubw %%mm6, %%mm3 \n\t" // |2H0 - 5H1 + 5H2 - 2H3|
#endif #endif
#if HAVE_MMXEXT #if HAVE_MMXEXT_INLINE
"pminsw %%mm2, %%mm0 \n\t" "pminsw %%mm2, %%mm0 \n\t"
"pminsw %%mm3, %%mm1 \n\t" "pminsw %%mm3, %%mm1 \n\t"
#else #else
...@@ -968,7 +968,7 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext ...@@ -968,7 +968,7 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext
"pand %%mm2, %%mm4 \n\t" "pand %%mm2, %%mm4 \n\t"
"pand %%mm3, %%mm5 \n\t" "pand %%mm3, %%mm5 \n\t"
#if HAVE_MMXEXT #if HAVE_MMXEXT_INLINE
"pminsw %%mm0, %%mm4 \n\t" "pminsw %%mm0, %%mm4 \n\t"
"pminsw %%mm1, %%mm5 \n\t" "pminsw %%mm1, %%mm5 \n\t"
#else #else
...@@ -995,7 +995,7 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext ...@@ -995,7 +995,7 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext
: "r" ((x86_reg)stride), "m" (c->pQPb), "r"(tmp) : "r" ((x86_reg)stride), "m" (c->pQPb), "r"(tmp)
: "%"REG_a : "%"REG_a
); );
#else //HAVE_MMXEXT || HAVE_AMD3DNOW #else //HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
const int l1= stride; const int l1= stride;
const int l2= stride + l1; const int l2= stride + l1;
const int l3= stride + l2; const int l3= stride + l2;
...@@ -1033,14 +1033,14 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext ...@@ -1033,14 +1033,14 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext
} }
src++; src++;
} }
#endif //HAVE_MMXEXT || HAVE_AMD3DNOW #endif //HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
} }
#endif //HAVE_ALTIVEC #endif //HAVE_ALTIVEC
#if !HAVE_ALTIVEC #if !HAVE_ALTIVEC
static inline void RENAME(dering)(uint8_t src[], int stride, PPContext *c) static inline void RENAME(dering)(uint8_t src[], int stride, PPContext *c)
{ {
#if HAVE_MMXEXT || HAVE_AMD3DNOW #if HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
DECLARE_ALIGNED(8, uint64_t, tmp)[3]; DECLARE_ALIGNED(8, uint64_t, tmp)[3];
__asm__ volatile( __asm__ volatile(
"pxor %%mm6, %%mm6 \n\t" "pxor %%mm6, %%mm6 \n\t"
...@@ -1060,7 +1060,7 @@ static inline void RENAME(dering)(uint8_t src[], int stride, PPContext *c) ...@@ -1060,7 +1060,7 @@ static inline void RENAME(dering)(uint8_t src[], int stride, PPContext *c)
#undef REAL_FIND_MIN_MAX #undef REAL_FIND_MIN_MAX
#undef FIND_MIN_MAX #undef FIND_MIN_MAX
#if HAVE_MMXEXT #if HAVE_MMXEXT_INLINE
#define REAL_FIND_MIN_MAX(addr)\ #define REAL_FIND_MIN_MAX(addr)\
"movq " #addr ", %%mm0 \n\t"\ "movq " #addr ", %%mm0 \n\t"\
"pminub %%mm0, %%mm7 \n\t"\ "pminub %%mm0, %%mm7 \n\t"\
...@@ -1087,7 +1087,7 @@ FIND_MIN_MAX((%0, %1, 8)) ...@@ -1087,7 +1087,7 @@ FIND_MIN_MAX((%0, %1, 8))
"movq %%mm7, %%mm4 \n\t" "movq %%mm7, %%mm4 \n\t"
"psrlq $8, %%mm7 \n\t" "psrlq $8, %%mm7 \n\t"
#if HAVE_MMXEXT #if HAVE_MMXEXT_INLINE
"pminub %%mm4, %%mm7 \n\t" // min of pixels "pminub %%mm4, %%mm7 \n\t" // min of pixels
"pshufw $0xF9, %%mm7, %%mm4 \n\t" "pshufw $0xF9, %%mm7, %%mm4 \n\t"
"pminub %%mm4, %%mm7 \n\t" // min of pixels "pminub %%mm4, %%mm7 \n\t" // min of pixels
...@@ -1112,7 +1112,7 @@ FIND_MIN_MAX((%0, %1, 8)) ...@@ -1112,7 +1112,7 @@ FIND_MIN_MAX((%0, %1, 8))
"movq %%mm6, %%mm4 \n\t" "movq %%mm6, %%mm4 \n\t"
"psrlq $8, %%mm6 \n\t" "psrlq $8, %%mm6 \n\t"
#if HAVE_MMXEXT #if HAVE_MMXEXT_INLINE
"pmaxub %%mm4, %%mm6 \n\t" // max of pixels "pmaxub %%mm4, %%mm6 \n\t" // max of pixels
"pshufw $0xF9, %%mm6, %%mm4 \n\t" "pshufw $0xF9, %%mm6, %%mm4 \n\t"
"pmaxub %%mm4, %%mm6 \n\t" "pmaxub %%mm4, %%mm6 \n\t"
...@@ -1266,7 +1266,7 @@ DERING_CORE((%0, %1, 8) ,(%%REGd, %1, 4),%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1, ...@@ -1266,7 +1266,7 @@ DERING_CORE((%0, %1, 8) ,(%%REGd, %1, 4),%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1,
: : "r" (src), "r" ((x86_reg)stride), "m" (c->pQPb), "m"(c->pQPb2), "q"(tmp) : : "r" (src), "r" ((x86_reg)stride), "m" (c->pQPb), "m"(c->pQPb2), "q"(tmp)
: "%"REG_a, "%"REG_d : "%"REG_a, "%"REG_d
); );
#else //HAVE_MMXEXT || HAVE_AMD3DNOW #else //HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
int y; int y;
int min=255; int min=255;
int max=0; int max=0;
...@@ -1383,7 +1383,7 @@ DERING_CORE((%0, %1, 8) ,(%%REGd, %1, 4),%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1, ...@@ -1383,7 +1383,7 @@ DERING_CORE((%0, %1, 8) ,(%%REGd, %1, 4),%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1,
// src[0] = src[7]=src[stride*7]=src[stride*7 + 7]=255; // src[0] = src[7]=src[stride*7]=src[stride*7 + 7]=255;
} }
#endif #endif
#endif //HAVE_MMXEXT || HAVE_AMD3DNOW #endif //HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
} }
#endif //HAVE_ALTIVEC #endif //HAVE_ALTIVEC
...@@ -1395,7 +1395,7 @@ DERING_CORE((%0, %1, 8) ,(%%REGd, %1, 4),%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1, ...@@ -1395,7 +1395,7 @@ DERING_CORE((%0, %1, 8) ,(%%REGd, %1, 4),%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1,
*/ */
static inline void RENAME(deInterlaceInterpolateLinear)(uint8_t src[], int stride) static inline void RENAME(deInterlaceInterpolateLinear)(uint8_t src[], int stride)
{ {
#if HAVE_MMXEXT || HAVE_AMD3DNOW #if HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
src+= 4*stride; src+= 4*stride;
__asm__ volatile( __asm__ volatile(
"lea (%0, %1), %%"REG_a" \n\t" "lea (%0, %1), %%"REG_a" \n\t"
...@@ -1448,7 +1448,7 @@ static inline void RENAME(deInterlaceInterpolateLinear)(uint8_t src[], int strid ...@@ -1448,7 +1448,7 @@ static inline void RENAME(deInterlaceInterpolateLinear)(uint8_t src[], int strid
*/ */
static inline void RENAME(deInterlaceInterpolateCubic)(uint8_t src[], int stride) static inline void RENAME(deInterlaceInterpolateCubic)(uint8_t src[], int stride)
{ {
#if HAVE_MMXEXT || HAVE_AMD3DNOW #if HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
src+= stride*3; src+= stride*3;
__asm__ volatile( __asm__ volatile(
"lea (%0, %1), %%"REG_a" \n\t" "lea (%0, %1), %%"REG_a" \n\t"
...@@ -1490,7 +1490,7 @@ DEINT_CUBIC((%%REGd, %1), (%0, %1, 8) , (%%REGd, %1, 4), (%%REGc) , (%%REGc, ...@@ -1490,7 +1490,7 @@ DEINT_CUBIC((%%REGd, %1), (%0, %1, 8) , (%%REGd, %1, 4), (%%REGc) , (%%REGc,
: : "r" (src), "r" ((x86_reg)stride) : : "r" (src), "r" ((x86_reg)stride)
: "%"REG_a, "%"REG_d, "%"REG_c : "%"REG_a, "%"REG_d, "%"REG_c
); );
#else //HAVE_MMXEXT || HAVE_AMD3DNOW #else //HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
int x; int x;
src+= stride*3; src+= stride*3;
for(x=0; x<8; x++){ for(x=0; x<8; x++){
...@@ -1500,7 +1500,7 @@ DEINT_CUBIC((%%REGd, %1), (%0, %1, 8) , (%%REGd, %1, 4), (%%REGc) , (%%REGc, ...@@ -1500,7 +1500,7 @@ DEINT_CUBIC((%%REGd, %1), (%0, %1, 8) , (%%REGd, %1, 4), (%%REGc) , (%%REGc,
src[stride*9] = CLIP((-src[stride*6] + 9*src[stride*8] + 9*src[stride*10] - src[stride*12])>>4); src[stride*9] = CLIP((-src[stride*6] + 9*src[stride*8] + 9*src[stride*10] - src[stride*12])>>4);
src++; src++;
} }
#endif //HAVE_MMXEXT || HAVE_AMD3DNOW #endif //HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
} }
/** /**
...@@ -1512,7 +1512,7 @@ DEINT_CUBIC((%%REGd, %1), (%0, %1, 8) , (%%REGd, %1, 4), (%%REGc) , (%%REGc, ...@@ -1512,7 +1512,7 @@ DEINT_CUBIC((%%REGd, %1), (%0, %1, 8) , (%%REGd, %1, 4), (%%REGc) , (%%REGc,
*/ */
static inline void RENAME(deInterlaceFF)(uint8_t src[], int stride, uint8_t *tmp) static inline void RENAME(deInterlaceFF)(uint8_t src[], int stride, uint8_t *tmp)
{ {
#if HAVE_MMXEXT || HAVE_AMD3DNOW #if HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
src+= stride*4; src+= stride*4;
__asm__ volatile( __asm__ volatile(
"lea (%0, %1), %%"REG_a" \n\t" "lea (%0, %1), %%"REG_a" \n\t"
...@@ -1561,7 +1561,7 @@ DEINT_FF((%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8) , (%%REGd, %1, 4)) ...@@ -1561,7 +1561,7 @@ DEINT_FF((%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8) , (%%REGd, %1, 4))
: : "r" (src), "r" ((x86_reg)stride), "r"(tmp) : : "r" (src), "r" ((x86_reg)stride), "r"(tmp)
: "%"REG_a, "%"REG_d : "%"REG_a, "%"REG_d
); );
#else //HAVE_MMXEXT || HAVE_AMD3DNOW #else //HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
int x; int x;
src+= stride*4; src+= stride*4;
for(x=0; x<8; x++){ for(x=0; x<8; x++){
...@@ -1579,7 +1579,7 @@ DEINT_FF((%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8) , (%%REGd, %1, 4)) ...@@ -1579,7 +1579,7 @@ DEINT_FF((%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8) , (%%REGd, %1, 4))
src++; src++;
} }
#endif //HAVE_MMXEXT || HAVE_AMD3DNOW #endif //HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
} }
/** /**
...@@ -1591,7 +1591,7 @@ DEINT_FF((%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8) , (%%REGd, %1, 4)) ...@@ -1591,7 +1591,7 @@ DEINT_FF((%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8) , (%%REGd, %1, 4))
*/ */
static inline void RENAME(deInterlaceL5)(uint8_t src[], int stride, uint8_t *tmp, uint8_t *tmp2) static inline void RENAME(deInterlaceL5)(uint8_t src[], int stride, uint8_t *tmp, uint8_t *tmp2)
{ {
#if HAVE_MMXEXT || HAVE_AMD3DNOW #if HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
src+= stride*4; src+= stride*4;
__asm__ volatile( __asm__ volatile(
"lea (%0, %1), %%"REG_a" \n\t" "lea (%0, %1), %%"REG_a" \n\t"
...@@ -1651,7 +1651,7 @@ DEINT_L5(%%mm1, %%mm0, (%%REGd, %1, 2), (%0, %1, 8) , (%%REGd, %1, 4)) ...@@ -1651,7 +1651,7 @@ DEINT_L5(%%mm1, %%mm0, (%%REGd, %1, 2), (%0, %1, 8) , (%%REGd, %1, 4))
: : "r" (src), "r" ((x86_reg)stride), "r"(tmp), "r"(tmp2) : : "r" (src), "r" ((x86_reg)stride), "r"(tmp), "r"(tmp2)
: "%"REG_a, "%"REG_d : "%"REG_a, "%"REG_d
); );
#else //HAVE_MMXEXT || HAVE_AMD3DNOW #else //HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
int x; int x;
src+= stride*4; src+= stride*4;
for(x=0; x<8; x++){ for(x=0; x<8; x++){
...@@ -1680,7 +1680,7 @@ DEINT_L5(%%mm1, %%mm0, (%%REGd, %1, 2), (%0, %1, 8) , (%%REGd, %1, 4)) ...@@ -1680,7 +1680,7 @@ DEINT_L5(%%mm1, %%mm0, (%%REGd, %1, 2), (%0, %1, 8) , (%%REGd, %1, 4))
src++; src++;
} }
#endif //HAVE_MMXEXT || HAVE_AMD3DNOW #endif //HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
} }
/** /**
...@@ -1692,7 +1692,7 @@ DEINT_L5(%%mm1, %%mm0, (%%REGd, %1, 2), (%0, %1, 8) , (%%REGd, %1, 4)) ...@@ -1692,7 +1692,7 @@ DEINT_L5(%%mm1, %%mm0, (%%REGd, %1, 2), (%0, %1, 8) , (%%REGd, %1, 4))
*/ */
static inline void RENAME(deInterlaceBlendLinear)(uint8_t src[], int stride, uint8_t *tmp) static inline void RENAME(deInterlaceBlendLinear)(uint8_t src[], int stride, uint8_t *tmp)
{ {
#if HAVE_MMXEXT || HAVE_AMD3DNOW #if HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
src+= 4*stride; src+= 4*stride;
__asm__ volatile( __asm__ volatile(
"lea (%0, %1), %%"REG_a" \n\t" "lea (%0, %1), %%"REG_a" \n\t"
...@@ -1739,7 +1739,7 @@ static inline void RENAME(deInterlaceBlendLinear)(uint8_t src[], int stride, uin ...@@ -1739,7 +1739,7 @@ static inline void RENAME(deInterlaceBlendLinear)(uint8_t src[], int stride, uin
: : "r" (src), "r" ((x86_reg)stride), "r" (tmp) : : "r" (src), "r" ((x86_reg)stride), "r" (tmp)
: "%"REG_a, "%"REG_d : "%"REG_a, "%"REG_d
); );
#else //HAVE_MMXEXT || HAVE_AMD3DNOW #else //HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
int a, b, c, x; int a, b, c, x;
src+= 4*stride; src+= 4*stride;
...@@ -1782,7 +1782,7 @@ static inline void RENAME(deInterlaceBlendLinear)(uint8_t src[], int stride, uin ...@@ -1782,7 +1782,7 @@ static inline void RENAME(deInterlaceBlendLinear)(uint8_t src[], int stride, uin
src += 4; src += 4;
tmp += 4; tmp += 4;
} }
#endif //HAVE_MMXEXT || HAVE_AMD3DNOW #endif //HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
} }
/** /**
...@@ -1793,9 +1793,9 @@ static inline void RENAME(deInterlaceBlendLinear)(uint8_t src[], int stride, uin ...@@ -1793,9 +1793,9 @@ static inline void RENAME(deInterlaceBlendLinear)(uint8_t src[], int stride, uin
*/ */
static inline void RENAME(deInterlaceMedian)(uint8_t src[], int stride) static inline void RENAME(deInterlaceMedian)(uint8_t src[], int stride)
{ {
#if HAVE_MMX #if HAVE_MMX_INLINE
src+= 4*stride; src+= 4*stride;
#if HAVE_MMXEXT #if HAVE_MMXEXT_INLINE
__asm__ volatile( __asm__ volatile(
"lea (%0, %1), %%"REG_a" \n\t" "lea (%0, %1), %%"REG_a" \n\t"
"lea (%%"REG_a", %1, 4), %%"REG_d" \n\t" "lea (%%"REG_a", %1, 4), %%"REG_d" \n\t"
...@@ -1885,8 +1885,8 @@ MEDIAN((%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8)) ...@@ -1885,8 +1885,8 @@ MEDIAN((%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8))
: : "r" (src), "r" ((x86_reg)stride) : : "r" (src), "r" ((x86_reg)stride)
: "%"REG_a, "%"REG_d : "%"REG_a, "%"REG_d
); );
#endif //HAVE_MMXEXT #endif //HAVE_MMXEXT_INLINE
#else //HAVE_MMX #else //HAVE_MMX_INLINE
int x, y; int x, y;
src+= 4*stride; src+= 4*stride;
// FIXME - there should be a way to do a few columns in parallel like w/mmx // FIXME - there should be a way to do a few columns in parallel like w/mmx
...@@ -1905,10 +1905,10 @@ MEDIAN((%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8)) ...@@ -1905,10 +1905,10 @@ MEDIAN((%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8))
} }
src++; src++;
} }
#endif //HAVE_MMX #endif //HAVE_MMX_INLINE
} }
#if HAVE_MMX #if HAVE_MMX_INLINE
/** /**
* Transpose and shift the given 8x8 Block into dst1 and dst2. * Transpose and shift the given 8x8 Block into dst1 and dst2.
*/ */
...@@ -2073,7 +2073,7 @@ static inline void RENAME(transpose2)(uint8_t *dst, int dstStride, uint8_t *src) ...@@ -2073,7 +2073,7 @@ static inline void RENAME(transpose2)(uint8_t *dst, int dstStride, uint8_t *src)
: "%"REG_a, "%"REG_d : "%"REG_a, "%"REG_d
); );
} }
#endif //HAVE_MMX #endif //HAVE_MMX_INLINE
//static long test=0; //static long test=0;
#if !HAVE_ALTIVEC #if !HAVE_ALTIVEC
...@@ -2087,7 +2087,7 @@ static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride, ...@@ -2087,7 +2087,7 @@ static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride,
#define FAST_L2_DIFF #define FAST_L2_DIFF
//#define L1_DIFF //u should change the thresholds too if u try that one //#define L1_DIFF //u should change the thresholds too if u try that one
#if HAVE_MMXEXT || HAVE_AMD3DNOW #if HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
__asm__ volatile( __asm__ volatile(
"lea (%2, %2, 2), %%"REG_a" \n\t" // 3*stride "lea (%2, %2, 2), %%"REG_a" \n\t" // 3*stride
"lea (%2, %2, 4), %%"REG_d" \n\t" // 5*stride "lea (%2, %2, 4), %%"REG_d" \n\t" // 5*stride
...@@ -2375,7 +2375,7 @@ L2_DIFF_CORE((%0, %%REGc) , (%1, %%REGc)) ...@@ -2375,7 +2375,7 @@ L2_DIFF_CORE((%0, %%REGc) , (%1, %%REGc))
:: "r" (src), "r" (tempBlurred), "r"((x86_reg)stride), "m" (tempBlurredPast) :: "r" (src), "r" (tempBlurred), "r"((x86_reg)stride), "m" (tempBlurredPast)
: "%"REG_a, "%"REG_d, "%"REG_c, "memory" : "%"REG_a, "%"REG_d, "%"REG_c, "memory"
); );
#else //HAVE_MMXEXT || HAVE_AMD3DNOW #else //HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
{ {
int y; int y;
int d=0; int d=0;
...@@ -2458,11 +2458,11 @@ Switch between ...@@ -2458,11 +2458,11 @@ Switch between
} }
} }
} }
#endif //HAVE_MMXEXT || HAVE_AMD3DNOW #endif //HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
} }
#endif //HAVE_ALTIVEC #endif //HAVE_ALTIVEC
#if HAVE_MMX #if HAVE_MMX_INLINE
/** /**
* accurate deblock filter * accurate deblock filter
*/ */
...@@ -2865,7 +2865,7 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st ...@@ -2865,7 +2865,7 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st
"movq (%4), %%mm2 \n\t" // 2L0 - 5L1 + 5L2 - 2L3 "movq (%4), %%mm2 \n\t" // 2L0 - 5L1 + 5L2 - 2L3
"movq 8(%4), %%mm3 \n\t" // 2H0 - 5H1 + 5H2 - 2H3 "movq 8(%4), %%mm3 \n\t" // 2H0 - 5H1 + 5H2 - 2H3
#if HAVE_MMXEXT #if HAVE_MMXEXT_INLINE
"movq %%mm7, %%mm6 \n\t" // 0 "movq %%mm7, %%mm6 \n\t" // 0
"psubw %%mm0, %%mm6 \n\t" "psubw %%mm0, %%mm6 \n\t"
"pmaxsw %%mm6, %%mm0 \n\t" // |2L4 - 5L5 + 5L6 - 2L7| "pmaxsw %%mm6, %%mm0 \n\t" // |2L4 - 5L5 + 5L6 - 2L7|
...@@ -2897,7 +2897,7 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st ...@@ -2897,7 +2897,7 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st
"psubw %%mm6, %%mm3 \n\t" // |2H0 - 5H1 + 5H2 - 2H3| "psubw %%mm6, %%mm3 \n\t" // |2H0 - 5H1 + 5H2 - 2H3|
#endif #endif
#if HAVE_MMXEXT #if HAVE_MMXEXT_INLINE
"pminsw %%mm2, %%mm0 \n\t" "pminsw %%mm2, %%mm0 \n\t"
"pminsw %%mm3, %%mm1 \n\t" "pminsw %%mm3, %%mm1 \n\t"
#else #else
...@@ -2961,7 +2961,7 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st ...@@ -2961,7 +2961,7 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st
"pand %%mm2, %%mm4 \n\t" "pand %%mm2, %%mm4 \n\t"
"pand %%mm3, %%mm5 \n\t" "pand %%mm3, %%mm5 \n\t"
#if HAVE_MMXEXT #if HAVE_MMXEXT_INLINE
"pminsw %%mm0, %%mm4 \n\t" "pminsw %%mm0, %%mm4 \n\t"
"pminsw %%mm1, %%mm5 \n\t" "pminsw %%mm1, %%mm5 \n\t"
#else #else
...@@ -2998,7 +2998,7 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st ...@@ -2998,7 +2998,7 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st
} }
} */ } */
} }
#endif //HAVE_MMX #endif //HAVE_MMX_INLINE
static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height, static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
const QP_STORE_T QPs[], int QPStride, int isColor, PPContext *c); const QP_STORE_T QPs[], int QPStride, int isColor, PPContext *c);
...@@ -3013,18 +3013,18 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ ...@@ -3013,18 +3013,18 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
static inline void RENAME(blockCopy)(uint8_t dst[], int dstStride, const uint8_t src[], int srcStride, static inline void RENAME(blockCopy)(uint8_t dst[], int dstStride, const uint8_t src[], int srcStride,
int levelFix, int64_t *packedOffsetAndScale) int levelFix, int64_t *packedOffsetAndScale)
{ {
#if !HAVE_MMX #if !HAVE_MMX_INLINE
int i; int i;
#endif #endif
if(levelFix){ if(levelFix){
#if HAVE_MMX #if HAVE_MMX_INLINE
__asm__ volatile( __asm__ volatile(
"movq (%%"REG_a"), %%mm2 \n\t" // packedYOffset "movq (%%"REG_a"), %%mm2 \n\t" // packedYOffset
"movq 8(%%"REG_a"), %%mm3 \n\t" // packedYScale "movq 8(%%"REG_a"), %%mm3 \n\t" // packedYScale
"lea (%2,%4), %%"REG_a" \n\t" "lea (%2,%4), %%"REG_a" \n\t"
"lea (%3,%5), %%"REG_d" \n\t" "lea (%3,%5), %%"REG_d" \n\t"
"pxor %%mm4, %%mm4 \n\t" "pxor %%mm4, %%mm4 \n\t"
#if HAVE_MMXEXT #if HAVE_MMXEXT_INLINE
#define REAL_SCALED_CPY(src1, src2, dst1, dst2) \ #define REAL_SCALED_CPY(src1, src2, dst1, dst2) \
"movq " #src1 ", %%mm0 \n\t"\ "movq " #src1 ", %%mm0 \n\t"\
"movq " #src1 ", %%mm5 \n\t"\ "movq " #src1 ", %%mm5 \n\t"\
...@@ -3047,7 +3047,7 @@ static inline void RENAME(blockCopy)(uint8_t dst[], int dstStride, const uint8_t ...@@ -3047,7 +3047,7 @@ static inline void RENAME(blockCopy)(uint8_t dst[], int dstStride, const uint8_t
"movq %%mm0, " #dst1 " \n\t"\ "movq %%mm0, " #dst1 " \n\t"\
"movq %%mm1, " #dst2 " \n\t"\ "movq %%mm1, " #dst2 " \n\t"\
#else //HAVE_MMXEXT #else //HAVE_MMXEXT_INLINE
#define REAL_SCALED_CPY(src1, src2, dst1, dst2) \ #define REAL_SCALED_CPY(src1, src2, dst1, dst2) \
"movq " #src1 ", %%mm0 \n\t"\ "movq " #src1 ", %%mm0 \n\t"\
"movq " #src1 ", %%mm5 \n\t"\ "movq " #src1 ", %%mm5 \n\t"\
...@@ -3074,7 +3074,7 @@ static inline void RENAME(blockCopy)(uint8_t dst[], int dstStride, const uint8_t ...@@ -3074,7 +3074,7 @@ static inline void RENAME(blockCopy)(uint8_t dst[], int dstStride, const uint8_t
"movq %%mm0, " #dst1 " \n\t"\ "movq %%mm0, " #dst1 " \n\t"\
"movq %%mm1, " #dst2 " \n\t"\ "movq %%mm1, " #dst2 " \n\t"\
#endif //HAVE_MMXEXT #endif //HAVE_MMXEXT_INLINE
#define SCALED_CPY(src1, src2, dst1, dst2)\ #define SCALED_CPY(src1, src2, dst1, dst2)\
REAL_SCALED_CPY(src1, src2, dst1, dst2) REAL_SCALED_CPY(src1, src2, dst1, dst2)
...@@ -3094,13 +3094,13 @@ SCALED_CPY((%%REGa, %4), (%%REGa, %4, 2), (%%REGd, %5), (%%REGd, %5, 2)) ...@@ -3094,13 +3094,13 @@ SCALED_CPY((%%REGa, %4), (%%REGa, %4, 2), (%%REGd, %5), (%%REGd, %5, 2))
"r" ((x86_reg)dstStride) "r" ((x86_reg)dstStride)
: "%"REG_d : "%"REG_d
); );
#else //HAVE_MMX #else //HAVE_MMX_INLINE
for(i=0; i<8; i++) for(i=0; i<8; i++)
memcpy( &(dst[dstStride*i]), memcpy( &(dst[dstStride*i]),
&(src[srcStride*i]), BLOCK_SIZE); &(src[srcStride*i]), BLOCK_SIZE);
#endif //HAVE_MMX #endif //HAVE_MMX_INLINE
}else{ }else{
#if HAVE_MMX #if HAVE_MMX_INLINE
__asm__ volatile( __asm__ volatile(
"lea (%0,%2), %%"REG_a" \n\t" "lea (%0,%2), %%"REG_a" \n\t"
"lea (%1,%3), %%"REG_d" \n\t" "lea (%1,%3), %%"REG_d" \n\t"
...@@ -3127,11 +3127,11 @@ SIMPLE_CPY((%%REGa, %2), (%%REGa, %2, 2), (%%REGd, %3), (%%REGd, %3, 2)) ...@@ -3127,11 +3127,11 @@ SIMPLE_CPY((%%REGa, %2), (%%REGa, %2, 2), (%%REGd, %3), (%%REGd, %3, 2))
"r" ((x86_reg)dstStride) "r" ((x86_reg)dstStride)
: "%"REG_a, "%"REG_d : "%"REG_a, "%"REG_d
); );
#else //HAVE_MMX #else //HAVE_MMX_INLINE
for(i=0; i<8; i++) for(i=0; i<8; i++)
memcpy( &(dst[dstStride*i]), memcpy( &(dst[dstStride*i]),
&(src[srcStride*i]), BLOCK_SIZE); &(src[srcStride*i]), BLOCK_SIZE);
#endif //HAVE_MMX #endif //HAVE_MMX_INLINE
} }
} }
...@@ -3140,7 +3140,7 @@ SIMPLE_CPY((%%REGa, %2), (%%REGa, %2, 2), (%%REGd, %3), (%%REGd, %3, 2)) ...@@ -3140,7 +3140,7 @@ SIMPLE_CPY((%%REGa, %2), (%%REGa, %2, 2), (%%REGd, %3), (%%REGd, %3, 2))
*/ */
static inline void RENAME(duplicate)(uint8_t src[], int stride) static inline void RENAME(duplicate)(uint8_t src[], int stride)
{ {
#if HAVE_MMX #if HAVE_MMX_INLINE
__asm__ volatile( __asm__ volatile(
"movq (%0), %%mm0 \n\t" "movq (%0), %%mm0 \n\t"
"add %1, %0 \n\t" "add %1, %0 \n\t"
...@@ -3177,7 +3177,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ ...@@ -3177,7 +3177,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
int QPCorrecture= 256*256; int QPCorrecture= 256*256;
int copyAhead; int copyAhead;
#if HAVE_MMX #if HAVE_MMX_INLINE
int i; int i;
#endif #endif
...@@ -3190,7 +3190,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ ...@@ -3190,7 +3190,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
uint8_t * const tempDst= dstStride > 0 ? c.tempDst : c.tempDst - 23*dstStride; uint8_t * const tempDst= dstStride > 0 ? c.tempDst : c.tempDst - 23*dstStride;
//const int mbWidth= isColor ? (width+7)>>3 : (width+15)>>4; //const int mbWidth= isColor ? (width+7)>>3 : (width+15)>>4;
#if HAVE_MMX #if HAVE_MMX_INLINE
for(i=0; i<57; i++){ for(i=0; i<57; i++){
int offset= ((i*c.ppMode.baseDcDiff)>>8) + 1; int offset= ((i*c.ppMode.baseDcDiff)>>8) + 1;
int threshold= offset*2 + 1; int threshold= offset*2 + 1;
...@@ -3248,7 +3248,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ ...@@ -3248,7 +3248,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
scale= (double)(c.ppMode.maxAllowedY - c.ppMode.minAllowedY) / (double)(white-black); scale= (double)(c.ppMode.maxAllowedY - c.ppMode.minAllowedY) / (double)(white-black);
#if HAVE_MMXEXT #if HAVE_MMXEXT_INLINE
c.packedYScale= (uint16_t)(scale*256.0 + 0.5); c.packedYScale= (uint16_t)(scale*256.0 + 0.5);
c.packedYOffset= (((black*c.packedYScale)>>8) - c.ppMode.minAllowedY) & 0xFFFF; c.packedYOffset= (((black*c.packedYScale)>>8) - c.ppMode.minAllowedY) & 0xFFFF;
#else #else
...@@ -3281,7 +3281,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ ...@@ -3281,7 +3281,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
// with the L1 Cache of the P4 ... or only a few blocks at a time or soemthing // with the L1 Cache of the P4 ... or only a few blocks at a time or soemthing
for(x=0; x<width; x+=BLOCK_SIZE){ for(x=0; x<width; x+=BLOCK_SIZE){
#if HAVE_MMXEXT #if HAVE_MMXEXT_INLINE
/* /*
prefetchnta(srcBlock + (((x>>2)&6) + 5)*srcStride + 32); prefetchnta(srcBlock + (((x>>2)&6) + 5)*srcStride + 32);
prefetchnta(srcBlock + (((x>>2)&6) + 6)*srcStride + 32); prefetchnta(srcBlock + (((x>>2)&6) + 6)*srcStride + 32);
...@@ -3308,7 +3308,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ ...@@ -3308,7 +3308,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
: "%"REG_a, "%"REG_d : "%"REG_a, "%"REG_d
); );
#elif HAVE_AMD3DNOW #elif HAVE_AMD3DNOW_INLINE
//FIXME check if this is faster on an 3dnow chip or if it is faster without the prefetch or ... //FIXME check if this is faster on an 3dnow chip or if it is faster without the prefetch or ...
/* prefetch(srcBlock + (((x>>3)&3) + 5)*srcStride + 32); /* prefetch(srcBlock + (((x>>3)&3) + 5)*srcStride + 32);
prefetch(srcBlock + (((x>>3)&3) + 9)*srcStride + 32); prefetch(srcBlock + (((x>>3)&3) + 9)*srcStride + 32);
...@@ -3354,7 +3354,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ ...@@ -3354,7 +3354,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
//1% speedup if these are here instead of the inner loop //1% speedup if these are here instead of the inner loop
const uint8_t *srcBlock= &(src[y*srcStride]); const uint8_t *srcBlock= &(src[y*srcStride]);
uint8_t *dstBlock= &(dst[y*dstStride]); uint8_t *dstBlock= &(dst[y*dstStride]);
#if HAVE_MMX #if HAVE_MMX_INLINE
uint8_t *tempBlock1= c.tempBlocks; uint8_t *tempBlock1= c.tempBlocks;
uint8_t *tempBlock2= c.tempBlocks + 8; uint8_t *tempBlock2= c.tempBlocks + 8;
#endif #endif
...@@ -3390,7 +3390,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ ...@@ -3390,7 +3390,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
// with the L1 Cache of the P4 ... or only a few blocks at a time or soemthing // with the L1 Cache of the P4 ... or only a few blocks at a time or soemthing
for(x=0; x<width; x+=BLOCK_SIZE){ for(x=0; x<width; x+=BLOCK_SIZE){
const int stride= dstStride; const int stride= dstStride;
#if HAVE_MMX #if HAVE_MMX_INLINE
uint8_t *tmpXchg; uint8_t *tmpXchg;
#endif #endif
if(isColor){ if(isColor){
...@@ -3404,7 +3404,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ ...@@ -3404,7 +3404,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
yHistogram[ srcBlock[srcStride*12 + 4] ]++; yHistogram[ srcBlock[srcStride*12 + 4] ]++;
} }
c.QP= QP; c.QP= QP;
#if HAVE_MMX #if HAVE_MMX_INLINE
__asm__ volatile( __asm__ volatile(
"movd %1, %%mm7 \n\t" "movd %1, %%mm7 \n\t"
"packuswb %%mm7, %%mm7 \n\t" // 0, 0, 0, QP, 0, 0, 0, QP "packuswb %%mm7, %%mm7 \n\t" // 0, 0, 0, QP, 0, 0, 0, QP
...@@ -3417,7 +3417,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ ...@@ -3417,7 +3417,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
#endif #endif
#if HAVE_MMXEXT #if HAVE_MMXEXT_INLINE
/* /*
prefetchnta(srcBlock + (((x>>2)&6) + 5)*srcStride + 32); prefetchnta(srcBlock + (((x>>2)&6) + 5)*srcStride + 32);
prefetchnta(srcBlock + (((x>>2)&6) + 6)*srcStride + 32); prefetchnta(srcBlock + (((x>>2)&6) + 6)*srcStride + 32);
...@@ -3444,7 +3444,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ ...@@ -3444,7 +3444,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
: "%"REG_a, "%"REG_d : "%"REG_a, "%"REG_d
); );
#elif HAVE_AMD3DNOW #elif HAVE_AMD3DNOW_INLINE
//FIXME check if this is faster on an 3dnow chip or if it is faster without the prefetch or ... //FIXME check if this is faster on an 3dnow chip or if it is faster without the prefetch or ...
/* prefetch(srcBlock + (((x>>3)&3) + 5)*srcStride + 32); /* prefetch(srcBlock + (((x>>3)&3) + 5)*srcStride + 32);
prefetch(srcBlock + (((x>>3)&3) + 9)*srcStride + 32); prefetch(srcBlock + (((x>>3)&3) + 9)*srcStride + 32);
...@@ -3488,12 +3488,12 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ ...@@ -3488,12 +3488,12 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
} }
} }
#if HAVE_MMX #if HAVE_MMX_INLINE
RENAME(transpose1)(tempBlock1, tempBlock2, dstBlock, dstStride); RENAME(transpose1)(tempBlock1, tempBlock2, dstBlock, dstStride);
#endif #endif
/* check if we have a previous block to deblock it with dstBlock */ /* check if we have a previous block to deblock it with dstBlock */
if(x - 8 >= 0){ if(x - 8 >= 0){
#if HAVE_MMX #if HAVE_MMX_INLINE
if(mode & H_X1_FILTER) if(mode & H_X1_FILTER)
RENAME(vertX1Filter)(tempBlock1, 16, &c); RENAME(vertX1Filter)(tempBlock1, 16, &c);
else if(mode & H_DEBLOCK){ else if(mode & H_DEBLOCK){
...@@ -3539,7 +3539,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ ...@@ -3539,7 +3539,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
}else if(mode & H_A_DEBLOCK){ }else if(mode & H_A_DEBLOCK){
RENAME(do_a_deblock)(dstBlock-8, 1, stride, &c); RENAME(do_a_deblock)(dstBlock-8, 1, stride, &c);
} }
#endif //HAVE_MMX #endif //HAVE_MMX_INLINE
if(mode & DERING){ if(mode & DERING){
//FIXME filter first line //FIXME filter first line
if(y>0) RENAME(dering)(dstBlock - stride - 8, stride, &c); if(y>0) RENAME(dering)(dstBlock - stride - 8, stride, &c);
...@@ -3557,7 +3557,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ ...@@ -3557,7 +3557,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
dstBlock+=8; dstBlock+=8;
srcBlock+=8; srcBlock+=8;
#if HAVE_MMX #if HAVE_MMX_INLINE
tmpXchg= tempBlock1; tmpXchg= tempBlock1;
tempBlock1= tempBlock2; tempBlock1= tempBlock2;
tempBlock2 = tmpXchg; tempBlock2 = tmpXchg;
...@@ -3597,9 +3597,9 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ ...@@ -3597,9 +3597,9 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
+ dstBlock[x +14*dstStride] + dstBlock[x +15*dstStride]; + dstBlock[x +14*dstStride] + dstBlock[x +15*dstStride];
}*/ }*/
} }
#if HAVE_AMD3DNOW #if HAVE_AMD3DNOW_INLINE
__asm__ volatile("femms"); __asm__ volatile("femms");
#elif HAVE_MMX #elif HAVE_MMX_INLINE
__asm__ volatile("emms"); __asm__ volatile("emms");
#endif #endif
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment