Commit 37da00fc authored by Michael Niedermayer's avatar Michael Niedermayer

optimizations (+2% speedup)

Originally committed as revision 2414 to svn://svn.mplayerhq.hu/mplayer/trunk/postproc
parent 534a602d
...@@ -214,54 +214,50 @@ static inline int isVertDC(uint8_t src[], int stride){ ...@@ -214,54 +214,50 @@ static inline int isVertDC(uint8_t src[], int stride){
int y; int y;
src+= stride*4; // src points to begin of the 8x8 Block src+= stride*4; // src points to begin of the 8x8 Block
#ifdef HAVE_MMX #ifdef HAVE_MMX
asm volatile( asm volatile(
"pushl %1\n\t" "leal (%1, %2), %%eax \n\t"
"leal (%%eax, %2, 4), %%ebx \n\t"
// 0 1 2 3 4 5 6 7 8 9
// %1 eax eax+%2 eax+2%2 %1+4%2 ebx ebx+%2 ebx+2%2 %1+8%2 ebx+4%2
"movq b7E, %%mm7 \n\t" // mm7 = 0x7F "movq b7E, %%mm7 \n\t" // mm7 = 0x7F
"movq b7C, %%mm6 \n\t" // mm6 = 0x7D "movq b7C, %%mm6 \n\t" // mm6 = 0x7D
"movq (%1), %%mm0 \n\t" "movq (%1), %%mm0 \n\t"
"addl %2, %1 \n\t" "movq (%%eax), %%mm1 \n\t"
"movq (%1), %%mm1 \n\t"
"psubb %%mm1, %%mm0 \n\t" // mm0 = differnece "psubb %%mm1, %%mm0 \n\t" // mm0 = differnece
"paddb %%mm7, %%mm0 \n\t" "paddb %%mm7, %%mm0 \n\t"
"pcmpgtb %%mm6, %%mm0 \n\t" "pcmpgtb %%mm6, %%mm0 \n\t"
"addl %2, %1 \n\t" "movq (%%eax,%2), %%mm2 \n\t"
"movq (%1), %%mm2 \n\t"
"psubb %%mm2, %%mm1 \n\t" "psubb %%mm2, %%mm1 \n\t"
"paddb %%mm7, %%mm1 \n\t" "paddb %%mm7, %%mm1 \n\t"
"pcmpgtb %%mm6, %%mm1 \n\t" "pcmpgtb %%mm6, %%mm1 \n\t"
"paddb %%mm1, %%mm0 \n\t" "paddb %%mm1, %%mm0 \n\t"
"addl %2, %1 \n\t" "movq (%%eax, %2, 2), %%mm1 \n\t"
"movq (%1), %%mm1 \n\t"
"psubb %%mm1, %%mm2 \n\t" "psubb %%mm1, %%mm2 \n\t"
"paddb %%mm7, %%mm2 \n\t" "paddb %%mm7, %%mm2 \n\t"
"pcmpgtb %%mm6, %%mm2 \n\t" "pcmpgtb %%mm6, %%mm2 \n\t"
"paddb %%mm2, %%mm0 \n\t" "paddb %%mm2, %%mm0 \n\t"
"addl %2, %1 \n\t" "movq (%1, %2, 4), %%mm2 \n\t"
"movq (%1), %%mm2 \n\t"
"psubb %%mm2, %%mm1 \n\t" "psubb %%mm2, %%mm1 \n\t"
"paddb %%mm7, %%mm1 \n\t" "paddb %%mm7, %%mm1 \n\t"
"pcmpgtb %%mm6, %%mm1 \n\t" "pcmpgtb %%mm6, %%mm1 \n\t"
"paddb %%mm1, %%mm0 \n\t" "paddb %%mm1, %%mm0 \n\t"
"addl %2, %1 \n\t" "movq (%%ebx), %%mm1 \n\t"
"movq (%1), %%mm1 \n\t"
"psubb %%mm1, %%mm2 \n\t" "psubb %%mm1, %%mm2 \n\t"
"paddb %%mm7, %%mm2 \n\t" "paddb %%mm7, %%mm2 \n\t"
"pcmpgtb %%mm6, %%mm2 \n\t" "pcmpgtb %%mm6, %%mm2 \n\t"
"paddb %%mm2, %%mm0 \n\t" "paddb %%mm2, %%mm0 \n\t"
"addl %2, %1 \n\t" "movq (%%ebx, %2), %%mm2 \n\t"
"movq (%1), %%mm2 \n\t"
"psubb %%mm2, %%mm1 \n\t" "psubb %%mm2, %%mm1 \n\t"
"paddb %%mm7, %%mm1 \n\t" "paddb %%mm7, %%mm1 \n\t"
"pcmpgtb %%mm6, %%mm1 \n\t" "pcmpgtb %%mm6, %%mm1 \n\t"
"paddb %%mm1, %%mm0 \n\t" "paddb %%mm1, %%mm0 \n\t"
"addl %2, %1 \n\t" "movq (%%ebx, %2, 2), %%mm1 \n\t"
"movq (%1), %%mm1 \n\t"
"psubb %%mm1, %%mm2 \n\t" "psubb %%mm1, %%mm2 \n\t"
"paddb %%mm7, %%mm2 \n\t" "paddb %%mm7, %%mm2 \n\t"
"pcmpgtb %%mm6, %%mm2 \n\t" "pcmpgtb %%mm6, %%mm2 \n\t"
...@@ -277,17 +273,12 @@ static inline int isVertDC(uint8_t src[], int stride){ ...@@ -277,17 +273,12 @@ static inline int isVertDC(uint8_t src[], int stride){
"movq %%mm0, %%mm1 \n\t" "movq %%mm0, %%mm1 \n\t"
"psrlq $32, %%mm0 \n\t" "psrlq $32, %%mm0 \n\t"
"paddb %%mm1, %%mm0 \n\t" "paddb %%mm1, %%mm0 \n\t"
"popl %1\n\t"
"movd %%mm0, %0 \n\t" "movd %%mm0, %0 \n\t"
: "=r" (numEq) : "=r" (numEq)
: "r" (src), "r" (stride) : "r" (src), "r" (stride)
); );
// printf("%d\n", numEq);
numEq= (256 - (numEq & 0xFF)) &0xFF;
// int asmEq= numEq; numEq= (256 - numEq) &0xFF;
// numEq=0;
// uint8_t *temp= src;
#else #else
for(y=0; y<BLOCK_SIZE-1; y++) for(y=0; y<BLOCK_SIZE-1; y++)
...@@ -2491,8 +2482,6 @@ static inline void blockCopy(uint8_t dst[], int dstStride, uint8_t src[], int sr ...@@ -2491,8 +2482,6 @@ static inline void blockCopy(uint8_t dst[], int dstStride, uint8_t src[], int sr
{ {
#ifdef HAVE_MMX #ifdef HAVE_MMX
asm volatile( asm volatile(
"pushl %0 \n\t"
"pushl %1 \n\t"
"leal (%2,%2), %%eax \n\t" "leal (%2,%2), %%eax \n\t"
"leal (%3,%3), %%ebx \n\t" "leal (%3,%3), %%ebx \n\t"
"movq packedYOffset, %%mm2 \n\t" "movq packedYOffset, %%mm2 \n\t"
...@@ -2534,11 +2523,9 @@ SCALED_CPY ...@@ -2534,11 +2523,9 @@ SCALED_CPY
"addl %%ebx, %1 \n\t" "addl %%ebx, %1 \n\t"
SCALED_CPY SCALED_CPY
"popl %1 \n\t" : "+r"(src),
"popl %0 \n\t" "+r"(dst)
: : "r" (src), :"r" (srcStride),
"r" (dst),
"r" (srcStride),
"r" (dstStride) "r" (dstStride)
: "%eax", "%ebx" : "%eax", "%ebx"
); );
......
...@@ -214,54 +214,50 @@ static inline int isVertDC(uint8_t src[], int stride){ ...@@ -214,54 +214,50 @@ static inline int isVertDC(uint8_t src[], int stride){
int y; int y;
src+= stride*4; // src points to begin of the 8x8 Block src+= stride*4; // src points to begin of the 8x8 Block
#ifdef HAVE_MMX #ifdef HAVE_MMX
asm volatile( asm volatile(
"pushl %1\n\t" "leal (%1, %2), %%eax \n\t"
"leal (%%eax, %2, 4), %%ebx \n\t"
// 0 1 2 3 4 5 6 7 8 9
// %1 eax eax+%2 eax+2%2 %1+4%2 ebx ebx+%2 ebx+2%2 %1+8%2 ebx+4%2
"movq b7E, %%mm7 \n\t" // mm7 = 0x7F "movq b7E, %%mm7 \n\t" // mm7 = 0x7F
"movq b7C, %%mm6 \n\t" // mm6 = 0x7D "movq b7C, %%mm6 \n\t" // mm6 = 0x7D
"movq (%1), %%mm0 \n\t" "movq (%1), %%mm0 \n\t"
"addl %2, %1 \n\t" "movq (%%eax), %%mm1 \n\t"
"movq (%1), %%mm1 \n\t"
"psubb %%mm1, %%mm0 \n\t" // mm0 = differnece "psubb %%mm1, %%mm0 \n\t" // mm0 = differnece
"paddb %%mm7, %%mm0 \n\t" "paddb %%mm7, %%mm0 \n\t"
"pcmpgtb %%mm6, %%mm0 \n\t" "pcmpgtb %%mm6, %%mm0 \n\t"
"addl %2, %1 \n\t" "movq (%%eax,%2), %%mm2 \n\t"
"movq (%1), %%mm2 \n\t"
"psubb %%mm2, %%mm1 \n\t" "psubb %%mm2, %%mm1 \n\t"
"paddb %%mm7, %%mm1 \n\t" "paddb %%mm7, %%mm1 \n\t"
"pcmpgtb %%mm6, %%mm1 \n\t" "pcmpgtb %%mm6, %%mm1 \n\t"
"paddb %%mm1, %%mm0 \n\t" "paddb %%mm1, %%mm0 \n\t"
"addl %2, %1 \n\t" "movq (%%eax, %2, 2), %%mm1 \n\t"
"movq (%1), %%mm1 \n\t"
"psubb %%mm1, %%mm2 \n\t" "psubb %%mm1, %%mm2 \n\t"
"paddb %%mm7, %%mm2 \n\t" "paddb %%mm7, %%mm2 \n\t"
"pcmpgtb %%mm6, %%mm2 \n\t" "pcmpgtb %%mm6, %%mm2 \n\t"
"paddb %%mm2, %%mm0 \n\t" "paddb %%mm2, %%mm0 \n\t"
"addl %2, %1 \n\t" "movq (%1, %2, 4), %%mm2 \n\t"
"movq (%1), %%mm2 \n\t"
"psubb %%mm2, %%mm1 \n\t" "psubb %%mm2, %%mm1 \n\t"
"paddb %%mm7, %%mm1 \n\t" "paddb %%mm7, %%mm1 \n\t"
"pcmpgtb %%mm6, %%mm1 \n\t" "pcmpgtb %%mm6, %%mm1 \n\t"
"paddb %%mm1, %%mm0 \n\t" "paddb %%mm1, %%mm0 \n\t"
"addl %2, %1 \n\t" "movq (%%ebx), %%mm1 \n\t"
"movq (%1), %%mm1 \n\t"
"psubb %%mm1, %%mm2 \n\t" "psubb %%mm1, %%mm2 \n\t"
"paddb %%mm7, %%mm2 \n\t" "paddb %%mm7, %%mm2 \n\t"
"pcmpgtb %%mm6, %%mm2 \n\t" "pcmpgtb %%mm6, %%mm2 \n\t"
"paddb %%mm2, %%mm0 \n\t" "paddb %%mm2, %%mm0 \n\t"
"addl %2, %1 \n\t" "movq (%%ebx, %2), %%mm2 \n\t"
"movq (%1), %%mm2 \n\t"
"psubb %%mm2, %%mm1 \n\t" "psubb %%mm2, %%mm1 \n\t"
"paddb %%mm7, %%mm1 \n\t" "paddb %%mm7, %%mm1 \n\t"
"pcmpgtb %%mm6, %%mm1 \n\t" "pcmpgtb %%mm6, %%mm1 \n\t"
"paddb %%mm1, %%mm0 \n\t" "paddb %%mm1, %%mm0 \n\t"
"addl %2, %1 \n\t" "movq (%%ebx, %2, 2), %%mm1 \n\t"
"movq (%1), %%mm1 \n\t"
"psubb %%mm1, %%mm2 \n\t" "psubb %%mm1, %%mm2 \n\t"
"paddb %%mm7, %%mm2 \n\t" "paddb %%mm7, %%mm2 \n\t"
"pcmpgtb %%mm6, %%mm2 \n\t" "pcmpgtb %%mm6, %%mm2 \n\t"
...@@ -277,17 +273,12 @@ static inline int isVertDC(uint8_t src[], int stride){ ...@@ -277,17 +273,12 @@ static inline int isVertDC(uint8_t src[], int stride){
"movq %%mm0, %%mm1 \n\t" "movq %%mm0, %%mm1 \n\t"
"psrlq $32, %%mm0 \n\t" "psrlq $32, %%mm0 \n\t"
"paddb %%mm1, %%mm0 \n\t" "paddb %%mm1, %%mm0 \n\t"
"popl %1\n\t"
"movd %%mm0, %0 \n\t" "movd %%mm0, %0 \n\t"
: "=r" (numEq) : "=r" (numEq)
: "r" (src), "r" (stride) : "r" (src), "r" (stride)
); );
// printf("%d\n", numEq);
numEq= (256 - (numEq & 0xFF)) &0xFF;
// int asmEq= numEq; numEq= (256 - numEq) &0xFF;
// numEq=0;
// uint8_t *temp= src;
#else #else
for(y=0; y<BLOCK_SIZE-1; y++) for(y=0; y<BLOCK_SIZE-1; y++)
...@@ -2491,8 +2482,6 @@ static inline void blockCopy(uint8_t dst[], int dstStride, uint8_t src[], int sr ...@@ -2491,8 +2482,6 @@ static inline void blockCopy(uint8_t dst[], int dstStride, uint8_t src[], int sr
{ {
#ifdef HAVE_MMX #ifdef HAVE_MMX
asm volatile( asm volatile(
"pushl %0 \n\t"
"pushl %1 \n\t"
"leal (%2,%2), %%eax \n\t" "leal (%2,%2), %%eax \n\t"
"leal (%3,%3), %%ebx \n\t" "leal (%3,%3), %%ebx \n\t"
"movq packedYOffset, %%mm2 \n\t" "movq packedYOffset, %%mm2 \n\t"
...@@ -2534,11 +2523,9 @@ SCALED_CPY ...@@ -2534,11 +2523,9 @@ SCALED_CPY
"addl %%ebx, %1 \n\t" "addl %%ebx, %1 \n\t"
SCALED_CPY SCALED_CPY
"popl %1 \n\t" : "+r"(src),
"popl %0 \n\t" "+r"(dst)
: : "r" (src), :"r" (srcStride),
"r" (dst),
"r" (srcStride),
"r" (dstStride) "r" (dstStride)
: "%eax", "%ebx" : "%eax", "%ebx"
); );
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment