Commit 99d33fa3 authored by Michael Niedermayer's avatar Michael Niedermayer

faster C linear blend & interpolate deinterlacers

Originally committed as revision 1716 to svn://svn.ffmpeg.org/ffmpeg/trunk
parent 134eb1e5
...@@ -1562,15 +1562,20 @@ static inline void RENAME(deInterlaceInterpolateLinear)(uint8_t src[], int strid ...@@ -1562,15 +1562,20 @@ static inline void RENAME(deInterlaceInterpolateLinear)(uint8_t src[], int strid
: "%eax", "%ecx" : "%eax", "%ecx"
); );
#else #else
int x; int a, b, x;
src+= 4*stride; src+= 4*stride;
for(x=0; x<8; x++)
{ for(x=0; x<2; x++){
src[stride] = (src[0] + src[stride*2])>>1; a= *(uint32_t*)&src[stride*0];
src[stride*3] = (src[stride*2] + src[stride*4])>>1; b= *(uint32_t*)&src[stride*2];
src[stride*5] = (src[stride*4] + src[stride*6])>>1; *(uint32_t*)&src[stride*1]= (a|b) - (((a^b)&0xFEFEFEFEUL)>>1);
src[stride*7] = (src[stride*6] + src[stride*8])>>1; a= *(uint32_t*)&src[stride*4];
src++; *(uint32_t*)&src[stride*3]= (a|b) - (((a^b)&0xFEFEFEFEUL)>>1);
b= *(uint32_t*)&src[stride*6];
*(uint32_t*)&src[stride*5]= (a|b) - (((a^b)&0xFEFEFEFEUL)>>1);
a= *(uint32_t*)&src[stride*8];
*(uint32_t*)&src[stride*7]= (a|b) - (((a^b)&0xFEFEFEFEUL)>>1);
src += 4;
} }
#endif #endif
} }
...@@ -1875,19 +1880,45 @@ static inline void RENAME(deInterlaceBlendLinear)(uint8_t src[], int stride) ...@@ -1875,19 +1880,45 @@ static inline void RENAME(deInterlaceBlendLinear)(uint8_t src[], int stride)
: "%eax", "%edx" : "%eax", "%edx"
); );
#else #else
int x; int a, b, c, x;
src+= 4*stride; src+= 4*stride;
for(x=0; x<8; x++)
{ for(x=0; x<2; x++){
src[0 ] = (src[0 ] + 2*src[stride ] + src[stride*2])>>2; a= *(uint32_t*)&src[stride*0];
src[stride ] = (src[stride ] + 2*src[stride*2] + src[stride*3])>>2; b= *(uint32_t*)&src[stride*1];
src[stride*2] = (src[stride*2] + 2*src[stride*3] + src[stride*4])>>2; c= *(uint32_t*)&src[stride*2];
src[stride*3] = (src[stride*3] + 2*src[stride*4] + src[stride*5])>>2; a= (a&c) + (((a^c)&0xFEFEFEFEUL)>>1);
src[stride*4] = (src[stride*4] + 2*src[stride*5] + src[stride*6])>>2; *(uint32_t*)&src[stride*0]= (a|b) - (((a^b)&0xFEFEFEFEUL)>>1);
src[stride*5] = (src[stride*5] + 2*src[stride*6] + src[stride*7])>>2;
src[stride*6] = (src[stride*6] + 2*src[stride*7] + src[stride*8])>>2; a= *(uint32_t*)&src[stride*3];
src[stride*7] = (src[stride*7] + 2*src[stride*8] + src[stride*9])>>2; b= (a&b) + (((a^b)&0xFEFEFEFEUL)>>1);
src++; *(uint32_t*)&src[stride*1]= (c|b) - (((c^b)&0xFEFEFEFEUL)>>1);
b= *(uint32_t*)&src[stride*4];
c= (b&c) + (((b^c)&0xFEFEFEFEUL)>>1);
*(uint32_t*)&src[stride*2]= (c|a) - (((c^a)&0xFEFEFEFEUL)>>1);
c= *(uint32_t*)&src[stride*5];
a= (a&c) + (((a^c)&0xFEFEFEFEUL)>>1);
*(uint32_t*)&src[stride*3]= (a|b) - (((a^b)&0xFEFEFEFEUL)>>1);
a= *(uint32_t*)&src[stride*6];
b= (a&b) + (((a^b)&0xFEFEFEFEUL)>>1);
*(uint32_t*)&src[stride*4]= (c|b) - (((c^b)&0xFEFEFEFEUL)>>1);
b= *(uint32_t*)&src[stride*7];
c= (b&c) + (((b^c)&0xFEFEFEFEUL)>>1);
*(uint32_t*)&src[stride*5]= (c|a) - (((c^a)&0xFEFEFEFEUL)>>1);
c= *(uint32_t*)&src[stride*8];
a= (a&c) + (((a^c)&0xFEFEFEFEUL)>>1);
*(uint32_t*)&src[stride*6]= (a|b) - (((a^b)&0xFEFEFEFEUL)>>1);
a= *(uint32_t*)&src[stride*9];
b= (a&b) + (((a^b)&0xFEFEFEFEUL)>>1);
*(uint32_t*)&src[stride*7]= (c|b) - (((c^b)&0xFEFEFEFEUL)>>1);
src += 4;
} }
#endif #endif
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment