Commit febdd0b9 authored by Michael Niedermayer's avatar Michael Niedermayer

~15% faster h264_chroma_mc2/4_c() these also prevent some possible out

of array reads.

Originally committed as revision 11290 to svn://svn.ffmpeg.org/ffmpeg/trunk
parent a06a18c5
...@@ -1440,6 +1440,7 @@ static void OPNAME ## h264_chroma_mc2_c(uint8_t *dst/*align 8*/, uint8_t *src/*a ...@@ -1440,6 +1440,7 @@ static void OPNAME ## h264_chroma_mc2_c(uint8_t *dst/*align 8*/, uint8_t *src/*a
\ \
assert(x<8 && y<8 && x>=0 && y>=0);\ assert(x<8 && y<8 && x>=0 && y>=0);\
\ \
if(D){\
for(i=0; i<h; i++)\ for(i=0; i<h; i++)\
{\ {\
OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\ OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
...@@ -1447,6 +1448,17 @@ static void OPNAME ## h264_chroma_mc2_c(uint8_t *dst/*align 8*/, uint8_t *src/*a ...@@ -1447,6 +1448,17 @@ static void OPNAME ## h264_chroma_mc2_c(uint8_t *dst/*align 8*/, uint8_t *src/*a
dst+= stride;\ dst+= stride;\
src+= stride;\ src+= stride;\
}\ }\
}else{\
const int E= B+C;\
const int step= C ? stride : 1;\
for(i=0; i<h; i++)\
{\
OP(dst[0], (A*src[0] + E*src[step+0]));\
OP(dst[1], (A*src[1] + E*src[step+1]));\
dst+= stride;\
src+= stride;\
}\
}\
}\ }\
\ \
static void OPNAME ## h264_chroma_mc4_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\ static void OPNAME ## h264_chroma_mc4_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
...@@ -1458,6 +1470,7 @@ static void OPNAME ## h264_chroma_mc4_c(uint8_t *dst/*align 8*/, uint8_t *src/*a ...@@ -1458,6 +1470,7 @@ static void OPNAME ## h264_chroma_mc4_c(uint8_t *dst/*align 8*/, uint8_t *src/*a
\ \
assert(x<8 && y<8 && x>=0 && y>=0);\ assert(x<8 && y<8 && x>=0 && y>=0);\
\ \
if(D){\
for(i=0; i<h; i++)\ for(i=0; i<h; i++)\
{\ {\
OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\ OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
...@@ -1467,6 +1480,19 @@ static void OPNAME ## h264_chroma_mc4_c(uint8_t *dst/*align 8*/, uint8_t *src/*a ...@@ -1467,6 +1480,19 @@ static void OPNAME ## h264_chroma_mc4_c(uint8_t *dst/*align 8*/, uint8_t *src/*a
dst+= stride;\ dst+= stride;\
src+= stride;\ src+= stride;\
}\ }\
}else{\
const int E= B+C;\
const int step= C ? stride : 1;\
for(i=0; i<h; i++)\
{\
OP(dst[0], (A*src[0] + E*src[step+0]));\
OP(dst[1], (A*src[1] + E*src[step+1]));\
OP(dst[2], (A*src[2] + E*src[step+2]));\
OP(dst[3], (A*src[3] + E*src[step+3]));\
dst+= stride;\
src+= stride;\
}\
}\
}\ }\
\ \
static void OPNAME ## h264_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\ static void OPNAME ## h264_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment