Commit e5262ec4 authored by Ronald S. Bultje's avatar Ronald S. Bultje

Optimize C version of ff_emulated_edge_mc().

From ~780 cycles to 551 cycles, mostly just by using libc memcpy()
instead of manually shuffling individual bytes around.
parent 4d54df8e
...@@ -355,38 +355,45 @@ void ff_emulated_edge_mc(uint8_t *buf, const uint8_t *src, int linesize, int blo ...@@ -355,38 +355,45 @@ void ff_emulated_edge_mc(uint8_t *buf, const uint8_t *src, int linesize, int blo
start_x= FFMAX(0, -src_x); start_x= FFMAX(0, -src_x);
end_y= FFMIN(block_h, h-src_y); end_y= FFMIN(block_h, h-src_y);
end_x= FFMIN(block_w, w-src_x); end_x= FFMIN(block_w, w-src_x);
assert(start_y < end_y && block_h);
assert(start_x < end_x && block_w);
// copy existing part w = end_x - start_x;
for(y=start_y; y<end_y; y++){ src += start_y*linesize + start_x;
for(x=start_x; x<end_x; x++){ buf += start_x;
buf[x + y*linesize]= src[x + y*linesize];
}
}
//top //top
for(y=0; y<start_y; y++){ for(y=0; y<start_y; y++){
for(x=start_x; x<end_x; x++){ memcpy(buf, src, w);
buf[x + y*linesize]= buf[x + start_y*linesize]; buf += linesize;
} }
// copy existing part
for(; y<end_y; y++){
memcpy(buf, src, w);
src += linesize;
buf += linesize;
} }
//bottom //bottom
for(y=end_y; y<block_h; y++){ src -= linesize;
for(x=start_x; x<end_x; x++){ for(; y<block_h; y++){
buf[x + y*linesize]= buf[x + (end_y-1)*linesize]; memcpy(buf, src, w);
} buf += linesize;
} }
for(y=0; y<block_h; y++){ buf -= block_h * linesize + start_x;
while (block_h--){
//left //left
for(x=0; x<start_x; x++){ for(x=0; x<start_x; x++){
buf[x + y*linesize]= buf[start_x + y*linesize]; buf[x] = buf[start_x];
} }
//right //right
for(x=end_x; x<block_w; x++){ for(x=end_x; x<block_w; x++){
buf[x + y*linesize]= buf[end_x - 1 + y*linesize]; buf[x] = buf[end_x - 1];
} }
buf += linesize;
} }
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment