Commit a68ca08e authored by Michael Niedermayer's avatar Michael Niedermayer

cleanup mc_block()

perform interpolation steps in such an order that halfpel interpolation
could be done per picture
this also makes mc_block() match h.264 for the 1/4 pel cases so that the
use of the h264 functions for some cases does not introduce a fantastic mess

Originally committed as revision 10433 to svn://svn.ffmpeg.org/ffmpeg/trunk
parent 7ae94d52
......@@ -2144,8 +2144,57 @@ static void decode_blocks(SnowContext *s){
}
static void mc_block(uint8_t *dst, const uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){
int x, y;
const static uint8_t weight[64]={
8,7,6,5,4,3,2,1,
7,7,0,0,0,0,0,1,
6,0,6,0,0,0,2,0,
5,0,0,5,0,3,0,0,
4,0,0,0,4,0,0,0,
3,0,0,5,0,3,0,0,
2,0,6,0,0,0,2,0,
1,7,0,0,0,0,0,1,
};
const static uint8_t brane[256]={
0x00,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x11,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
0x04,0x05,0xcc,0xcc,0xcc,0xcc,0xcc,0x41,0x15,0x16,0xcc,0xcc,0xcc,0xcc,0xcc,0x52,
0x04,0xcc,0x05,0xcc,0xcc,0xcc,0x41,0xcc,0x15,0xcc,0x16,0xcc,0xcc,0xcc,0x52,0xcc,
0x04,0xcc,0xcc,0x05,0xcc,0x41,0xcc,0xcc,0x15,0xcc,0xcc,0x16,0xcc,0x52,0xcc,0xcc,
0x04,0xcc,0xcc,0xcc,0x41,0xcc,0xcc,0xcc,0x15,0xcc,0xcc,0xcc,0x16,0xcc,0xcc,0xcc,
0x04,0xcc,0xcc,0x41,0xcc,0x05,0xcc,0xcc,0x15,0xcc,0xcc,0x52,0xcc,0x16,0xcc,0xcc,
0x04,0xcc,0x41,0xcc,0xcc,0xcc,0x05,0xcc,0x15,0xcc,0x52,0xcc,0xcc,0xcc,0x16,0xcc,
0x04,0x41,0xcc,0xcc,0xcc,0xcc,0xcc,0x05,0x15,0x52,0xcc,0xcc,0xcc,0xcc,0xcc,0x16,
0x44,0x45,0x45,0x45,0x45,0x45,0x45,0x45,0x55,0x56,0x56,0x56,0x56,0x56,0x56,0x56,
0x48,0x49,0xcc,0xcc,0xcc,0xcc,0xcc,0x85,0x59,0x5A,0xcc,0xcc,0xcc,0xcc,0xcc,0x96,
0x48,0xcc,0x49,0xcc,0xcc,0xcc,0x85,0xcc,0x59,0xcc,0x5A,0xcc,0xcc,0xcc,0x96,0xcc,
0x48,0xcc,0xcc,0x49,0xcc,0x85,0xcc,0xcc,0x59,0xcc,0xcc,0x5A,0xcc,0x96,0xcc,0xcc,
0x48,0xcc,0xcc,0xcc,0x49,0xcc,0xcc,0xcc,0x59,0xcc,0xcc,0xcc,0x96,0xcc,0xcc,0xcc,
0x48,0xcc,0xcc,0x85,0xcc,0x49,0xcc,0xcc,0x59,0xcc,0xcc,0x96,0xcc,0x5A,0xcc,0xcc,
0x48,0xcc,0x85,0xcc,0xcc,0xcc,0x49,0xcc,0x59,0xcc,0x96,0xcc,0xcc,0xcc,0x5A,0xcc,
0x48,0x85,0xcc,0xcc,0xcc,0xcc,0xcc,0x49,0x59,0x96,0xcc,0xcc,0xcc,0xcc,0xcc,0x5A,
};
const static uint8_t needs[16]={
0,1,0,0,
2,4,2,0,
0,1,0,0,
15
};
int x, y, b, r, l;
int16_t tmpIt [64*(32+HTAPS)];
uint8_t tmp2t[3][stride*(32+HTAPS)];
int16_t *tmpI= tmpIt;
uint8_t *tmp2= tmp2t[0];
uint8_t *hpel[11];
START_TIMER
assert(dx<16 && dy<16);
r= brane[dx + 16*dy]&15;
l= brane[dx + 16*dy]>>4;
b= needs[l] | needs[r];
if(b&5){
for(y=0; y < b_h+HTAPS-1; y++){
for(x=0; x < b_w; x++){
int a_2=src[x + HTAPS/2-5];
......@@ -2170,36 +2219,33 @@ START_TIMER
// if(b_w==16) am= 8*(a1+a2);
if(dx<8) am = (32*a2*( 8-dx) + am* dx + 128)>>8;
else am = ( am*(16-dx) + 32*a3*(dx-8) + 128)>>8;
/* FIXME Try increasing tmp buffer to 16 bits and not clipping here. Should give marginally better results. - Robert*/
tmpI[x]= am;
am= (am+16)>>5;
if(am&(~255)) am= ~(am>>31);
tmp[x] = am;
/* if (dx< 4) tmp[x + y*stride]= (16*a1*( 4-dx) + aL* dx + 32)>>6;
else if(dx< 8) tmp[x + y*stride]= ( aL*( 8-dx) + am*(dx- 4) + 32)>>6;
else if(dx<12) tmp[x + y*stride]= ( am*(12-dx) + aR*(dx- 8) + 32)>>6;
else tmp[x + y*stride]= ( aR*(16-dx) + 16*a2*(dx-12) + 32)>>6;*/
tmp2[x]= am;
}
tmp += stride;
tmpI+= 64;
tmp2+= stride;
src += stride;
}
tmp -= (b_h+HTAPS-1)*stride;
src -= stride*y;
}
src += HTAPS/2 - 1;
tmp2= tmp2t[1];
if(b&2){
for(y=0; y < b_h; y++){
for(x=0; x < b_w; x++){
int a_2=tmp[x + (HTAPS/2-5)*stride];
int a_1=tmp[x + (HTAPS/2-4)*stride];
int a0= tmp[x + (HTAPS/2-3)*stride];
int a1= tmp[x + (HTAPS/2-2)*stride];
int a2= tmp[x + (HTAPS/2-1)*stride];
int a3= tmp[x + (HTAPS/2+0)*stride];
int a4= tmp[x + (HTAPS/2+1)*stride];
int a5= tmp[x + (HTAPS/2+2)*stride];
int a6= tmp[x + (HTAPS/2+3)*stride];
int a7= tmp[x + (HTAPS/2+4)*stride];
for(x=0; x < b_w+1; x++){
int a_2=src[x + (HTAPS/2-5)*stride];
int a_1=src[x + (HTAPS/2-4)*stride];
int a0= src[x + (HTAPS/2-3)*stride];
int a1= src[x + (HTAPS/2-2)*stride];
int a2= src[x + (HTAPS/2-1)*stride];
int a3= src[x + (HTAPS/2+0)*stride];
int a4= src[x + (HTAPS/2+1)*stride];
int a5= src[x + (HTAPS/2+2)*stride];
int a6= src[x + (HTAPS/2+3)*stride];
int a7= src[x + (HTAPS/2+4)*stride];
#if HTAPS==6
int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
#else
......@@ -2211,19 +2257,88 @@ START_TIMER
// if(b_w==16) am= 8*(a1+a2);
if(dy<8) am = (32*a2*( 8-dy) + am* dy + 128)>>8;
else am = ( am*(16-dy) + 32*a3*(dy-8) + 128)>>8;
am= (am + 16)>>5;
if(am&(~255)) am= ~(am>>31);
tmp2[x]= am;
}
src += stride;
tmp2+= stride;
}
src -= stride*y;
}
src += stride*(HTAPS/2 - 1);
tmp2= tmp2t[2];
tmpI= tmpIt;
if(b&4){
for(y=0; y < b_h; y++){
for(x=0; x < b_w; x++){
int a_2=tmpI[x + (HTAPS/2-5)*64];
int a_1=tmpI[x + (HTAPS/2-4)*64];
int a0= tmpI[x + (HTAPS/2-3)*64];
int a1= tmpI[x + (HTAPS/2-2)*64];
int a2= tmpI[x + (HTAPS/2-1)*64];
int a3= tmpI[x + (HTAPS/2+0)*64];
int a4= tmpI[x + (HTAPS/2+1)*64];
int a5= tmpI[x + (HTAPS/2+2)*64];
int a6= tmpI[x + (HTAPS/2+3)*64];
int a7= tmpI[x + (HTAPS/2+4)*64];
#if HTAPS==6
int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
#else
int am= 21*(a2+a3) - 7*(a1+a4) + 3*(a0+a5) - (a_1+a6);
#endif
am= (am + 512)>>10;
if(am&(~255)) am= ~(am>>31);
tmp2[x]= am;
}
tmpI+= 64;
tmp2+= stride;
}
}
dst[x] = am;
/* if (dy< 4) tmp[x + y*stride]= (16*a1*( 4-dy) + aL* dy + 32)>>6;
else if(dy< 8) tmp[x + y*stride]= ( aL*( 8-dy) + am*(dy- 4) + 32)>>6;
else if(dy<12) tmp[x + y*stride]= ( am*(12-dy) + aR*(dy- 8) + 32)>>6;
else tmp[x + y*stride]= ( aR*(16-dy) + 16*a2*(dy-12) + 32)>>6;*/
hpel[ 0]= src;
hpel[ 1]= tmp2t[0] + stride*(HTAPS/2-1);
hpel[ 2]= src + 1;
hpel[ 4]= tmp2t[1];
hpel[ 5]= tmp2t[2];
hpel[ 6]= tmp2t[1] + 1;
hpel[ 8]= src + stride;
hpel[ 9]= hpel[1] + stride;
hpel[10]= hpel[8] + 1;
if(b==15){
uint8_t *src1= hpel[dx/8 + dy/8*4 ];
uint8_t *src2= hpel[dx/8 + dy/8*4+1];
uint8_t *src3= hpel[dx/8 + dy/8*4+4];
uint8_t *src4= hpel[dx/8 + dy/8*4+5];
dx&=7;
dy&=7;
for(y=0; y < b_h; y++){
for(x=0; x < b_w; x++){
dst[x]= ((8-dx)*(8-dy)*src1[x] + dx*(8-dy)*src2[x]+
(8-dx)* dy *src3[x] + dx* dy *src4[x]+32)>>6;
}
src1+=stride;
src2+=stride;
src3+=stride;
src4+=stride;
dst +=stride;
}
}else{
uint8_t *src1= hpel[l];
uint8_t *src2= hpel[r];
int a= weight[((dx&7) + (8*(dy&7)))];
int b= 8-a;
for(y=0; y < b_h; y++){
for(x=0; x < b_w; x++){
dst[x]= (a*src1[x] + b*src2[x] + 4)>>3;
}
src1+=stride;
src2+=stride;
dst +=stride;
}
dst += stride;
tmp += stride;
}
STOP_TIMER("mc_block")
}
......
......@@ -141,12 +141,12 @@ f8f51fa737add17f7fecaefa118b57ed *./tests/data/a-ffv1.avi
2654678 ./tests/data/a-ffv1.avi
799d3db687f6cdd7a837ec156efc171f *./tests/data/out.yuv
stddev: 0.00 PSNR:99.99 bytes:7602176
0356b219110f391044352547360377a8 *./tests/data/a-snow.avi
156586 ./tests/data/a-snow.avi
c038bc896a435796588ca3b96f38bbb5 *./tests/data/out.yuv
3b419d4e700466fed31bdbdb7fd032ea *./tests/data/a-snow.avi
156682 ./tests/data/a-snow.avi
3de2b39f90fd8331f27e627e68e076f7 *./tests/data/out.yuv
stddev: 23.15 PSNR:20.83 bytes:7602176
ba999e86070aa971376e7f317a022c37 *./tests/data/a-snow53.avi
3519486 ./tests/data/a-snow53.avi
336d769a694cf92e21962879fea05331 *./tests/data/a-snow53.avi
3520654 ./tests/data/a-snow53.avi
799d3db687f6cdd7a837ec156efc171f *./tests/data/out.yuv
stddev: 0.00 PSNR:99.99 bytes:7602176
c299f64c3e85b928c5bfe71629cee006 *./tests/data/a-dv.dv
......
......@@ -141,12 +141,12 @@ d72b0960e162d4998b9acbabb07e99ab *./tests/data/a-ffv1.avi
3525804 ./tests/data/a-ffv1.avi
dde5895817ad9d219f79a52d0bdfb001 *./tests/data/out.yuv
stddev: 0.00 PSNR:99.99 bytes:7602176
ae64e5ff9b5684c46e74e48381e6a132 *./tests/data/a-snow.avi
68900 ./tests/data/a-snow.avi
5f5b97b726f97d3514b3c2b8e635175c *./tests/data/out.yuv
7e658570df22365c9dbbaa2038468f3c *./tests/data/a-snow.avi
69026 ./tests/data/a-snow.avi
63ea1d2fe1693be57d3ab7d351af7c55 *./tests/data/out.yuv
stddev: 10.87 PSNR:27.39 bytes:7602176
3d0da6aeec9b80c6ee0ff4b747bdd0f0 *./tests/data/a-snow53.avi
2721980 ./tests/data/a-snow53.avi
d33395ea1de6b801142be556d7699263 *./tests/data/a-snow53.avi
2721708 ./tests/data/a-snow53.avi
dde5895817ad9d219f79a52d0bdfb001 *./tests/data/out.yuv
stddev: 0.00 PSNR:99.99 bytes:7602176
54cdeaee32d1007666a1e487e739daf6 *./tests/data/a-dv.dv
......
......@@ -2050,47 +2050,47 @@ ret: 0 st: 0 dts:0.000000 pts:0.000000 pos:5660 size:2987 flags:1
ret: 0 st:-1 ts:-1.000000 flags:0
ret: 0 st: 0 dts:0.000000 pts:0.000000 pos:5660 size:2987 flags:1
ret: 0 st:-1 ts:1.894167 flags:1
ret: 0 st: 0 dts:1.440000 pts:1.440000 pos:46890 size:3663 flags:1
ret: 0 st: 0 dts:1.440000 pts:1.440000 pos:46978 size:3663 flags:1
ret: 0 st: 0 ts:0.800000 flags:0
ret: 0 st: 0 dts:0.960000 pts:0.960000 pos:31696 size:3478 flags:1
ret: 0 st: 0 dts:0.960000 pts:0.960000 pos:31750 size:3478 flags:1
ret:-1 st: 0 ts:-0.320000 flags:1
ret:-1 st:-1 ts:2.576668 flags:0
ret: 0 st:-1 ts:1.470835 flags:1
ret: 0 st: 0 dts:1.440000 pts:1.440000 pos:46890 size:3663 flags:1
ret: 0 st: 0 dts:1.440000 pts:1.440000 pos:46978 size:3663 flags:1
ret: 0 st: 0 ts:0.360000 flags:0
ret: 0 st: 0 dts:0.480000 pts:0.480000 pos:17984 size:3229 flags:1
ret: 0 st: 0 dts:0.480000 pts:0.480000 pos:18006 size:3229 flags:1
ret:-1 st: 0 ts:-0.760000 flags:1
ret:-1 st:-1 ts:2.153336 flags:0
ret: 0 st:-1 ts:1.047503 flags:1
ret: 0 st: 0 dts:0.960000 pts:0.960000 pos:31696 size:3478 flags:1
ret: 0 st: 0 dts:0.960000 pts:0.960000 pos:31750 size:3478 flags:1
ret: 0 st: 0 ts:-0.040000 flags:0
ret: 0 st: 0 dts:0.000000 pts:0.000000 pos:5660 size:2987 flags:1
ret: 0 st: 0 ts:2.840000 flags:1
ret: 0 st: 0 dts:1.920000 pts:1.920000 pos:63380 size:3635 flags:1
ret: 0 st: 0 dts:1.920000 pts:1.920000 pos:63506 size:3635 flags:1
ret: 0 st:-1 ts:1.730004 flags:0
ret: 0 st: 0 dts:1.920000 pts:1.920000 pos:63380 size:3635 flags:1
ret: 0 st: 0 dts:1.920000 pts:1.920000 pos:63506 size:3635 flags:1
ret: 0 st:-1 ts:0.624171 flags:1
ret: 0 st: 0 dts:0.480000 pts:0.480000 pos:17984 size:3229 flags:1
ret: 0 st: 0 dts:0.480000 pts:0.480000 pos:18006 size:3229 flags:1
ret: 0 st: 0 ts:-0.480000 flags:0
ret: 0 st: 0 dts:0.000000 pts:0.000000 pos:5660 size:2987 flags:1
ret: 0 st: 0 ts:2.400000 flags:1
ret: 0 st: 0 dts:1.920000 pts:1.920000 pos:63380 size:3635 flags:1
ret: 0 st: 0 dts:1.920000 pts:1.920000 pos:63506 size:3635 flags:1
ret: 0 st:-1 ts:1.306672 flags:0
ret: 0 st: 0 dts:1.440000 pts:1.440000 pos:46890 size:3663 flags:1
ret: 0 st: 0 dts:1.440000 pts:1.440000 pos:46978 size:3663 flags:1
ret: 0 st:-1 ts:0.200839 flags:1
ret: 0 st: 0 dts:0.000000 pts:0.000000 pos:5660 size:2987 flags:1
ret: 0 st: 0 ts:-0.920000 flags:0
ret: 0 st: 0 dts:0.000000 pts:0.000000 pos:5660 size:2987 flags:1
ret: 0 st: 0 ts:2.000000 flags:1
ret: 0 st: 0 dts:1.920000 pts:1.920000 pos:63380 size:3635 flags:1
ret: 0 st: 0 dts:1.920000 pts:1.920000 pos:63506 size:3635 flags:1
ret: 0 st:-1 ts:0.883340 flags:0
ret: 0 st: 0 dts:0.960000 pts:0.960000 pos:31696 size:3478 flags:1
ret: 0 st: 0 dts:0.960000 pts:0.960000 pos:31750 size:3478 flags:1
ret:-1 st:-1 ts:-0.222493 flags:1
ret:-1 st: 0 ts:2.680000 flags:0
ret: 0 st: 0 ts:1.560000 flags:1
ret: 0 st: 0 dts:1.440000 pts:1.440000 pos:46890 size:3663 flags:1
ret: 0 st: 0 dts:1.440000 pts:1.440000 pos:46978 size:3663 flags:1
ret: 0 st:-1 ts:0.460008 flags:0
ret: 0 st: 0 dts:0.480000 pts:0.480000 pos:17984 size:3229 flags:1
ret: 0 st: 0 dts:0.480000 pts:0.480000 pos:18006 size:3229 flags:1
ret:-1 st:-1 ts:-0.645825 flags:1
----------------
tests/data/a-snow53.avi
......@@ -2098,47 +2098,47 @@ ret: 0 st: 0 dts:0.000000 pts:0.000000 pos:5660 size:72476 flags:1
ret: 0 st:-1 ts:-1.000000 flags:0
ret: 0 st: 0 dts:0.000000 pts:0.000000 pos:5660 size:72476 flags:1
ret: 0 st:-1 ts:1.894167 flags:1
ret: 0 st: 0 dts:1.440000 pts:1.440000 pos:1902900 size:78837 flags:1
ret: 0 st: 0 dts:1.440000 pts:1.440000 pos:1902764 size:78837 flags:1
ret: 0 st: 0 ts:0.800000 flags:0
ret: 0 st: 0 dts:0.960000 pts:0.960000 pos:1239314 size:74994 flags:1
ret: 0 st: 0 dts:0.960000 pts:0.960000 pos:1239078 size:74994 flags:1
ret:-1 st: 0 ts:-0.320000 flags:1
ret:-1 st:-1 ts:2.576668 flags:0
ret: 0 st:-1 ts:1.470835 flags:1
ret: 0 st: 0 dts:1.440000 pts:1.440000 pos:1902900 size:78837 flags:1
ret: 0 st: 0 dts:1.440000 pts:1.440000 pos:1902764 size:78837 flags:1
ret: 0 st: 0 ts:0.360000 flags:0
ret: 0 st: 0 dts:0.480000 pts:0.480000 pos:605698 size:71059 flags:1
ret: 0 st: 0 dts:0.480000 pts:0.480000 pos:605628 size:71059 flags:1
ret:-1 st: 0 ts:-0.760000 flags:1
ret:-1 st:-1 ts:2.153336 flags:0
ret: 0 st:-1 ts:1.047503 flags:1
ret: 0 st: 0 dts:0.960000 pts:0.960000 pos:1239314 size:74994 flags:1
ret: 0 st: 0 dts:0.960000 pts:0.960000 pos:1239078 size:74994 flags:1
ret: 0 st: 0 ts:-0.040000 flags:0
ret: 0 st: 0 dts:0.000000 pts:0.000000 pos:5660 size:72476 flags:1
ret: 0 st: 0 ts:2.840000 flags:1
ret: 0 st: 0 dts:1.920000 pts:1.920000 pos:2585846 size:79731 flags:1
ret: 0 st: 0 dts:1.920000 pts:1.920000 pos:2585580 size:79731 flags:1
ret: 0 st:-1 ts:1.730004 flags:0
ret: 0 st: 0 dts:1.920000 pts:1.920000 pos:2585846 size:79731 flags:1
ret: 0 st: 0 dts:1.920000 pts:1.920000 pos:2585580 size:79731 flags:1
ret: 0 st:-1 ts:0.624171 flags:1
ret: 0 st: 0 dts:0.480000 pts:0.480000 pos:605698 size:71059 flags:1
ret: 0 st: 0 dts:0.480000 pts:0.480000 pos:605628 size:71059 flags:1
ret: 0 st: 0 ts:-0.480000 flags:0
ret: 0 st: 0 dts:0.000000 pts:0.000000 pos:5660 size:72476 flags:1
ret: 0 st: 0 ts:2.400000 flags:1
ret: 0 st: 0 dts:1.920000 pts:1.920000 pos:2585846 size:79731 flags:1
ret: 0 st: 0 dts:1.920000 pts:1.920000 pos:2585580 size:79731 flags:1
ret: 0 st:-1 ts:1.306672 flags:0
ret: 0 st: 0 dts:1.440000 pts:1.440000 pos:1902900 size:78837 flags:1
ret: 0 st: 0 dts:1.440000 pts:1.440000 pos:1902764 size:78837 flags:1
ret: 0 st:-1 ts:0.200839 flags:1
ret: 0 st: 0 dts:0.000000 pts:0.000000 pos:5660 size:72476 flags:1
ret: 0 st: 0 ts:-0.920000 flags:0
ret: 0 st: 0 dts:0.000000 pts:0.000000 pos:5660 size:72476 flags:1
ret: 0 st: 0 ts:2.000000 flags:1
ret: 0 st: 0 dts:1.920000 pts:1.920000 pos:2585846 size:79731 flags:1
ret: 0 st: 0 dts:1.920000 pts:1.920000 pos:2585580 size:79731 flags:1
ret: 0 st:-1 ts:0.883340 flags:0
ret: 0 st: 0 dts:0.960000 pts:0.960000 pos:1239314 size:74994 flags:1
ret: 0 st: 0 dts:0.960000 pts:0.960000 pos:1239078 size:74994 flags:1
ret:-1 st:-1 ts:-0.222493 flags:1
ret:-1 st: 0 ts:2.680000 flags:0
ret: 0 st: 0 ts:1.560000 flags:1
ret: 0 st: 0 dts:1.440000 pts:1.440000 pos:1902900 size:78837 flags:1
ret: 0 st: 0 dts:1.440000 pts:1.440000 pos:1902764 size:78837 flags:1
ret: 0 st:-1 ts:0.460008 flags:0
ret: 0 st: 0 dts:0.480000 pts:0.480000 pos:605698 size:71059 flags:1
ret: 0 st: 0 dts:0.480000 pts:0.480000 pos:605628 size:71059 flags:1
ret:-1 st:-1 ts:-0.645825 flags:1
----------------
tests/data/a-svq1.mov
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment