Commit 871371a7 authored by Loren Merritt's avatar Loren Merritt

Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.

Originally committed as revision 5427 to svn://svn.ffmpeg.org/ffmpeg/trunk
parent 383f62fd
...@@ -293,34 +293,33 @@ static int sse16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) ...@@ -293,34 +293,33 @@ static int sse16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
static inline int w_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int w, int h, int type){ static inline int w_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int w, int h, int type){
#ifdef CONFIG_SNOW_ENCODER //idwt is in snow.c #ifdef CONFIG_SNOW_ENCODER //dwt is in snow.c
int s, i, j; int s, i, j;
const int dec_count= w==8 ? 3 : 4; const int dec_count= w==8 ? 3 : 4;
int tmp[16*16]; int tmp[32*32];
#if 0
int level, ori; int level, ori;
static const int scale[2][2][4][4]={ static const int scale[2][2][4][4]={
{ {
{ {
//8x8 dec=3 // 9/7 8x8 dec=3
{268, 239, 239, 213}, {268, 239, 239, 213},
{ 0, 224, 224, 152}, { 0, 224, 224, 152},
{ 0, 135, 135, 110}, { 0, 135, 135, 110},
},{ },{
//16x16 dec=4 // 9/7 16x16 or 32x32 dec=4
{344, 310, 310, 280}, {344, 310, 310, 280},
{ 0, 320, 320, 228}, { 0, 320, 320, 228},
{ 0, 175, 175, 136}, { 0, 175, 175, 136},
{ 0, 129, 129, 102}, { 0, 129, 129, 102},
} }
},{ },{
{//FIXME 5/3 {
//8x8 dec=3 // 5/3 8x8 dec=3
{275, 245, 245, 218}, {275, 245, 245, 218},
{ 0, 230, 230, 156}, { 0, 230, 230, 156},
{ 0, 138, 138, 113}, { 0, 138, 138, 113},
},{ },{
//16x16 dec=4 // 5/3 16x16 or 32x32 dec=4
{352, 317, 317, 286}, {352, 317, 317, 286},
{ 0, 328, 328, 233}, { 0, 328, 328, 233},
{ 0, 180, 180, 140}, { 0, 180, 180, 140},
...@@ -328,29 +327,28 @@ static inline int w_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, in ...@@ -328,29 +327,28 @@ static inline int w_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, in
} }
} }
}; };
#endif
for (i = 0; i < h; i++) { for (i = 0; i < h; i++) {
for (j = 0; j < w; j+=4) { for (j = 0; j < w; j+=4) {
tmp[16*i+j+0] = (pix1[j+0] - pix2[j+0])<<4; tmp[32*i+j+0] = (pix1[j+0] - pix2[j+0])<<4;
tmp[16*i+j+1] = (pix1[j+1] - pix2[j+1])<<4; tmp[32*i+j+1] = (pix1[j+1] - pix2[j+1])<<4;
tmp[16*i+j+2] = (pix1[j+2] - pix2[j+2])<<4; tmp[32*i+j+2] = (pix1[j+2] - pix2[j+2])<<4;
tmp[16*i+j+3] = (pix1[j+3] - pix2[j+3])<<4; tmp[32*i+j+3] = (pix1[j+3] - pix2[j+3])<<4;
} }
pix1 += line_size; pix1 += line_size;
pix2 += line_size; pix2 += line_size;
} }
ff_spatial_dwt(tmp, w, h, 16, type, dec_count); ff_spatial_dwt(tmp, w, h, 32, type, dec_count);
s=0; s=0;
#if 0 assert(w==h);
for(level=0; level<dec_count; level++){ for(level=0; level<dec_count; level++){
for(ori= level ? 1 : 0; ori<4; ori++){ for(ori= level ? 1 : 0; ori<4; ori++){
int sx= (ori&1) ? 1<<level: 0; int size= w>>(dec_count-level);
int stride= 16<<(dec_count-level); int sx= (ori&1) ? size : 0;
int stride= 32<<(dec_count-level);
int sy= (ori&2) ? stride>>1 : 0; int sy= (ori&2) ? stride>>1 : 0;
int size= 1<<level;
for(i=0; i<size; i++){ for(i=0; i<size; i++){
for(j=0; j<size; j++){ for(j=0; j<size; j++){
...@@ -360,18 +358,8 @@ static inline int w_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, in ...@@ -360,18 +358,8 @@ static inline int w_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, in
} }
} }
} }
#endif
for (i = 0; i < h; i++) {
for (j = 0; j < w; j+=4) {
s+= ABS(tmp[16*i+j+0]);
s+= ABS(tmp[16*i+j+1]);
s+= ABS(tmp[16*i+j+2]);
s+= ABS(tmp[16*i+j+3]);
}
}
assert(s>=0); assert(s>=0);
return s>>9;
return s>>2;
#endif #endif
} }
...@@ -391,6 +379,14 @@ static int w97_16_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int ...@@ -391,6 +379,14 @@ static int w97_16_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int
return w_c(v, pix1, pix2, line_size, 16, h, 0); return w_c(v, pix1, pix2, line_size, 16, h, 0);
} }
int w53_32_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
return w_c(v, pix1, pix2, line_size, 32, h, 1);
}
int w97_32_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
return w_c(v, pix1, pix2, line_size, 32, h, 0);
}
static void get_pixels_c(DCTELEM *restrict block, const uint8_t *pixels, int line_size) static void get_pixels_c(DCTELEM *restrict block, const uint8_t *pixels, int line_size)
{ {
int i; int i;
......
...@@ -3092,12 +3092,23 @@ static int get_block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index, con ...@@ -3092,12 +3092,23 @@ static int get_block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index, con
memcpy(dst + sx+x0 + (sy+y)*ref_stride, cur + x0 + y*ref_stride, x1-x0); memcpy(dst + sx+x0 + (sy+y)*ref_stride, cur + x0 + y*ref_stride, x1-x0);
} }
//FIXME sad/ssd can be broken up, but wavelet cmp should be one 32x32 block
if(block_w==16){ if(block_w==16){
distortion = 0; /* FIXME rearrange dsputil to fit 32x32 cmp functions */
for(i=0; i<4; i++){ /* FIXME check alignment of the cmp wavelet vs the encoding wavelet */
int off = sx+16*(i&1) + (sy+16*(i>>1))*ref_stride; /* FIXME cmps overlap but don't cover the wavelet's whole support,
distortion += s->dsp.me_cmp[0](&s->m, src + off, dst + off, ref_stride, 16); * so improving the score of one block is not strictly guaranteed to
* improve the score of the whole frame, so iterative motion est
* doesn't always converge. */
if(s->avctx->me_cmp == FF_CMP_W97)
distortion = w97_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
else if(s->avctx->me_cmp == FF_CMP_W53)
distortion = w53_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
else{
distortion = 0;
for(i=0; i<4; i++){
int off = sx+16*(i&1) + (sy+16*(i>>1))*ref_stride;
distortion += s->dsp.me_cmp[0](&s->m, src + off, dst + off, ref_stride, 16);
}
} }
}else{ }else{
assert(block_w==8); assert(block_w==8);
...@@ -3282,7 +3293,7 @@ static void iterative_me(SnowContext *s){ ...@@ -3282,7 +3293,7 @@ static void iterative_me(SnowContext *s){
memcpy(s->block_state, state, sizeof(s->block_state)); memcpy(s->block_state, state, sizeof(s->block_state));
} }
for(pass=0; pass<50; pass++){ for(pass=0; pass<25; pass++){
int change= 0; int change= 0;
for(mb_y= 0; mb_y<b_height; mb_y++){ for(mb_y= 0; mb_y<b_height; mb_y++){
......
...@@ -121,6 +121,9 @@ extern void ff_snow_vertical_compose97i(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, D ...@@ -121,6 +121,9 @@ extern void ff_snow_vertical_compose97i(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, D
extern void ff_snow_horizontal_compose97i(DWTELEM *b, int width); extern void ff_snow_horizontal_compose97i(DWTELEM *b, int width);
extern void ff_snow_inner_add_yblock(uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h, int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8); extern void ff_snow_inner_add_yblock(uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h, int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8);
int w53_32_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h);
int w97_32_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h);
/* C bits used by mmx/sse2/altivec */ /* C bits used by mmx/sse2/altivec */
......
...@@ -133,10 +133,10 @@ a7ef4746f27be309138c188e327d3ebe *./data/a-ffv1.avi ...@@ -133,10 +133,10 @@ a7ef4746f27be309138c188e327d3ebe *./data/a-ffv1.avi
2653642 ./data/a-ffv1.avi 2653642 ./data/a-ffv1.avi
799d3db687f6cdd7a837ec156efc171f *./data/out.yuv 799d3db687f6cdd7a837ec156efc171f *./data/out.yuv
stddev: 0.00 PSNR:99.99 bytes:7602176 stddev: 0.00 PSNR:99.99 bytes:7602176
ee3a27ccd48345f071e1617b116c76b6 *./data/a-snow.avi 495ad3209021bb2bb98ab431965d27b9 *./data/a-snow.avi
154958 ./data/a-snow.avi 155066 ./data/a-snow.avi
5310705af239bc5a427a2fa2ffb9206d *./data/out.yuv 5aba44ff94d278a20c9cbfb19953e618 *./data/out.yuv
stddev: 23.18 PSNR:20.81 bytes:7602176 stddev: 23.19 PSNR:20.81 bytes:7602176
ef6b08e8e122c5d29aa7af9259098d3e *./data/a-snow53.avi ef6b08e8e122c5d29aa7af9259098d3e *./data/a-snow53.avi
3533712 ./data/a-snow53.avi 3533712 ./data/a-snow53.avi
799d3db687f6cdd7a837ec156efc171f *./data/out.yuv 799d3db687f6cdd7a837ec156efc171f *./data/out.yuv
......
...@@ -133,10 +133,10 @@ d0831a8339491fd680b650f05262e5d9 *./data/a-ffv1.avi ...@@ -133,10 +133,10 @@ d0831a8339491fd680b650f05262e5d9 *./data/a-ffv1.avi
3524768 ./data/a-ffv1.avi 3524768 ./data/a-ffv1.avi
dde5895817ad9d219f79a52d0bdfb001 *./data/out.yuv dde5895817ad9d219f79a52d0bdfb001 *./data/out.yuv
stddev: 0.00 PSNR:99.99 bytes:7602176 stddev: 0.00 PSNR:99.99 bytes:7602176
64812de848ca5eec1931bce8299ed6a1 *./data/a-snow.avi b0bfe9be7082450d1c079d89a54527fb *./data/a-snow.avi
68024 ./data/a-snow.avi 67464 ./data/a-snow.avi
146f2c37d861f542bfb635e05be160e9 *./data/out.yuv 8ab54d65f2edbc2255444a039b7bd06e *./data/out.yuv
stddev: 10.94 PSNR:27.34 bytes:7602176 stddev: 10.93 PSNR:27.34 bytes:7602176
1b4d88af4c8c1ac3fd092de0a69d5023 *./data/a-snow53.avi 1b4d88af4c8c1ac3fd092de0a69d5023 *./data/a-snow53.avi
2725570 ./data/a-snow53.avi 2725570 ./data/a-snow53.avi
dde5895817ad9d219f79a52d0bdfb001 *./data/out.yuv dde5895817ad9d219f79a52d0bdfb001 *./data/out.yuv
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment