motion_est.c 69.6 KB
Newer Older
Fabrice Bellard's avatar
Fabrice Bellard committed
1 2
/*
 * Motion estimation 
3
 * Copyright (c) 2000,2001 Fabrice Bellard.
4
 * Copyright (c) 2002-2004 Michael Niedermayer
Fabrice Bellard's avatar
Fabrice Bellard committed
5 6
 * 
 *
7 8 9 10
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2 of the License, or (at your option) any later version.
Fabrice Bellard's avatar
Fabrice Bellard committed
11
 *
12
 * This library is distributed in the hope that it will be useful,
Fabrice Bellard's avatar
Fabrice Bellard committed
13
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 15
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
Fabrice Bellard's avatar
Fabrice Bellard committed
16
 *
17 18 19
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
20 21
 *
 * new Motion Estimation (X1/EPZS) by Michael Niedermayer <michaelni@gmx.at>
Fabrice Bellard's avatar
Fabrice Bellard committed
22
 */
Michael Niedermayer's avatar
Michael Niedermayer committed
23 24 25 26 27 28
 
/**
 * @file motion_est.c
 * Motion estimation.
 */
 
Fabrice Bellard's avatar
Fabrice Bellard committed
29 30
#include <stdlib.h>
#include <stdio.h>
31
#include <limits.h>
Fabrice Bellard's avatar
Fabrice Bellard committed
32 33 34 35
#include "avcodec.h"
#include "dsputil.h"
#include "mpegvideo.h"

36 37
#undef NDEBUG
#include <assert.h>
Michael Niedermayer's avatar
Michael Niedermayer committed
38

39
#define SQ(a) ((a)*(a))
40

41 42 43 44 45 46
#define P_LEFT P[1]
#define P_TOP P[2]
#define P_TOPRIGHT P[3]
#define P_MEDIAN P[4]
#define P_MV1 P[9]

Michael Niedermayer's avatar
Michael Niedermayer committed
47 48
static inline int sad_hpel_motion_search(MpegEncContext * s,
				  int *mx_ptr, int *my_ptr, int dmin,
49 50
                                  int src_index, int ref_index,
                                  int size, int h);
Michael Niedermayer's avatar
Michael Niedermayer committed
51 52 53 54 55 56 57 58 59 60 61

static inline int update_map_generation(MpegEncContext * s)
{
    s->me.map_generation+= 1<<(ME_MAP_MV_BITS*2);
    if(s->me.map_generation==0){
        s->me.map_generation= 1<<(ME_MAP_MV_BITS*2);
        memset(s->me.map, 0, sizeof(uint32_t)*ME_MAP_SIZE);
    }
    return s->me.map_generation;
}

62 63 64 65 66 67
/* shape adaptive search stuff */
typedef struct Minima{
    int height;
    int x, y;
    int checked;
}Minima;
Michael Niedermayer's avatar
Michael Niedermayer committed
68

69
static int minima_cmp(const void *a, const void *b){
70 71
    const Minima *da = (const Minima *) a;
    const Minima *db = (const Minima *) b;
72 73 74
    
    return da->height - db->height;
}
Michael Niedermayer's avatar
Michael Niedermayer committed
75

76 77 78
#define FLAG_QPEL   1 //must be 1
#define FLAG_CHROMA 2
#define FLAG_DIRECT 4
Michael Niedermayer's avatar
Michael Niedermayer committed
79

80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96
static inline void init_ref(MpegEncContext *s, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){
    MotionEstContext * const c= &s->me;
    const int offset[3]= {
          y*c->  stride + x,
        ((y*c->uvstride + x)>>1),
        ((y*c->uvstride + x)>>1),
    };
    int i;
    for(i=0; i<3; i++){
        c->src[0][i]= src [i] + offset[i];
        c->ref[0][i]= ref [i] + offset[i];
    }
    if(ref_index){
        for(i=0; i<3; i++){
            c->ref[ref_index][i]= ref2[i] + offset[i];
        }
    }
Michael Niedermayer's avatar
Michael Niedermayer committed
97 98
}

99 100 101 102
static int get_flags(MpegEncContext *s, int direct, int chroma){
    return   ((s->flags&CODEC_FLAG_QPEL) ? FLAG_QPEL : 0)
           + (direct ? FLAG_DIRECT : 0) 
           + (chroma ? FLAG_CHROMA : 0);
Michael Niedermayer's avatar
Michael Niedermayer committed
103 104
}

105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219
static always_inline int cmp(MpegEncContext *s, const int x, const int y, const int subx, const int suby,
                      const int size, const int h, int ref_index, int src_index,
                      me_cmp_func cmp_func, me_cmp_func chroma_cmp_func, const int flags){
    MotionEstContext * const c= &s->me;
    const int stride= c->stride;
    const int uvstride= c->uvstride;
    const int qpel= flags&FLAG_QPEL;
    const int chroma= flags&FLAG_CHROMA;
    const int dxy= subx + (suby<<(1+qpel)); //FIXME log2_subpel?
    const int hx= subx + (x<<(1+qpel));
    const int hy= suby + (y<<(1+qpel));
    uint8_t * const * const ref= c->ref[ref_index];
    uint8_t * const * const src= c->src[src_index];
    int d;
    //FIXME check chroma 4mv, (no crashes ...)
    if(flags&FLAG_DIRECT){
        if(x >= c->xmin && hx <= c->xmax<<(qpel+1) && y >= c->ymin && hy <= c->ymax<<(qpel+1)){
            const int time_pp= s->pp_time;
            const int time_pb= s->pb_time;
            const int mask= 2*qpel+1;
            if(s->mv_type==MV_TYPE_8X8){
                int i;
                for(i=0; i<4; i++){
                    int fx = c->direct_basis_mv[i][0] + hx;
                    int fy = c->direct_basis_mv[i][1] + hy;
                    int bx = hx ? fx - c->co_located_mv[i][0] : c->co_located_mv[i][0]*(time_pb - time_pp)/time_pp + ((i &1)<<(qpel+4));
                    int by = hy ? fy - c->co_located_mv[i][1] : c->co_located_mv[i][1]*(time_pb - time_pp)/time_pp + ((i>>1)<<(qpel+4));
                    int fxy= (fx&mask) + ((fy&mask)<<(qpel+1));
                    int bxy= (bx&mask) + ((by&mask)<<(qpel+1));
        
                    uint8_t *dst= c->temp + 8*(i&1) + 8*stride*(i>>1);
                    if(qpel){
                        c->qpel_put[1][fxy](dst, ref[0] + (fx>>2) + (fy>>2)*stride, stride);
                        c->qpel_avg[1][bxy](dst, ref[8] + (bx>>2) + (by>>2)*stride, stride);
                    }else{
                        c->hpel_put[1][fxy](dst, ref[0] + (fx>>1) + (fy>>1)*stride, stride, 8);
                        c->hpel_avg[1][bxy](dst, ref[8] + (bx>>1) + (by>>1)*stride, stride, 8);
                    }
                }
            }else{
                int fx = c->direct_basis_mv[0][0] + hx;
                int fy = c->direct_basis_mv[0][1] + hy;
                int bx = hx ? fx - c->co_located_mv[0][0] : (c->co_located_mv[0][0]*(time_pb - time_pp)/time_pp);
                int by = hy ? fy - c->co_located_mv[0][1] : (c->co_located_mv[0][1]*(time_pb - time_pp)/time_pp);
                int fxy= (fx&mask) + ((fy&mask)<<(qpel+1));
                int bxy= (bx&mask) + ((by&mask)<<(qpel+1));
                
                if(qpel){
                    c->qpel_put[1][fxy](c->temp               , ref[0] + (fx>>2) + (fy>>2)*stride               , stride);
                    c->qpel_put[1][fxy](c->temp + 8           , ref[0] + (fx>>2) + (fy>>2)*stride + 8           , stride);
                    c->qpel_put[1][fxy](c->temp     + 8*stride, ref[0] + (fx>>2) + (fy>>2)*stride     + 8*stride, stride);
                    c->qpel_put[1][fxy](c->temp + 8 + 8*stride, ref[0] + (fx>>2) + (fy>>2)*stride + 8 + 8*stride, stride);
                    c->qpel_avg[1][bxy](c->temp               , ref[8] + (bx>>2) + (by>>2)*stride               , stride);
                    c->qpel_avg[1][bxy](c->temp + 8           , ref[8] + (bx>>2) + (by>>2)*stride + 8           , stride);
                    c->qpel_avg[1][bxy](c->temp     + 8*stride, ref[8] + (bx>>2) + (by>>2)*stride     + 8*stride, stride);
                    c->qpel_avg[1][bxy](c->temp + 8 + 8*stride, ref[8] + (bx>>2) + (by>>2)*stride + 8 + 8*stride, stride);
                }else{            
                    assert((fx>>1) + 16*s->mb_x >= -16);
                    assert((fy>>1) + 16*s->mb_y >= -16);
                    assert((fx>>1) + 16*s->mb_x <= s->width);
                    assert((fy>>1) + 16*s->mb_y <= s->height);
                    assert((bx>>1) + 16*s->mb_x >= -16);
                    assert((by>>1) + 16*s->mb_y >= -16);
                    assert((bx>>1) + 16*s->mb_x <= s->width);
                    assert((by>>1) + 16*s->mb_y <= s->height);

                    c->hpel_put[0][fxy](c->temp, ref[0] + (fx>>1) + (fy>>1)*stride, stride, 16);
                    c->hpel_avg[0][bxy](c->temp, ref[8] + (bx>>1) + (by>>1)*stride, stride, 16);
                }
            }
            d = cmp_func(s, c->temp, src[0], stride, 16);
        }else
            d= 256*256*256*32;
    }else{
        int uvdxy;
        if(dxy){
            if(qpel){
                c->qpel_put[size][dxy](c->temp, ref[0] + x + y*stride, stride); //FIXME prototype (add h)
                if(chroma){
                    int cx= hx/2;
                    int cy= hy/2;
                    cx= (cx>>1)|(cx&1);
                    cy= (cy>>1)|(cy&1);
                    uvdxy= (cx&1) + 2*(cy&1);
                    //FIXME x/y wrong, but mpeg4 qpel is sick anyway, we should drop as much of it as possible in favor for h264
                }
            }else{
                c->hpel_put[size][dxy](c->temp, ref[0] + x + y*stride, stride, h);
                if(chroma)
                    uvdxy= dxy | (x&1) | (2*(y&1));
            }
            d = cmp_func(s, c->temp, src[0], stride, h); 
        }else{
            d = cmp_func(s, src[0], ref[0] + x + y*stride, stride, h); 
            if(chroma)
                uvdxy= (x&1) + 2*(y&1);
        }
        if(chroma){
            uint8_t * const uvtemp= c->temp + 16*stride;
            c->hpel_put[size+1][uvdxy](uvtemp  , ref[1] + (x>>1) + (y>>1)*uvstride, uvstride, h>>1);
            c->hpel_put[size+1][uvdxy](uvtemp+8, ref[2] + (x>>1) + (y>>1)*uvstride, uvstride, h>>1);
            d += chroma_cmp_func(s, uvtemp  , src[1], uvstride, h>>1); 
            d += chroma_cmp_func(s, uvtemp+8, src[2], uvstride, h>>1); 
        }
    }
#if 0
    if(full_pel){
        const int index= (((y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);
        score_map[index]= d;
    }

    d += (c->mv_penalty[hx - c->pred_x] + c->mv_penalty[hy - c->pred_y])*c->penalty_factor;
#endif
    return d;
}
Michael Niedermayer's avatar
Michael Niedermayer committed
220 221 222 223

#include "motion_est_template.c"

static inline int get_penalty_factor(MpegEncContext *s, int type){
224
    switch(type&0xFF){
Michael Niedermayer's avatar
Michael Niedermayer committed
225 226
    default:
    case FF_CMP_SAD:
227
        return s->qscale*2;
Michael Niedermayer's avatar
Michael Niedermayer committed
228
    case FF_CMP_DCT:
229
        return s->qscale*3;
Michael Niedermayer's avatar
Michael Niedermayer committed
230
    case FF_CMP_SATD:
231
        return s->qscale*6;
232
    case FF_CMP_SSE:
233
        return s->qscale*s->qscale*2;
234 235 236
    case FF_CMP_BIT:
        return 1;
    case FF_CMP_RD:
237 238
    case FF_CMP_PSNR:
        return (s->qscale*s->qscale*185 + 64)>>7;
Michael Niedermayer's avatar
Michael Niedermayer committed
239 240 241 242
    }
}

void ff_init_me(MpegEncContext *s){
243 244
    MotionEstContext * const c= &s->me;

245 246 247 248
    ff_set_cmp(&s->dsp, s->dsp.me_pre_cmp, s->avctx->me_pre_cmp);
    ff_set_cmp(&s->dsp, s->dsp.me_cmp, s->avctx->me_cmp);
    ff_set_cmp(&s->dsp, s->dsp.me_sub_cmp, s->avctx->me_sub_cmp);
    ff_set_cmp(&s->dsp, s->dsp.mb_cmp, s->avctx->mb_cmp);
249 250 251 252
    
    s->me.flags    = get_flags(s, 0, s->avctx->me_cmp    &FF_CMP_CHROMA);
    s->me.sub_flags= get_flags(s, 0, s->avctx->me_sub_cmp&FF_CMP_CHROMA);
    s->me.mb_flags = get_flags(s, 0, s->avctx->mb_cmp    &FF_CMP_CHROMA);
Fabrice Bellard's avatar
Fabrice Bellard committed
253

254
/*FIXME s->no_rounding b_type*/
Michael Niedermayer's avatar
Michael Niedermayer committed
255
    if(s->flags&CODEC_FLAG_QPEL){
256
        s->me.sub_motion_search= qpel_motion_search;
257 258 259
        c->qpel_avg= s->dsp.avg_qpel_pixels_tab;
        if(s->no_rounding) c->qpel_put= s->dsp.put_no_rnd_qpel_pixels_tab;
        else               c->qpel_put= s->dsp.put_qpel_pixels_tab;
Michael Niedermayer's avatar
Michael Niedermayer committed
260 261
    }else{
        if(s->avctx->me_sub_cmp&FF_CMP_CHROMA)
262
            s->me.sub_motion_search= hpel_motion_search;
263 264 265
        else if(   s->avctx->me_sub_cmp == FF_CMP_SAD 
                && s->avctx->    me_cmp == FF_CMP_SAD 
                && s->avctx->    mb_cmp == FF_CMP_SAD)
266
            s->me.sub_motion_search= sad_hpel_motion_search; // 2050 vs. 2450 cycles
Michael Niedermayer's avatar
Michael Niedermayer committed
267
        else
268
            s->me.sub_motion_search= hpel_motion_search;
269 270 271
        c->hpel_avg= s->dsp.avg_pixels_tab;
        if(s->no_rounding) c->hpel_put= s->dsp.put_no_rnd_pixels_tab;
        else               c->hpel_put= s->dsp.put_pixels_tab;
Michael Niedermayer's avatar
Michael Niedermayer committed
272
    }
273 274 275
    if(s->linesize){
        s->me.stride  = s->linesize; 
        s->me.uvstride= s->uvlinesize;
276
    }else{
277 278
        s->me.stride  = 16*s->mb_width + 32;
        s->me.uvstride=  8*s->mb_width + 16;
279
    }
280 281

    c->temp= c->scratchpad;
Michael Niedermayer's avatar
Michael Niedermayer committed
282 283
}
      
284
#if 0
285
static int pix_dev(uint8_t * pix, int line_size, int mean)
286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305
{
    int s, i, j;

    s = 0;
    for (i = 0; i < 16; i++) {
	for (j = 0; j < 16; j += 8) {
	    s += ABS(pix[0]-mean);
	    s += ABS(pix[1]-mean);
	    s += ABS(pix[2]-mean);
	    s += ABS(pix[3]-mean);
	    s += ABS(pix[4]-mean);
	    s += ABS(pix[5]-mean);
	    s += ABS(pix[6]-mean);
	    s += ABS(pix[7]-mean);
	    pix += 8;
	}
	pix += line_size - 16;
    }
    return s;
}
306
#endif
307

308 309
static inline void no_motion_search(MpegEncContext * s,
				    int *mx_ptr, int *my_ptr)
Fabrice Bellard's avatar
Fabrice Bellard committed
310 311 312 313 314 315 316
{
    *mx_ptr = 16 * s->mb_x;
    *my_ptr = 16 * s->mb_y;
}

static int full_motion_search(MpegEncContext * s,
                              int *mx_ptr, int *my_ptr, int range,
317
                              int xmin, int ymin, int xmax, int ymax, uint8_t *ref_picture)
Fabrice Bellard's avatar
Fabrice Bellard committed
318 319 320
{
    int x1, y1, x2, y2, xx, yy, x, y;
    int mx, my, dmin, d;
321
    uint8_t *pix;
Fabrice Bellard's avatar
Fabrice Bellard committed
322 323 324 325 326 327 328 329 330 331 332 333 334 335 336

    xx = 16 * s->mb_x;
    yy = 16 * s->mb_y;
    x1 = xx - range + 1;	/* we loose one pixel to avoid boundary pb with half pixel pred */
    if (x1 < xmin)
	x1 = xmin;
    x2 = xx + range - 1;
    if (x2 > xmax)
	x2 = xmax;
    y1 = yy - range + 1;
    if (y1 < ymin)
	y1 = ymin;
    y2 = yy + range - 1;
    if (y2 > ymax)
	y2 = ymax;
Michael Niedermayer's avatar
Michael Niedermayer committed
337
    pix = s->new_picture.data[0] + (yy * s->linesize) + xx;
Fabrice Bellard's avatar
Fabrice Bellard committed
338 339 340 341 342
    dmin = 0x7fffffff;
    mx = 0;
    my = 0;
    for (y = y1; y <= y2; y++) {
	for (x = x1; x <= x2; x++) {
343 344
	    d = s->dsp.pix_abs[0][0](NULL, pix, ref_picture + (y * s->linesize) + x,
			     s->linesize, 16);
Fabrice Bellard's avatar
Fabrice Bellard committed
345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370
	    if (d < dmin ||
		(d == dmin &&
		 (abs(x - xx) + abs(y - yy)) <
		 (abs(mx - xx) + abs(my - yy)))) {
		dmin = d;
		mx = x;
		my = y;
	    }
	}
    }

    *mx_ptr = mx;
    *my_ptr = my;

#if 0
    if (*mx_ptr < -(2 * range) || *mx_ptr >= (2 * range) ||
	*my_ptr < -(2 * range) || *my_ptr >= (2 * range)) {
	fprintf(stderr, "error %d %d\n", *mx_ptr, *my_ptr);
    }
#endif
    return dmin;
}


static int log_motion_search(MpegEncContext * s,
                             int *mx_ptr, int *my_ptr, int range,
371
                             int xmin, int ymin, int xmax, int ymax, uint8_t *ref_picture)
Fabrice Bellard's avatar
Fabrice Bellard committed
372 373 374
{
    int x1, y1, x2, y2, xx, yy, x, y;
    int mx, my, dmin, d;
375
    uint8_t *pix;
Fabrice Bellard's avatar
Fabrice Bellard committed
376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399

    xx = s->mb_x << 4;
    yy = s->mb_y << 4;

    /* Left limit */
    x1 = xx - range;
    if (x1 < xmin)
	x1 = xmin;

    /* Right limit */
    x2 = xx + range;
    if (x2 > xmax)
	x2 = xmax;

    /* Upper limit */
    y1 = yy - range;
    if (y1 < ymin)
	y1 = ymin;

    /* Lower limit */
    y2 = yy + range;
    if (y2 > ymax)
	y2 = ymax;

Michael Niedermayer's avatar
Michael Niedermayer committed
400
    pix = s->new_picture.data[0] + (yy * s->linesize) + xx;
Fabrice Bellard's avatar
Fabrice Bellard committed
401 402 403 404 405 406 407
    dmin = 0x7fffffff;
    mx = 0;
    my = 0;

    do {
	for (y = y1; y <= y2; y += range) {
	    for (x = x1; x <= x2; x += range) {
408
		d = s->dsp.pix_abs[0][0](NULL, pix, ref_picture + (y * s->linesize) + x, s->linesize, 16);
Fabrice Bellard's avatar
Fabrice Bellard committed
409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446
		if (d < dmin || (d == dmin && (abs(x - xx) + abs(y - yy)) < (abs(mx - xx) + abs(my - yy)))) {
		    dmin = d;
		    mx = x;
		    my = y;
		}
	    }
	}

	range = range >> 1;

	x1 = mx - range;
	if (x1 < xmin)
	    x1 = xmin;

	x2 = mx + range;
	if (x2 > xmax)
	    x2 = xmax;

	y1 = my - range;
	if (y1 < ymin)
	    y1 = ymin;

	y2 = my + range;
	if (y2 > ymax)
	    y2 = ymax;

    } while (range >= 1);

#ifdef DEBUG
    fprintf(stderr, "log       - MX: %d\tMY: %d\n", mx, my);
#endif
    *mx_ptr = mx;
    *my_ptr = my;
    return dmin;
}

static int phods_motion_search(MpegEncContext * s,
                               int *mx_ptr, int *my_ptr, int range,
447
                               int xmin, int ymin, int xmax, int ymax, uint8_t *ref_picture)
Fabrice Bellard's avatar
Fabrice Bellard committed
448 449 450
{
    int x1, y1, x2, y2, xx, yy, x, y, lastx, d;
    int mx, my, dminx, dminy;
451
    uint8_t *pix;
Fabrice Bellard's avatar
Fabrice Bellard committed
452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475

    xx = s->mb_x << 4;
    yy = s->mb_y << 4;

    /* Left limit */
    x1 = xx - range;
    if (x1 < xmin)
	x1 = xmin;

    /* Right limit */
    x2 = xx + range;
    if (x2 > xmax)
	x2 = xmax;

    /* Upper limit */
    y1 = yy - range;
    if (y1 < ymin)
	y1 = ymin;

    /* Lower limit */
    y2 = yy + range;
    if (y2 > ymax)
	y2 = ymax;

Michael Niedermayer's avatar
Michael Niedermayer committed
476
    pix = s->new_picture.data[0] + (yy * s->linesize) + xx;
Fabrice Bellard's avatar
Fabrice Bellard committed
477 478 479 480 481 482 483 484 485 486 487
    mx = 0;
    my = 0;

    x = xx;
    y = yy;
    do {
        dminx = 0x7fffffff;
        dminy = 0x7fffffff;

	lastx = x;
	for (x = x1; x <= x2; x += range) {
488
	    d = s->dsp.pix_abs[0][0](NULL, pix, ref_picture + (y * s->linesize) + x, s->linesize, 16);
Fabrice Bellard's avatar
Fabrice Bellard committed
489 490 491 492 493 494 495 496
	    if (d < dminx || (d == dminx && (abs(x - xx) + abs(y - yy)) < (abs(mx - xx) + abs(my - yy)))) {
		dminx = d;
		mx = x;
	    }
	}

	x = lastx;
	for (y = y1; y <= y2; y += range) {
497
	    d = s->dsp.pix_abs[0][0](NULL, pix, ref_picture + (y * s->linesize) + x, s->linesize, 16);
Fabrice Bellard's avatar
Fabrice Bellard committed
498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535
	    if (d < dminy || (d == dminy && (abs(x - xx) + abs(y - yy)) < (abs(mx - xx) + abs(my - yy)))) {
		dminy = d;
		my = y;
	    }
	}

	range = range >> 1;

	x = mx;
	y = my;
	x1 = mx - range;
	if (x1 < xmin)
	    x1 = xmin;

	x2 = mx + range;
	if (x2 > xmax)
	    x2 = xmax;

	y1 = my - range;
	if (y1 < ymin)
	    y1 = ymin;

	y2 = my + range;
	if (y2 > ymax)
	    y2 = ymax;

    } while (range >= 1);

#ifdef DEBUG
    fprintf(stderr, "phods     - MX: %d\tMY: %d\n", mx, my);
#endif

    /* half pixel search */
    *mx_ptr = mx;
    *my_ptr = my;
    return dminy;
}

536 537 538

#define Z_THRESHOLD 256

Michael Niedermayer's avatar
Michael Niedermayer committed
539
#define CHECK_SAD_HALF_MV(suffix, x, y) \
540
{\
541
    d= s->dsp.pix_abs[size][(x?1:0)+(y?2:0)](NULL, pix, ptr+((x)>>1), stride, h);\
Michael Niedermayer's avatar
Michael Niedermayer committed
542
    d += (mv_penalty[pen_x + x] + mv_penalty[pen_y + y])*penalty_factor;\
543 544
    COPY3_IF_LT(dminh, d, dx, x, dy, y)\
}
545

Michael Niedermayer's avatar
Michael Niedermayer committed
546
static inline int sad_hpel_motion_search(MpegEncContext * s,
Fabrice Bellard's avatar
Fabrice Bellard committed
547
				  int *mx_ptr, int *my_ptr, int dmin,
548 549
                                  int src_index, int ref_index,
                                  int size, int h)
Fabrice Bellard's avatar
Fabrice Bellard committed
550
{
Michael Niedermayer's avatar
Michael Niedermayer committed
551
    const int penalty_factor= s->me.sub_penalty_factor;
552
    int mx, my, dminh;
553
    uint8_t *pix, *ptr;
554 555 556 557 558
    int stride= s->me.stride;
    const int flags= s->me.sub_flags;
    LOAD_COMMON
    
    assert(flags == 0);
Fabrice Bellard's avatar
Fabrice Bellard committed
559

Michael Niedermayer's avatar
Michael Niedermayer committed
560
    if(s->me.skip){
561 562 563 564 565 566 567
//    printf("S");
        *mx_ptr = 0;
        *my_ptr = 0;
        return dmin;
    }
//    printf("N");
        
568
    pix = s->me.src[src_index][0];
569

570 571
    mx = *mx_ptr;
    my = *my_ptr;
572
    ptr = s->me.ref[ref_index][0] + (my * stride) + mx;
573
    
574 575 576 577
    dminh = dmin;

    if (mx > xmin && mx < xmax && 
        my > ymin && my < ymax) {
578 579 580 581 582 583 584 585 586
        int dx=0, dy=0;
        int d, pen_x, pen_y; 
        const int index= (my<<ME_MAP_SHIFT) + mx;
        const int t= score_map[(index-(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)];
        const int l= score_map[(index- 1               )&(ME_MAP_SIZE-1)];
        const int r= score_map[(index+ 1               )&(ME_MAP_SIZE-1)];
        const int b= score_map[(index+(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)];
        mx<<=1;
        my<<=1;
587 588 589 590 591

        
        pen_x= pred_x + mx;
        pen_y= pred_y + my;

592
        ptr-= stride;
593
        if(t<=b){
Michael Niedermayer's avatar
Michael Niedermayer committed
594
            CHECK_SAD_HALF_MV(y2 , 0, -1)
595
            if(l<=r){
Michael Niedermayer's avatar
Michael Niedermayer committed
596
                CHECK_SAD_HALF_MV(xy2, -1, -1)
597
                if(t+r<=b+l){
Michael Niedermayer's avatar
Michael Niedermayer committed
598
                    CHECK_SAD_HALF_MV(xy2, +1, -1)
599
                    ptr+= stride;
600
                }else{
601
                    ptr+= stride;
Michael Niedermayer's avatar
Michael Niedermayer committed
602
                    CHECK_SAD_HALF_MV(xy2, -1, +1)
603
                }
Michael Niedermayer's avatar
Michael Niedermayer committed
604
                CHECK_SAD_HALF_MV(x2 , -1,  0)
605
            }else{
Michael Niedermayer's avatar
Michael Niedermayer committed
606
                CHECK_SAD_HALF_MV(xy2, +1, -1)
607
                if(t+l<=b+r){
Michael Niedermayer's avatar
Michael Niedermayer committed
608
                    CHECK_SAD_HALF_MV(xy2, -1, -1)
609
                    ptr+= stride;
610
                }else{
611
                    ptr+= stride;
Michael Niedermayer's avatar
Michael Niedermayer committed
612
                    CHECK_SAD_HALF_MV(xy2, +1, +1)
613
                }
Michael Niedermayer's avatar
Michael Niedermayer committed
614
                CHECK_SAD_HALF_MV(x2 , +1,  0)
615 616 617 618
            }
        }else{
            if(l<=r){
                if(t+l<=b+r){
Michael Niedermayer's avatar
Michael Niedermayer committed
619
                    CHECK_SAD_HALF_MV(xy2, -1, -1)
620
                    ptr+= stride;
621
                }else{
622
                    ptr+= stride;
Michael Niedermayer's avatar
Michael Niedermayer committed
623
                    CHECK_SAD_HALF_MV(xy2, +1, +1)
624
                }
Michael Niedermayer's avatar
Michael Niedermayer committed
625 626
                CHECK_SAD_HALF_MV(x2 , -1,  0)
                CHECK_SAD_HALF_MV(xy2, -1, +1)
627 628
            }else{
                if(t+r<=b+l){
Michael Niedermayer's avatar
Michael Niedermayer committed
629
                    CHECK_SAD_HALF_MV(xy2, +1, -1)
630
                    ptr+= stride;
631
                }else{
632
                    ptr+= stride;
Michael Niedermayer's avatar
Michael Niedermayer committed
633
                    CHECK_SAD_HALF_MV(xy2, -1, +1)
634
                }
Michael Niedermayer's avatar
Michael Niedermayer committed
635 636
                CHECK_SAD_HALF_MV(x2 , +1,  0)
                CHECK_SAD_HALF_MV(xy2, +1, +1)
637
            }
Michael Niedermayer's avatar
Michael Niedermayer committed
638
            CHECK_SAD_HALF_MV(y2 ,  0, +1)
639 640 641
        }
        mx+=dx;
        my+=dy;
642 643

    }else{
644 645
        mx<<=1;
        my<<=1;
646 647 648 649
    }

    *mx_ptr = mx;
    *my_ptr = my;
650
    return dminh;
651 652
}

653
static inline void set_p_mv_tables(MpegEncContext * s, int mx, int my, int mv4)
654
{
655
    const int xy= s->mb_x + s->mb_y*s->mb_stride;
656
    
657 658
    s->p_mv_table[xy][0] = mx;
    s->p_mv_table[xy][1] = my;
659 660

    /* has allready been set to the 4 MV if 4MV is done */
661
    if(mv4){
662 663
        int mot_xy= s->block_index[0];

664 665 666 667
        s->current_picture.motion_val[0][mot_xy  ][0]= mx;
        s->current_picture.motion_val[0][mot_xy  ][1]= my;
        s->current_picture.motion_val[0][mot_xy+1][0]= mx;
        s->current_picture.motion_val[0][mot_xy+1][1]= my;
668

669
        mot_xy += s->b8_stride;
670 671 672 673
        s->current_picture.motion_val[0][mot_xy  ][0]= mx;
        s->current_picture.motion_val[0][mot_xy  ][1]= my;
        s->current_picture.motion_val[0][mot_xy+1][0]= mx;
        s->current_picture.motion_val[0][mot_xy+1][1]= my;
674 675 676
    }
}

677 678 679
/**
 * get fullpel ME search limits.
 */
680
static inline void get_limits(MpegEncContext *s, int x, int y)
Fabrice Bellard's avatar
Fabrice Bellard committed
681
{
682 683 684 685
/*
    if(s->avctx->me_range) s->me.range= s->avctx->me_range >> 1;
    else                   s->me.range= 16;
*/
Fabrice Bellard's avatar
Fabrice Bellard committed
686
    if (s->unrestricted_mv) {
687 688 689 690
        s->me.xmin = - x - 16;
        s->me.ymin = - y - 16;
        s->me.xmax = - x + s->mb_width *16;
        s->me.ymax = - y + s->mb_height*16;
Fabrice Bellard's avatar
Fabrice Bellard committed
691
    } else {
692 693 694 695
        s->me.xmin = - x;
        s->me.ymin = - y;
        s->me.xmax = - x + s->mb_width *16 - 16;
        s->me.ymax = - y + s->mb_height*16 - 16;
Fabrice Bellard's avatar
Fabrice Bellard committed
696
    }
697 698
}

699 700 701 702 703 704 705 706 707 708 709 710
static inline void init_mv4_ref(MpegEncContext *s){
    MotionEstContext * const c= &s->me;
    const int stride= s->linesize;

    c->ref[1][0] = c->ref[0][0] + 8;
    c->ref[2][0] = c->ref[0][0] + 8*stride;
    c->ref[3][0] = c->ref[2][0] + 8;
    c->src[1][0] = c->src[0][0] + 8;
    c->src[2][0] = c->src[0][0] + 8*stride;
    c->src[3][0] = c->src[2][0] + 8;
}

711
static inline int h263_mv4_search(MpegEncContext *s, int mx, int my, int shift)
712
{
713
    MotionEstContext * const c= &s->me;
714 715
    const int size= 1;
    const int h=8;
716 717
    int block;
    int P[10][2];
718
    int dmin_sum=0, mx4_sum=0, my4_sum=0;
719
    int same=1;
720 721
    const int stride= s->linesize;
    const int uvstride= s->uvlinesize;
722 723
    uint8_t *mv_penalty= s->me.current_mv_penalty;

724
    init_mv4_ref(s);
725
    
726 727 728 729 730
    for(block=0; block<4; block++){
        int mx4, my4;
        int pred_x4, pred_y4;
        int dmin4;
        static const int off[4]= {2, 1, 1, -1};
731
        const int mot_stride = s->b8_stride;
732
        const int mot_xy = s->block_index[block];
733

734 735
        P_LEFT[0] = s->current_picture.motion_val[0][mot_xy - 1][0];
        P_LEFT[1] = s->current_picture.motion_val[0][mot_xy - 1][1];
736

737
        if(P_LEFT[0]       > (s->me.xmax<<shift)) P_LEFT[0]       = (s->me.xmax<<shift);
738 739

        /* special case for first line */
740
        if (s->first_slice_line && block<2) {
741 742
            s->me.pred_x= pred_x4= P_LEFT[0];
            s->me.pred_y= pred_y4= P_LEFT[1];
743
        } else {
744 745 746 747
            P_TOP[0]      = s->current_picture.motion_val[0][mot_xy - mot_stride             ][0];
            P_TOP[1]      = s->current_picture.motion_val[0][mot_xy - mot_stride             ][1];
            P_TOPRIGHT[0] = s->current_picture.motion_val[0][mot_xy - mot_stride + off[block]][0];
            P_TOPRIGHT[1] = s->current_picture.motion_val[0][mot_xy - mot_stride + off[block]][1];
748 749 750 751
            if(P_TOP[1]      > (s->me.ymax<<shift)) P_TOP[1]     = (s->me.ymax<<shift);
            if(P_TOPRIGHT[0] < (s->me.xmin<<shift)) P_TOPRIGHT[0]= (s->me.xmin<<shift);
            if(P_TOPRIGHT[0] > (s->me.xmax<<shift)) P_TOPRIGHT[0]= (s->me.xmax<<shift);
            if(P_TOPRIGHT[1] > (s->me.ymax<<shift)) P_TOPRIGHT[1]= (s->me.ymax<<shift);
752 753 754 755
    
            P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
            P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);

756 757
            s->me.pred_x= pred_x4 = P_MEDIAN[0];
            s->me.pred_y= pred_y4 = P_MEDIAN[1];
758 759 760 761
        }
        P_MV1[0]= mx;
        P_MV1[1]= my;

762
        dmin4 = epzs_motion_search4(s, &mx4, &my4, P, block, block, s->p_mv_table, (1<<16)>>shift);
763

764
        dmin4= s->me.sub_motion_search(s, &mx4, &my4, dmin4, block, block, size, h);
765
        
766
        if(s->dsp.me_sub_cmp[0] != s->dsp.mb_cmp[0]){
767
            int dxy;
768
            const int offset= ((block&1) + (block>>1)*stride)*8;
769 770
            uint8_t *dest_y = s->me.scratchpad + offset;
            if(s->quarter_sample){
771
                uint8_t *ref= c->ref[block][0] + (mx4>>2) + (my4>>2)*stride;
772 773 774
                dxy = ((my4 & 3) << 2) | (mx4 & 3);

                if(s->no_rounding)
775
                    s->dsp.put_no_rnd_qpel_pixels_tab[1][dxy](dest_y   , ref    , stride);
776
                else
777
                    s->dsp.put_qpel_pixels_tab       [1][dxy](dest_y   , ref    , stride);
778
            }else{
779
                uint8_t *ref= c->ref[block][0] + (mx4>>1) + (my4>>1)*stride;
780 781 782
                dxy = ((my4 & 1) << 1) | (mx4 & 1);

                if(s->no_rounding)
783
                    s->dsp.put_no_rnd_pixels_tab[1][dxy](dest_y    , ref    , stride, h);
784
                else
785
                    s->dsp.put_pixels_tab       [1][dxy](dest_y    , ref    , stride, h);
786 787 788 789 790 791 792 793 794 795 796 797 798
            }
            dmin_sum+= (mv_penalty[mx4-pred_x4] + mv_penalty[my4-pred_y4])*s->me.mb_penalty_factor;
        }else
            dmin_sum+= dmin4;

        if(s->quarter_sample){
            mx4_sum+= mx4/2;
            my4_sum+= my4/2;
        }else{
            mx4_sum+= mx4;
            my4_sum+= my4;
        }
            
799 800
        s->current_picture.motion_val[0][ s->block_index[block] ][0]= mx4;
        s->current_picture.motion_val[0][ s->block_index[block] ][1]= my4;
801 802

        if(mx4 != mx || my4 != my) same=0;
803
    }
804
    
805 806 807
    if(same)
        return INT_MAX;
    
808
    if(s->dsp.me_sub_cmp[0] != s->dsp.mb_cmp[0]){
809
        dmin_sum += s->dsp.mb_cmp[0](s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*16*stride, s->me.scratchpad, stride, 16);
810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830
    }
    
    if(s->avctx->mb_cmp&FF_CMP_CHROMA){
        int dxy;
        int mx, my;
        int offset;

        mx= ff_h263_round_chroma(mx4_sum);
        my= ff_h263_round_chroma(my4_sum);
        dxy = ((my & 1) << 1) | (mx & 1);
        
        offset= (s->mb_x*8 + (mx>>1)) + (s->mb_y*8 + (my>>1))*s->uvlinesize;
       
        if(s->no_rounding){
            s->dsp.put_no_rnd_pixels_tab[1][dxy](s->me.scratchpad    , s->last_picture.data[1] + offset, s->uvlinesize, 8);
            s->dsp.put_no_rnd_pixels_tab[1][dxy](s->me.scratchpad+8  , s->last_picture.data[2] + offset, s->uvlinesize, 8);
        }else{
            s->dsp.put_pixels_tab       [1][dxy](s->me.scratchpad    , s->last_picture.data[1] + offset, s->uvlinesize, 8);
            s->dsp.put_pixels_tab       [1][dxy](s->me.scratchpad+8  , s->last_picture.data[2] + offset, s->uvlinesize, 8);
        }

831 832
        dmin_sum += s->dsp.mb_cmp[1](s, s->new_picture.data[1] + s->mb_x*8 + s->mb_y*8*s->uvlinesize, s->me.scratchpad  , s->uvlinesize, 8);
        dmin_sum += s->dsp.mb_cmp[1](s, s->new_picture.data[2] + s->mb_x*8 + s->mb_y*8*s->uvlinesize, s->me.scratchpad+8, s->uvlinesize, 8);
833
    }
834 835 836
    
    s->me.pred_x= mx;
    s->me.pred_y= my;
837 838 839 840 841 842 843 844 845

    switch(s->avctx->mb_cmp&0xFF){
    /*case FF_CMP_SSE:
        return dmin_sum+ 32*s->qscale*s->qscale;*/
    case FF_CMP_RD:
        return dmin_sum;
    default:
        return dmin_sum+ 11*s->me.mb_penalty_factor;
    }
846 847
}

848 849 850 851 852 853 854 855 856 857 858 859 860
static inline void init_interlaced_ref(MpegEncContext *s, int ref_index){
    MotionEstContext * const c= &s->me;

    c->ref[1+ref_index][0] = c->ref[0+ref_index][0] + s->linesize;
    c->src[1][0] = c->src[0][0] + s->linesize;
    if(c->flags & FLAG_CHROMA){
        c->ref[1+ref_index][1] = c->ref[0+ref_index][1] + s->uvlinesize;
        c->ref[1+ref_index][2] = c->ref[0+ref_index][2] + s->uvlinesize;
        c->src[1][1] = c->src[0][1] + s->uvlinesize;
        c->src[1][2] = c->src[0][2] + s->uvlinesize;
    }
}

861
static int interlaced_search(MpegEncContext *s, int ref_index, 
862
                             int16_t (*mv_tables[2][2])[2], uint8_t *field_select_tables[2], int mx, int my, int user_field_select)
863
{
864
    MotionEstContext * const c= &s->me;
865 866 867 868
    const int size=0;
    const int h=8;
    int block;
    int P[10][2];
869
    uint8_t * const mv_penalty= c->current_mv_penalty;
870 871 872 873 874 875 876
    int same=1;
    const int stride= 2*s->linesize;
    const int uvstride= 2*s->uvlinesize;
    int dmin_sum= 0;
    const int mot_stride= s->mb_stride;
    const int xy= s->mb_x + s->mb_y*mot_stride;
    
877 878 879 880
    c->ymin>>=1;
    c->ymax>>=1;
    c->stride<<=1;
    c->uvstride<<=1;
881
    init_interlaced_ref(s, ref_index);
882 883 884 885 886 887 888
    
    for(block=0; block<2; block++){
        int field_select;
        int best_dmin= INT_MAX;
        int best_field= -1;

        for(field_select=0; field_select<2; field_select++){
889
            int dmin, mx_i, my_i;
890 891
            int16_t (*mv_table)[2]= mv_tables[block][field_select];
            
892 893 894 895 896
            if(user_field_select){
                if(field_select_tables[block][xy] != field_select)
                    continue;
            }
            
897 898
            P_LEFT[0] = mv_table[xy - 1][0];
            P_LEFT[1] = mv_table[xy - 1][1];
899
            if(P_LEFT[0]       > (c->xmax<<1)) P_LEFT[0]       = (c->xmax<<1);
900
            
901 902
            s->me.pred_x= P_LEFT[0];
            s->me.pred_y= P_LEFT[1];
903
            
904
            if(!s->first_slice_line){
905 906 907 908
                P_TOP[0]      = mv_table[xy - mot_stride][0];
                P_TOP[1]      = mv_table[xy - mot_stride][1];
                P_TOPRIGHT[0] = mv_table[xy - mot_stride + 1][0];
                P_TOPRIGHT[1] = mv_table[xy - mot_stride + 1][1];
909 910 911 912
                if(P_TOP[1]      > (c->ymax<<1)) P_TOP[1]     = (c->ymax<<1);
                if(P_TOPRIGHT[0] < (c->xmin<<1)) P_TOPRIGHT[0]= (c->xmin<<1);
                if(P_TOPRIGHT[0] > (c->xmax<<1)) P_TOPRIGHT[0]= (c->xmax<<1);
                if(P_TOPRIGHT[1] > (c->ymax<<1)) P_TOPRIGHT[1]= (c->ymax<<1);
913 914 915 916 917 918 919
    
                P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
                P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
            }
            P_MV1[0]= mx; //FIXME not correct if block != field_select
            P_MV1[1]= my / 2;
            
920
            dmin = epzs_motion_search2(s, &mx_i, &my_i, P, block, field_select+ref_index, mv_table, (1<<16)>>1);
921

922
            dmin= c->sub_motion_search(s, &mx_i, &my_i, dmin, block, field_select+ref_index, size, h);
923 924 925 926
            
            mv_table[xy][0]= mx_i;
            mv_table[xy][1]= my_i;
            
927
            if(s->dsp.me_sub_cmp[0] != s->dsp.mb_cmp[0]){
928 929 930
                int dxy;

                //FIXME chroma ME
931
                uint8_t *ref= c->ref[field_select+ref_index][0] + (mx_i>>1) + (my_i>>1)*stride;
932 933 934
                dxy = ((my_i & 1) << 1) | (mx_i & 1);

                if(s->no_rounding){
935
                    s->dsp.put_no_rnd_pixels_tab[size][dxy](c->scratchpad, ref    , stride, h);
936
                }else{
937
                    s->dsp.put_pixels_tab       [size][dxy](c->scratchpad, ref    , stride, h);
938
                }
939 940
                dmin= s->dsp.mb_cmp[size](s, c->src[block][0], c->scratchpad, stride, h);
                dmin+= (mv_penalty[mx_i-s->me.pred_x] + mv_penalty[my_i-s->me.pred_y] + 1)*c->mb_penalty_factor;
941
            }else
942
                dmin+= c->mb_penalty_factor; //field_select bits
943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963
                
            dmin += field_select != block; //slightly prefer same field
            
            if(dmin < best_dmin){
                best_dmin= dmin;
                best_field= field_select;
            }
        }
        {
            int16_t (*mv_table)[2]= mv_tables[block][best_field];

            if(mv_table[xy][0] != mx) same=0; //FIXME check if these checks work and are any good at all
            if(mv_table[xy][1]&1) same=0;
            if(mv_table[xy][1]*2 != my) same=0; 
            if(best_field != block) same=0;
        }

        field_select_tables[block][xy]= best_field;
        dmin_sum += best_dmin;
    }
    
964 965 966 967
    c->ymin<<=1;
    c->ymax<<=1;
    c->stride>>=1;
    c->uvstride>>=1;
968 969 970 971 972 973 974 975 976 977

    if(same)
        return INT_MAX;
    
    switch(s->avctx->mb_cmp&0xFF){
    /*case FF_CMP_SSE:
        return dmin_sum+ 32*s->qscale*s->qscale;*/
    case FF_CMP_RD:
        return dmin_sum;
    default:
978
        return dmin_sum+ 11*c->mb_penalty_factor;
979 980 981
    }
}

982 983 984 985 986 987 988 989 990
static inline int check_input_motion(MpegEncContext * s, int mb_x, int mb_y, int p_type){
    MotionEstContext * const c= &s->me;
    Picture *p= s->current_picture_ptr;
    int mb_xy= mb_x + mb_y*s->mb_stride;
    int xy= 2*mb_x + 2*mb_y*s->b8_stride;
    int mb_type= s->current_picture.mb_type[mb_xy];
    int flags= c->flags;
    int shift= (flags&FLAG_QPEL) + 1;
    int mask= (1<<shift)-1;
991
    int x, y, i;
992 993 994 995 996 997 998 999 1000 1001 1002 1003
    int d=0;
    me_cmp_func cmpf= s->dsp.sse[0];
    me_cmp_func chroma_cmpf= s->dsp.sse[1];
    
    assert(p_type==0 || !USES_LIST(mb_type, 1));
    assert(IS_INTRA(mb_type) || USES_LIST(mb_type,0) || USES_LIST(mb_type,1));
    
    if(IS_INTERLACED(mb_type)){
        int xy2= xy  + s->b8_stride;
        s->mb_type[mb_xy]=CANDIDATE_MB_TYPE_INTRA;
        c->stride<<=1;
        c->uvstride<<=1;
Michael Niedermayer's avatar
Michael Niedermayer committed
1004
        
1005 1006 1007 1008
        if(!(s->flags & CODEC_FLAG_INTERLACED_ME)){
            av_log(s->avctx, AV_LOG_ERROR, "Interlaced macroblock selected but interlaced motion estimation disabled\n");
            return -1;
        }
1009

1010 1011 1012 1013 1014
        if(USES_LIST(mb_type, 0)){
            int field_select0= p->ref_index[0][xy ];
            int field_select1= p->ref_index[0][xy2];
            assert(field_select0==0 ||field_select0==1);
            assert(field_select1==0 ||field_select1==1);
1015 1016
            init_interlaced_ref(s, 0);

1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042
            if(p_type){
                s->p_field_select_table[0][mb_xy]= field_select0;
                s->p_field_select_table[1][mb_xy]= field_select1;
                *(uint32_t*)s->p_field_mv_table[0][field_select0][mb_xy]= *(uint32_t*)p->motion_val[0][xy ];
                *(uint32_t*)s->p_field_mv_table[1][field_select1][mb_xy]= *(uint32_t*)p->motion_val[0][xy2];
                s->mb_type[mb_xy]=CANDIDATE_MB_TYPE_INTER_I;
            }else{
                s->b_field_select_table[0][0][mb_xy]= field_select0;
                s->b_field_select_table[0][1][mb_xy]= field_select1;
                *(uint32_t*)s->b_field_mv_table[0][0][field_select0][mb_xy]= *(uint32_t*)p->motion_val[0][xy ];
                *(uint32_t*)s->b_field_mv_table[0][1][field_select1][mb_xy]= *(uint32_t*)p->motion_val[0][xy2];
                s->mb_type[mb_xy]= CANDIDATE_MB_TYPE_FORWARD_I;
            }

            x= p->motion_val[0][xy ][0]; 
            y= p->motion_val[0][xy ][1];
            d = cmp(s, x>>shift, y>>shift, x&mask, y&mask, 0, 8, field_select0, 0, cmpf, chroma_cmpf, flags);
            x= p->motion_val[0][xy2][0]; 
            y= p->motion_val[0][xy2][1];
            d+= cmp(s, x>>shift, y>>shift, x&mask, y&mask, 0, 8, field_select1, 1, cmpf, chroma_cmpf, flags);
        }
        if(USES_LIST(mb_type, 1)){
            int field_select0= p->ref_index[1][xy ];
            int field_select1= p->ref_index[1][xy2];
            assert(field_select0==0 ||field_select0==1);
            assert(field_select1==0 ||field_select1==1);
1043 1044
            init_interlaced_ref(s, 2);

1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064
            s->b_field_select_table[1][0][mb_xy]= field_select0;
            s->b_field_select_table[1][1][mb_xy]= field_select1;
            *(uint32_t*)s->b_field_mv_table[1][0][field_select0][mb_xy]= *(uint32_t*)p->motion_val[1][xy ];
            *(uint32_t*)s->b_field_mv_table[1][1][field_select1][mb_xy]= *(uint32_t*)p->motion_val[1][xy2];
            if(USES_LIST(mb_type, 0)){
                s->mb_type[mb_xy]= CANDIDATE_MB_TYPE_BIDIR_I;
            }else{
                s->mb_type[mb_xy]= CANDIDATE_MB_TYPE_BACKWARD_I;
            }

            x= p->motion_val[1][xy ][0]; 
            y= p->motion_val[1][xy ][1];
            d = cmp(s, x>>shift, y>>shift, x&mask, y&mask, 0, 8, field_select0+2, 0, cmpf, chroma_cmpf, flags);
            x= p->motion_val[1][xy2][0]; 
            y= p->motion_val[1][xy2][1];
            d+= cmp(s, x>>shift, y>>shift, x&mask, y&mask, 0, 8, field_select1+2, 1, cmpf, chroma_cmpf, flags);
            //FIXME bidir scores
        }
        c->stride>>=1;
        c->uvstride>>=1;
1065
    }else if(IS_8X8(mb_type)){
1066 1067 1068 1069
        if(!(s->flags & CODEC_FLAG_4MV)){
            av_log(s->avctx, AV_LOG_ERROR, "4MV macroblock selected but 4MV encoding disabled\n");
            return -1;
        }
1070 1071 1072 1073 1074 1075 1076 1077 1078 1079
        cmpf= s->dsp.sse[1];
        chroma_cmpf= s->dsp.sse[1];
        init_mv4_ref(s);
        for(i=0; i<4; i++){
            xy= s->block_index[i];
            x= p->motion_val[0][xy][0]; 
            y= p->motion_val[0][xy][1];
            d+= cmp(s, x>>shift, y>>shift, x&mask, y&mask, 1, 8, i, i, cmpf, chroma_cmpf, flags);
        }
        s->mb_type[mb_xy]=CANDIDATE_MB_TYPE_INTER4V;
1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108
    }else{
        if(USES_LIST(mb_type, 0)){
            if(p_type){
                *(uint32_t*)s->p_mv_table[mb_xy]= *(uint32_t*)p->motion_val[0][xy];
                s->mb_type[mb_xy]=CANDIDATE_MB_TYPE_INTER;
            }else if(USES_LIST(mb_type, 1)){
                *(uint32_t*)s->b_bidir_forw_mv_table[mb_xy]= *(uint32_t*)p->motion_val[0][xy];
                *(uint32_t*)s->b_bidir_back_mv_table[mb_xy]= *(uint32_t*)p->motion_val[1][xy];
                s->mb_type[mb_xy]=CANDIDATE_MB_TYPE_BIDIR;
            }else{
                *(uint32_t*)s->b_forw_mv_table[mb_xy]= *(uint32_t*)p->motion_val[0][xy];
                s->mb_type[mb_xy]=CANDIDATE_MB_TYPE_FORWARD;
            }
            x= p->motion_val[0][xy][0]; 
            y= p->motion_val[0][xy][1];
            d = cmp(s, x>>shift, y>>shift, x&mask, y&mask, 0, 16, 0, 0, cmpf, chroma_cmpf, flags);
        }else if(USES_LIST(mb_type, 1)){
            *(uint32_t*)s->b_back_mv_table[mb_xy]= *(uint32_t*)p->motion_val[1][xy];
            s->mb_type[mb_xy]=CANDIDATE_MB_TYPE_BACKWARD;
           
            x= p->motion_val[1][xy][0]; 
            y= p->motion_val[1][xy][1];
            d = cmp(s, x>>shift, y>>shift, x&mask, y&mask, 0, 16, 2, 0, cmpf, chroma_cmpf, flags);
        }else
            s->mb_type[mb_xy]=CANDIDATE_MB_TYPE_INTRA;
    }
    return d;
}

1109 1110 1111
void ff_estimate_p_frame_motion(MpegEncContext * s,
                                int mb_x, int mb_y)
{
1112
    MotionEstContext * const c= &s->me;
1113
    uint8_t *pix, *ppix;
1114
    int sum, varc, vard, mx, my, dmin;
1115
    int P[10][2];
1116 1117
    const int shift= 1+s->quarter_sample;
    int mb_type=0;
Michael Niedermayer's avatar
Michael Niedermayer committed
1118
    Picture * const pic= &s->current_picture;
1119 1120
    
    init_ref(s, s->new_picture.data, s->last_picture.data, NULL, 16*mb_x, 16*mb_y, 0);
1121

Michael Niedermayer's avatar
Michael Niedermayer committed
1122
    assert(s->quarter_sample==0 || s->quarter_sample==1);
1123 1124
    assert(s->linesize == s->me.stride);
    assert(s->uvlinesize == s->me.uvstride);
Michael Niedermayer's avatar
Michael Niedermayer committed
1125 1126 1127

    s->me.penalty_factor    = get_penalty_factor(s, s->avctx->me_cmp);
    s->me.sub_penalty_factor= get_penalty_factor(s, s->avctx->me_sub_cmp);
1128
    s->me.mb_penalty_factor = get_penalty_factor(s, s->avctx->mb_cmp);
1129
    s->me.current_mv_penalty= s->me.mv_penalty[s->f_code] + MAX_MV;
1130

1131
    get_limits(s, 16*mb_x, 16*mb_y);
Michael Niedermayer's avatar
Michael Niedermayer committed
1132
    s->me.skip=0;
1133

1134 1135 1136 1137 1138 1139 1140 1141 1142
    /* intra / predictive decision */
    pix = c->src[0][0];
    sum = s->dsp.pix_sum(pix, s->linesize);
    varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)(sum*sum))>>8) + 500 + 128)>>8;

    pic->mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
    pic->mb_var [s->mb_stride * mb_y + mb_x] = varc;
    s->mb_var_sum_temp += varc;

1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155
    if(s->avctx->me_threshold){
        vard= (check_input_motion(s, mb_x, mb_y, 1)+128)>>8;
        
        if(vard<s->avctx->me_threshold){
            pic->mc_mb_var[s->mb_stride * mb_y + mb_x] = vard;
            s->mc_mb_var_sum_temp += vard;
            if (vard <= 64 || vard < varc) { //FIXME
                s->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
            }else{
                s->scene_change_score+= s->qscale;
            }
            return;
        }
1156 1157
        if(vard<s->avctx->mb_threshold)
            mb_type= s->mb_type[mb_x + mb_y*s->mb_stride];
1158 1159
    }

1160
    switch(s->me_method) {
Fabrice Bellard's avatar
Fabrice Bellard committed
1161 1162 1163
    case ME_ZERO:
    default:
	no_motion_search(s, &mx, &my);
1164 1165
        mx-= mb_x*16;
        my-= mb_y*16;
Fabrice Bellard's avatar
Fabrice Bellard committed
1166 1167
        dmin = 0;
        break;
1168
#if 0
Fabrice Bellard's avatar
Fabrice Bellard committed
1169
    case ME_FULL:
1170
	dmin = full_motion_search(s, &mx, &my, range, ref_picture);
1171 1172
        mx-= mb_x*16;
        my-= mb_y*16;
Fabrice Bellard's avatar
Fabrice Bellard committed
1173 1174
        break;
    case ME_LOG:
1175
	dmin = log_motion_search(s, &mx, &my, range / 2, ref_picture);
1176 1177
        mx-= mb_x*16;
        my-= mb_y*16;
Fabrice Bellard's avatar
Fabrice Bellard committed
1178 1179
        break;
    case ME_PHODS:
1180
	dmin = phods_motion_search(s, &mx, &my, range / 2, ref_picture);
1181 1182
        mx-= mb_x*16;
        my-= mb_y*16;
Fabrice Bellard's avatar
Fabrice Bellard committed
1183
        break;
1184
#endif
1185
    case ME_X1:
1186
    case ME_EPZS:
1187
       {
1188
            const int mot_stride = s->b8_stride;
1189
            const int mot_xy = s->block_index[0];
1190

1191 1192
            P_LEFT[0]       = s->current_picture.motion_val[0][mot_xy - 1][0];
            P_LEFT[1]       = s->current_picture.motion_val[0][mot_xy - 1][1];
1193

1194
            if(P_LEFT[0]       > (s->me.xmax<<shift)) P_LEFT[0]       = (s->me.xmax<<shift);
1195

1196
            if(!s->first_slice_line) {
1197 1198 1199 1200
                P_TOP[0]      = s->current_picture.motion_val[0][mot_xy - mot_stride    ][0];
                P_TOP[1]      = s->current_picture.motion_val[0][mot_xy - mot_stride    ][1];
                P_TOPRIGHT[0] = s->current_picture.motion_val[0][mot_xy - mot_stride + 2][0];
                P_TOPRIGHT[1] = s->current_picture.motion_val[0][mot_xy - mot_stride + 2][1];
1201 1202 1203
                if(P_TOP[1]      > (s->me.ymax<<shift)) P_TOP[1]     = (s->me.ymax<<shift);
                if(P_TOPRIGHT[0] < (s->me.xmin<<shift)) P_TOPRIGHT[0]= (s->me.xmin<<shift);
                if(P_TOPRIGHT[1] > (s->me.ymax<<shift)) P_TOPRIGHT[1]= (s->me.ymax<<shift);
1204
        
1205 1206 1207 1208
                P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
                P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);

                if(s->out_format == FMT_H263){
1209 1210
                    c->pred_x = P_MEDIAN[0];
                    c->pred_y = P_MEDIAN[1];
1211
                }else { /* mpeg1 at least */
1212 1213
                    c->pred_x= P_LEFT[0];
                    c->pred_y= P_LEFT[1];
1214
                }
1215
            }else{
1216 1217
                c->pred_x= P_LEFT[0];
                c->pred_y= P_LEFT[1];
1218
            }
1219

1220
        }
1221 1222
        dmin = epzs_motion_search(s, &mx, &my, P, 0, 0, s->p_mv_table, (1<<16)>>shift);       

1223
        break;
Fabrice Bellard's avatar
Fabrice Bellard committed
1224 1225
    }

1226
    /* At this point (mx,my) are full-pell and the relative displacement */
1227
    ppix = c->ref[0][0] + (my * s->linesize) + mx;
1228
        
1229
    vard = (s->dsp.sse[0](NULL, pix, ppix, s->linesize, 16)+128)>>8;
1230

1231 1232
    pic->mc_mb_var[s->mb_stride * mb_y + mb_x] = vard;
//    pic->mb_cmp_score[s->mb_stride * mb_y + mb_x] = dmin; 
1233
    s->mc_mb_var_sum_temp += vard;
1234
    
Fabrice Bellard's avatar
Fabrice Bellard committed
1235
#if 0
1236 1237
    printf("varc=%4d avg_var=%4d (sum=%4d) vard=%4d mx=%2d my=%2d\n",
	   varc, s->avg_mb_var, sum, vard, mx - xx, my - yy);
Fabrice Bellard's avatar
Fabrice Bellard committed
1238
#endif
1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260
    if(mb_type){
        if (vard <= 64 || vard < varc)
            s->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
        else
            s->scene_change_score+= s->qscale;

        if(mb_type == CANDIDATE_MB_TYPE_INTER){
            s->me.sub_motion_search(s, &mx, &my, dmin, 0, 0, 0, 16);
            set_p_mv_tables(s, mx, my, 1);
        }else{
            mx <<=shift;
            my <<=shift;
        }
        if(mb_type == CANDIDATE_MB_TYPE_INTER4V){
            h263_mv4_search(s, mx, my, shift);

            set_p_mv_tables(s, mx, my, 0);
        }
        if(mb_type == CANDIDATE_MB_TYPE_INTER_I){
            interlaced_search(s, 0, s->p_field_mv_table, s->p_field_select_table, mx, my, 1);
        }
    }else if(s->avctx->mb_decision > FF_MB_DECISION_SIMPLE){
1261 1262 1263
        if (vard <= 64 || vard < varc)
            s->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
        else
1264
            s->scene_change_score+= s->qscale;
1265

1266
        if (vard*2 + 200 > varc)
1267
            mb_type|= CANDIDATE_MB_TYPE_INTRA;
1268
        if (varc*2 + 200 > vard){
1269
            mb_type|= CANDIDATE_MB_TYPE_INTER;
1270
            s->me.sub_motion_search(s, &mx, &my, dmin, 0, 0, 0, 16);
1271 1272
            if(s->flags&CODEC_FLAG_MV0)
                if(mx || my)
1273
                    mb_type |= CANDIDATE_MB_TYPE_SKIPED; //FIXME check difference
1274
        }else{
Michael Niedermayer's avatar
Michael Niedermayer committed
1275 1276
            mx <<=shift;
            my <<=shift;
Fabrice Bellard's avatar
Fabrice Bellard committed
1277
        }
1278
        if((s->flags&CODEC_FLAG_4MV)
Michael Niedermayer's avatar
Michael Niedermayer committed
1279
           && !s->me.skip && varc>50 && vard>10){
1280 1281
            if(h263_mv4_search(s, mx, my, shift) < INT_MAX)
                mb_type|=CANDIDATE_MB_TYPE_INTER4V;
1282 1283 1284 1285

            set_p_mv_tables(s, mx, my, 0);
        }else
            set_p_mv_tables(s, mx, my, 1);
1286 1287
        if((s->flags&CODEC_FLAG_INTERLACED_ME)
           && !s->me.skip){ //FIXME varc/d checks
1288
            if(interlaced_search(s, 0, s->p_field_mv_table, s->p_field_select_table, mx, my, 0) < INT_MAX)
1289 1290
                mb_type |= CANDIDATE_MB_TYPE_INTER_I;
        }
1291
    }else{
1292
        int intra_score, i;
1293
        mb_type= CANDIDATE_MB_TYPE_INTER;
1294

1295
        dmin= s->me.sub_motion_search(s, &mx, &my, dmin, 0, 0, 0, 16);
1296
        if(s->avctx->me_sub_cmp != s->avctx->mb_cmp && !s->me.skip)
1297
            dmin= get_mb_score(s, mx, my, 0, 0);
1298 1299 1300

        if((s->flags&CODEC_FLAG_4MV)
           && !s->me.skip && varc>50 && vard>10){
1301
            int dmin4= h263_mv4_search(s, mx, my, shift);
1302
            if(dmin4 < dmin){
1303
                mb_type= CANDIDATE_MB_TYPE_INTER4V;
1304
                dmin=dmin4;
1305
            }
1306
        }
1307 1308
        if((s->flags&CODEC_FLAG_INTERLACED_ME)
           && !s->me.skip){ //FIXME varc/d checks
1309
            int dmin_i= interlaced_search(s, 0, s->p_field_mv_table, s->p_field_select_table, mx, my, 0);
1310 1311 1312 1313 1314
            if(dmin_i < dmin){
                mb_type = CANDIDATE_MB_TYPE_INTER_I;
                dmin= dmin_i;
            }
        }
1315 1316
                
//        pic->mb_cmp_score[s->mb_stride * mb_y + mb_x] = dmin; 
1317
        set_p_mv_tables(s, mx, my, mb_type!=CANDIDATE_MB_TYPE_INTER4V);
1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332

        /* get intra luma score */
        if((s->avctx->mb_cmp&0xFF)==FF_CMP_SSE){
            intra_score= (varc<<8) - 500; //FIXME dont scale it down so we dont have to fix it
        }else{
            int mean= (sum+128)>>8;
            mean*= 0x01010101;
            
            for(i=0; i<16; i++){
                *(uint32_t*)(&s->me.scratchpad[i*s->linesize+ 0]) = mean;
                *(uint32_t*)(&s->me.scratchpad[i*s->linesize+ 4]) = mean;
                *(uint32_t*)(&s->me.scratchpad[i*s->linesize+ 8]) = mean;
                *(uint32_t*)(&s->me.scratchpad[i*s->linesize+12]) = mean;
            }

1333
            intra_score= s->dsp.mb_cmp[0](s, s->me.scratchpad, pix, s->linesize, 16);
1334 1335 1336 1337 1338 1339 1340 1341 1342
        }
#if 0 //FIXME
        /* get chroma score */
        if(s->avctx->mb_cmp&FF_CMP_CHROMA){
            for(i=1; i<3; i++){
                uint8_t *dest_c;
                int mean;
                
                if(s->out_format == FMT_H263){
1343
                    mean= (s->dc_val[i][mb_x + mb_y*s->b8_stride] + 4)>>3; //FIXME not exact but simple ;)
1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361
                }else{
                    mean= (s->last_dc[i] + 4)>>3;
                }
                dest_c = s->new_picture.data[i] + (mb_y * 8  * (s->uvlinesize)) + mb_x * 8;
                
                mean*= 0x01010101;
                for(i=0; i<8; i++){
                    *(uint32_t*)(&s->me.scratchpad[i*s->uvlinesize+ 0]) = mean;
                    *(uint32_t*)(&s->me.scratchpad[i*s->uvlinesize+ 4]) = mean;
                }
                
                intra_score+= s->dsp.mb_cmp[1](s, s->me.scratchpad, dest_c, s->uvlinesize);
            }                
        }
#endif
        intra_score += s->me.mb_penalty_factor*16;
        
        if(intra_score < dmin){
1362 1363
            mb_type= CANDIDATE_MB_TYPE_INTRA;
            s->current_picture.mb_type[mb_y*s->mb_stride + mb_x]= CANDIDATE_MB_TYPE_INTRA; //FIXME cleanup
1364 1365
        }else
            s->current_picture.mb_type[mb_y*s->mb_stride + mb_x]= 0;
1366
        
1367
        if (vard <= 64 || vard < varc) { //FIXME
1368
            s->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
1369
        }else{
Michael Niedermayer's avatar
Michael Niedermayer committed
1370
            s->scene_change_score+= s->qscale;
1371
        }
Fabrice Bellard's avatar
Fabrice Bellard committed
1372
    }
1373

1374
    s->mb_type[mb_y*s->mb_stride + mb_x]= mb_type;
1375 1376
}

1377 1378 1379
int ff_pre_estimate_p_frame_motion(MpegEncContext * s,
                                    int mb_x, int mb_y)
{
1380
    MotionEstContext * const c= &s->me;
1381
    int mx, my, dmin;
1382 1383
    int P[10][2];
    const int shift= 1+s->quarter_sample;
1384
    const int xy= mb_x + mb_y*s->mb_stride;
1385
    init_ref(s, s->new_picture.data, s->last_picture.data, NULL, 16*mb_x, 16*mb_y, 0);
1386 1387 1388
    
    assert(s->quarter_sample==0 || s->quarter_sample==1);

Michael Niedermayer's avatar
Michael Niedermayer committed
1389
    s->me.pre_penalty_factor    = get_penalty_factor(s, s->avctx->me_pre_cmp);
1390
    s->me.current_mv_penalty= s->me.mv_penalty[s->f_code] + MAX_MV;
1391

1392
    get_limits(s, 16*mb_x, 16*mb_y);
1393 1394 1395 1396 1397
    s->me.skip=0;

    P_LEFT[0]       = s->p_mv_table[xy + 1][0];
    P_LEFT[1]       = s->p_mv_table[xy + 1][1];

1398
    if(P_LEFT[0]       < (s->me.xmin<<shift)) P_LEFT[0]       = (s->me.xmin<<shift);
1399 1400

    /* special case for first line */
1401
    if (s->first_slice_line) {
1402 1403
        c->pred_x= P_LEFT[0];
        c->pred_y= P_LEFT[1];
1404 1405
        P_TOP[0]= P_TOPRIGHT[0]= P_MEDIAN[0]=
        P_TOP[1]= P_TOPRIGHT[1]= P_MEDIAN[1]= 0; //FIXME 
1406
    } else {
1407 1408 1409 1410
        P_TOP[0]      = s->p_mv_table[xy + s->mb_stride    ][0];
        P_TOP[1]      = s->p_mv_table[xy + s->mb_stride    ][1];
        P_TOPRIGHT[0] = s->p_mv_table[xy + s->mb_stride - 1][0];
        P_TOPRIGHT[1] = s->p_mv_table[xy + s->mb_stride - 1][1];
1411 1412 1413
        if(P_TOP[1]      < (s->me.ymin<<shift)) P_TOP[1]     = (s->me.ymin<<shift);
        if(P_TOPRIGHT[0] > (s->me.xmax<<shift)) P_TOPRIGHT[0]= (s->me.xmax<<shift);
        if(P_TOPRIGHT[1] < (s->me.ymin<<shift)) P_TOPRIGHT[1]= (s->me.ymin<<shift);
1414 1415 1416 1417
    
        P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
        P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);

1418 1419
        c->pred_x = P_MEDIAN[0];
        c->pred_y = P_MEDIAN[1];
1420
    }
1421 1422

    dmin = epzs_motion_search(s, &mx, &my, P, 0, 0, s->p_mv_table, (1<<16)>>shift);       
1423

1424 1425 1426 1427 1428 1429
    s->p_mv_table[xy][0] = mx<<shift;
    s->p_mv_table[xy][1] = my<<shift;
    
    return dmin;
}

1430
static int ff_estimate_motion_b(MpegEncContext * s,
1431
                       int mb_x, int mb_y, int16_t (*mv_table)[2], int ref_index, int f_code)
1432
{
1433
    int mx, my, dmin;
1434
    int P[10][2];
1435
    const int shift= 1+s->quarter_sample;
1436 1437
    const int mot_stride = s->mb_stride;
    const int mot_xy = mb_y*mot_stride + mb_x;
1438
    uint8_t * const mv_penalty= s->me.mv_penalty[f_code] + MAX_MV;
1439
    int mv_scale;
Michael Niedermayer's avatar
Michael Niedermayer committed
1440 1441 1442
        
    s->me.penalty_factor    = get_penalty_factor(s, s->avctx->me_cmp);
    s->me.sub_penalty_factor= get_penalty_factor(s, s->avctx->me_sub_cmp);
1443
    s->me.mb_penalty_factor = get_penalty_factor(s, s->avctx->mb_cmp);
1444
    s->me.current_mv_penalty= mv_penalty;
Michael Niedermayer's avatar
Michael Niedermayer committed
1445

1446
    get_limits(s, 16*mb_x, 16*mb_y);
1447 1448 1449 1450 1451 1452

    switch(s->me_method) {
    case ME_ZERO:
    default:
	no_motion_search(s, &mx, &my);
        dmin = 0;
1453 1454
        mx-= mb_x*16;
        my-= mb_y*16;
1455
        break;
1456
#if 0
1457
    case ME_FULL:
1458
	dmin = full_motion_search(s, &mx, &my, range, ref_picture);
1459 1460
        mx-= mb_x*16;
        my-= mb_y*16;
1461 1462
        break;
    case ME_LOG:
1463
	dmin = log_motion_search(s, &mx, &my, range / 2, ref_picture);
1464 1465
        mx-= mb_x*16;
        my-= mb_y*16;
1466 1467
        break;
    case ME_PHODS:
1468
	dmin = phods_motion_search(s, &mx, &my, range / 2, ref_picture);
1469 1470
        mx-= mb_x*16;
        my-= mb_y*16;
1471
        break;
1472
#endif
1473 1474 1475
    case ME_X1:
    case ME_EPZS:
       {
1476 1477
            P_LEFT[0]        = mv_table[mot_xy - 1][0];
            P_LEFT[1]        = mv_table[mot_xy - 1][1];
1478

1479
            if(P_LEFT[0]       > (s->me.xmax<<shift)) P_LEFT[0]       = (s->me.xmax<<shift);
1480 1481

            /* special case for first line */
1482
            if (!s->first_slice_line) {
1483 1484 1485 1486
                P_TOP[0] = mv_table[mot_xy - mot_stride             ][0];
                P_TOP[1] = mv_table[mot_xy - mot_stride             ][1];
                P_TOPRIGHT[0] = mv_table[mot_xy - mot_stride + 1         ][0];
                P_TOPRIGHT[1] = mv_table[mot_xy - mot_stride + 1         ][1];
1487 1488 1489
                if(P_TOP[1] > (s->me.ymax<<shift)) P_TOP[1]= (s->me.ymax<<shift);
                if(P_TOPRIGHT[0] < (s->me.xmin<<shift)) P_TOPRIGHT[0]= (s->me.xmin<<shift);
                if(P_TOPRIGHT[1] > (s->me.ymax<<shift)) P_TOPRIGHT[1]= (s->me.ymax<<shift);
1490
        
1491 1492
                P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
                P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
1493
            }
1494 1495
            s->me.pred_x= P_LEFT[0];
            s->me.pred_y= P_LEFT[1];
1496
        }
1497 1498 1499 1500 1501 1502 1503
        
        if(mv_table == s->b_forw_mv_table){
            mv_scale= (s->pb_time<<16) / (s->pp_time<<shift);
        }else{
            mv_scale= ((s->pb_time - s->pp_time)<<16) / (s->pp_time<<shift);
        }
        
1504
        dmin = epzs_motion_search(s, &mx, &my, P, 0, ref_index, s->p_mv_table, mv_scale);
1505 1506 1507 1508
 
        break;
    }
    
1509
    dmin= s->me.sub_motion_search(s, &mx, &my, dmin, 0, ref_index, 0, 16);
1510 1511
                                   
    if(s->avctx->me_sub_cmp != s->avctx->mb_cmp && !s->me.skip)
1512
        dmin= get_mb_score(s, mx, my, 0, ref_index);
1513

1514
//printf("%d %d %d %d//", s->mb_x, s->mb_y, mx, my);
1515 1516 1517
//    s->mb_type[mb_y*s->mb_width + mb_x]= mb_type;
    mv_table[mot_xy][0]= mx;
    mv_table[mot_xy][1]= my;
Michael Niedermayer's avatar
Michael Niedermayer committed
1518

1519
    return dmin;
Fabrice Bellard's avatar
Fabrice Bellard committed
1520 1521
}

1522
static inline int check_bidir_mv(MpegEncContext * s,
1523 1524 1525
                   int motion_fx, int motion_fy,
                   int motion_bx, int motion_by,
                   int pred_fx, int pred_fy,
1526 1527
                   int pred_bx, int pred_by,
                   int size, int h)
Fabrice Bellard's avatar
Fabrice Bellard committed
1528
{
1529
    //FIXME optimize?
Michael Niedermayer's avatar
Michael Niedermayer committed
1530
    //FIXME better f_code prediction (max mv & distance)
1531
    //FIXME pointers
1532
    MotionEstContext * const c= &s->me;
1533
    uint8_t * const mv_penalty= s->me.mv_penalty[s->f_code] + MAX_MV; // f_code of the prev frame
1534 1535
    int stride= s->me.stride;
    int uvstride= s->me.uvstride;
Michael Niedermayer's avatar
Michael Niedermayer committed
1536
    uint8_t *dest_y = s->me.scratchpad;
1537 1538 1539 1540
    uint8_t *ptr;
    int dxy;
    int src_x, src_y;
    int fbmin;
1541 1542 1543
    uint8_t **src_data= c->src[0];
    uint8_t **ref_data= c->ref[0];
    uint8_t **ref2_data= c->ref[2];
1544

Michael Niedermayer's avatar
Michael Niedermayer committed
1545 1546
    if(s->quarter_sample){
        dxy = ((motion_fy & 3) << 2) | (motion_fx & 3);
1547 1548
        src_x = motion_fx >> 2;
        src_y = motion_fy >> 2;
Michael Niedermayer's avatar
Michael Niedermayer committed
1549

1550 1551
        ptr = ref_data[0] + (src_y * stride) + src_x;
        s->dsp.put_qpel_pixels_tab[0][dxy](dest_y    , ptr    , stride);
Michael Niedermayer's avatar
Michael Niedermayer committed
1552 1553

        dxy = ((motion_by & 3) << 2) | (motion_bx & 3);
1554 1555
        src_x = motion_bx >> 2;
        src_y = motion_by >> 2;
Michael Niedermayer's avatar
Michael Niedermayer committed
1556
    
1557
        ptr = ref2_data[0] + (src_y * stride) + src_x;
1558
        s->dsp.avg_qpel_pixels_tab[size][dxy](dest_y    , ptr    , stride);
Michael Niedermayer's avatar
Michael Niedermayer committed
1559 1560
    }else{
        dxy = ((motion_fy & 1) << 1) | (motion_fx & 1);
1561 1562
        src_x = motion_fx >> 1;
        src_y = motion_fy >> 1;
Michael Niedermayer's avatar
Michael Niedermayer committed
1563

1564 1565
        ptr = ref_data[0] + (src_y * stride) + src_x;
        s->dsp.put_pixels_tab[size][dxy](dest_y    , ptr    , stride, h);
Michael Niedermayer's avatar
Michael Niedermayer committed
1566 1567

        dxy = ((motion_by & 1) << 1) | (motion_bx & 1);
1568 1569
        src_x = motion_bx >> 1;
        src_y = motion_by >> 1;
Michael Niedermayer's avatar
Michael Niedermayer committed
1570
    
1571
        ptr = ref2_data[0] + (src_y * stride) + src_x;
1572
        s->dsp.avg_pixels_tab[size][dxy](dest_y    , ptr    , stride, h);
Michael Niedermayer's avatar
Michael Niedermayer committed
1573 1574
    }

1575 1576
    fbmin = (mv_penalty[motion_fx-pred_fx] + mv_penalty[motion_fy-pred_fy])*s->me.mb_penalty_factor
           +(mv_penalty[motion_bx-pred_bx] + mv_penalty[motion_by-pred_by])*s->me.mb_penalty_factor
1577
           + s->dsp.mb_cmp[size](s, src_data[0], dest_y, stride, h); //FIXME new_pic
1578 1579 1580 1581 1582
           
    if(s->avctx->mb_cmp&FF_CMP_CHROMA){
    }
    //FIXME CHROMA !!!
           
1583 1584
    return fbmin;
}
1585

1586
/* refine the bidir vectors in hq mode and return the score in both lq & hq mode*/
1587
static inline int bidir_refine(MpegEncContext * s, int mb_x, int mb_y)
1588
{
1589 1590
    const int mot_stride = s->mb_stride;
    const int xy = mb_y *mot_stride + mb_x;
1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602
    int fbmin;
    int pred_fx= s->b_bidir_forw_mv_table[xy-1][0];
    int pred_fy= s->b_bidir_forw_mv_table[xy-1][1];
    int pred_bx= s->b_bidir_back_mv_table[xy-1][0];
    int pred_by= s->b_bidir_back_mv_table[xy-1][1];
    int motion_fx= s->b_bidir_forw_mv_table[xy][0]= s->b_forw_mv_table[xy][0];
    int motion_fy= s->b_bidir_forw_mv_table[xy][1]= s->b_forw_mv_table[xy][1];
    int motion_bx= s->b_bidir_back_mv_table[xy][0]= s->b_back_mv_table[xy][0];
    int motion_by= s->b_bidir_back_mv_table[xy][1]= s->b_back_mv_table[xy][1];

    //FIXME do refinement and add flag
    
1603
    fbmin= check_bidir_mv(s, motion_fx, motion_fy,
1604 1605
                          motion_bx, motion_by,
                          pred_fx, pred_fy,
1606 1607
                          pred_bx, pred_by,
                          0, 16);
1608 1609 1610 1611

   return fbmin;
}

1612
static inline int direct_search(MpegEncContext * s, int mb_x, int mb_y)
1613
{
1614
    int P[10][2];
1615 1616
    const int mot_stride = s->mb_stride;
    const int mot_xy = mb_y*mot_stride + mb_x;
Michael Niedermayer's avatar
Michael Niedermayer committed
1617 1618
    const int shift= 1+s->quarter_sample;
    int dmin, i;
1619
    const int time_pp= s->pp_time;
1620
    const int time_pb= s->pb_time;
Michael Niedermayer's avatar
Michael Niedermayer committed
1621
    int mx, my, xmin, xmax, ymin, ymax;
1622
    int16_t (*mv_table)[2]= s->b_direct_mv_table;
Michael Niedermayer's avatar
Michael Niedermayer committed
1623
    
1624
    s->me.current_mv_penalty= s->me.mv_penalty[1] + MAX_MV;
Michael Niedermayer's avatar
Michael Niedermayer committed
1625 1626 1627
    ymin= xmin=(-32)>>shift;
    ymax= xmax=   31>>shift;

1628
    if(IS_8X8(s->next_picture.mb_type[mot_xy])){
Michael Niedermayer's avatar
Michael Niedermayer committed
1629 1630 1631
        s->mv_type= MV_TYPE_8X8;
    }else{
        s->mv_type= MV_TYPE_16X16;
1632
    }
Michael Niedermayer's avatar
Michael Niedermayer committed
1633 1634 1635 1636 1637

    for(i=0; i<4; i++){
        int index= s->block_index[i];
        int min, max;
    
1638 1639
        s->me.co_located_mv[i][0]= s->next_picture.motion_val[0][index][0];
        s->me.co_located_mv[i][1]= s->next_picture.motion_val[0][index][1];
Michael Niedermayer's avatar
Michael Niedermayer committed
1640 1641 1642 1643 1644 1645 1646
        s->me.direct_basis_mv[i][0]= s->me.co_located_mv[i][0]*time_pb/time_pp + ((i& 1)<<(shift+3));
        s->me.direct_basis_mv[i][1]= s->me.co_located_mv[i][1]*time_pb/time_pp + ((i>>1)<<(shift+3));
//        s->me.direct_basis_mv[1][i][0]= s->me.co_located_mv[i][0]*(time_pb - time_pp)/time_pp + ((i &1)<<(shift+3);
//        s->me.direct_basis_mv[1][i][1]= s->me.co_located_mv[i][1]*(time_pb - time_pp)/time_pp + ((i>>1)<<(shift+3);

        max= FFMAX(s->me.direct_basis_mv[i][0], s->me.direct_basis_mv[i][0] - s->me.co_located_mv[i][0])>>shift;
        min= FFMIN(s->me.direct_basis_mv[i][0], s->me.direct_basis_mv[i][0] - s->me.co_located_mv[i][0])>>shift;
1647 1648
        max+= 16*mb_x + 1; // +-1 is for the simpler rounding
        min+= 16*mb_x - 1;
1649 1650
        xmax= FFMIN(xmax, s->width - max);
        xmin= FFMAX(xmin, - 16     - min);
Michael Niedermayer's avatar
Michael Niedermayer committed
1651 1652 1653

        max= FFMAX(s->me.direct_basis_mv[i][1], s->me.direct_basis_mv[i][1] - s->me.co_located_mv[i][1])>>shift;
        min= FFMIN(s->me.direct_basis_mv[i][1], s->me.direct_basis_mv[i][1] - s->me.co_located_mv[i][1])>>shift;
1654 1655
        max+= 16*mb_y + 1; // +-1 is for the simpler rounding
        min+= 16*mb_y - 1;
1656 1657
        ymax= FFMIN(ymax, s->height - max);
        ymin= FFMAX(ymin, - 16      - min);
Michael Niedermayer's avatar
Michael Niedermayer committed
1658 1659
        
        if(s->mv_type == MV_TYPE_16X16) break;
1660
    }
Michael Niedermayer's avatar
Michael Niedermayer committed
1661 1662 1663 1664 1665 1666 1667 1668 1669
    
    assert(xmax <= 15 && ymax <= 15 && xmin >= -16 && ymin >= -16);
    
    if(xmax < 0 || xmin >0 || ymax < 0 || ymin > 0){
        s->b_direct_mv_table[mot_xy][0]= 0;
        s->b_direct_mv_table[mot_xy][1]= 0;

        return 256*256*256*64;
    }
1670 1671 1672 1673 1674
    
    s->me.xmin= xmin;
    s->me.ymin= ymin;
    s->me.xmax= xmax;
    s->me.ymax= ymax;
1675 1676 1677 1678
    s->me.flags     |= FLAG_DIRECT;
    s->me.sub_flags |= FLAG_DIRECT;
    s->me.pred_x=0;
    s->me.pred_y=0;
Michael Niedermayer's avatar
Michael Niedermayer committed
1679

Michael Niedermayer's avatar
Michael Niedermayer committed
1680 1681 1682 1683
    P_LEFT[0]        = clip(mv_table[mot_xy - 1][0], xmin<<shift, xmax<<shift);
    P_LEFT[1]        = clip(mv_table[mot_xy - 1][1], ymin<<shift, ymax<<shift);

    /* special case for first line */
1684
    if (!s->first_slice_line) { //FIXME maybe allow this over thread boundary as its cliped
Michael Niedermayer's avatar
Michael Niedermayer committed
1685 1686 1687 1688 1689 1690 1691 1692
        P_TOP[0]      = clip(mv_table[mot_xy - mot_stride             ][0], xmin<<shift, xmax<<shift);
        P_TOP[1]      = clip(mv_table[mot_xy - mot_stride             ][1], ymin<<shift, ymax<<shift);
        P_TOPRIGHT[0] = clip(mv_table[mot_xy - mot_stride + 1         ][0], xmin<<shift, xmax<<shift);
        P_TOPRIGHT[1] = clip(mv_table[mot_xy - mot_stride + 1         ][1], ymin<<shift, ymax<<shift);
    
        P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
        P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
    }
1693
 
1694 1695 1696 1697 1698 1699 1700 1701
    dmin = epzs_motion_search(s, &mx, &my, P, 0, 0, mv_table, 1<<(16-shift));
    if(s->me.sub_flags&FLAG_QPEL) 
        dmin = qpel_motion_search(s, &mx, &my, dmin, 0, 0, 0, 16);
    else
        dmin = hpel_motion_search(s, &mx, &my, dmin, 0, 0, 0, 16);
    
    if(s->avctx->me_sub_cmp != s->avctx->mb_cmp && !s->me.skip)
        dmin= get_mb_score(s, mx, my, 0, 0);
1702 1703
    
    get_limits(s, 16*mb_x, 16*mb_y); //restore s->me.?min/max, maybe not needed
1704 1705 1706

    s->b_direct_mv_table[mot_xy][0]= mx;
    s->b_direct_mv_table[mot_xy][1]= my;
1707 1708 1709
    s->me.flags     &= ~FLAG_DIRECT;
    s->me.sub_flags &= ~FLAG_DIRECT;

1710
    return dmin;
1711 1712 1713 1714 1715
}

void ff_estimate_b_frame_motion(MpegEncContext * s,
                             int mb_x, int mb_y)
{
1716
    const int penalty_factor= s->me.mb_penalty_factor;
1717
    int fmin, bmin, dmin, fbmin, bimin, fimin;
1718
    int type=0;
1719
    const int xy = mb_y*s->mb_stride + mb_x;
1720
    init_ref(s, s->new_picture.data, s->last_picture.data, s->next_picture.data, 16*mb_x, 16*mb_y, 2);
1721

1722
    
1723
    s->me.skip=0;
1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743
    if(s->avctx->me_threshold){
        int vard= (check_input_motion(s, mb_x, mb_y, 0)+128)>>8;
        
        if(vard<s->avctx->me_threshold){
//            pix = c->src[0][0];
//            sum = s->dsp.pix_sum(pix, s->linesize);
//            varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)(sum*sum))>>8) + 500 + 128)>>8;
        
//            pic->mb_var   [s->mb_stride * mb_y + mb_x] = varc;
             s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] = vard;
/*            pic->mb_mean  [s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
            s->mb_var_sum_temp    += varc;*/
            s->mc_mb_var_sum_temp += vard;
/*            if (vard <= 64 || vard < varc) {
                s->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
            }else{
                s->scene_change_score+= s->qscale;
            }*/
            return;
        }
1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772
        if(vard<s->avctx->mb_threshold){
            type= s->mb_type[mb_y*s->mb_stride + mb_x];
            if(type == CANDIDATE_MB_TYPE_DIRECT){
                direct_search(s, mb_x, mb_y);
            }
            if(type == CANDIDATE_MB_TYPE_FORWARD || type == CANDIDATE_MB_TYPE_BIDIR){
                s->me.skip=0;
                ff_estimate_motion_b(s, mb_x, mb_y, s->b_forw_mv_table, 0, s->f_code);
            }
            if(type == CANDIDATE_MB_TYPE_BACKWARD || type == CANDIDATE_MB_TYPE_BIDIR){
                s->me.skip=0;
                ff_estimate_motion_b(s, mb_x, mb_y, s->b_back_mv_table, 2, s->b_code);
            }
            if(type == CANDIDATE_MB_TYPE_FORWARD_I || type == CANDIDATE_MB_TYPE_BIDIR_I){
                s->me.skip=0;
                s->me.current_mv_penalty= s->me.mv_penalty[s->f_code] + MAX_MV;
                interlaced_search(s, 0,
                                        s->b_field_mv_table[0], s->b_field_select_table[0],
                                        s->b_forw_mv_table[xy][0], s->b_forw_mv_table[xy][1], 1);
            }
            if(type == CANDIDATE_MB_TYPE_BACKWARD_I || type == CANDIDATE_MB_TYPE_BIDIR_I){
                s->me.skip=0;
                s->me.current_mv_penalty= s->me.mv_penalty[s->b_code] + MAX_MV;
                interlaced_search(s, 2,
                                        s->b_field_mv_table[1], s->b_field_select_table[1],
                                        s->b_back_mv_table[xy][0], s->b_back_mv_table[xy][1], 1);
            }
            return;
        }
1773 1774
    }

1775
    if (s->codec_id == CODEC_ID_MPEG4)
1776
        dmin= direct_search(s, mb_x, mb_y);
1777 1778
    else
        dmin= INT_MAX;
1779
//FIXME penalty stuff for non mpeg4
1780
    s->me.skip=0;
1781
    fmin= ff_estimate_motion_b(s, mb_x, mb_y, s->b_forw_mv_table, 0, s->f_code) + 3*penalty_factor;
1782 1783
    
    s->me.skip=0;
1784
    bmin= ff_estimate_motion_b(s, mb_x, mb_y, s->b_back_mv_table, 2, s->b_code) + 2*penalty_factor;
1785
//printf(" %d %d ", s->b_forw_mv_table[xy][0], s->b_forw_mv_table[xy][1]);
1786

1787
    s->me.skip=0;
1788
    fbmin= bidir_refine(s, mb_x, mb_y) + penalty_factor;
1789
//printf("%d %d %d %d\n", dmin, fmin, bmin, fbmin);
1790 1791 1792 1793
    
    if(s->flags & CODEC_FLAG_INTERLACED_ME){
//FIXME mb type penalty
        s->me.skip=0;
1794 1795 1796
        s->me.current_mv_penalty= s->me.mv_penalty[s->f_code] + MAX_MV;
        fimin= interlaced_search(s, 0,
                                 s->b_field_mv_table[0], s->b_field_select_table[0],
1797
                                 s->b_forw_mv_table[xy][0], s->b_forw_mv_table[xy][1], 0);
1798 1799 1800
        s->me.current_mv_penalty= s->me.mv_penalty[s->b_code] + MAX_MV;
        bimin= interlaced_search(s, 2,
                                 s->b_field_mv_table[1], s->b_field_select_table[1],
1801
                                 s->b_back_mv_table[xy][0], s->b_back_mv_table[xy][1], 0);
1802 1803 1804
    }else
        fimin= bimin= INT_MAX;

1805
    {
1806
        int score= fmin;
1807
        type = CANDIDATE_MB_TYPE_FORWARD;
1808
        
1809
        if (dmin <= score){
1810
            score = dmin;
1811
            type = CANDIDATE_MB_TYPE_DIRECT;
1812 1813 1814
        }
        if(bmin<score){
            score=bmin;
1815
            type= CANDIDATE_MB_TYPE_BACKWARD; 
1816 1817 1818
        }
        if(fbmin<score){
            score=fbmin;
1819 1820 1821 1822 1823 1824 1825 1826 1827
            type= CANDIDATE_MB_TYPE_BIDIR;
        }
        if(fimin<score){
            score=fimin;
            type= CANDIDATE_MB_TYPE_FORWARD_I;
        }
        if(bimin<score){
            score=bimin;
            type= CANDIDATE_MB_TYPE_BACKWARD_I;
1828
        }
1829
        
1830
        score= ((unsigned)(score*score + 128*256))>>16;
1831
        s->mc_mb_var_sum_temp += score;
1832
        s->current_picture.mc_mb_var[mb_y*s->mb_stride + mb_x] = score; //FIXME use SSE
1833
    }
1834

1835
    if(s->avctx->mb_decision > FF_MB_DECISION_SIMPLE){
1836 1837 1838 1839 1840 1841 1842 1843 1844 1845
        type= CANDIDATE_MB_TYPE_FORWARD | CANDIDATE_MB_TYPE_BACKWARD | CANDIDATE_MB_TYPE_BIDIR | CANDIDATE_MB_TYPE_DIRECT;
        if(fimin < INT_MAX)
            type |= CANDIDATE_MB_TYPE_FORWARD_I;
        if(bimin < INT_MAX)
            type |= CANDIDATE_MB_TYPE_BACKWARD_I;
        if(fimin < INT_MAX && bimin < INT_MAX){
            type |= CANDIDATE_MB_TYPE_BIDIR_I;
        }
         //FIXME something smarter
        if(dmin>256*256*16) type&= ~CANDIDATE_MB_TYPE_DIRECT; //dont try direct mode if its invalid for this MB
1846 1847 1848 1849
#if 0        
        if(s->out_format == FMT_MPEG1)
            type |= CANDIDATE_MB_TYPE_INTRA;
#endif
1850 1851
    }

1852
    s->mb_type[mb_y*s->mb_stride + mb_x]= type;
1853 1854 1855 1856 1857 1858
}

/* find best f_code for ME which do unlimited searches */
int ff_get_best_fcode(MpegEncContext * s, int16_t (*mv_table)[2], int type)
{
    if(s->me_method>=ME_EPZS){
1859
        int score[8];
1860
        int i, y;
1861
        uint8_t * fcode_tab= s->fcode_tab;
1862 1863
        int best_fcode=-1;
        int best_score=-10000000;
1864

Michael Niedermayer's avatar
Michael Niedermayer committed
1865
        for(i=0; i<8; i++) score[i]= s->mb_num*(8-i);
1866 1867 1868

        for(y=0; y<s->mb_height; y++){
            int x;
1869
            int xy= y*s->mb_stride;
1870
            for(x=0; x<s->mb_width; x++){
Michael Niedermayer's avatar
Michael Niedermayer committed
1871
                if(s->mb_type[xy] & type){
1872 1873
                    int fcode= FFMAX(fcode_tab[mv_table[xy][0] + MAX_MV],
                                     fcode_tab[mv_table[xy][1] + MAX_MV]);
1874 1875 1876
                    int j;
                    
                    for(j=0; j<fcode && j<8; j++){
1877
                        if(s->pict_type==B_TYPE || s->current_picture.mc_mb_var[xy] < s->current_picture.mb_var[xy])
1878 1879
                            score[j]-= 170;
                    }
1880 1881 1882 1883
                }
                xy++;
            }
        }
1884 1885 1886 1887 1888 1889 1890
        
        for(i=1; i<8; i++){
            if(score[i] > best_score){
                best_score= score[i];
                best_fcode= i;
            }
//            printf("%d %d\n", i, score[i]);
1891
        }
1892

1893
//    printf("fcode: %d type: %d\n", i, s->pict_type);
1894
        return best_fcode;
1895 1896 1897 1898 1899 1900
/*        for(i=0; i<=MAX_FCODE; i++){
            printf("%d ", mv_num[i]);
        }
        printf("\n");*/
    }else{
        return 1;
Fabrice Bellard's avatar
Fabrice Bellard committed
1901 1902 1903
    }
}

1904 1905 1906
void ff_fix_long_p_mvs(MpegEncContext * s)
{
    const int f_code= s->f_code;
1907
    int y, range;
Michael Niedermayer's avatar
Michael Niedermayer committed
1908
    assert(s->pict_type==P_TYPE);
1909

Michael Niedermayer's avatar
Michael Niedermayer committed
1910
    range = (((s->out_format == FMT_MPEG1) ? 8 : 16) << f_code);
1911 1912 1913 1914 1915
    
    if(s->msmpeg4_version) range= 16;
    
    if(s->avctx->me_range && range > s->avctx->me_range) range= s->avctx->me_range;
    
1916
//printf("%d no:%d %d//\n", clip, noclip, f_code);
1917
    if(s->flags&CODEC_FLAG_4MV){
1918
        const int wrap= s->b8_stride;
1919 1920 1921

        /* clip / convert to intra 8x8 type MVs */
        for(y=0; y<s->mb_height; y++){
1922
            int xy= y*2*wrap;
1923
            int i= y*s->mb_stride;
1924 1925 1926
            int x;

            for(x=0; x<s->mb_width; x++){
1927
                if(s->mb_type[i]&CANDIDATE_MB_TYPE_INTER4V){
1928 1929 1930
                    int block;
                    for(block=0; block<4; block++){
                        int off= (block& 1) + (block>>1)*wrap;
1931 1932
                        int mx= s->current_picture.motion_val[0][ xy + off ][0];
                        int my= s->current_picture.motion_val[0][ xy + off ][1];
1933

1934 1935
                        if(   mx >=range || mx <-range
                           || my >=range || my <-range){
1936 1937 1938
                            s->mb_type[i] &= ~CANDIDATE_MB_TYPE_INTER4V;
                            s->mb_type[i] |= CANDIDATE_MB_TYPE_INTRA;
                            s->current_picture.mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
1939 1940 1941
                        }
                    }
                }
Michael Niedermayer's avatar
Michael Niedermayer committed
1942 1943
                xy+=2;
                i++;
1944 1945 1946 1947 1948
            }
        }
    }
}

1949 1950 1951 1952 1953 1954
/**
 *
 * @param truncate 1 for truncation, 0 for using intra
 */
void ff_fix_long_mvs(MpegEncContext * s, uint8_t *field_select_table, int field_select, 
                     int16_t (*mv_table)[2], int f_code, int type, int truncate)
1955
{
1956
    int y, h_range, v_range;
1957

1958
    // RAL: 8 in MPEG-1, 16 in MPEG-4
Michael Niedermayer's avatar
Michael Niedermayer committed
1959
    int range = (((s->out_format == FMT_MPEG1) ? 8 : 16) << f_code);
1960 1961

    if(s->msmpeg4_version) range= 16;
1962
    if(s->avctx->me_range && range > s->avctx->me_range) range= s->avctx->me_range;
1963

1964 1965 1966
    h_range= range;
    v_range= field_select_table ? range>>1 : range;

1967 1968 1969
    /* clip / convert to intra 16x16 type MVs */
    for(y=0; y<s->mb_height; y++){
        int x;
1970
        int xy= y*s->mb_stride;
1971
        for(x=0; x<s->mb_width; x++){
1972
            if (s->mb_type[xy] & type){    // RAL: "type" test added...
1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987
                if(field_select_table==NULL || field_select_table[xy] == field_select){
                    if(   mv_table[xy][0] >=h_range || mv_table[xy][0] <-h_range
                       || mv_table[xy][1] >=v_range || mv_table[xy][1] <-v_range){

                        if(truncate){
                            if     (mv_table[xy][0] > h_range-1) mv_table[xy][0]=  h_range-1;
                            else if(mv_table[xy][0] < -h_range ) mv_table[xy][0]= -h_range;
                            if     (mv_table[xy][1] > v_range-1) mv_table[xy][1]=  v_range-1;
                            else if(mv_table[xy][1] < -v_range ) mv_table[xy][1]= -v_range;
                        }else{
                            s->mb_type[xy] &= ~type;
                            s->mb_type[xy] |= CANDIDATE_MB_TYPE_INTRA;
                            mv_table[xy][0]=
                            mv_table[xy][1]= 0;
                        }
1988
                    }
1989
                }
1990 1991 1992 1993 1994
            }
            xy++;
        }
    }
}