motion_est.c 74.7 KB
Newer Older
Fabrice Bellard's avatar
Fabrice Bellard committed
1
/*
2
 * Motion estimation
3
 * Copyright (c) 2000,2001 Fabrice Bellard.
4
 * Copyright (c) 2002-2004 Michael Niedermayer
5
 *
Fabrice Bellard's avatar
Fabrice Bellard committed
6
 *
7 8 9 10
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2 of the License, or (at your option) any later version.
Fabrice Bellard's avatar
Fabrice Bellard committed
11
 *
12
 * This library is distributed in the hope that it will be useful,
Fabrice Bellard's avatar
Fabrice Bellard committed
13
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 15
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
Fabrice Bellard's avatar
Fabrice Bellard committed
16
 *
17 18
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
19
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 21
 *
 * new Motion Estimation (X1/EPZS) by Michael Niedermayer <michaelni@gmx.at>
Fabrice Bellard's avatar
Fabrice Bellard committed
22
 */
23

Michael Niedermayer's avatar
Michael Niedermayer committed
24 25 26 27
/**
 * @file motion_est.c
 * Motion estimation.
 */
28

Fabrice Bellard's avatar
Fabrice Bellard committed
29 30
#include <stdlib.h>
#include <stdio.h>
31
#include <limits.h>
Fabrice Bellard's avatar
Fabrice Bellard committed
32 33 34 35
#include "avcodec.h"
#include "dsputil.h"
#include "mpegvideo.h"

36 37
#undef NDEBUG
#include <assert.h>
Michael Niedermayer's avatar
Michael Niedermayer committed
38

39
#define SQ(a) ((a)*(a))
40

41 42 43 44 45 46
#define P_LEFT P[1]
#define P_TOP P[2]
#define P_TOPRIGHT P[3]
#define P_MEDIAN P[4]
#define P_MV1 P[9]

Michael Niedermayer's avatar
Michael Niedermayer committed
47
static inline int sad_hpel_motion_search(MpegEncContext * s,
48
                                  int *mx_ptr, int *my_ptr, int dmin,
49 50
                                  int src_index, int ref_index,
                                  int size, int h);
Michael Niedermayer's avatar
Michael Niedermayer committed
51

52
static inline int update_map_generation(MotionEstContext *c)
Michael Niedermayer's avatar
Michael Niedermayer committed
53
{
54 55 56 57
    c->map_generation+= 1<<(ME_MAP_MV_BITS*2);
    if(c->map_generation==0){
        c->map_generation= 1<<(ME_MAP_MV_BITS*2);
        memset(c->map, 0, sizeof(uint32_t)*ME_MAP_SIZE);
Michael Niedermayer's avatar
Michael Niedermayer committed
58
    }
59
    return c->map_generation;
Michael Niedermayer's avatar
Michael Niedermayer committed
60 61
}

62 63 64 65 66 67
/* shape adaptive search stuff */
typedef struct Minima{
    int height;
    int x, y;
    int checked;
}Minima;
Michael Niedermayer's avatar
Michael Niedermayer committed
68

69
static int minima_cmp(const void *a, const void *b){
70 71
    const Minima *da = (const Minima *) a;
    const Minima *db = (const Minima *) b;
72

73 74
    return da->height - db->height;
}
Michael Niedermayer's avatar
Michael Niedermayer committed
75

76 77 78
#define FLAG_QPEL   1 //must be 1
#define FLAG_CHROMA 2
#define FLAG_DIRECT 4
Michael Niedermayer's avatar
Michael Niedermayer committed
79

80
static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){
81 82 83 84 85 86 87 88 89 90 91 92 93 94 95
    const int offset[3]= {
          y*c->  stride + x,
        ((y*c->uvstride + x)>>1),
        ((y*c->uvstride + x)>>1),
    };
    int i;
    for(i=0; i<3; i++){
        c->src[0][i]= src [i] + offset[i];
        c->ref[0][i]= ref [i] + offset[i];
    }
    if(ref_index){
        for(i=0; i<3; i++){
            c->ref[ref_index][i]= ref2[i] + offset[i];
        }
    }
Michael Niedermayer's avatar
Michael Niedermayer committed
96 97
}

98 99
static int get_flags(MotionEstContext *c, int direct, int chroma){
    return   ((c->avctx->flags&CODEC_FLAG_QPEL) ? FLAG_QPEL : 0)
100
           + (direct ? FLAG_DIRECT : 0)
101
           + (chroma ? FLAG_CHROMA : 0);
Michael Niedermayer's avatar
Michael Niedermayer committed
102 103
}

104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132
static always_inline int cmp(MpegEncContext *s, const int x, const int y, const int subx, const int suby,
                      const int size, const int h, int ref_index, int src_index,
                      me_cmp_func cmp_func, me_cmp_func chroma_cmp_func, const int flags){
    MotionEstContext * const c= &s->me;
    const int stride= c->stride;
    const int uvstride= c->uvstride;
    const int qpel= flags&FLAG_QPEL;
    const int chroma= flags&FLAG_CHROMA;
    const int dxy= subx + (suby<<(1+qpel)); //FIXME log2_subpel?
    const int hx= subx + (x<<(1+qpel));
    const int hy= suby + (y<<(1+qpel));
    uint8_t * const * const ref= c->ref[ref_index];
    uint8_t * const * const src= c->src[src_index];
    int d;
    //FIXME check chroma 4mv, (no crashes ...)
    if(flags&FLAG_DIRECT){
        if(x >= c->xmin && hx <= c->xmax<<(qpel+1) && y >= c->ymin && hy <= c->ymax<<(qpel+1)){
            const int time_pp= s->pp_time;
            const int time_pb= s->pb_time;
            const int mask= 2*qpel+1;
            if(s->mv_type==MV_TYPE_8X8){
                int i;
                for(i=0; i<4; i++){
                    int fx = c->direct_basis_mv[i][0] + hx;
                    int fy = c->direct_basis_mv[i][1] + hy;
                    int bx = hx ? fx - c->co_located_mv[i][0] : c->co_located_mv[i][0]*(time_pb - time_pp)/time_pp + ((i &1)<<(qpel+4));
                    int by = hy ? fy - c->co_located_mv[i][1] : c->co_located_mv[i][1]*(time_pb - time_pp)/time_pp + ((i>>1)<<(qpel+4));
                    int fxy= (fx&mask) + ((fy&mask)<<(qpel+1));
                    int bxy= (bx&mask) + ((by&mask)<<(qpel+1));
133

134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149
                    uint8_t *dst= c->temp + 8*(i&1) + 8*stride*(i>>1);
                    if(qpel){
                        c->qpel_put[1][fxy](dst, ref[0] + (fx>>2) + (fy>>2)*stride, stride);
                        c->qpel_avg[1][bxy](dst, ref[8] + (bx>>2) + (by>>2)*stride, stride);
                    }else{
                        c->hpel_put[1][fxy](dst, ref[0] + (fx>>1) + (fy>>1)*stride, stride, 8);
                        c->hpel_avg[1][bxy](dst, ref[8] + (bx>>1) + (by>>1)*stride, stride, 8);
                    }
                }
            }else{
                int fx = c->direct_basis_mv[0][0] + hx;
                int fy = c->direct_basis_mv[0][1] + hy;
                int bx = hx ? fx - c->co_located_mv[0][0] : (c->co_located_mv[0][0]*(time_pb - time_pp)/time_pp);
                int by = hy ? fy - c->co_located_mv[0][1] : (c->co_located_mv[0][1]*(time_pb - time_pp)/time_pp);
                int fxy= (fx&mask) + ((fy&mask)<<(qpel+1));
                int bxy= (bx&mask) + ((by&mask)<<(qpel+1));
150

151 152 153 154 155 156 157 158 159
                if(qpel){
                    c->qpel_put[1][fxy](c->temp               , ref[0] + (fx>>2) + (fy>>2)*stride               , stride);
                    c->qpel_put[1][fxy](c->temp + 8           , ref[0] + (fx>>2) + (fy>>2)*stride + 8           , stride);
                    c->qpel_put[1][fxy](c->temp     + 8*stride, ref[0] + (fx>>2) + (fy>>2)*stride     + 8*stride, stride);
                    c->qpel_put[1][fxy](c->temp + 8 + 8*stride, ref[0] + (fx>>2) + (fy>>2)*stride + 8 + 8*stride, stride);
                    c->qpel_avg[1][bxy](c->temp               , ref[8] + (bx>>2) + (by>>2)*stride               , stride);
                    c->qpel_avg[1][bxy](c->temp + 8           , ref[8] + (bx>>2) + (by>>2)*stride + 8           , stride);
                    c->qpel_avg[1][bxy](c->temp     + 8*stride, ref[8] + (bx>>2) + (by>>2)*stride     + 8*stride, stride);
                    c->qpel_avg[1][bxy](c->temp + 8 + 8*stride, ref[8] + (bx>>2) + (by>>2)*stride + 8 + 8*stride, stride);
160
                }else{
161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177
                    assert((fx>>1) + 16*s->mb_x >= -16);
                    assert((fy>>1) + 16*s->mb_y >= -16);
                    assert((fx>>1) + 16*s->mb_x <= s->width);
                    assert((fy>>1) + 16*s->mb_y <= s->height);
                    assert((bx>>1) + 16*s->mb_x >= -16);
                    assert((by>>1) + 16*s->mb_y >= -16);
                    assert((bx>>1) + 16*s->mb_x <= s->width);
                    assert((by>>1) + 16*s->mb_y <= s->height);

                    c->hpel_put[0][fxy](c->temp, ref[0] + (fx>>1) + (fy>>1)*stride, stride, 16);
                    c->hpel_avg[0][bxy](c->temp, ref[8] + (bx>>1) + (by>>1)*stride, stride, 16);
                }
            }
            d = cmp_func(s, c->temp, src[0], stride, 16);
        }else
            d= 256*256*256*32;
    }else{
178
        int uvdxy;              /* no, it might not be used uninitialized */
179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194
        if(dxy){
            if(qpel){
                c->qpel_put[size][dxy](c->temp, ref[0] + x + y*stride, stride); //FIXME prototype (add h)
                if(chroma){
                    int cx= hx/2;
                    int cy= hy/2;
                    cx= (cx>>1)|(cx&1);
                    cy= (cy>>1)|(cy&1);
                    uvdxy= (cx&1) + 2*(cy&1);
                    //FIXME x/y wrong, but mpeg4 qpel is sick anyway, we should drop as much of it as possible in favor for h264
                }
            }else{
                c->hpel_put[size][dxy](c->temp, ref[0] + x + y*stride, stride, h);
                if(chroma)
                    uvdxy= dxy | (x&1) | (2*(y&1));
            }
195
            d = cmp_func(s, c->temp, src[0], stride, h);
196
        }else{
197
            d = cmp_func(s, src[0], ref[0] + x + y*stride, stride, h);
198 199 200 201 202 203 204
            if(chroma)
                uvdxy= (x&1) + 2*(y&1);
        }
        if(chroma){
            uint8_t * const uvtemp= c->temp + 16*stride;
            c->hpel_put[size+1][uvdxy](uvtemp  , ref[1] + (x>>1) + (y>>1)*uvstride, uvstride, h>>1);
            c->hpel_put[size+1][uvdxy](uvtemp+8, ref[2] + (x>>1) + (y>>1)*uvstride, uvstride, h>>1);
205 206
            d += chroma_cmp_func(s, uvtemp  , src[1], uvstride, h>>1);
            d += chroma_cmp_func(s, uvtemp+8, src[2], uvstride, h>>1);
207 208 209 210 211 212 213 214 215 216 217 218
        }
    }
#if 0
    if(full_pel){
        const int index= (((y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);
        score_map[index]= d;
    }

    d += (c->mv_penalty[hx - c->pred_x] + c->mv_penalty[hy - c->pred_y])*c->penalty_factor;
#endif
    return d;
}
Michael Niedermayer's avatar
Michael Niedermayer committed
219 220 221

#include "motion_est_template.c"

Michael Niedermayer's avatar
Michael Niedermayer committed
222 223 224 225 226 227 228
static int zero_cmp(void *s, uint8_t *a, uint8_t *b, int stride, int h){
    return 0;
}

static void zero_hpel(uint8_t *a, const uint8_t *b, int stride, int h){
}

Michael Niedermayer's avatar
Michael Niedermayer committed
229
void ff_init_me(MpegEncContext *s){
230
    MotionEstContext * const c= &s->me;
231
    c->avctx= s->avctx;
232

233 234 235 236
    ff_set_cmp(&s->dsp, s->dsp.me_pre_cmp, c->avctx->me_pre_cmp);
    ff_set_cmp(&s->dsp, s->dsp.me_cmp, c->avctx->me_cmp);
    ff_set_cmp(&s->dsp, s->dsp.me_sub_cmp, c->avctx->me_sub_cmp);
    ff_set_cmp(&s->dsp, s->dsp.mb_cmp, c->avctx->mb_cmp);
237

238 239 240
    c->flags    = get_flags(c, 0, c->avctx->me_cmp    &FF_CMP_CHROMA);
    c->sub_flags= get_flags(c, 0, c->avctx->me_sub_cmp&FF_CMP_CHROMA);
    c->mb_flags = get_flags(c, 0, c->avctx->mb_cmp    &FF_CMP_CHROMA);
Fabrice Bellard's avatar
Fabrice Bellard committed
241

242
/*FIXME s->no_rounding b_type*/
Michael Niedermayer's avatar
Michael Niedermayer committed
243
    if(s->flags&CODEC_FLAG_QPEL){
244
        c->sub_motion_search= qpel_motion_search;
245 246 247
        c->qpel_avg= s->dsp.avg_qpel_pixels_tab;
        if(s->no_rounding) c->qpel_put= s->dsp.put_no_rnd_qpel_pixels_tab;
        else               c->qpel_put= s->dsp.put_qpel_pixels_tab;
Michael Niedermayer's avatar
Michael Niedermayer committed
248
    }else{
249
        if(c->avctx->me_sub_cmp&FF_CMP_CHROMA)
250
            c->sub_motion_search= hpel_motion_search;
251 252
        else if(   c->avctx->me_sub_cmp == FF_CMP_SAD
                && c->avctx->    me_cmp == FF_CMP_SAD
253
                && c->avctx->    mb_cmp == FF_CMP_SAD)
254
            c->sub_motion_search= sad_hpel_motion_search; // 2050 vs. 2450 cycles
Michael Niedermayer's avatar
Michael Niedermayer committed
255
        else
256
            c->sub_motion_search= hpel_motion_search;
Michael Niedermayer's avatar
Michael Niedermayer committed
257
    }
Michael Niedermayer's avatar
Michael Niedermayer committed
258 259 260 261
    c->hpel_avg= s->dsp.avg_pixels_tab;
    if(s->no_rounding) c->hpel_put= s->dsp.put_no_rnd_pixels_tab;
    else               c->hpel_put= s->dsp.put_pixels_tab;

262
    if(s->linesize){
263
        c->stride  = s->linesize;
264
        c->uvstride= s->uvlinesize;
265
    }else{
266 267
        c->stride  = 16*s->mb_width + 32;
        c->uvstride=  8*s->mb_width + 16;
268
    }
269

Michael Niedermayer's avatar
Michael Niedermayer committed
270
    // 8x8 fullpel search would need a 4x4 chroma compare, which we dont have yet, and even if we had the motion estimation code doesnt expect it
271
    if(s->codec_id != CODEC_ID_SNOW){
272
        if((c->avctx->me_cmp&FF_CMP_CHROMA)/* && !s->dsp.me_cmp[2]*/){
273 274 275 276 277 278 279
            s->dsp.me_cmp[2]= zero_cmp;
        }
        if((c->avctx->me_sub_cmp&FF_CMP_CHROMA) && !s->dsp.me_sub_cmp[2]){
            s->dsp.me_sub_cmp[2]= zero_cmp;
        }
        c->hpel_put[2][0]= c->hpel_put[2][1]=
        c->hpel_put[2][2]= c->hpel_put[2][3]= zero_hpel;
Michael Niedermayer's avatar
Michael Niedermayer committed
280 281
    }

282 283 284 285
    if(s->codec_id == CODEC_ID_H261){
        c->sub_motion_search= no_sub_motion_search;
    }

286
    c->temp= c->scratchpad;
Michael Niedermayer's avatar
Michael Niedermayer committed
287
}
288

289
#if 0
290
static int pix_dev(uint8_t * pix, int line_size, int mean)
291 292 293 294 295
{
    int s, i, j;

    s = 0;
    for (i = 0; i < 16; i++) {
296 297 298 299 300 301 302 303 304 305 306 307
        for (j = 0; j < 16; j += 8) {
            s += ABS(pix[0]-mean);
            s += ABS(pix[1]-mean);
            s += ABS(pix[2]-mean);
            s += ABS(pix[3]-mean);
            s += ABS(pix[4]-mean);
            s += ABS(pix[5]-mean);
            s += ABS(pix[6]-mean);
            s += ABS(pix[7]-mean);
            pix += 8;
        }
        pix += line_size - 16;
308 309 310
    }
    return s;
}
311
#endif
312

313
static inline void no_motion_search(MpegEncContext * s,
314
                                    int *mx_ptr, int *my_ptr)
Fabrice Bellard's avatar
Fabrice Bellard committed
315 316 317 318 319
{
    *mx_ptr = 16 * s->mb_x;
    *my_ptr = 16 * s->mb_y;
}

320
#if 0  /* the use of these functions is inside #if 0 */
Fabrice Bellard's avatar
Fabrice Bellard committed
321 322
static int full_motion_search(MpegEncContext * s,
                              int *mx_ptr, int *my_ptr, int range,
323
                              int xmin, int ymin, int xmax, int ymax, uint8_t *ref_picture)
Fabrice Bellard's avatar
Fabrice Bellard committed
324 325 326
{
    int x1, y1, x2, y2, xx, yy, x, y;
    int mx, my, dmin, d;
327
    uint8_t *pix;
Fabrice Bellard's avatar
Fabrice Bellard committed
328 329 330

    xx = 16 * s->mb_x;
    yy = 16 * s->mb_y;
331
    x1 = xx - range + 1;        /* we loose one pixel to avoid boundary pb with half pixel pred */
Fabrice Bellard's avatar
Fabrice Bellard committed
332
    if (x1 < xmin)
333
        x1 = xmin;
Fabrice Bellard's avatar
Fabrice Bellard committed
334 335
    x2 = xx + range - 1;
    if (x2 > xmax)
336
        x2 = xmax;
Fabrice Bellard's avatar
Fabrice Bellard committed
337 338
    y1 = yy - range + 1;
    if (y1 < ymin)
339
        y1 = ymin;
Fabrice Bellard's avatar
Fabrice Bellard committed
340 341
    y2 = yy + range - 1;
    if (y2 > ymax)
342
        y2 = ymax;
Michael Niedermayer's avatar
Michael Niedermayer committed
343
    pix = s->new_picture.data[0] + (yy * s->linesize) + xx;
Fabrice Bellard's avatar
Fabrice Bellard committed
344 345 346 347
    dmin = 0x7fffffff;
    mx = 0;
    my = 0;
    for (y = y1; y <= y2; y++) {
348 349 350 351 352 353 354 355 356 357 358 359
        for (x = x1; x <= x2; x++) {
            d = s->dsp.pix_abs[0][0](NULL, pix, ref_picture + (y * s->linesize) + x,
                             s->linesize, 16);
            if (d < dmin ||
                (d == dmin &&
                 (abs(x - xx) + abs(y - yy)) <
                 (abs(mx - xx) + abs(my - yy)))) {
                dmin = d;
                mx = x;
                my = y;
            }
        }
Fabrice Bellard's avatar
Fabrice Bellard committed
360 361 362 363 364 365 366
    }

    *mx_ptr = mx;
    *my_ptr = my;

#if 0
    if (*mx_ptr < -(2 * range) || *mx_ptr >= (2 * range) ||
367
        *my_ptr < -(2 * range) || *my_ptr >= (2 * range)) {
368
        av_log(NULL, AV_LOG_ERROR, "error %d %d\n", *mx_ptr, *my_ptr);
Fabrice Bellard's avatar
Fabrice Bellard committed
369 370 371 372 373 374 375 376
    }
#endif
    return dmin;
}


static int log_motion_search(MpegEncContext * s,
                             int *mx_ptr, int *my_ptr, int range,
377
                             int xmin, int ymin, int xmax, int ymax, uint8_t *ref_picture)
Fabrice Bellard's avatar
Fabrice Bellard committed
378 379 380
{
    int x1, y1, x2, y2, xx, yy, x, y;
    int mx, my, dmin, d;
381
    uint8_t *pix;
Fabrice Bellard's avatar
Fabrice Bellard committed
382 383 384 385 386 387 388

    xx = s->mb_x << 4;
    yy = s->mb_y << 4;

    /* Left limit */
    x1 = xx - range;
    if (x1 < xmin)
389
        x1 = xmin;
Fabrice Bellard's avatar
Fabrice Bellard committed
390 391 392 393

    /* Right limit */
    x2 = xx + range;
    if (x2 > xmax)
394
        x2 = xmax;
Fabrice Bellard's avatar
Fabrice Bellard committed
395 396 397 398

    /* Upper limit */
    y1 = yy - range;
    if (y1 < ymin)
399
        y1 = ymin;
Fabrice Bellard's avatar
Fabrice Bellard committed
400 401 402 403

    /* Lower limit */
    y2 = yy + range;
    if (y2 > ymax)
404
        y2 = ymax;
Fabrice Bellard's avatar
Fabrice Bellard committed
405

Michael Niedermayer's avatar
Michael Niedermayer committed
406
    pix = s->new_picture.data[0] + (yy * s->linesize) + xx;
Fabrice Bellard's avatar
Fabrice Bellard committed
407 408 409 410 411
    dmin = 0x7fffffff;
    mx = 0;
    my = 0;

    do {
412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439
        for (y = y1; y <= y2; y += range) {
            for (x = x1; x <= x2; x += range) {
                d = s->dsp.pix_abs[0][0](NULL, pix, ref_picture + (y * s->linesize) + x, s->linesize, 16);
                if (d < dmin || (d == dmin && (abs(x - xx) + abs(y - yy)) < (abs(mx - xx) + abs(my - yy)))) {
                    dmin = d;
                    mx = x;
                    my = y;
                }
            }
        }

        range = range >> 1;

        x1 = mx - range;
        if (x1 < xmin)
            x1 = xmin;

        x2 = mx + range;
        if (x2 > xmax)
            x2 = xmax;

        y1 = my - range;
        if (y1 < ymin)
            y1 = ymin;

        y2 = my + range;
        if (y2 > ymax)
            y2 = ymax;
Fabrice Bellard's avatar
Fabrice Bellard committed
440 441 442 443

    } while (range >= 1);

#ifdef DEBUG
444
    av_log(s->avctx, AV_LOG_DEBUG, "log       - MX: %d\tMY: %d\n", mx, my);
Fabrice Bellard's avatar
Fabrice Bellard committed
445 446 447 448 449 450 451 452
#endif
    *mx_ptr = mx;
    *my_ptr = my;
    return dmin;
}

static int phods_motion_search(MpegEncContext * s,
                               int *mx_ptr, int *my_ptr, int range,
453
                               int xmin, int ymin, int xmax, int ymax, uint8_t *ref_picture)
Fabrice Bellard's avatar
Fabrice Bellard committed
454 455 456
{
    int x1, y1, x2, y2, xx, yy, x, y, lastx, d;
    int mx, my, dminx, dminy;
457
    uint8_t *pix;
Fabrice Bellard's avatar
Fabrice Bellard committed
458 459 460 461 462 463 464

    xx = s->mb_x << 4;
    yy = s->mb_y << 4;

    /* Left limit */
    x1 = xx - range;
    if (x1 < xmin)
465
        x1 = xmin;
Fabrice Bellard's avatar
Fabrice Bellard committed
466 467 468 469

    /* Right limit */
    x2 = xx + range;
    if (x2 > xmax)
470
        x2 = xmax;
Fabrice Bellard's avatar
Fabrice Bellard committed
471 472 473 474

    /* Upper limit */
    y1 = yy - range;
    if (y1 < ymin)
475
        y1 = ymin;
Fabrice Bellard's avatar
Fabrice Bellard committed
476 477 478 479

    /* Lower limit */
    y2 = yy + range;
    if (y2 > ymax)
480
        y2 = ymax;
Fabrice Bellard's avatar
Fabrice Bellard committed
481

Michael Niedermayer's avatar
Michael Niedermayer committed
482
    pix = s->new_picture.data[0] + (yy * s->linesize) + xx;
Fabrice Bellard's avatar
Fabrice Bellard committed
483 484 485 486 487 488 489 490 491
    mx = 0;
    my = 0;

    x = xx;
    y = yy;
    do {
        dminx = 0x7fffffff;
        dminy = 0x7fffffff;

492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528
        lastx = x;
        for (x = x1; x <= x2; x += range) {
            d = s->dsp.pix_abs[0][0](NULL, pix, ref_picture + (y * s->linesize) + x, s->linesize, 16);
            if (d < dminx || (d == dminx && (abs(x - xx) + abs(y - yy)) < (abs(mx - xx) + abs(my - yy)))) {
                dminx = d;
                mx = x;
            }
        }

        x = lastx;
        for (y = y1; y <= y2; y += range) {
            d = s->dsp.pix_abs[0][0](NULL, pix, ref_picture + (y * s->linesize) + x, s->linesize, 16);
            if (d < dminy || (d == dminy && (abs(x - xx) + abs(y - yy)) < (abs(mx - xx) + abs(my - yy)))) {
                dminy = d;
                my = y;
            }
        }

        range = range >> 1;

        x = mx;
        y = my;
        x1 = mx - range;
        if (x1 < xmin)
            x1 = xmin;

        x2 = mx + range;
        if (x2 > xmax)
            x2 = xmax;

        y1 = my - range;
        if (y1 < ymin)
            y1 = ymin;

        y2 = my + range;
        if (y2 > ymax)
            y2 = ymax;
Fabrice Bellard's avatar
Fabrice Bellard committed
529 530 531 532

    } while (range >= 1);

#ifdef DEBUG
533
    av_log(s->avctx, AV_LOG_DEBUG, "phods     - MX: %d\tMY: %d\n", mx, my);
Fabrice Bellard's avatar
Fabrice Bellard committed
534 535 536 537 538 539 540
#endif

    /* half pixel search */
    *mx_ptr = mx;
    *my_ptr = my;
    return dminy;
}
541
#endif /* 0 */
542 543 544

#define Z_THRESHOLD 256

Michael Niedermayer's avatar
Michael Niedermayer committed
545
#define CHECK_SAD_HALF_MV(suffix, x, y) \
546
{\
547
    d= s->dsp.pix_abs[size][(x?1:0)+(y?2:0)](NULL, pix, ptr+((x)>>1), stride, h);\
Michael Niedermayer's avatar
Michael Niedermayer committed
548
    d += (mv_penalty[pen_x + x] + mv_penalty[pen_y + y])*penalty_factor;\
549 550
    COPY3_IF_LT(dminh, d, dx, x, dy, y)\
}
551

Michael Niedermayer's avatar
Michael Niedermayer committed
552
static inline int sad_hpel_motion_search(MpegEncContext * s,
553
                                  int *mx_ptr, int *my_ptr, int dmin,
554 555
                                  int src_index, int ref_index,
                                  int size, int h)
Fabrice Bellard's avatar
Fabrice Bellard committed
556
{
557 558
    MotionEstContext * const c= &s->me;
    const int penalty_factor= c->sub_penalty_factor;
559
    int mx, my, dminh;
560
    uint8_t *pix, *ptr;
561 562
    int stride= c->stride;
    const int flags= c->sub_flags;
563
    LOAD_COMMON
564

565
    assert(flags == 0);
Fabrice Bellard's avatar
Fabrice Bellard committed
566

567
    if(c->skip){
568 569 570 571 572 573
//    printf("S");
        *mx_ptr = 0;
        *my_ptr = 0;
        return dmin;
    }
//    printf("N");
574

575
    pix = c->src[src_index][0];
576

577 578
    mx = *mx_ptr;
    my = *my_ptr;
579
    ptr = c->ref[ref_index][0] + (my * stride) + mx;
580

581 582
    dminh = dmin;

583
    if (mx > xmin && mx < xmax &&
584
        my > ymin && my < ymax) {
585
        int dx=0, dy=0;
586
        int d, pen_x, pen_y;
587 588 589 590 591 592 593
        const int index= (my<<ME_MAP_SHIFT) + mx;
        const int t= score_map[(index-(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)];
        const int l= score_map[(index- 1               )&(ME_MAP_SIZE-1)];
        const int r= score_map[(index+ 1               )&(ME_MAP_SIZE-1)];
        const int b= score_map[(index+(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)];
        mx<<=1;
        my<<=1;
594

595

596 597 598
        pen_x= pred_x + mx;
        pen_y= pred_y + my;

599
        ptr-= stride;
600
        if(t<=b){
Michael Niedermayer's avatar
Michael Niedermayer committed
601
            CHECK_SAD_HALF_MV(y2 , 0, -1)
602
            if(l<=r){
Michael Niedermayer's avatar
Michael Niedermayer committed
603
                CHECK_SAD_HALF_MV(xy2, -1, -1)
604
                if(t+r<=b+l){
Michael Niedermayer's avatar
Michael Niedermayer committed
605
                    CHECK_SAD_HALF_MV(xy2, +1, -1)
606
                    ptr+= stride;
607
                }else{
608
                    ptr+= stride;
Michael Niedermayer's avatar
Michael Niedermayer committed
609
                    CHECK_SAD_HALF_MV(xy2, -1, +1)
610
                }
Michael Niedermayer's avatar
Michael Niedermayer committed
611
                CHECK_SAD_HALF_MV(x2 , -1,  0)
612
            }else{
Michael Niedermayer's avatar
Michael Niedermayer committed
613
                CHECK_SAD_HALF_MV(xy2, +1, -1)
614
                if(t+l<=b+r){
Michael Niedermayer's avatar
Michael Niedermayer committed
615
                    CHECK_SAD_HALF_MV(xy2, -1, -1)
616
                    ptr+= stride;
617
                }else{
618
                    ptr+= stride;
Michael Niedermayer's avatar
Michael Niedermayer committed
619
                    CHECK_SAD_HALF_MV(xy2, +1, +1)
620
                }
Michael Niedermayer's avatar
Michael Niedermayer committed
621
                CHECK_SAD_HALF_MV(x2 , +1,  0)
622 623 624 625
            }
        }else{
            if(l<=r){
                if(t+l<=b+r){
Michael Niedermayer's avatar
Michael Niedermayer committed
626
                    CHECK_SAD_HALF_MV(xy2, -1, -1)
627
                    ptr+= stride;
628
                }else{
629
                    ptr+= stride;
Michael Niedermayer's avatar
Michael Niedermayer committed
630
                    CHECK_SAD_HALF_MV(xy2, +1, +1)
631
                }
Michael Niedermayer's avatar
Michael Niedermayer committed
632 633
                CHECK_SAD_HALF_MV(x2 , -1,  0)
                CHECK_SAD_HALF_MV(xy2, -1, +1)
634 635
            }else{
                if(t+r<=b+l){
Michael Niedermayer's avatar
Michael Niedermayer committed
636
                    CHECK_SAD_HALF_MV(xy2, +1, -1)
637
                    ptr+= stride;
638
                }else{
639
                    ptr+= stride;
Michael Niedermayer's avatar
Michael Niedermayer committed
640
                    CHECK_SAD_HALF_MV(xy2, -1, +1)
641
                }
Michael Niedermayer's avatar
Michael Niedermayer committed
642 643
                CHECK_SAD_HALF_MV(x2 , +1,  0)
                CHECK_SAD_HALF_MV(xy2, +1, +1)
644
            }
Michael Niedermayer's avatar
Michael Niedermayer committed
645
            CHECK_SAD_HALF_MV(y2 ,  0, +1)
646 647 648
        }
        mx+=dx;
        my+=dy;
649 650

    }else{
651 652
        mx<<=1;
        my<<=1;
653 654 655 656
    }

    *mx_ptr = mx;
    *my_ptr = my;
657
    return dminh;
658 659
}

660
static inline void set_p_mv_tables(MpegEncContext * s, int mx, int my, int mv4)
661
{
662
    const int xy= s->mb_x + s->mb_y*s->mb_stride;
663

664 665
    s->p_mv_table[xy][0] = mx;
    s->p_mv_table[xy][1] = my;
666

Diego Biurrun's avatar
Diego Biurrun committed
667
    /* has already been set to the 4 MV if 4MV is done */
668
    if(mv4){
669 670
        int mot_xy= s->block_index[0];

671 672 673 674
        s->current_picture.motion_val[0][mot_xy  ][0]= mx;
        s->current_picture.motion_val[0][mot_xy  ][1]= my;
        s->current_picture.motion_val[0][mot_xy+1][0]= mx;
        s->current_picture.motion_val[0][mot_xy+1][1]= my;
675

676
        mot_xy += s->b8_stride;
677 678 679 680
        s->current_picture.motion_val[0][mot_xy  ][0]= mx;
        s->current_picture.motion_val[0][mot_xy  ][1]= my;
        s->current_picture.motion_val[0][mot_xy+1][0]= mx;
        s->current_picture.motion_val[0][mot_xy+1][1]= my;
681 682 683
    }
}

684 685 686
/**
 * get fullpel ME search limits.
 */
687
static inline void get_limits(MpegEncContext *s, int x, int y)
Fabrice Bellard's avatar
Fabrice Bellard committed
688
{
689
    MotionEstContext * const c= &s->me;
690
/*
691
    if(c->avctx->me_range) c->range= c->avctx->me_range >> 1;
692
    else                   c->range= 16;
693
*/
Fabrice Bellard's avatar
Fabrice Bellard committed
694
    if (s->unrestricted_mv) {
695 696 697 698
        c->xmin = - x - 16;
        c->ymin = - y - 16;
        c->xmax = - x + s->mb_width *16;
        c->ymax = - y + s->mb_height*16;
699 700 701 702
    } else if (s->out_format == FMT_H261){
        // Search range of H261 is different from other codec standards
        c->xmin = (x > 15) ? - 15 : 0;
        c->ymin = (y > 15) ? - 15 : 0;
703
        c->xmax = (x < s->mb_width * 16 - 16) ? 15 : 0;
704
        c->ymax = (y < s->mb_height * 16 - 16) ? 15 : 0;
Fabrice Bellard's avatar
Fabrice Bellard committed
705
    } else {
706 707 708 709
        c->xmin = - x;
        c->ymin = - y;
        c->xmax = - x + s->mb_width *16 - 16;
        c->ymax = - y + s->mb_height*16 - 16;
Fabrice Bellard's avatar
Fabrice Bellard committed
710
    }
711 712
}

713 714
static inline void init_mv4_ref(MotionEstContext *c){
    const int stride= c->stride;
715 716 717 718 719 720 721 722 723

    c->ref[1][0] = c->ref[0][0] + 8;
    c->ref[2][0] = c->ref[0][0] + 8*stride;
    c->ref[3][0] = c->ref[2][0] + 8;
    c->src[1][0] = c->src[0][0] + 8;
    c->src[2][0] = c->src[0][0] + 8*stride;
    c->src[3][0] = c->src[2][0] + 8;
}

724
static inline int h263_mv4_search(MpegEncContext *s, int mx, int my, int shift)
725
{
726
    MotionEstContext * const c= &s->me;
727 728
    const int size= 1;
    const int h=8;
729 730
    int block;
    int P[10][2];
731
    int dmin_sum=0, mx4_sum=0, my4_sum=0;
732
    int same=1;
733
    const int stride= c->stride;
734
    uint8_t *mv_penalty= c->current_mv_penalty;
735

736
    init_mv4_ref(c);
737

738 739 740 741 742
    for(block=0; block<4; block++){
        int mx4, my4;
        int pred_x4, pred_y4;
        int dmin4;
        static const int off[4]= {2, 1, 1, -1};
743
        const int mot_stride = s->b8_stride;
744
        const int mot_xy = s->block_index[block];
745

746 747
        P_LEFT[0] = s->current_picture.motion_val[0][mot_xy - 1][0];
        P_LEFT[1] = s->current_picture.motion_val[0][mot_xy - 1][1];
748

749
        if(P_LEFT[0]       > (c->xmax<<shift)) P_LEFT[0]       = (c->xmax<<shift);
750 751

        /* special case for first line */
752
        if (s->first_slice_line && block<2) {
753 754
            c->pred_x= pred_x4= P_LEFT[0];
            c->pred_y= pred_y4= P_LEFT[1];
755
        } else {
756 757 758 759
            P_TOP[0]      = s->current_picture.motion_val[0][mot_xy - mot_stride             ][0];
            P_TOP[1]      = s->current_picture.motion_val[0][mot_xy - mot_stride             ][1];
            P_TOPRIGHT[0] = s->current_picture.motion_val[0][mot_xy - mot_stride + off[block]][0];
            P_TOPRIGHT[1] = s->current_picture.motion_val[0][mot_xy - mot_stride + off[block]][1];
760 761 762 763
            if(P_TOP[1]      > (c->ymax<<shift)) P_TOP[1]     = (c->ymax<<shift);
            if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
            if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift);
            if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
764

765 766 767
            P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
            P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);

768 769
            c->pred_x= pred_x4 = P_MEDIAN[0];
            c->pred_y= pred_y4 = P_MEDIAN[1];
770 771 772 773
        }
        P_MV1[0]= mx;
        P_MV1[1]= my;

774
        dmin4 = epzs_motion_search4(s, &mx4, &my4, P, block, block, s->p_mv_table, (1<<16)>>shift);
775

776
        dmin4= c->sub_motion_search(s, &mx4, &my4, dmin4, block, block, size, h);
777

778
        if(s->dsp.me_sub_cmp[0] != s->dsp.mb_cmp[0]){
779
            int dxy;
780
            const int offset= ((block&1) + (block>>1)*stride)*8;
781
            uint8_t *dest_y = c->scratchpad + offset;
782
            if(s->quarter_sample){
783
                uint8_t *ref= c->ref[block][0] + (mx4>>2) + (my4>>2)*stride;
784 785 786
                dxy = ((my4 & 3) << 2) | (mx4 & 3);

                if(s->no_rounding)
787
                    s->dsp.put_no_rnd_qpel_pixels_tab[1][dxy](dest_y   , ref    , stride);
788
                else
789
                    s->dsp.put_qpel_pixels_tab       [1][dxy](dest_y   , ref    , stride);
790
            }else{
791
                uint8_t *ref= c->ref[block][0] + (mx4>>1) + (my4>>1)*stride;
792 793 794
                dxy = ((my4 & 1) << 1) | (mx4 & 1);

                if(s->no_rounding)
795
                    s->dsp.put_no_rnd_pixels_tab[1][dxy](dest_y    , ref    , stride, h);
796
                else
797
                    s->dsp.put_pixels_tab       [1][dxy](dest_y    , ref    , stride, h);
798
            }
799
            dmin_sum+= (mv_penalty[mx4-pred_x4] + mv_penalty[my4-pred_y4])*c->mb_penalty_factor;
800 801 802 803 804 805 806 807 808 809
        }else
            dmin_sum+= dmin4;

        if(s->quarter_sample){
            mx4_sum+= mx4/2;
            my4_sum+= my4/2;
        }else{
            mx4_sum+= mx4;
            my4_sum+= my4;
        }
810

811 812
        s->current_picture.motion_val[0][ s->block_index[block] ][0]= mx4;
        s->current_picture.motion_val[0][ s->block_index[block] ][1]= my4;
813 814

        if(mx4 != mx || my4 != my) same=0;
815
    }
816

817 818
    if(same)
        return INT_MAX;
819

820
    if(s->dsp.me_sub_cmp[0] != s->dsp.mb_cmp[0]){
821
        dmin_sum += s->dsp.mb_cmp[0](s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*16*stride, c->scratchpad, stride, 16);
822
    }
823

824
    if(c->avctx->mb_cmp&FF_CMP_CHROMA){
825 826 827 828 829 830 831
        int dxy;
        int mx, my;
        int offset;

        mx= ff_h263_round_chroma(mx4_sum);
        my= ff_h263_round_chroma(my4_sum);
        dxy = ((my & 1) << 1) | (mx & 1);
832

833
        offset= (s->mb_x*8 + (mx>>1)) + (s->mb_y*8 + (my>>1))*s->uvlinesize;
834

835
        if(s->no_rounding){
836 837
            s->dsp.put_no_rnd_pixels_tab[1][dxy](c->scratchpad    , s->last_picture.data[1] + offset, s->uvlinesize, 8);
            s->dsp.put_no_rnd_pixels_tab[1][dxy](c->scratchpad+8  , s->last_picture.data[2] + offset, s->uvlinesize, 8);
838
        }else{
839 840
            s->dsp.put_pixels_tab       [1][dxy](c->scratchpad    , s->last_picture.data[1] + offset, s->uvlinesize, 8);
            s->dsp.put_pixels_tab       [1][dxy](c->scratchpad+8  , s->last_picture.data[2] + offset, s->uvlinesize, 8);
841 842
        }

843 844
        dmin_sum += s->dsp.mb_cmp[1](s, s->new_picture.data[1] + s->mb_x*8 + s->mb_y*8*s->uvlinesize, c->scratchpad  , s->uvlinesize, 8);
        dmin_sum += s->dsp.mb_cmp[1](s, s->new_picture.data[2] + s->mb_x*8 + s->mb_y*8*s->uvlinesize, c->scratchpad+8, s->uvlinesize, 8);
845
    }
846

847 848
    c->pred_x= mx;
    c->pred_y= my;
849

850
    switch(c->avctx->mb_cmp&0xFF){
851 852 853 854 855
    /*case FF_CMP_SSE:
        return dmin_sum+ 32*s->qscale*s->qscale;*/
    case FF_CMP_RD:
        return dmin_sum;
    default:
856
        return dmin_sum+ 11*c->mb_penalty_factor;
857
    }
858 859
}

860 861 862 863 864 865 866 867 868 869 870 871 872
static inline void init_interlaced_ref(MpegEncContext *s, int ref_index){
    MotionEstContext * const c= &s->me;

    c->ref[1+ref_index][0] = c->ref[0+ref_index][0] + s->linesize;
    c->src[1][0] = c->src[0][0] + s->linesize;
    if(c->flags & FLAG_CHROMA){
        c->ref[1+ref_index][1] = c->ref[0+ref_index][1] + s->uvlinesize;
        c->ref[1+ref_index][2] = c->ref[0+ref_index][2] + s->uvlinesize;
        c->src[1][1] = c->src[0][1] + s->uvlinesize;
        c->src[1][2] = c->src[0][2] + s->uvlinesize;
    }
}

873
static int interlaced_search(MpegEncContext *s, int ref_index,
874
                             int16_t (*mv_tables[2][2])[2], uint8_t *field_select_tables[2], int mx, int my, int user_field_select)
875
{
876
    MotionEstContext * const c= &s->me;
877 878 879 880
    const int size=0;
    const int h=8;
    int block;
    int P[10][2];
881
    uint8_t * const mv_penalty= c->current_mv_penalty;
882 883 884 885 886
    int same=1;
    const int stride= 2*s->linesize;
    int dmin_sum= 0;
    const int mot_stride= s->mb_stride;
    const int xy= s->mb_x + s->mb_y*mot_stride;
887

888 889 890 891
    c->ymin>>=1;
    c->ymax>>=1;
    c->stride<<=1;
    c->uvstride<<=1;
892
    init_interlaced_ref(s, ref_index);
893

894 895 896 897 898 899
    for(block=0; block<2; block++){
        int field_select;
        int best_dmin= INT_MAX;
        int best_field= -1;

        for(field_select=0; field_select<2; field_select++){
900
            int dmin, mx_i, my_i;
901
            int16_t (*mv_table)[2]= mv_tables[block][field_select];
902

903
            if(user_field_select){
904 905
                assert(field_select==0 || field_select==1);
                assert(field_select_tables[block][xy]==0 || field_select_tables[block][xy]==1);
906 907 908
                if(field_select_tables[block][xy] != field_select)
                    continue;
            }
909

910 911
            P_LEFT[0] = mv_table[xy - 1][0];
            P_LEFT[1] = mv_table[xy - 1][1];
912
            if(P_LEFT[0]       > (c->xmax<<1)) P_LEFT[0]       = (c->xmax<<1);
913

914 915
            c->pred_x= P_LEFT[0];
            c->pred_y= P_LEFT[1];
916

917
            if(!s->first_slice_line){
918 919 920 921
                P_TOP[0]      = mv_table[xy - mot_stride][0];
                P_TOP[1]      = mv_table[xy - mot_stride][1];
                P_TOPRIGHT[0] = mv_table[xy - mot_stride + 1][0];
                P_TOPRIGHT[1] = mv_table[xy - mot_stride + 1][1];
922 923 924 925
                if(P_TOP[1]      > (c->ymax<<1)) P_TOP[1]     = (c->ymax<<1);
                if(P_TOPRIGHT[0] < (c->xmin<<1)) P_TOPRIGHT[0]= (c->xmin<<1);
                if(P_TOPRIGHT[0] > (c->xmax<<1)) P_TOPRIGHT[0]= (c->xmax<<1);
                if(P_TOPRIGHT[1] > (c->ymax<<1)) P_TOPRIGHT[1]= (c->ymax<<1);
926

927 928 929 930 931
                P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
                P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
            }
            P_MV1[0]= mx; //FIXME not correct if block != field_select
            P_MV1[1]= my / 2;
932

933
            dmin = epzs_motion_search2(s, &mx_i, &my_i, P, block, field_select+ref_index, mv_table, (1<<16)>>1);
934

935
            dmin= c->sub_motion_search(s, &mx_i, &my_i, dmin, block, field_select+ref_index, size, h);
936

937 938
            mv_table[xy][0]= mx_i;
            mv_table[xy][1]= my_i;
939

940
            if(s->dsp.me_sub_cmp[0] != s->dsp.mb_cmp[0]){
941 942 943
                int dxy;

                //FIXME chroma ME
944
                uint8_t *ref= c->ref[field_select+ref_index][0] + (mx_i>>1) + (my_i>>1)*stride;
945 946 947
                dxy = ((my_i & 1) << 1) | (mx_i & 1);

                if(s->no_rounding){
948
                    s->dsp.put_no_rnd_pixels_tab[size][dxy](c->scratchpad, ref    , stride, h);
949
                }else{
950
                    s->dsp.put_pixels_tab       [size][dxy](c->scratchpad, ref    , stride, h);
951
                }
952
                dmin= s->dsp.mb_cmp[size](s, c->src[block][0], c->scratchpad, stride, h);
953
                dmin+= (mv_penalty[mx_i-c->pred_x] + mv_penalty[my_i-c->pred_y] + 1)*c->mb_penalty_factor;
954
            }else
955
                dmin+= c->mb_penalty_factor; //field_select bits
956

957
            dmin += field_select != block; //slightly prefer same field
958

959 960 961 962 963 964 965 966 967 968
            if(dmin < best_dmin){
                best_dmin= dmin;
                best_field= field_select;
            }
        }
        {
            int16_t (*mv_table)[2]= mv_tables[block][best_field];

            if(mv_table[xy][0] != mx) same=0; //FIXME check if these checks work and are any good at all
            if(mv_table[xy][1]&1) same=0;
969
            if(mv_table[xy][1]*2 != my) same=0;
970 971 972 973 974 975
            if(best_field != block) same=0;
        }

        field_select_tables[block][xy]= best_field;
        dmin_sum += best_dmin;
    }
976

977 978 979 980
    c->ymin<<=1;
    c->ymax<<=1;
    c->stride>>=1;
    c->uvstride>>=1;
981 982 983

    if(same)
        return INT_MAX;
984

985
    switch(c->avctx->mb_cmp&0xFF){
986 987 988 989 990
    /*case FF_CMP_SSE:
        return dmin_sum+ 32*s->qscale*s->qscale;*/
    case FF_CMP_RD:
        return dmin_sum;
    default:
991
        return dmin_sum+ 11*c->mb_penalty_factor;
992 993 994
    }
}

995 996 997
static void clip_input_mv(MpegEncContext * s, int16_t *mv, int interlaced){
    int ymax= s->me.ymax>>interlaced;
    int ymin= s->me.ymin>>interlaced;
998

999 1000 1001 1002 1003 1004
    if(mv[0] < s->me.xmin) mv[0] = s->me.xmin;
    if(mv[0] > s->me.xmax) mv[0] = s->me.xmax;
    if(mv[1] <       ymin) mv[1] =       ymin;
    if(mv[1] >       ymax) mv[1] =       ymax;
}

1005 1006 1007 1008 1009 1010 1011 1012 1013
static inline int check_input_motion(MpegEncContext * s, int mb_x, int mb_y, int p_type){
    MotionEstContext * const c= &s->me;
    Picture *p= s->current_picture_ptr;
    int mb_xy= mb_x + mb_y*s->mb_stride;
    int xy= 2*mb_x + 2*mb_y*s->b8_stride;
    int mb_type= s->current_picture.mb_type[mb_xy];
    int flags= c->flags;
    int shift= (flags&FLAG_QPEL) + 1;
    int mask= (1<<shift)-1;
1014
    int x, y, i;
1015 1016 1017
    int d=0;
    me_cmp_func cmpf= s->dsp.sse[0];
    me_cmp_func chroma_cmpf= s->dsp.sse[1];
1018

1019 1020
    if(p_type && USES_LIST(mb_type, 1)){
        av_log(c->avctx, AV_LOG_ERROR, "backward motion vector in P frame\n");
1021
        return INT_MAX/2;
1022
    }
1023
    assert(IS_INTRA(mb_type) || USES_LIST(mb_type,0) || USES_LIST(mb_type,1));
1024

1025 1026 1027 1028 1029 1030
    for(i=0; i<4; i++){
        int xy= s->block_index[i];
        clip_input_mv(s, p->motion_val[0][xy], !!IS_INTERLACED(mb_type));
        clip_input_mv(s, p->motion_val[1][xy], !!IS_INTERLACED(mb_type));
    }

1031 1032 1033 1034 1035
    if(IS_INTERLACED(mb_type)){
        int xy2= xy  + s->b8_stride;
        s->mb_type[mb_xy]=CANDIDATE_MB_TYPE_INTRA;
        c->stride<<=1;
        c->uvstride<<=1;
1036

1037
        if(!(s->flags & CODEC_FLAG_INTERLACED_ME)){
1038
            av_log(c->avctx, AV_LOG_ERROR, "Interlaced macroblock selected but interlaced motion estimation disabled\n");
1039
            return INT_MAX/2;
1040
        }
1041

1042 1043 1044 1045 1046
        if(USES_LIST(mb_type, 0)){
            int field_select0= p->ref_index[0][xy ];
            int field_select1= p->ref_index[0][xy2];
            assert(field_select0==0 ||field_select0==1);
            assert(field_select1==0 ||field_select1==1);
1047 1048
            init_interlaced_ref(s, 0);

1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062
            if(p_type){
                s->p_field_select_table[0][mb_xy]= field_select0;
                s->p_field_select_table[1][mb_xy]= field_select1;
                *(uint32_t*)s->p_field_mv_table[0][field_select0][mb_xy]= *(uint32_t*)p->motion_val[0][xy ];
                *(uint32_t*)s->p_field_mv_table[1][field_select1][mb_xy]= *(uint32_t*)p->motion_val[0][xy2];
                s->mb_type[mb_xy]=CANDIDATE_MB_TYPE_INTER_I;
            }else{
                s->b_field_select_table[0][0][mb_xy]= field_select0;
                s->b_field_select_table[0][1][mb_xy]= field_select1;
                *(uint32_t*)s->b_field_mv_table[0][0][field_select0][mb_xy]= *(uint32_t*)p->motion_val[0][xy ];
                *(uint32_t*)s->b_field_mv_table[0][1][field_select1][mb_xy]= *(uint32_t*)p->motion_val[0][xy2];
                s->mb_type[mb_xy]= CANDIDATE_MB_TYPE_FORWARD_I;
            }

1063
            x= p->motion_val[0][xy ][0];
1064 1065
            y= p->motion_val[0][xy ][1];
            d = cmp(s, x>>shift, y>>shift, x&mask, y&mask, 0, 8, field_select0, 0, cmpf, chroma_cmpf, flags);
1066
            x= p->motion_val[0][xy2][0];
1067 1068 1069 1070 1071 1072 1073 1074
            y= p->motion_val[0][xy2][1];
            d+= cmp(s, x>>shift, y>>shift, x&mask, y&mask, 0, 8, field_select1, 1, cmpf, chroma_cmpf, flags);
        }
        if(USES_LIST(mb_type, 1)){
            int field_select0= p->ref_index[1][xy ];
            int field_select1= p->ref_index[1][xy2];
            assert(field_select0==0 ||field_select0==1);
            assert(field_select1==0 ||field_select1==1);
1075 1076
            init_interlaced_ref(s, 2);

1077 1078 1079 1080 1081 1082 1083 1084 1085 1086
            s->b_field_select_table[1][0][mb_xy]= field_select0;
            s->b_field_select_table[1][1][mb_xy]= field_select1;
            *(uint32_t*)s->b_field_mv_table[1][0][field_select0][mb_xy]= *(uint32_t*)p->motion_val[1][xy ];
            *(uint32_t*)s->b_field_mv_table[1][1][field_select1][mb_xy]= *(uint32_t*)p->motion_val[1][xy2];
            if(USES_LIST(mb_type, 0)){
                s->mb_type[mb_xy]= CANDIDATE_MB_TYPE_BIDIR_I;
            }else{
                s->mb_type[mb_xy]= CANDIDATE_MB_TYPE_BACKWARD_I;
            }

1087
            x= p->motion_val[1][xy ][0];
1088 1089
            y= p->motion_val[1][xy ][1];
            d = cmp(s, x>>shift, y>>shift, x&mask, y&mask, 0, 8, field_select0+2, 0, cmpf, chroma_cmpf, flags);
1090
            x= p->motion_val[1][xy2][0];
1091 1092 1093 1094 1095 1096
            y= p->motion_val[1][xy2][1];
            d+= cmp(s, x>>shift, y>>shift, x&mask, y&mask, 0, 8, field_select1+2, 1, cmpf, chroma_cmpf, flags);
            //FIXME bidir scores
        }
        c->stride>>=1;
        c->uvstride>>=1;
1097
    }else if(IS_8X8(mb_type)){
1098
        if(!(s->flags & CODEC_FLAG_4MV)){
1099
            av_log(c->avctx, AV_LOG_ERROR, "4MV macroblock selected but 4MV encoding disabled\n");
1100
            return INT_MAX/2;
1101
        }
1102 1103
        cmpf= s->dsp.sse[1];
        chroma_cmpf= s->dsp.sse[1];
1104
        init_mv4_ref(c);
1105 1106
        for(i=0; i<4; i++){
            xy= s->block_index[i];
1107
            x= p->motion_val[0][xy][0];
1108 1109 1110 1111
            y= p->motion_val[0][xy][1];
            d+= cmp(s, x>>shift, y>>shift, x&mask, y&mask, 1, 8, i, i, cmpf, chroma_cmpf, flags);
        }
        s->mb_type[mb_xy]=CANDIDATE_MB_TYPE_INTER4V;
1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124
    }else{
        if(USES_LIST(mb_type, 0)){
            if(p_type){
                *(uint32_t*)s->p_mv_table[mb_xy]= *(uint32_t*)p->motion_val[0][xy];
                s->mb_type[mb_xy]=CANDIDATE_MB_TYPE_INTER;
            }else if(USES_LIST(mb_type, 1)){
                *(uint32_t*)s->b_bidir_forw_mv_table[mb_xy]= *(uint32_t*)p->motion_val[0][xy];
                *(uint32_t*)s->b_bidir_back_mv_table[mb_xy]= *(uint32_t*)p->motion_val[1][xy];
                s->mb_type[mb_xy]=CANDIDATE_MB_TYPE_BIDIR;
            }else{
                *(uint32_t*)s->b_forw_mv_table[mb_xy]= *(uint32_t*)p->motion_val[0][xy];
                s->mb_type[mb_xy]=CANDIDATE_MB_TYPE_FORWARD;
            }
1125
            x= p->motion_val[0][xy][0];
1126 1127 1128 1129 1130
            y= p->motion_val[0][xy][1];
            d = cmp(s, x>>shift, y>>shift, x&mask, y&mask, 0, 16, 0, 0, cmpf, chroma_cmpf, flags);
        }else if(USES_LIST(mb_type, 1)){
            *(uint32_t*)s->b_back_mv_table[mb_xy]= *(uint32_t*)p->motion_val[1][xy];
            s->mb_type[mb_xy]=CANDIDATE_MB_TYPE_BACKWARD;
1131 1132

            x= p->motion_val[1][xy][0];
1133 1134 1135 1136 1137 1138 1139 1140
            y= p->motion_val[1][xy][1];
            d = cmp(s, x>>shift, y>>shift, x&mask, y&mask, 0, 16, 2, 0, cmpf, chroma_cmpf, flags);
        }else
            s->mb_type[mb_xy]=CANDIDATE_MB_TYPE_INTRA;
    }
    return d;
}

1141 1142 1143
void ff_estimate_p_frame_motion(MpegEncContext * s,
                                int mb_x, int mb_y)
{
1144
    MotionEstContext * const c= &s->me;
1145
    uint8_t *pix, *ppix;
1146
    int sum, varc, vard, mx, my, dmin;
1147
    int P[10][2];
1148 1149
    const int shift= 1+s->quarter_sample;
    int mb_type=0;
Michael Niedermayer's avatar
Michael Niedermayer committed
1150
    Picture * const pic= &s->current_picture;
1151

1152
    init_ref(c, s->new_picture.data, s->last_picture.data, NULL, 16*mb_x, 16*mb_y, 0);
1153

Michael Niedermayer's avatar
Michael Niedermayer committed
1154
    assert(s->quarter_sample==0 || s->quarter_sample==1);
1155 1156
    assert(s->linesize == c->stride);
    assert(s->uvlinesize == c->uvstride);
Michael Niedermayer's avatar
Michael Niedermayer committed
1157

1158 1159 1160
    c->penalty_factor    = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
    c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
    c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
1161
    c->current_mv_penalty= c->mv_penalty[s->f_code] + MAX_MV;
1162

1163
    get_limits(s, 16*mb_x, 16*mb_y);
1164
    c->skip=0;
1165

1166 1167 1168
    /* intra / predictive decision */
    pix = c->src[0][0];
    sum = s->dsp.pix_sum(pix, s->linesize);
1169
    varc = s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)(sum*sum))>>8) + 500;
1170 1171

    pic->mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
1172 1173
    pic->mb_var [s->mb_stride * mb_y + mb_x] = (varc+128)>>8;
    c->mb_var_sum_temp += (varc+128)>>8;
1174

1175
    if(c->avctx->me_threshold){
1176
        vard= check_input_motion(s, mb_x, mb_y, 1);
1177

1178 1179 1180 1181 1182
        if((vard+128)>>8 < c->avctx->me_threshold){
            pic->mc_mb_var[s->mb_stride * mb_y + mb_x] = (vard+128)>>8;
            c->mc_mb_var_sum_temp += (vard+128)>>8;
            if (vard <= 64<<8 || vard < varc) { //FIXME
                c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
1183
            }else{
1184
                c->scene_change_score+= s->qscale * s->avctx->scenechange_factor;
1185 1186 1187
            }
            return;
        }
1188
        if((vard+128)>>8 < c->avctx->mb_threshold)
1189
            mb_type= s->mb_type[mb_x + mb_y*s->mb_stride];
1190 1191
    }

1192
    switch(s->me_method) {
Fabrice Bellard's avatar
Fabrice Bellard committed
1193 1194
    case ME_ZERO:
    default:
1195
        no_motion_search(s, &mx, &my);
1196 1197
        mx-= mb_x*16;
        my-= mb_y*16;
Fabrice Bellard's avatar
Fabrice Bellard committed
1198 1199
        dmin = 0;
        break;
1200
#if 0
Fabrice Bellard's avatar
Fabrice Bellard committed
1201
    case ME_FULL:
1202
        dmin = full_motion_search(s, &mx, &my, range, ref_picture);
1203 1204
        mx-= mb_x*16;
        my-= mb_y*16;
Fabrice Bellard's avatar
Fabrice Bellard committed
1205 1206
        break;
    case ME_LOG:
1207
        dmin = log_motion_search(s, &mx, &my, range / 2, ref_picture);
1208 1209
        mx-= mb_x*16;
        my-= mb_y*16;
Fabrice Bellard's avatar
Fabrice Bellard committed
1210 1211
        break;
    case ME_PHODS:
1212
        dmin = phods_motion_search(s, &mx, &my, range / 2, ref_picture);
1213 1214
        mx-= mb_x*16;
        my-= mb_y*16;
Fabrice Bellard's avatar
Fabrice Bellard committed
1215
        break;
1216
#endif
1217
    case ME_X1:
1218
    case ME_EPZS:
1219
       {
1220
            const int mot_stride = s->b8_stride;
1221
            const int mot_xy = s->block_index[0];
1222

1223 1224
            P_LEFT[0]       = s->current_picture.motion_val[0][mot_xy - 1][0];
            P_LEFT[1]       = s->current_picture.motion_val[0][mot_xy - 1][1];
1225

1226
            if(P_LEFT[0]       > (c->xmax<<shift)) P_LEFT[0]       = (c->xmax<<shift);
1227

1228
            if(!s->first_slice_line) {
1229 1230 1231 1232
                P_TOP[0]      = s->current_picture.motion_val[0][mot_xy - mot_stride    ][0];
                P_TOP[1]      = s->current_picture.motion_val[0][mot_xy - mot_stride    ][1];
                P_TOPRIGHT[0] = s->current_picture.motion_val[0][mot_xy - mot_stride + 2][0];
                P_TOPRIGHT[1] = s->current_picture.motion_val[0][mot_xy - mot_stride + 2][1];
1233 1234 1235
                if(P_TOP[1]      > (c->ymax<<shift)) P_TOP[1]     = (c->ymax<<shift);
                if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
                if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
1236

1237 1238 1239 1240
                P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
                P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);

                if(s->out_format == FMT_H263){
1241 1242
                    c->pred_x = P_MEDIAN[0];
                    c->pred_y = P_MEDIAN[1];
1243
                }else { /* mpeg1 at least */
1244 1245
                    c->pred_x= P_LEFT[0];
                    c->pred_y= P_LEFT[1];
1246
                }
1247
            }else{
1248 1249
                c->pred_x= P_LEFT[0];
                c->pred_y= P_LEFT[1];
1250
            }
1251

1252
        }
1253
        dmin = ff_epzs_motion_search(s, &mx, &my, P, 0, 0, s->p_mv_table, (1<<16)>>shift, 0, 16);
1254

1255
        break;
Fabrice Bellard's avatar
Fabrice Bellard committed
1256 1257
    }

1258
    /* At this point (mx,my) are full-pell and the relative displacement */
1259
    ppix = c->ref[0][0] + (my * s->linesize) + mx;
1260

1261
    vard = s->dsp.sse[0](NULL, pix, ppix, s->linesize, 16);
1262

1263
    pic->mc_mb_var[s->mb_stride * mb_y + mb_x] = (vard+128)>>8;
1264
//    pic->mb_cmp_score[s->mb_stride * mb_y + mb_x] = dmin;
1265
    c->mc_mb_var_sum_temp += (vard+128)>>8;
1266

Fabrice Bellard's avatar
Fabrice Bellard committed
1267
#if 0
1268
    printf("varc=%4d avg_var=%4d (sum=%4d) vard=%4d mx=%2d my=%2d\n",
1269
           varc, s->avg_mb_var, sum, vard, mx - xx, my - yy);
Fabrice Bellard's avatar
Fabrice Bellard committed
1270
#endif
1271
    if(mb_type){
1272 1273
        if (vard <= 64<<8 || vard < varc)
            c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
1274
        else
1275
            c->scene_change_score+= s->qscale * s->avctx->scenechange_factor;
1276 1277

        if(mb_type == CANDIDATE_MB_TYPE_INTER){
1278
            c->sub_motion_search(s, &mx, &my, dmin, 0, 0, 0, 16);
1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291
            set_p_mv_tables(s, mx, my, 1);
        }else{
            mx <<=shift;
            my <<=shift;
        }
        if(mb_type == CANDIDATE_MB_TYPE_INTER4V){
            h263_mv4_search(s, mx, my, shift);

            set_p_mv_tables(s, mx, my, 0);
        }
        if(mb_type == CANDIDATE_MB_TYPE_INTER_I){
            interlaced_search(s, 0, s->p_field_mv_table, s->p_field_select_table, mx, my, 1);
        }
1292
    }else if(c->avctx->mb_decision > FF_MB_DECISION_SIMPLE){
1293 1294
        if (vard <= 64<<8 || vard < varc)
            c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
1295
        else
1296
            c->scene_change_score+= s->qscale * s->avctx->scenechange_factor;
1297

1298
        if (vard*2 + 200*256 > varc)
1299
            mb_type|= CANDIDATE_MB_TYPE_INTRA;
1300
        if (varc*2 + 200*256 > vard){
1301
            mb_type|= CANDIDATE_MB_TYPE_INTER;
1302
            c->sub_motion_search(s, &mx, &my, dmin, 0, 0, 0, 16);
1303 1304
            if(s->flags&CODEC_FLAG_MV0)
                if(mx || my)
1305
                    mb_type |= CANDIDATE_MB_TYPE_SKIPPED; //FIXME check difference
1306
        }else{
Michael Niedermayer's avatar
Michael Niedermayer committed
1307 1308
            mx <<=shift;
            my <<=shift;
Fabrice Bellard's avatar
Fabrice Bellard committed
1309
        }
1310
        if((s->flags&CODEC_FLAG_4MV)
1311
           && !c->skip && varc>50<<8 && vard>10<<8){
1312 1313
            if(h263_mv4_search(s, mx, my, shift) < INT_MAX)
                mb_type|=CANDIDATE_MB_TYPE_INTER4V;
1314 1315 1316 1317

            set_p_mv_tables(s, mx, my, 0);
        }else
            set_p_mv_tables(s, mx, my, 1);
1318
        if((s->flags&CODEC_FLAG_INTERLACED_ME)
1319
           && !c->skip){ //FIXME varc/d checks
1320
            if(interlaced_search(s, 0, s->p_field_mv_table, s->p_field_select_table, mx, my, 0) < INT_MAX)
1321 1322
                mb_type |= CANDIDATE_MB_TYPE_INTER_I;
        }
1323
    }else{
1324
        int intra_score, i;
1325
        mb_type= CANDIDATE_MB_TYPE_INTER;
1326

1327
        dmin= c->sub_motion_search(s, &mx, &my, dmin, 0, 0, 0, 16);
1328
        if(c->avctx->me_sub_cmp != c->avctx->mb_cmp && !c->skip)
1329
            dmin= ff_get_mb_score(s, mx, my, 0, 0, 0, 16, 1);
1330 1331

        if((s->flags&CODEC_FLAG_4MV)
1332
           && !c->skip && varc>50<<8 && vard>10<<8){
1333
            int dmin4= h263_mv4_search(s, mx, my, shift);
1334
            if(dmin4 < dmin){
1335
                mb_type= CANDIDATE_MB_TYPE_INTER4V;
1336
                dmin=dmin4;
1337
            }
1338
        }
1339
        if((s->flags&CODEC_FLAG_INTERLACED_ME)
1340
           && !c->skip){ //FIXME varc/d checks
1341
            int dmin_i= interlaced_search(s, 0, s->p_field_mv_table, s->p_field_select_table, mx, my, 0);
1342 1343 1344 1345 1346
            if(dmin_i < dmin){
                mb_type = CANDIDATE_MB_TYPE_INTER_I;
                dmin= dmin_i;
            }
        }
1347 1348

//        pic->mb_cmp_score[s->mb_stride * mb_y + mb_x] = dmin;
1349
        set_p_mv_tables(s, mx, my, mb_type!=CANDIDATE_MB_TYPE_INTER4V);
1350 1351

        /* get intra luma score */
1352
        if((c->avctx->mb_cmp&0xFF)==FF_CMP_SSE){
1353
            intra_score= varc - 500;
1354 1355 1356
        }else{
            int mean= (sum+128)>>8;
            mean*= 0x01010101;
1357

1358
            for(i=0; i<16; i++){
1359 1360 1361 1362
                *(uint32_t*)(&c->scratchpad[i*s->linesize+ 0]) = mean;
                *(uint32_t*)(&c->scratchpad[i*s->linesize+ 4]) = mean;
                *(uint32_t*)(&c->scratchpad[i*s->linesize+ 8]) = mean;
                *(uint32_t*)(&c->scratchpad[i*s->linesize+12]) = mean;
1363 1364
            }

1365
            intra_score= s->dsp.mb_cmp[0](s, c->scratchpad, pix, s->linesize, 16);
1366 1367 1368
        }
#if 0 //FIXME
        /* get chroma score */
1369
        if(c->avctx->mb_cmp&FF_CMP_CHROMA){
1370 1371 1372
            for(i=1; i<3; i++){
                uint8_t *dest_c;
                int mean;
1373

1374
                if(s->out_format == FMT_H263){
1375
                    mean= (s->dc_val[i][mb_x + mb_y*s->b8_stride] + 4)>>3; //FIXME not exact but simple ;)
1376 1377 1378 1379
                }else{
                    mean= (s->last_dc[i] + 4)>>3;
                }
                dest_c = s->new_picture.data[i] + (mb_y * 8  * (s->uvlinesize)) + mb_x * 8;
1380

1381 1382
                mean*= 0x01010101;
                for(i=0; i<8; i++){
1383 1384
                    *(uint32_t*)(&c->scratchpad[i*s->uvlinesize+ 0]) = mean;
                    *(uint32_t*)(&c->scratchpad[i*s->uvlinesize+ 4]) = mean;
1385
                }
1386

1387
                intra_score+= s->dsp.mb_cmp[1](s, c->scratchpad, dest_c, s->uvlinesize);
1388
            }
1389 1390
        }
#endif
1391
        intra_score += c->mb_penalty_factor*16;
1392

1393
        if(intra_score < dmin){
1394 1395
            mb_type= CANDIDATE_MB_TYPE_INTRA;
            s->current_picture.mb_type[mb_y*s->mb_stride + mb_x]= CANDIDATE_MB_TYPE_INTRA; //FIXME cleanup
1396 1397
        }else
            s->current_picture.mb_type[mb_y*s->mb_stride + mb_x]= 0;
1398

1399 1400
        if (vard <= 64<<8 || vard < varc) { //FIXME
            c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
1401
        }else{
1402
            c->scene_change_score+= s->qscale * s->avctx->scenechange_factor;
1403
        }
Fabrice Bellard's avatar
Fabrice Bellard committed
1404
    }
1405

1406
    s->mb_type[mb_y*s->mb_stride + mb_x]= mb_type;
1407 1408
}

1409 1410 1411
int ff_pre_estimate_p_frame_motion(MpegEncContext * s,
                                    int mb_x, int mb_y)
{
1412
    MotionEstContext * const c= &s->me;
1413
    int mx, my, dmin;
1414 1415
    int P[10][2];
    const int shift= 1+s->quarter_sample;
1416
    const int xy= mb_x + mb_y*s->mb_stride;
1417
    init_ref(c, s->new_picture.data, s->last_picture.data, NULL, 16*mb_x, 16*mb_y, 0);
1418

1419 1420
    assert(s->quarter_sample==0 || s->quarter_sample==1);

1421
    c->pre_penalty_factor    = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_pre_cmp);
1422
    c->current_mv_penalty= c->mv_penalty[s->f_code] + MAX_MV;
1423

1424
    get_limits(s, 16*mb_x, 16*mb_y);
1425
    c->skip=0;
1426 1427 1428 1429

    P_LEFT[0]       = s->p_mv_table[xy + 1][0];
    P_LEFT[1]       = s->p_mv_table[xy + 1][1];

1430
    if(P_LEFT[0]       < (c->xmin<<shift)) P_LEFT[0]       = (c->xmin<<shift);
1431 1432

    /* special case for first line */
1433
    if (s->first_slice_line) {
1434 1435
        c->pred_x= P_LEFT[0];
        c->pred_y= P_LEFT[1];
1436
        P_TOP[0]= P_TOPRIGHT[0]= P_MEDIAN[0]=
1437
        P_TOP[1]= P_TOPRIGHT[1]= P_MEDIAN[1]= 0; //FIXME
1438
    } else {
1439 1440 1441 1442
        P_TOP[0]      = s->p_mv_table[xy + s->mb_stride    ][0];
        P_TOP[1]      = s->p_mv_table[xy + s->mb_stride    ][1];
        P_TOPRIGHT[0] = s->p_mv_table[xy + s->mb_stride - 1][0];
        P_TOPRIGHT[1] = s->p_mv_table[xy + s->mb_stride - 1][1];
1443 1444 1445
        if(P_TOP[1]      < (c->ymin<<shift)) P_TOP[1]     = (c->ymin<<shift);
        if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift);
        if(P_TOPRIGHT[1] < (c->ymin<<shift)) P_TOPRIGHT[1]= (c->ymin<<shift);
1446

1447 1448 1449
        P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
        P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);

1450 1451
        c->pred_x = P_MEDIAN[0];
        c->pred_y = P_MEDIAN[1];
1452
    }
1453

1454
    dmin = ff_epzs_motion_search(s, &mx, &my, P, 0, 0, s->p_mv_table, (1<<16)>>shift, 0, 16);
1455

1456 1457
    s->p_mv_table[xy][0] = mx<<shift;
    s->p_mv_table[xy][1] = my<<shift;
1458

1459 1460 1461
    return dmin;
}

1462
static int ff_estimate_motion_b(MpegEncContext * s,
1463
                       int mb_x, int mb_y, int16_t (*mv_table)[2], int ref_index, int f_code)
1464
{
1465
    MotionEstContext * const c= &s->me;
1466
    int mx, my, dmin;
1467
    int P[10][2];
1468
    const int shift= 1+s->quarter_sample;
1469 1470
    const int mot_stride = s->mb_stride;
    const int mot_xy = mb_y*mot_stride + mb_x;
1471
    uint8_t * const mv_penalty= c->mv_penalty[f_code] + MAX_MV;
1472
    int mv_scale;
1473

1474 1475 1476
    c->penalty_factor    = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
    c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
    c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
1477
    c->current_mv_penalty= mv_penalty;
Michael Niedermayer's avatar
Michael Niedermayer committed
1478

1479
    get_limits(s, 16*mb_x, 16*mb_y);
1480 1481 1482 1483

    switch(s->me_method) {
    case ME_ZERO:
    default:
1484
        no_motion_search(s, &mx, &my);
1485
        dmin = 0;
1486 1487
        mx-= mb_x*16;
        my-= mb_y*16;
1488
        break;
1489
#if 0
1490
    case ME_FULL:
1491
        dmin = full_motion_search(s, &mx, &my, range, ref_picture);
1492 1493
        mx-= mb_x*16;
        my-= mb_y*16;
1494 1495
        break;
    case ME_LOG:
1496
        dmin = log_motion_search(s, &mx, &my, range / 2, ref_picture);
1497 1498
        mx-= mb_x*16;
        my-= mb_y*16;
1499 1500
        break;
    case ME_PHODS:
1501
        dmin = phods_motion_search(s, &mx, &my, range / 2, ref_picture);
1502 1503
        mx-= mb_x*16;
        my-= mb_y*16;
1504
        break;
1505
#endif
1506 1507 1508
    case ME_X1:
    case ME_EPZS:
       {
1509 1510
            P_LEFT[0]        = mv_table[mot_xy - 1][0];
            P_LEFT[1]        = mv_table[mot_xy - 1][1];
1511

1512
            if(P_LEFT[0]       > (c->xmax<<shift)) P_LEFT[0]       = (c->xmax<<shift);
1513 1514

            /* special case for first line */
1515
            if (!s->first_slice_line) {
1516 1517 1518 1519
                P_TOP[0] = mv_table[mot_xy - mot_stride             ][0];
                P_TOP[1] = mv_table[mot_xy - mot_stride             ][1];
                P_TOPRIGHT[0] = mv_table[mot_xy - mot_stride + 1         ][0];
                P_TOPRIGHT[1] = mv_table[mot_xy - mot_stride + 1         ][1];
1520 1521 1522
                if(P_TOP[1] > (c->ymax<<shift)) P_TOP[1]= (c->ymax<<shift);
                if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
                if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
1523

1524 1525
                P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
                P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
1526
            }
1527 1528
            c->pred_x= P_LEFT[0];
            c->pred_y= P_LEFT[1];
1529
        }
1530

1531 1532 1533 1534 1535
        if(mv_table == s->b_forw_mv_table){
            mv_scale= (s->pb_time<<16) / (s->pp_time<<shift);
        }else{
            mv_scale= ((s->pb_time - s->pp_time)<<16) / (s->pp_time<<shift);
        }
1536

1537
        dmin = ff_epzs_motion_search(s, &mx, &my, P, 0, ref_index, s->p_mv_table, mv_scale, 0, 16);
1538

1539 1540
        break;
    }
1541

1542
    dmin= c->sub_motion_search(s, &mx, &my, dmin, 0, ref_index, 0, 16);
1543

1544
    if(c->avctx->me_sub_cmp != c->avctx->mb_cmp && !c->skip)
1545
        dmin= ff_get_mb_score(s, mx, my, 0, ref_index, 0, 16, 1);
1546

1547
//printf("%d %d %d %d//", s->mb_x, s->mb_y, mx, my);
1548 1549 1550
//    s->mb_type[mb_y*s->mb_width + mb_x]= mb_type;
    mv_table[mot_xy][0]= mx;
    mv_table[mot_xy][1]= my;
Michael Niedermayer's avatar
Michael Niedermayer committed
1551

1552
    return dmin;
Fabrice Bellard's avatar
Fabrice Bellard committed
1553 1554
}

1555
static inline int check_bidir_mv(MpegEncContext * s,
1556 1557 1558
                   int motion_fx, int motion_fy,
                   int motion_bx, int motion_by,
                   int pred_fx, int pred_fy,
1559 1560
                   int pred_bx, int pred_by,
                   int size, int h)
Fabrice Bellard's avatar
Fabrice Bellard committed
1561
{
1562
    //FIXME optimize?
Michael Niedermayer's avatar
Michael Niedermayer committed
1563
    //FIXME better f_code prediction (max mv & distance)
1564
    //FIXME pointers
1565
    MotionEstContext * const c= &s->me;
Michael Niedermayer's avatar
Michael Niedermayer committed
1566 1567
    uint8_t * const mv_penalty_f= c->mv_penalty[s->f_code] + MAX_MV; // f_code of the prev frame
    uint8_t * const mv_penalty_b= c->mv_penalty[s->b_code] + MAX_MV; // f_code of the prev frame
1568 1569
    int stride= c->stride;
    uint8_t *dest_y = c->scratchpad;
1570 1571 1572 1573
    uint8_t *ptr;
    int dxy;
    int src_x, src_y;
    int fbmin;
1574 1575 1576
    uint8_t **src_data= c->src[0];
    uint8_t **ref_data= c->ref[0];
    uint8_t **ref2_data= c->ref[2];
1577

Michael Niedermayer's avatar
Michael Niedermayer committed
1578 1579
    if(s->quarter_sample){
        dxy = ((motion_fy & 3) << 2) | (motion_fx & 3);
1580 1581
        src_x = motion_fx >> 2;
        src_y = motion_fy >> 2;
Michael Niedermayer's avatar
Michael Niedermayer committed
1582

1583 1584
        ptr = ref_data[0] + (src_y * stride) + src_x;
        s->dsp.put_qpel_pixels_tab[0][dxy](dest_y    , ptr    , stride);
Michael Niedermayer's avatar
Michael Niedermayer committed
1585 1586

        dxy = ((motion_by & 3) << 2) | (motion_bx & 3);
1587 1588
        src_x = motion_bx >> 2;
        src_y = motion_by >> 2;
1589

1590
        ptr = ref2_data[0] + (src_y * stride) + src_x;
1591
        s->dsp.avg_qpel_pixels_tab[size][dxy](dest_y    , ptr    , stride);
Michael Niedermayer's avatar
Michael Niedermayer committed
1592 1593
    }else{
        dxy = ((motion_fy & 1) << 1) | (motion_fx & 1);
1594 1595
        src_x = motion_fx >> 1;
        src_y = motion_fy >> 1;
Michael Niedermayer's avatar
Michael Niedermayer committed
1596

1597 1598
        ptr = ref_data[0] + (src_y * stride) + src_x;
        s->dsp.put_pixels_tab[size][dxy](dest_y    , ptr    , stride, h);
Michael Niedermayer's avatar
Michael Niedermayer committed
1599 1600

        dxy = ((motion_by & 1) << 1) | (motion_bx & 1);
1601 1602
        src_x = motion_bx >> 1;
        src_y = motion_by >> 1;
1603

1604
        ptr = ref2_data[0] + (src_y * stride) + src_x;
1605
        s->dsp.avg_pixels_tab[size][dxy](dest_y    , ptr    , stride, h);
Michael Niedermayer's avatar
Michael Niedermayer committed
1606 1607
    }

Michael Niedermayer's avatar
Michael Niedermayer committed
1608 1609
    fbmin = (mv_penalty_f[motion_fx-pred_fx] + mv_penalty_f[motion_fy-pred_fy])*c->mb_penalty_factor
           +(mv_penalty_b[motion_bx-pred_bx] + mv_penalty_b[motion_by-pred_by])*c->mb_penalty_factor
1610
           + s->dsp.mb_cmp[size](s, src_data[0], dest_y, stride, h); //FIXME new_pic
1611

1612
    if(c->avctx->mb_cmp&FF_CMP_CHROMA){
1613 1614
    }
    //FIXME CHROMA !!!
1615

1616 1617
    return fbmin;
}
1618

1619
/* refine the bidir vectors in hq mode and return the score in both lq & hq mode*/
1620
static inline int bidir_refine(MpegEncContext * s, int mb_x, int mb_y)
1621
{
1622
    MotionEstContext * const c= &s->me;
1623 1624
    const int mot_stride = s->mb_stride;
    const int xy = mb_y *mot_stride + mb_x;
1625 1626 1627 1628 1629 1630 1631 1632 1633
    int fbmin;
    int pred_fx= s->b_bidir_forw_mv_table[xy-1][0];
    int pred_fy= s->b_bidir_forw_mv_table[xy-1][1];
    int pred_bx= s->b_bidir_back_mv_table[xy-1][0];
    int pred_by= s->b_bidir_back_mv_table[xy-1][1];
    int motion_fx= s->b_bidir_forw_mv_table[xy][0]= s->b_forw_mv_table[xy][0];
    int motion_fy= s->b_bidir_forw_mv_table[xy][1]= s->b_forw_mv_table[xy][1];
    int motion_bx= s->b_bidir_back_mv_table[xy][0]= s->b_back_mv_table[xy][0];
    int motion_by= s->b_bidir_back_mv_table[xy][1]= s->b_back_mv_table[xy][1];
1634 1635 1636 1637 1638 1639 1640
    const int flags= c->sub_flags;
    const int qpel= flags&FLAG_QPEL;
    const int shift= 1+qpel;
    const int xmin= c->xmin<<shift;
    const int ymin= c->ymin<<shift;
    const int xmax= c->xmax<<shift;
    const int ymax= c->ymax<<shift;
1641 1642 1643 1644 1645 1646
    uint8_t map[8][8][8][8];

    memset(map,0,sizeof(map));
#define BIDIR_MAP(fx,fy,bx,by) \
    map[(motion_fx+fx)&7][(motion_fy+fy)&7][(motion_bx+bx)&7][(motion_by+by)&7]
    BIDIR_MAP(0,0,0,0) = 1;
1647

1648
    fbmin= check_bidir_mv(s, motion_fx, motion_fy,
1649 1650
                          motion_bx, motion_by,
                          pred_fx, pred_fy,
1651 1652
                          pred_bx, pred_by,
                          0, 16);
1653

1654 1655 1656
    if(s->avctx->bidir_refine){
        int score, end;
#define CHECK_BIDIR(fx,fy,bx,by)\
1657 1658
    if( !BIDIR_MAP(fx,fy,bx,by)\
       &&(fx<=0 || motion_fx+fx<=xmax) && (fy<=0 || motion_fy+fy<=ymax) && (bx<=0 || motion_bx+bx<=xmax) && (by<=0 || motion_by+by<=ymax)\
Michael Niedermayer's avatar
Michael Niedermayer committed
1659
       &&(fx>=0 || motion_fx+fx>=xmin) && (fy>=0 || motion_fy+fy>=ymin) && (bx>=0 || motion_bx+bx>=xmin) && (by>=0 || motion_by+by>=ymin)){\
1660
        BIDIR_MAP(fx,fy,bx,by) = 1;\
Michael Niedermayer's avatar
Michael Niedermayer committed
1661 1662 1663 1664 1665 1666 1667 1668 1669
        score= check_bidir_mv(s, motion_fx+fx, motion_fy+fy, motion_bx+bx, motion_by+by, pred_fx, pred_fy, pred_bx, pred_by, 0, 16);\
        if(score < fbmin){\
            fbmin= score;\
            motion_fx+=fx;\
            motion_fy+=fy;\
            motion_bx+=bx;\
            motion_by+=by;\
            end=0;\
        }\
1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708
    }
#define CHECK_BIDIR2(a,b,c,d)\
CHECK_BIDIR(a,b,c,d)\
CHECK_BIDIR(-a,-b,-c,-d)

#define CHECK_BIDIRR(a,b,c,d)\
CHECK_BIDIR2(a,b,c,d)\
CHECK_BIDIR2(b,c,d,a)\
CHECK_BIDIR2(c,d,a,b)\
CHECK_BIDIR2(d,a,b,c)

        do{
            end=1;

            CHECK_BIDIRR( 0, 0, 0, 1)
            if(s->avctx->bidir_refine > 1){
                CHECK_BIDIRR( 0, 0, 1, 1)
                CHECK_BIDIR2( 0, 1, 0, 1)
                CHECK_BIDIR2( 1, 0, 1, 0)
                CHECK_BIDIRR( 0, 0,-1, 1)
                CHECK_BIDIR2( 0,-1, 0, 1)
                CHECK_BIDIR2(-1, 0, 1, 0)
                if(s->avctx->bidir_refine > 2){
                    CHECK_BIDIRR( 0, 1, 1, 1)
                    CHECK_BIDIRR( 0,-1, 1, 1)
                    CHECK_BIDIRR( 0, 1,-1, 1)
                    CHECK_BIDIRR( 0, 1, 1,-1)
                    if(s->avctx->bidir_refine > 3){
                        CHECK_BIDIR2( 1, 1, 1, 1)
                        CHECK_BIDIRR( 1, 1, 1,-1)
                        CHECK_BIDIR2( 1, 1,-1,-1)
                        CHECK_BIDIR2( 1,-1,-1, 1)
                        CHECK_BIDIR2( 1,-1, 1,-1)
                    }
                }
            }
        }while(!end);
    }

1709 1710 1711 1712 1713
    s->b_bidir_forw_mv_table[xy][0]= motion_fx;
    s->b_bidir_forw_mv_table[xy][1]= motion_fy;
    s->b_bidir_back_mv_table[xy][0]= motion_bx;
    s->b_bidir_back_mv_table[xy][1]= motion_by;

1714
    return fbmin;
1715 1716
}

1717
static inline int direct_search(MpegEncContext * s, int mb_x, int mb_y)
1718
{
1719
    MotionEstContext * const c= &s->me;
1720
    int P[10][2];
1721 1722
    const int mot_stride = s->mb_stride;
    const int mot_xy = mb_y*mot_stride + mb_x;
Michael Niedermayer's avatar
Michael Niedermayer committed
1723 1724
    const int shift= 1+s->quarter_sample;
    int dmin, i;
1725
    const int time_pp= s->pp_time;
1726
    const int time_pb= s->pb_time;
Michael Niedermayer's avatar
Michael Niedermayer committed
1727
    int mx, my, xmin, xmax, ymin, ymax;
1728
    int16_t (*mv_table)[2]= s->b_direct_mv_table;
1729

1730
    c->current_mv_penalty= c->mv_penalty[1] + MAX_MV;
Michael Niedermayer's avatar
Michael Niedermayer committed
1731 1732 1733
    ymin= xmin=(-32)>>shift;
    ymax= xmax=   31>>shift;

1734
    if(IS_8X8(s->next_picture.mb_type[mot_xy])){
Michael Niedermayer's avatar
Michael Niedermayer committed
1735 1736 1737
        s->mv_type= MV_TYPE_8X8;
    }else{
        s->mv_type= MV_TYPE_16X16;
1738
    }
Michael Niedermayer's avatar
Michael Niedermayer committed
1739 1740 1741 1742

    for(i=0; i<4; i++){
        int index= s->block_index[i];
        int min, max;
1743

1744 1745 1746 1747 1748 1749 1750 1751 1752
        c->co_located_mv[i][0]= s->next_picture.motion_val[0][index][0];
        c->co_located_mv[i][1]= s->next_picture.motion_val[0][index][1];
        c->direct_basis_mv[i][0]= c->co_located_mv[i][0]*time_pb/time_pp + ((i& 1)<<(shift+3));
        c->direct_basis_mv[i][1]= c->co_located_mv[i][1]*time_pb/time_pp + ((i>>1)<<(shift+3));
//        c->direct_basis_mv[1][i][0]= c->co_located_mv[i][0]*(time_pb - time_pp)/time_pp + ((i &1)<<(shift+3);
//        c->direct_basis_mv[1][i][1]= c->co_located_mv[i][1]*(time_pb - time_pp)/time_pp + ((i>>1)<<(shift+3);

        max= FFMAX(c->direct_basis_mv[i][0], c->direct_basis_mv[i][0] - c->co_located_mv[i][0])>>shift;
        min= FFMIN(c->direct_basis_mv[i][0], c->direct_basis_mv[i][0] - c->co_located_mv[i][0])>>shift;
1753 1754
        max+= 16*mb_x + 1; // +-1 is for the simpler rounding
        min+= 16*mb_x - 1;
1755 1756
        xmax= FFMIN(xmax, s->width - max);
        xmin= FFMAX(xmin, - 16     - min);
Michael Niedermayer's avatar
Michael Niedermayer committed
1757

1758 1759
        max= FFMAX(c->direct_basis_mv[i][1], c->direct_basis_mv[i][1] - c->co_located_mv[i][1])>>shift;
        min= FFMIN(c->direct_basis_mv[i][1], c->direct_basis_mv[i][1] - c->co_located_mv[i][1])>>shift;
1760 1761
        max+= 16*mb_y + 1; // +-1 is for the simpler rounding
        min+= 16*mb_y - 1;
1762 1763
        ymax= FFMIN(ymax, s->height - max);
        ymin= FFMAX(ymin, - 16      - min);
1764

Michael Niedermayer's avatar
Michael Niedermayer committed
1765
        if(s->mv_type == MV_TYPE_16X16) break;
1766
    }
1767

Michael Niedermayer's avatar
Michael Niedermayer committed
1768
    assert(xmax <= 15 && ymax <= 15 && xmin >= -16 && ymin >= -16);
1769

Michael Niedermayer's avatar
Michael Niedermayer committed
1770 1771 1772 1773 1774 1775
    if(xmax < 0 || xmin >0 || ymax < 0 || ymin > 0){
        s->b_direct_mv_table[mot_xy][0]= 0;
        s->b_direct_mv_table[mot_xy][1]= 0;

        return 256*256*256*64;
    }
1776

1777 1778 1779 1780 1781 1782 1783 1784
    c->xmin= xmin;
    c->ymin= ymin;
    c->xmax= xmax;
    c->ymax= ymax;
    c->flags     |= FLAG_DIRECT;
    c->sub_flags |= FLAG_DIRECT;
    c->pred_x=0;
    c->pred_y=0;
Michael Niedermayer's avatar
Michael Niedermayer committed
1785

Michael Niedermayer's avatar
Michael Niedermayer committed
1786 1787 1788 1789
    P_LEFT[0]        = clip(mv_table[mot_xy - 1][0], xmin<<shift, xmax<<shift);
    P_LEFT[1]        = clip(mv_table[mot_xy - 1][1], ymin<<shift, ymax<<shift);

    /* special case for first line */
1790
    if (!s->first_slice_line) { //FIXME maybe allow this over thread boundary as its cliped
Michael Niedermayer's avatar
Michael Niedermayer committed
1791 1792 1793 1794
        P_TOP[0]      = clip(mv_table[mot_xy - mot_stride             ][0], xmin<<shift, xmax<<shift);
        P_TOP[1]      = clip(mv_table[mot_xy - mot_stride             ][1], ymin<<shift, ymax<<shift);
        P_TOPRIGHT[0] = clip(mv_table[mot_xy - mot_stride + 1         ][0], xmin<<shift, xmax<<shift);
        P_TOPRIGHT[1] = clip(mv_table[mot_xy - mot_stride + 1         ][1], ymin<<shift, ymax<<shift);
1795

Michael Niedermayer's avatar
Michael Niedermayer committed
1796 1797 1798
        P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
        P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
    }
1799

1800
    dmin = ff_epzs_motion_search(s, &mx, &my, P, 0, 0, mv_table, 1<<(16-shift), 0, 16);
1801
    if(c->sub_flags&FLAG_QPEL)
1802 1803 1804
        dmin = qpel_motion_search(s, &mx, &my, dmin, 0, 0, 0, 16);
    else
        dmin = hpel_motion_search(s, &mx, &my, dmin, 0, 0, 0, 16);
1805

1806
    if(c->avctx->me_sub_cmp != c->avctx->mb_cmp && !c->skip)
1807
        dmin= ff_get_mb_score(s, mx, my, 0, 0, 0, 16, 1);
1808

1809
    get_limits(s, 16*mb_x, 16*mb_y); //restore c->?min/max, maybe not needed
1810 1811 1812

    s->b_direct_mv_table[mot_xy][0]= mx;
    s->b_direct_mv_table[mot_xy][1]= my;
1813 1814
    c->flags     &= ~FLAG_DIRECT;
    c->sub_flags &= ~FLAG_DIRECT;
1815

1816
    return dmin;
1817 1818 1819 1820 1821
}

void ff_estimate_b_frame_motion(MpegEncContext * s,
                             int mb_x, int mb_y)
{
1822 1823
    MotionEstContext * const c= &s->me;
    const int penalty_factor= c->mb_penalty_factor;
1824
    int fmin, bmin, dmin, fbmin, bimin, fimin;
1825
    int type=0;
1826
    const int xy = mb_y*s->mb_stride + mb_x;
1827
    init_ref(c, s->new_picture.data, s->last_picture.data, s->next_picture.data, 16*mb_x, 16*mb_y, 2);
1828

1829
    get_limits(s, 16*mb_x, 16*mb_y);
1830

1831
    c->skip=0;
1832
    if(c->avctx->me_threshold){
1833
        int vard= check_input_motion(s, mb_x, mb_y, 0);
1834

1835
        if((vard+128)>>8 < c->avctx->me_threshold){
1836 1837
//            pix = c->src[0][0];
//            sum = s->dsp.pix_sum(pix, s->linesize);
1838
//            varc = s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)(sum*sum))>>8) + 500;
1839

1840 1841
//            pic->mb_var   [s->mb_stride * mb_y + mb_x] = (varc+128)>>8;
             s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] = (vard+128)>>8;
1842
/*            pic->mb_mean  [s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
1843 1844 1845 1846
            c->mb_var_sum_temp    += (varc+128)>>8;*/
            c->mc_mb_var_sum_temp += (vard+128)>>8;
/*            if (vard <= 64<<8 || vard < varc) {
                c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
1847
            }else{
1848
                c->scene_change_score+= s->qscale * s->avctx->scenechange_factor;
1849 1850 1851
            }*/
            return;
        }
1852
        if((vard+128)>>8 < c->avctx->mb_threshold){
1853 1854 1855 1856 1857
            type= s->mb_type[mb_y*s->mb_stride + mb_x];
            if(type == CANDIDATE_MB_TYPE_DIRECT){
                direct_search(s, mb_x, mb_y);
            }
            if(type == CANDIDATE_MB_TYPE_FORWARD || type == CANDIDATE_MB_TYPE_BIDIR){
1858
                c->skip=0;
1859 1860 1861
                ff_estimate_motion_b(s, mb_x, mb_y, s->b_forw_mv_table, 0, s->f_code);
            }
            if(type == CANDIDATE_MB_TYPE_BACKWARD || type == CANDIDATE_MB_TYPE_BIDIR){
1862
                c->skip=0;
1863 1864 1865
                ff_estimate_motion_b(s, mb_x, mb_y, s->b_back_mv_table, 2, s->b_code);
            }
            if(type == CANDIDATE_MB_TYPE_FORWARD_I || type == CANDIDATE_MB_TYPE_BIDIR_I){
1866 1867
                c->skip=0;
                c->current_mv_penalty= c->mv_penalty[s->f_code] + MAX_MV;
1868 1869 1870 1871 1872
                interlaced_search(s, 0,
                                        s->b_field_mv_table[0], s->b_field_select_table[0],
                                        s->b_forw_mv_table[xy][0], s->b_forw_mv_table[xy][1], 1);
            }
            if(type == CANDIDATE_MB_TYPE_BACKWARD_I || type == CANDIDATE_MB_TYPE_BIDIR_I){
1873 1874
                c->skip=0;
                c->current_mv_penalty= c->mv_penalty[s->b_code] + MAX_MV;
1875 1876 1877 1878 1879 1880
                interlaced_search(s, 2,
                                        s->b_field_mv_table[1], s->b_field_select_table[1],
                                        s->b_back_mv_table[xy][0], s->b_back_mv_table[xy][1], 1);
            }
            return;
        }
1881 1882
    }

1883
    if (s->codec_id == CODEC_ID_MPEG4)
1884
        dmin= direct_search(s, mb_x, mb_y);
1885 1886
    else
        dmin= INT_MAX;
1887
//FIXME penalty stuff for non mpeg4
1888
    c->skip=0;
1889
    fmin= ff_estimate_motion_b(s, mb_x, mb_y, s->b_forw_mv_table, 0, s->f_code) + 3*penalty_factor;
1890

1891
    c->skip=0;
1892
    bmin= ff_estimate_motion_b(s, mb_x, mb_y, s->b_back_mv_table, 2, s->b_code) + 2*penalty_factor;
1893
//printf(" %d %d ", s->b_forw_mv_table[xy][0], s->b_forw_mv_table[xy][1]);
1894

1895
    c->skip=0;
1896
    fbmin= bidir_refine(s, mb_x, mb_y) + penalty_factor;
1897
//printf("%d %d %d %d\n", dmin, fmin, bmin, fbmin);
1898

1899 1900
    if(s->flags & CODEC_FLAG_INTERLACED_ME){
//FIXME mb type penalty
1901 1902
        c->skip=0;
        c->current_mv_penalty= c->mv_penalty[s->f_code] + MAX_MV;
1903 1904
        fimin= interlaced_search(s, 0,
                                 s->b_field_mv_table[0], s->b_field_select_table[0],
1905
                                 s->b_forw_mv_table[xy][0], s->b_forw_mv_table[xy][1], 0);
1906
        c->current_mv_penalty= c->mv_penalty[s->b_code] + MAX_MV;
1907 1908
        bimin= interlaced_search(s, 2,
                                 s->b_field_mv_table[1], s->b_field_select_table[1],
1909
                                 s->b_back_mv_table[xy][0], s->b_back_mv_table[xy][1], 0);
1910 1911 1912
    }else
        fimin= bimin= INT_MAX;

1913
    {
1914
        int score= fmin;
1915
        type = CANDIDATE_MB_TYPE_FORWARD;
1916

1917
        if (dmin <= score){
1918
            score = dmin;
1919
            type = CANDIDATE_MB_TYPE_DIRECT;
1920 1921 1922
        }
        if(bmin<score){
            score=bmin;
1923
            type= CANDIDATE_MB_TYPE_BACKWARD;
1924 1925 1926
        }
        if(fbmin<score){
            score=fbmin;
1927 1928 1929 1930 1931 1932 1933 1934 1935
            type= CANDIDATE_MB_TYPE_BIDIR;
        }
        if(fimin<score){
            score=fimin;
            type= CANDIDATE_MB_TYPE_FORWARD_I;
        }
        if(bimin<score){
            score=bimin;
            type= CANDIDATE_MB_TYPE_BACKWARD_I;
1936
        }
1937

1938
        score= ((unsigned)(score*score + 128*256))>>16;
1939
        c->mc_mb_var_sum_temp += score;
1940
        s->current_picture.mc_mb_var[mb_y*s->mb_stride + mb_x] = score; //FIXME use SSE
1941
    }
1942

1943
    if(c->avctx->mb_decision > FF_MB_DECISION_SIMPLE){
1944 1945 1946 1947 1948 1949 1950 1951 1952 1953
        type= CANDIDATE_MB_TYPE_FORWARD | CANDIDATE_MB_TYPE_BACKWARD | CANDIDATE_MB_TYPE_BIDIR | CANDIDATE_MB_TYPE_DIRECT;
        if(fimin < INT_MAX)
            type |= CANDIDATE_MB_TYPE_FORWARD_I;
        if(bimin < INT_MAX)
            type |= CANDIDATE_MB_TYPE_BACKWARD_I;
        if(fimin < INT_MAX && bimin < INT_MAX){
            type |= CANDIDATE_MB_TYPE_BIDIR_I;
        }
         //FIXME something smarter
        if(dmin>256*256*16) type&= ~CANDIDATE_MB_TYPE_DIRECT; //dont try direct mode if its invalid for this MB
1954
#if 0
1955 1956 1957
        if(s->out_format == FMT_MPEG1)
            type |= CANDIDATE_MB_TYPE_INTRA;
#endif
1958 1959
    }

1960
    s->mb_type[mb_y*s->mb_stride + mb_x]= type;
1961 1962 1963 1964 1965 1966
}

/* find best f_code for ME which do unlimited searches */
int ff_get_best_fcode(MpegEncContext * s, int16_t (*mv_table)[2], int type)
{
    if(s->me_method>=ME_EPZS){
1967
        int score[8];
1968
        int i, y, range= s->avctx->me_range ? s->avctx->me_range : (INT_MAX/2);
1969
        uint8_t * fcode_tab= s->fcode_tab;
1970 1971
        int best_fcode=-1;
        int best_score=-10000000;
1972

1973
        if(s->msmpeg4_version)
1974 1975 1976 1977
            range= FFMIN(range, 16);
        else if(s->codec_id == CODEC_ID_MPEG2VIDEO && s->avctx->strict_std_compliance >= FF_COMPLIANCE_NORMAL)
            range= FFMIN(range, 256);

Michael Niedermayer's avatar
Michael Niedermayer committed
1978
        for(i=0; i<8; i++) score[i]= s->mb_num*(8-i);
1979 1980 1981

        for(y=0; y<s->mb_height; y++){
            int x;
1982
            int xy= y*s->mb_stride;
1983
            for(x=0; x<s->mb_width; x++){
Michael Niedermayer's avatar
Michael Niedermayer committed
1984
                if(s->mb_type[xy] & type){
1985 1986 1987 1988
                    int mx= mv_table[xy][0];
                    int my= mv_table[xy][1];
                    int fcode= FFMAX(fcode_tab[mx + MAX_MV],
                                     fcode_tab[my + MAX_MV]);
1989
                    int j;
1990 1991

                        if(mx >= range || mx < -range ||
1992 1993
                           my >= range || my < -range)
                            continue;
1994

1995
                    for(j=0; j<fcode && j<8; j++){
1996
                        if(s->pict_type==B_TYPE || s->current_picture.mc_mb_var[xy] < s->current_picture.mb_var[xy])
1997 1998
                            score[j]-= 170;
                    }
1999 2000 2001 2002
                }
                xy++;
            }
        }
2003

2004 2005 2006 2007 2008 2009
        for(i=1; i<8; i++){
            if(score[i] > best_score){
                best_score= score[i];
                best_fcode= i;
            }
//            printf("%d %d\n", i, score[i]);
2010
        }
2011

2012
//    printf("fcode: %d type: %d\n", i, s->pict_type);
2013
        return best_fcode;
2014 2015 2016 2017 2018 2019
/*        for(i=0; i<=MAX_FCODE; i++){
            printf("%d ", mv_num[i]);
        }
        printf("\n");*/
    }else{
        return 1;
Fabrice Bellard's avatar
Fabrice Bellard committed
2020 2021 2022
    }
}

2023 2024
void ff_fix_long_p_mvs(MpegEncContext * s)
{
2025
    MotionEstContext * const c= &s->me;
2026
    const int f_code= s->f_code;
2027
    int y, range;
Michael Niedermayer's avatar
Michael Niedermayer committed
2028
    assert(s->pict_type==P_TYPE);
2029

2030 2031 2032 2033
    range = (((s->out_format == FMT_MPEG1 || s->msmpeg4_version) ? 8 : 16) << f_code);

    assert(range <= 16 || !s->msmpeg4_version);
    assert(range <=256 || !(s->codec_id == CODEC_ID_MPEG2VIDEO && s->avctx->strict_std_compliance >= FF_COMPLIANCE_NORMAL));
2034

2035
    if(c->avctx->me_range && range > c->avctx->me_range) range= c->avctx->me_range;
2036

2037
//printf("%d no:%d %d//\n", clip, noclip, f_code);
2038
    if(s->flags&CODEC_FLAG_4MV){
2039
        const int wrap= s->b8_stride;
2040 2041 2042

        /* clip / convert to intra 8x8 type MVs */
        for(y=0; y<s->mb_height; y++){
2043
            int xy= y*2*wrap;
2044
            int i= y*s->mb_stride;
2045 2046 2047
            int x;

            for(x=0; x<s->mb_width; x++){
2048
                if(s->mb_type[i]&CANDIDATE_MB_TYPE_INTER4V){
2049 2050 2051
                    int block;
                    for(block=0; block<4; block++){
                        int off= (block& 1) + (block>>1)*wrap;
2052 2053
                        int mx= s->current_picture.motion_val[0][ xy + off ][0];
                        int my= s->current_picture.motion_val[0][ xy + off ][1];
2054

2055 2056
                        if(   mx >=range || mx <-range
                           || my >=range || my <-range){
2057 2058 2059
                            s->mb_type[i] &= ~CANDIDATE_MB_TYPE_INTER4V;
                            s->mb_type[i] |= CANDIDATE_MB_TYPE_INTRA;
                            s->current_picture.mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
2060 2061 2062
                        }
                    }
                }
Michael Niedermayer's avatar
Michael Niedermayer committed
2063 2064
                xy+=2;
                i++;
2065 2066 2067 2068 2069
            }
        }
    }
}

2070 2071 2072 2073
/**
 *
 * @param truncate 1 for truncation, 0 for using intra
 */
2074
void ff_fix_long_mvs(MpegEncContext * s, uint8_t *field_select_table, int field_select,
2075
                     int16_t (*mv_table)[2], int f_code, int type, int truncate)
2076
{
2077
    MotionEstContext * const c= &s->me;
2078
    int y, h_range, v_range;
2079

2080
    // RAL: 8 in MPEG-1, 16 in MPEG-4
2081
    int range = (((s->out_format == FMT_MPEG1 || s->msmpeg4_version) ? 8 : 16) << f_code);
2082

2083
    if(c->avctx->me_range && range > c->avctx->me_range) range= c->avctx->me_range;
2084

2085 2086 2087
    h_range= range;
    v_range= field_select_table ? range>>1 : range;

2088 2089 2090
    /* clip / convert to intra 16x16 type MVs */
    for(y=0; y<s->mb_height; y++){
        int x;
2091
        int xy= y*s->mb_stride;
2092
        for(x=0; x<s->mb_width; x++){
2093
            if (s->mb_type[xy] & type){    // RAL: "type" test added...
2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108
                if(field_select_table==NULL || field_select_table[xy] == field_select){
                    if(   mv_table[xy][0] >=h_range || mv_table[xy][0] <-h_range
                       || mv_table[xy][1] >=v_range || mv_table[xy][1] <-v_range){

                        if(truncate){
                            if     (mv_table[xy][0] > h_range-1) mv_table[xy][0]=  h_range-1;
                            else if(mv_table[xy][0] < -h_range ) mv_table[xy][0]= -h_range;
                            if     (mv_table[xy][1] > v_range-1) mv_table[xy][1]=  v_range-1;
                            else if(mv_table[xy][1] < -v_range ) mv_table[xy][1]= -v_range;
                        }else{
                            s->mb_type[xy] &= ~type;
                            s->mb_type[xy] |= CANDIDATE_MB_TYPE_INTRA;
                            mv_table[xy][0]=
                            mv_table[xy][1]= 0;
                        }
2109
                    }
2110
                }
2111 2112 2113 2114 2115
            }
            xy++;
        }
    }
}