motion_est.c 76.2 KB
Newer Older
Fabrice Bellard's avatar
Fabrice Bellard committed
1
/*
2
 * Motion estimation
3
 * Copyright (c) 2000,2001 Fabrice Bellard.
4
 * Copyright (c) 2002-2004 Michael Niedermayer
5
 *
Fabrice Bellard's avatar
Fabrice Bellard committed
6
 *
7 8 9
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
10 11
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
12
 * version 2.1 of the License, or (at your option) any later version.
Fabrice Bellard's avatar
Fabrice Bellard committed
13
 *
14
 * FFmpeg is distributed in the hope that it will be useful,
Fabrice Bellard's avatar
Fabrice Bellard committed
15
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 17
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
Fabrice Bellard's avatar
Fabrice Bellard committed
18
 *
19
 * You should have received a copy of the GNU Lesser General Public
20
 * License along with FFmpeg; if not, write to the Free Software
21
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 23
 *
 * new Motion Estimation (X1/EPZS) by Michael Niedermayer <michaelni@gmx.at>
Fabrice Bellard's avatar
Fabrice Bellard committed
24
 */
25

Michael Niedermayer's avatar
Michael Niedermayer committed
26 27 28 29
/**
 * @file motion_est.c
 * Motion estimation.
 */
30

Fabrice Bellard's avatar
Fabrice Bellard committed
31 32
#include <stdlib.h>
#include <stdio.h>
33
#include <limits.h>
Fabrice Bellard's avatar
Fabrice Bellard committed
34 35 36 37
#include "avcodec.h"
#include "dsputil.h"
#include "mpegvideo.h"

38 39
#undef NDEBUG
#include <assert.h>
Michael Niedermayer's avatar
Michael Niedermayer committed
40

41
#define SQ(a) ((a)*(a))
42

43 44 45 46 47 48
#define P_LEFT P[1]
#define P_TOP P[2]
#define P_TOPRIGHT P[3]
#define P_MEDIAN P[4]
#define P_MV1 P[9]

Michael Niedermayer's avatar
Michael Niedermayer committed
49
static inline int sad_hpel_motion_search(MpegEncContext * s,
50
                                  int *mx_ptr, int *my_ptr, int dmin,
51 52
                                  int src_index, int ref_index,
                                  int size, int h);
Michael Niedermayer's avatar
Michael Niedermayer committed
53

54
static inline int update_map_generation(MotionEstContext *c)
Michael Niedermayer's avatar
Michael Niedermayer committed
55
{
56 57 58 59
    c->map_generation+= 1<<(ME_MAP_MV_BITS*2);
    if(c->map_generation==0){
        c->map_generation= 1<<(ME_MAP_MV_BITS*2);
        memset(c->map, 0, sizeof(uint32_t)*ME_MAP_SIZE);
Michael Niedermayer's avatar
Michael Niedermayer committed
60
    }
61
    return c->map_generation;
Michael Niedermayer's avatar
Michael Niedermayer committed
62 63
}

64 65 66 67 68 69
/* shape adaptive search stuff */
typedef struct Minima{
    int height;
    int x, y;
    int checked;
}Minima;
Michael Niedermayer's avatar
Michael Niedermayer committed
70

71
static int minima_cmp(const void *a, const void *b){
72 73
    const Minima *da = (const Minima *) a;
    const Minima *db = (const Minima *) b;
74

75 76
    return da->height - db->height;
}
Michael Niedermayer's avatar
Michael Niedermayer committed
77

78 79 80
#define FLAG_QPEL   1 //must be 1
#define FLAG_CHROMA 2
#define FLAG_DIRECT 4
Michael Niedermayer's avatar
Michael Niedermayer committed
81

82
static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){
83 84 85 86 87 88 89 90 91 92 93 94 95 96 97
    const int offset[3]= {
          y*c->  stride + x,
        ((y*c->uvstride + x)>>1),
        ((y*c->uvstride + x)>>1),
    };
    int i;
    for(i=0; i<3; i++){
        c->src[0][i]= src [i] + offset[i];
        c->ref[0][i]= ref [i] + offset[i];
    }
    if(ref_index){
        for(i=0; i<3; i++){
            c->ref[ref_index][i]= ref2[i] + offset[i];
        }
    }
Michael Niedermayer's avatar
Michael Niedermayer committed
98 99
}

100 101
static int get_flags(MotionEstContext *c, int direct, int chroma){
    return   ((c->avctx->flags&CODEC_FLAG_QPEL) ? FLAG_QPEL : 0)
102
           + (direct ? FLAG_DIRECT : 0)
103
           + (chroma ? FLAG_CHROMA : 0);
Michael Niedermayer's avatar
Michael Niedermayer committed
104 105
}

106
static av_always_inline int cmp(MpegEncContext *s, const int x, const int y, const int subx, const int suby,
107 108 109 110 111 112 113 114 115 116 117 118 119 120 121
                      const int size, const int h, int ref_index, int src_index,
                      me_cmp_func cmp_func, me_cmp_func chroma_cmp_func, const int flags){
    MotionEstContext * const c= &s->me;
    const int stride= c->stride;
    const int uvstride= c->uvstride;
    const int qpel= flags&FLAG_QPEL;
    const int chroma= flags&FLAG_CHROMA;
    const int dxy= subx + (suby<<(1+qpel)); //FIXME log2_subpel?
    const int hx= subx + (x<<(1+qpel));
    const int hy= suby + (y<<(1+qpel));
    uint8_t * const * const ref= c->ref[ref_index];
    uint8_t * const * const src= c->src[src_index];
    int d;
    //FIXME check chroma 4mv, (no crashes ...)
    if(flags&FLAG_DIRECT){
122
        assert(x >= c->xmin && hx <= c->xmax<<(qpel+1) && y >= c->ymin && hy <= c->ymax<<(qpel+1));
123 124 125 126 127 128 129 130 131 132 133 134 135
        if(x >= c->xmin && hx <= c->xmax<<(qpel+1) && y >= c->ymin && hy <= c->ymax<<(qpel+1)){
            const int time_pp= s->pp_time;
            const int time_pb= s->pb_time;
            const int mask= 2*qpel+1;
            if(s->mv_type==MV_TYPE_8X8){
                int i;
                for(i=0; i<4; i++){
                    int fx = c->direct_basis_mv[i][0] + hx;
                    int fy = c->direct_basis_mv[i][1] + hy;
                    int bx = hx ? fx - c->co_located_mv[i][0] : c->co_located_mv[i][0]*(time_pb - time_pp)/time_pp + ((i &1)<<(qpel+4));
                    int by = hy ? fy - c->co_located_mv[i][1] : c->co_located_mv[i][1]*(time_pb - time_pp)/time_pp + ((i>>1)<<(qpel+4));
                    int fxy= (fx&mask) + ((fy&mask)<<(qpel+1));
                    int bxy= (bx&mask) + ((by&mask)<<(qpel+1));
136

137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152
                    uint8_t *dst= c->temp + 8*(i&1) + 8*stride*(i>>1);
                    if(qpel){
                        c->qpel_put[1][fxy](dst, ref[0] + (fx>>2) + (fy>>2)*stride, stride);
                        c->qpel_avg[1][bxy](dst, ref[8] + (bx>>2) + (by>>2)*stride, stride);
                    }else{
                        c->hpel_put[1][fxy](dst, ref[0] + (fx>>1) + (fy>>1)*stride, stride, 8);
                        c->hpel_avg[1][bxy](dst, ref[8] + (bx>>1) + (by>>1)*stride, stride, 8);
                    }
                }
            }else{
                int fx = c->direct_basis_mv[0][0] + hx;
                int fy = c->direct_basis_mv[0][1] + hy;
                int bx = hx ? fx - c->co_located_mv[0][0] : (c->co_located_mv[0][0]*(time_pb - time_pp)/time_pp);
                int by = hy ? fy - c->co_located_mv[0][1] : (c->co_located_mv[0][1]*(time_pb - time_pp)/time_pp);
                int fxy= (fx&mask) + ((fy&mask)<<(qpel+1));
                int bxy= (bx&mask) + ((by&mask)<<(qpel+1));
153

154 155 156 157 158 159 160 161 162
                if(qpel){
                    c->qpel_put[1][fxy](c->temp               , ref[0] + (fx>>2) + (fy>>2)*stride               , stride);
                    c->qpel_put[1][fxy](c->temp + 8           , ref[0] + (fx>>2) + (fy>>2)*stride + 8           , stride);
                    c->qpel_put[1][fxy](c->temp     + 8*stride, ref[0] + (fx>>2) + (fy>>2)*stride     + 8*stride, stride);
                    c->qpel_put[1][fxy](c->temp + 8 + 8*stride, ref[0] + (fx>>2) + (fy>>2)*stride + 8 + 8*stride, stride);
                    c->qpel_avg[1][bxy](c->temp               , ref[8] + (bx>>2) + (by>>2)*stride               , stride);
                    c->qpel_avg[1][bxy](c->temp + 8           , ref[8] + (bx>>2) + (by>>2)*stride + 8           , stride);
                    c->qpel_avg[1][bxy](c->temp     + 8*stride, ref[8] + (bx>>2) + (by>>2)*stride     + 8*stride, stride);
                    c->qpel_avg[1][bxy](c->temp + 8 + 8*stride, ref[8] + (bx>>2) + (by>>2)*stride + 8 + 8*stride, stride);
163
                }else{
164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180
                    assert((fx>>1) + 16*s->mb_x >= -16);
                    assert((fy>>1) + 16*s->mb_y >= -16);
                    assert((fx>>1) + 16*s->mb_x <= s->width);
                    assert((fy>>1) + 16*s->mb_y <= s->height);
                    assert((bx>>1) + 16*s->mb_x >= -16);
                    assert((by>>1) + 16*s->mb_y >= -16);
                    assert((bx>>1) + 16*s->mb_x <= s->width);
                    assert((by>>1) + 16*s->mb_y <= s->height);

                    c->hpel_put[0][fxy](c->temp, ref[0] + (fx>>1) + (fy>>1)*stride, stride, 16);
                    c->hpel_avg[0][bxy](c->temp, ref[8] + (bx>>1) + (by>>1)*stride, stride, 16);
                }
            }
            d = cmp_func(s, c->temp, src[0], stride, 16);
        }else
            d= 256*256*256*32;
    }else{
181
        int uvdxy;              /* no, it might not be used uninitialized */
182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197
        if(dxy){
            if(qpel){
                c->qpel_put[size][dxy](c->temp, ref[0] + x + y*stride, stride); //FIXME prototype (add h)
                if(chroma){
                    int cx= hx/2;
                    int cy= hy/2;
                    cx= (cx>>1)|(cx&1);
                    cy= (cy>>1)|(cy&1);
                    uvdxy= (cx&1) + 2*(cy&1);
                    //FIXME x/y wrong, but mpeg4 qpel is sick anyway, we should drop as much of it as possible in favor for h264
                }
            }else{
                c->hpel_put[size][dxy](c->temp, ref[0] + x + y*stride, stride, h);
                if(chroma)
                    uvdxy= dxy | (x&1) | (2*(y&1));
            }
198
            d = cmp_func(s, c->temp, src[0], stride, h);
199
        }else{
200
            d = cmp_func(s, src[0], ref[0] + x + y*stride, stride, h);
201 202 203 204 205 206 207
            if(chroma)
                uvdxy= (x&1) + 2*(y&1);
        }
        if(chroma){
            uint8_t * const uvtemp= c->temp + 16*stride;
            c->hpel_put[size+1][uvdxy](uvtemp  , ref[1] + (x>>1) + (y>>1)*uvstride, uvstride, h>>1);
            c->hpel_put[size+1][uvdxy](uvtemp+8, ref[2] + (x>>1) + (y>>1)*uvstride, uvstride, h>>1);
208 209
            d += chroma_cmp_func(s, uvtemp  , src[1], uvstride, h>>1);
            d += chroma_cmp_func(s, uvtemp+8, src[2], uvstride, h>>1);
210 211 212 213 214 215 216 217 218 219 220 221
        }
    }
#if 0
    if(full_pel){
        const int index= (((y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);
        score_map[index]= d;
    }

    d += (c->mv_penalty[hx - c->pred_x] + c->mv_penalty[hy - c->pred_y])*c->penalty_factor;
#endif
    return d;
}
Michael Niedermayer's avatar
Michael Niedermayer committed
222 223 224

#include "motion_est_template.c"

Michael Niedermayer's avatar
Michael Niedermayer committed
225 226 227 228 229 230 231
static int zero_cmp(void *s, uint8_t *a, uint8_t *b, int stride, int h){
    return 0;
}

static void zero_hpel(uint8_t *a, const uint8_t *b, int stride, int h){
}

Michael Niedermayer's avatar
Michael Niedermayer committed
232
void ff_init_me(MpegEncContext *s){
233
    MotionEstContext * const c= &s->me;
234 235
    int cache_size= FFMIN(ME_MAP_SIZE>>ME_MAP_SHIFT, 1<<ME_MAP_SHIFT);
    int dia_size= FFMAX(FFABS(s->avctx->dia_size)&255, FFABS(s->avctx->pre_dia_size)&255);
Michael Niedermayer's avatar
Michael Niedermayer committed
236
    c->avctx= s->avctx;
237 238 239 240

    if(cache_size < 2*dia_size && !c->stride){
        av_log(s->avctx, AV_LOG_INFO, "ME_MAP size may be a little small for the selected diamond size\n");
    }
241

242 243 244 245
    ff_set_cmp(&s->dsp, s->dsp.me_pre_cmp, c->avctx->me_pre_cmp);
    ff_set_cmp(&s->dsp, s->dsp.me_cmp, c->avctx->me_cmp);
    ff_set_cmp(&s->dsp, s->dsp.me_sub_cmp, c->avctx->me_sub_cmp);
    ff_set_cmp(&s->dsp, s->dsp.mb_cmp, c->avctx->mb_cmp);
246

247 248 249
    c->flags    = get_flags(c, 0, c->avctx->me_cmp    &FF_CMP_CHROMA);
    c->sub_flags= get_flags(c, 0, c->avctx->me_sub_cmp&FF_CMP_CHROMA);
    c->mb_flags = get_flags(c, 0, c->avctx->mb_cmp    &FF_CMP_CHROMA);
Fabrice Bellard's avatar
Fabrice Bellard committed
250

251
/*FIXME s->no_rounding b_type*/
Michael Niedermayer's avatar
Michael Niedermayer committed
252
    if(s->flags&CODEC_FLAG_QPEL){
253
        c->sub_motion_search= qpel_motion_search;
254 255 256
        c->qpel_avg= s->dsp.avg_qpel_pixels_tab;
        if(s->no_rounding) c->qpel_put= s->dsp.put_no_rnd_qpel_pixels_tab;
        else               c->qpel_put= s->dsp.put_qpel_pixels_tab;
Michael Niedermayer's avatar
Michael Niedermayer committed
257
    }else{
258
        if(c->avctx->me_sub_cmp&FF_CMP_CHROMA)
259
            c->sub_motion_search= hpel_motion_search;
260 261
        else if(   c->avctx->me_sub_cmp == FF_CMP_SAD
                && c->avctx->    me_cmp == FF_CMP_SAD
262
                && c->avctx->    mb_cmp == FF_CMP_SAD)
263
            c->sub_motion_search= sad_hpel_motion_search; // 2050 vs. 2450 cycles
Michael Niedermayer's avatar
Michael Niedermayer committed
264
        else
265
            c->sub_motion_search= hpel_motion_search;
Michael Niedermayer's avatar
Michael Niedermayer committed
266
    }
Michael Niedermayer's avatar
Michael Niedermayer committed
267 268 269 270
    c->hpel_avg= s->dsp.avg_pixels_tab;
    if(s->no_rounding) c->hpel_put= s->dsp.put_no_rnd_pixels_tab;
    else               c->hpel_put= s->dsp.put_pixels_tab;

271
    if(s->linesize){
272
        c->stride  = s->linesize;
273
        c->uvstride= s->uvlinesize;
274
    }else{
275 276
        c->stride  = 16*s->mb_width + 32;
        c->uvstride=  8*s->mb_width + 16;
277
    }
278

Michael Niedermayer's avatar
Michael Niedermayer committed
279
    // 8x8 fullpel search would need a 4x4 chroma compare, which we dont have yet, and even if we had the motion estimation code doesnt expect it
280
    if(s->codec_id != CODEC_ID_SNOW){
281
        if((c->avctx->me_cmp&FF_CMP_CHROMA)/* && !s->dsp.me_cmp[2]*/){
282 283 284 285 286 287 288
            s->dsp.me_cmp[2]= zero_cmp;
        }
        if((c->avctx->me_sub_cmp&FF_CMP_CHROMA) && !s->dsp.me_sub_cmp[2]){
            s->dsp.me_sub_cmp[2]= zero_cmp;
        }
        c->hpel_put[2][0]= c->hpel_put[2][1]=
        c->hpel_put[2][2]= c->hpel_put[2][3]= zero_hpel;
Michael Niedermayer's avatar
Michael Niedermayer committed
289 290
    }

291 292 293 294
    if(s->codec_id == CODEC_ID_H261){
        c->sub_motion_search= no_sub_motion_search;
    }

295
    c->temp= c->scratchpad;
Michael Niedermayer's avatar
Michael Niedermayer committed
296
}
297

298
#if 0
299
static int pix_dev(uint8_t * pix, int line_size, int mean)
300 301 302 303 304
{
    int s, i, j;

    s = 0;
    for (i = 0; i < 16; i++) {
305
        for (j = 0; j < 16; j += 8) {
306 307 308 309 310 311 312 313
            s += FFABS(pix[0]-mean);
            s += FFABS(pix[1]-mean);
            s += FFABS(pix[2]-mean);
            s += FFABS(pix[3]-mean);
            s += FFABS(pix[4]-mean);
            s += FFABS(pix[5]-mean);
            s += FFABS(pix[6]-mean);
            s += FFABS(pix[7]-mean);
314 315 316
            pix += 8;
        }
        pix += line_size - 16;
317 318 319
    }
    return s;
}
320
#endif
321

322
static inline void no_motion_search(MpegEncContext * s,
323
                                    int *mx_ptr, int *my_ptr)
Fabrice Bellard's avatar
Fabrice Bellard committed
324 325 326 327 328
{
    *mx_ptr = 16 * s->mb_x;
    *my_ptr = 16 * s->mb_y;
}

329
#if 0  /* the use of these functions is inside #if 0 */
Fabrice Bellard's avatar
Fabrice Bellard committed
330 331
static int full_motion_search(MpegEncContext * s,
                              int *mx_ptr, int *my_ptr, int range,
332
                              int xmin, int ymin, int xmax, int ymax, uint8_t *ref_picture)
Fabrice Bellard's avatar
Fabrice Bellard committed
333 334 335
{
    int x1, y1, x2, y2, xx, yy, x, y;
    int mx, my, dmin, d;
336
    uint8_t *pix;
Fabrice Bellard's avatar
Fabrice Bellard committed
337 338 339

    xx = 16 * s->mb_x;
    yy = 16 * s->mb_y;
340
    x1 = xx - range + 1;        /* we loose one pixel to avoid boundary pb with half pixel pred */
Fabrice Bellard's avatar
Fabrice Bellard committed
341
    if (x1 < xmin)
342
        x1 = xmin;
Fabrice Bellard's avatar
Fabrice Bellard committed
343 344
    x2 = xx + range - 1;
    if (x2 > xmax)
345
        x2 = xmax;
Fabrice Bellard's avatar
Fabrice Bellard committed
346 347
    y1 = yy - range + 1;
    if (y1 < ymin)
348
        y1 = ymin;
Fabrice Bellard's avatar
Fabrice Bellard committed
349 350
    y2 = yy + range - 1;
    if (y2 > ymax)
351
        y2 = ymax;
Michael Niedermayer's avatar
Michael Niedermayer committed
352
    pix = s->new_picture.data[0] + (yy * s->linesize) + xx;
Fabrice Bellard's avatar
Fabrice Bellard committed
353 354 355 356
    dmin = 0x7fffffff;
    mx = 0;
    my = 0;
    for (y = y1; y <= y2; y++) {
357 358 359 360 361 362 363 364 365 366 367 368
        for (x = x1; x <= x2; x++) {
            d = s->dsp.pix_abs[0][0](NULL, pix, ref_picture + (y * s->linesize) + x,
                             s->linesize, 16);
            if (d < dmin ||
                (d == dmin &&
                 (abs(x - xx) + abs(y - yy)) <
                 (abs(mx - xx) + abs(my - yy)))) {
                dmin = d;
                mx = x;
                my = y;
            }
        }
Fabrice Bellard's avatar
Fabrice Bellard committed
369 370 371 372 373 374 375
    }

    *mx_ptr = mx;
    *my_ptr = my;

#if 0
    if (*mx_ptr < -(2 * range) || *mx_ptr >= (2 * range) ||
376
        *my_ptr < -(2 * range) || *my_ptr >= (2 * range)) {
377
        av_log(NULL, AV_LOG_ERROR, "error %d %d\n", *mx_ptr, *my_ptr);
Fabrice Bellard's avatar
Fabrice Bellard committed
378 379 380 381 382 383 384 385
    }
#endif
    return dmin;
}


static int log_motion_search(MpegEncContext * s,
                             int *mx_ptr, int *my_ptr, int range,
386
                             int xmin, int ymin, int xmax, int ymax, uint8_t *ref_picture)
Fabrice Bellard's avatar
Fabrice Bellard committed
387 388 389
{
    int x1, y1, x2, y2, xx, yy, x, y;
    int mx, my, dmin, d;
390
    uint8_t *pix;
Fabrice Bellard's avatar
Fabrice Bellard committed
391 392 393 394 395 396 397

    xx = s->mb_x << 4;
    yy = s->mb_y << 4;

    /* Left limit */
    x1 = xx - range;
    if (x1 < xmin)
398
        x1 = xmin;
Fabrice Bellard's avatar
Fabrice Bellard committed
399 400 401 402

    /* Right limit */
    x2 = xx + range;
    if (x2 > xmax)
403
        x2 = xmax;
Fabrice Bellard's avatar
Fabrice Bellard committed
404 405 406 407

    /* Upper limit */
    y1 = yy - range;
    if (y1 < ymin)
408
        y1 = ymin;
Fabrice Bellard's avatar
Fabrice Bellard committed
409 410 411 412

    /* Lower limit */
    y2 = yy + range;
    if (y2 > ymax)
413
        y2 = ymax;
Fabrice Bellard's avatar
Fabrice Bellard committed
414

Michael Niedermayer's avatar
Michael Niedermayer committed
415
    pix = s->new_picture.data[0] + (yy * s->linesize) + xx;
Fabrice Bellard's avatar
Fabrice Bellard committed
416 417 418 419 420
    dmin = 0x7fffffff;
    mx = 0;
    my = 0;

    do {
421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448
        for (y = y1; y <= y2; y += range) {
            for (x = x1; x <= x2; x += range) {
                d = s->dsp.pix_abs[0][0](NULL, pix, ref_picture + (y * s->linesize) + x, s->linesize, 16);
                if (d < dmin || (d == dmin && (abs(x - xx) + abs(y - yy)) < (abs(mx - xx) + abs(my - yy)))) {
                    dmin = d;
                    mx = x;
                    my = y;
                }
            }
        }

        range = range >> 1;

        x1 = mx - range;
        if (x1 < xmin)
            x1 = xmin;

        x2 = mx + range;
        if (x2 > xmax)
            x2 = xmax;

        y1 = my - range;
        if (y1 < ymin)
            y1 = ymin;

        y2 = my + range;
        if (y2 > ymax)
            y2 = ymax;
Fabrice Bellard's avatar
Fabrice Bellard committed
449 450 451 452

    } while (range >= 1);

#ifdef DEBUG
453
    av_log(s->avctx, AV_LOG_DEBUG, "log       - MX: %d\tMY: %d\n", mx, my);
Fabrice Bellard's avatar
Fabrice Bellard committed
454 455 456 457 458 459 460 461
#endif
    *mx_ptr = mx;
    *my_ptr = my;
    return dmin;
}

static int phods_motion_search(MpegEncContext * s,
                               int *mx_ptr, int *my_ptr, int range,
462
                               int xmin, int ymin, int xmax, int ymax, uint8_t *ref_picture)
Fabrice Bellard's avatar
Fabrice Bellard committed
463 464 465
{
    int x1, y1, x2, y2, xx, yy, x, y, lastx, d;
    int mx, my, dminx, dminy;
466
    uint8_t *pix;
Fabrice Bellard's avatar
Fabrice Bellard committed
467 468 469 470 471 472 473

    xx = s->mb_x << 4;
    yy = s->mb_y << 4;

    /* Left limit */
    x1 = xx - range;
    if (x1 < xmin)
474
        x1 = xmin;
Fabrice Bellard's avatar
Fabrice Bellard committed
475 476 477 478

    /* Right limit */
    x2 = xx + range;
    if (x2 > xmax)
479
        x2 = xmax;
Fabrice Bellard's avatar
Fabrice Bellard committed
480 481 482 483

    /* Upper limit */
    y1 = yy - range;
    if (y1 < ymin)
484
        y1 = ymin;
Fabrice Bellard's avatar
Fabrice Bellard committed
485 486 487 488

    /* Lower limit */
    y2 = yy + range;
    if (y2 > ymax)
489
        y2 = ymax;
Fabrice Bellard's avatar
Fabrice Bellard committed
490

Michael Niedermayer's avatar
Michael Niedermayer committed
491
    pix = s->new_picture.data[0] + (yy * s->linesize) + xx;
Fabrice Bellard's avatar
Fabrice Bellard committed
492 493 494 495 496 497 498 499 500
    mx = 0;
    my = 0;

    x = xx;
    y = yy;
    do {
        dminx = 0x7fffffff;
        dminy = 0x7fffffff;

501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537
        lastx = x;
        for (x = x1; x <= x2; x += range) {
            d = s->dsp.pix_abs[0][0](NULL, pix, ref_picture + (y * s->linesize) + x, s->linesize, 16);
            if (d < dminx || (d == dminx && (abs(x - xx) + abs(y - yy)) < (abs(mx - xx) + abs(my - yy)))) {
                dminx = d;
                mx = x;
            }
        }

        x = lastx;
        for (y = y1; y <= y2; y += range) {
            d = s->dsp.pix_abs[0][0](NULL, pix, ref_picture + (y * s->linesize) + x, s->linesize, 16);
            if (d < dminy || (d == dminy && (abs(x - xx) + abs(y - yy)) < (abs(mx - xx) + abs(my - yy)))) {
                dminy = d;
                my = y;
            }
        }

        range = range >> 1;

        x = mx;
        y = my;
        x1 = mx - range;
        if (x1 < xmin)
            x1 = xmin;

        x2 = mx + range;
        if (x2 > xmax)
            x2 = xmax;

        y1 = my - range;
        if (y1 < ymin)
            y1 = ymin;

        y2 = my + range;
        if (y2 > ymax)
            y2 = ymax;
Fabrice Bellard's avatar
Fabrice Bellard committed
538 539 540 541

    } while (range >= 1);

#ifdef DEBUG
542
    av_log(s->avctx, AV_LOG_DEBUG, "phods     - MX: %d\tMY: %d\n", mx, my);
Fabrice Bellard's avatar
Fabrice Bellard committed
543 544 545 546 547 548 549
#endif

    /* half pixel search */
    *mx_ptr = mx;
    *my_ptr = my;
    return dminy;
}
550
#endif /* 0 */
551 552 553

#define Z_THRESHOLD 256

Michael Niedermayer's avatar
Michael Niedermayer committed
554
#define CHECK_SAD_HALF_MV(suffix, x, y) \
555
{\
556
    d= s->dsp.pix_abs[size][(x?1:0)+(y?2:0)](NULL, pix, ptr+((x)>>1), stride, h);\
Michael Niedermayer's avatar
Michael Niedermayer committed
557
    d += (mv_penalty[pen_x + x] + mv_penalty[pen_y + y])*penalty_factor;\
558 559
    COPY3_IF_LT(dminh, d, dx, x, dy, y)\
}
560

Michael Niedermayer's avatar
Michael Niedermayer committed
561
static inline int sad_hpel_motion_search(MpegEncContext * s,
562
                                  int *mx_ptr, int *my_ptr, int dmin,
563 564
                                  int src_index, int ref_index,
                                  int size, int h)
Fabrice Bellard's avatar
Fabrice Bellard committed
565
{
566 567
    MotionEstContext * const c= &s->me;
    const int penalty_factor= c->sub_penalty_factor;
568
    int mx, my, dminh;
569
    uint8_t *pix, *ptr;
570 571
    int stride= c->stride;
    const int flags= c->sub_flags;
572
    LOAD_COMMON
573

574
    assert(flags == 0);
Fabrice Bellard's avatar
Fabrice Bellard committed
575

576
    if(c->skip){
577 578 579 580 581 582
//    printf("S");
        *mx_ptr = 0;
        *my_ptr = 0;
        return dmin;
    }
//    printf("N");
583

584
    pix = c->src[src_index][0];
585

586 587
    mx = *mx_ptr;
    my = *my_ptr;
588
    ptr = c->ref[ref_index][0] + (my * stride) + mx;
589

590 591
    dminh = dmin;

592
    if (mx > xmin && mx < xmax &&
593
        my > ymin && my < ymax) {
594
        int dx=0, dy=0;
595
        int d, pen_x, pen_y;
596 597 598 599 600 601 602
        const int index= (my<<ME_MAP_SHIFT) + mx;
        const int t= score_map[(index-(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)];
        const int l= score_map[(index- 1               )&(ME_MAP_SIZE-1)];
        const int r= score_map[(index+ 1               )&(ME_MAP_SIZE-1)];
        const int b= score_map[(index+(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)];
        mx<<=1;
        my<<=1;
603

604

605 606 607
        pen_x= pred_x + mx;
        pen_y= pred_y + my;

608
        ptr-= stride;
609
        if(t<=b){
Michael Niedermayer's avatar
Michael Niedermayer committed
610
            CHECK_SAD_HALF_MV(y2 , 0, -1)
611
            if(l<=r){
Michael Niedermayer's avatar
Michael Niedermayer committed
612
                CHECK_SAD_HALF_MV(xy2, -1, -1)
613
                if(t+r<=b+l){
Michael Niedermayer's avatar
Michael Niedermayer committed
614
                    CHECK_SAD_HALF_MV(xy2, +1, -1)
615
                    ptr+= stride;
616
                }else{
617
                    ptr+= stride;
Michael Niedermayer's avatar
Michael Niedermayer committed
618
                    CHECK_SAD_HALF_MV(xy2, -1, +1)
619
                }
Michael Niedermayer's avatar
Michael Niedermayer committed
620
                CHECK_SAD_HALF_MV(x2 , -1,  0)
621
            }else{
Michael Niedermayer's avatar
Michael Niedermayer committed
622
                CHECK_SAD_HALF_MV(xy2, +1, -1)
623
                if(t+l<=b+r){
Michael Niedermayer's avatar
Michael Niedermayer committed
624
                    CHECK_SAD_HALF_MV(xy2, -1, -1)
625
                    ptr+= stride;
626
                }else{
627
                    ptr+= stride;
Michael Niedermayer's avatar
Michael Niedermayer committed
628
                    CHECK_SAD_HALF_MV(xy2, +1, +1)
629
                }
Michael Niedermayer's avatar
Michael Niedermayer committed
630
                CHECK_SAD_HALF_MV(x2 , +1,  0)
631 632 633 634
            }
        }else{
            if(l<=r){
                if(t+l<=b+r){
Michael Niedermayer's avatar
Michael Niedermayer committed
635
                    CHECK_SAD_HALF_MV(xy2, -1, -1)
636
                    ptr+= stride;
637
                }else{
638
                    ptr+= stride;
Michael Niedermayer's avatar
Michael Niedermayer committed
639
                    CHECK_SAD_HALF_MV(xy2, +1, +1)
640
                }
Michael Niedermayer's avatar
Michael Niedermayer committed
641 642
                CHECK_SAD_HALF_MV(x2 , -1,  0)
                CHECK_SAD_HALF_MV(xy2, -1, +1)
643 644
            }else{
                if(t+r<=b+l){
Michael Niedermayer's avatar
Michael Niedermayer committed
645
                    CHECK_SAD_HALF_MV(xy2, +1, -1)
646
                    ptr+= stride;
647
                }else{
648
                    ptr+= stride;
Michael Niedermayer's avatar
Michael Niedermayer committed
649
                    CHECK_SAD_HALF_MV(xy2, -1, +1)
650
                }
Michael Niedermayer's avatar
Michael Niedermayer committed
651 652
                CHECK_SAD_HALF_MV(x2 , +1,  0)
                CHECK_SAD_HALF_MV(xy2, +1, +1)
653
            }
Michael Niedermayer's avatar
Michael Niedermayer committed
654
            CHECK_SAD_HALF_MV(y2 ,  0, +1)
655 656 657
        }
        mx+=dx;
        my+=dy;
658 659

    }else{
660 661
        mx<<=1;
        my<<=1;
662 663 664 665
    }

    *mx_ptr = mx;
    *my_ptr = my;
666
    return dminh;
667 668
}

669
static inline void set_p_mv_tables(MpegEncContext * s, int mx, int my, int mv4)
670
{
671
    const int xy= s->mb_x + s->mb_y*s->mb_stride;
672

673 674
    s->p_mv_table[xy][0] = mx;
    s->p_mv_table[xy][1] = my;
675

Diego Biurrun's avatar
Diego Biurrun committed
676
    /* has already been set to the 4 MV if 4MV is done */
677
    if(mv4){
678 679
        int mot_xy= s->block_index[0];

680 681 682 683
        s->current_picture.motion_val[0][mot_xy  ][0]= mx;
        s->current_picture.motion_val[0][mot_xy  ][1]= my;
        s->current_picture.motion_val[0][mot_xy+1][0]= mx;
        s->current_picture.motion_val[0][mot_xy+1][1]= my;
684

685
        mot_xy += s->b8_stride;
686 687 688 689
        s->current_picture.motion_val[0][mot_xy  ][0]= mx;
        s->current_picture.motion_val[0][mot_xy  ][1]= my;
        s->current_picture.motion_val[0][mot_xy+1][0]= mx;
        s->current_picture.motion_val[0][mot_xy+1][1]= my;
690 691 692
    }
}

693 694 695
/**
 * get fullpel ME search limits.
 */
696
static inline void get_limits(MpegEncContext *s, int x, int y)
Fabrice Bellard's avatar
Fabrice Bellard committed
697
{
698
    MotionEstContext * const c= &s->me;
699
    int range= c->avctx->me_range >> (1 + !!(c->flags&FLAG_QPEL));
700
/*
701
    if(c->avctx->me_range) c->range= c->avctx->me_range >> 1;
702
    else                   c->range= 16;
703
*/
Fabrice Bellard's avatar
Fabrice Bellard committed
704
    if (s->unrestricted_mv) {
705 706 707 708
        c->xmin = - x - 16;
        c->ymin = - y - 16;
        c->xmax = - x + s->mb_width *16;
        c->ymax = - y + s->mb_height*16;
709 710 711 712
    } else if (s->out_format == FMT_H261){
        // Search range of H261 is different from other codec standards
        c->xmin = (x > 15) ? - 15 : 0;
        c->ymin = (y > 15) ? - 15 : 0;
713
        c->xmax = (x < s->mb_width * 16 - 16) ? 15 : 0;
714
        c->ymax = (y < s->mb_height * 16 - 16) ? 15 : 0;
Fabrice Bellard's avatar
Fabrice Bellard committed
715
    } else {
716 717 718 719
        c->xmin = - x;
        c->ymin = - y;
        c->xmax = - x + s->mb_width *16 - 16;
        c->ymax = - y + s->mb_height*16 - 16;
Fabrice Bellard's avatar
Fabrice Bellard committed
720
    }
721 722 723 724 725 726
    if(range){
        c->xmin = FFMAX(c->xmin,-range);
        c->xmax = FFMIN(c->xmax, range);
        c->ymin = FFMAX(c->ymin,-range);
        c->ymax = FFMIN(c->ymax, range);
    }
727 728
}

729 730
static inline void init_mv4_ref(MotionEstContext *c){
    const int stride= c->stride;
731 732 733 734 735 736 737 738 739

    c->ref[1][0] = c->ref[0][0] + 8;
    c->ref[2][0] = c->ref[0][0] + 8*stride;
    c->ref[3][0] = c->ref[2][0] + 8;
    c->src[1][0] = c->src[0][0] + 8;
    c->src[2][0] = c->src[0][0] + 8*stride;
    c->src[3][0] = c->src[2][0] + 8;
}

740
static inline int h263_mv4_search(MpegEncContext *s, int mx, int my, int shift)
741
{
742
    MotionEstContext * const c= &s->me;
743 744
    const int size= 1;
    const int h=8;
745 746
    int block;
    int P[10][2];
747
    int dmin_sum=0, mx4_sum=0, my4_sum=0;
748
    int same=1;
749
    const int stride= c->stride;
750
    uint8_t *mv_penalty= c->current_mv_penalty;
751

752
    init_mv4_ref(c);
753

754 755 756 757 758
    for(block=0; block<4; block++){
        int mx4, my4;
        int pred_x4, pred_y4;
        int dmin4;
        static const int off[4]= {2, 1, 1, -1};
759
        const int mot_stride = s->b8_stride;
760
        const int mot_xy = s->block_index[block];
761

762 763
        P_LEFT[0] = s->current_picture.motion_val[0][mot_xy - 1][0];
        P_LEFT[1] = s->current_picture.motion_val[0][mot_xy - 1][1];
764

765
        if(P_LEFT[0]       > (c->xmax<<shift)) P_LEFT[0]       = (c->xmax<<shift);
766 767

        /* special case for first line */
768
        if (s->first_slice_line && block<2) {
769 770
            c->pred_x= pred_x4= P_LEFT[0];
            c->pred_y= pred_y4= P_LEFT[1];
771
        } else {
772 773 774 775
            P_TOP[0]      = s->current_picture.motion_val[0][mot_xy - mot_stride             ][0];
            P_TOP[1]      = s->current_picture.motion_val[0][mot_xy - mot_stride             ][1];
            P_TOPRIGHT[0] = s->current_picture.motion_val[0][mot_xy - mot_stride + off[block]][0];
            P_TOPRIGHT[1] = s->current_picture.motion_val[0][mot_xy - mot_stride + off[block]][1];
776 777 778 779
            if(P_TOP[1]      > (c->ymax<<shift)) P_TOP[1]     = (c->ymax<<shift);
            if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
            if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift);
            if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
780

781 782 783
            P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
            P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);

784 785
            c->pred_x= pred_x4 = P_MEDIAN[0];
            c->pred_y= pred_y4 = P_MEDIAN[1];
786 787 788 789
        }
        P_MV1[0]= mx;
        P_MV1[1]= my;

790
        dmin4 = epzs_motion_search4(s, &mx4, &my4, P, block, block, s->p_mv_table, (1<<16)>>shift);
791

792
        dmin4= c->sub_motion_search(s, &mx4, &my4, dmin4, block, block, size, h);
793

794
        if(s->dsp.me_sub_cmp[0] != s->dsp.mb_cmp[0]){
795
            int dxy;
796
            const int offset= ((block&1) + (block>>1)*stride)*8;
797
            uint8_t *dest_y = c->scratchpad + offset;
798
            if(s->quarter_sample){
799
                uint8_t *ref= c->ref[block][0] + (mx4>>2) + (my4>>2)*stride;
800 801 802
                dxy = ((my4 & 3) << 2) | (mx4 & 3);

                if(s->no_rounding)
803
                    s->dsp.put_no_rnd_qpel_pixels_tab[1][dxy](dest_y   , ref    , stride);
804
                else
805
                    s->dsp.put_qpel_pixels_tab       [1][dxy](dest_y   , ref    , stride);
806
            }else{
807
                uint8_t *ref= c->ref[block][0] + (mx4>>1) + (my4>>1)*stride;
808 809 810
                dxy = ((my4 & 1) << 1) | (mx4 & 1);

                if(s->no_rounding)
811
                    s->dsp.put_no_rnd_pixels_tab[1][dxy](dest_y    , ref    , stride, h);
812
                else
813
                    s->dsp.put_pixels_tab       [1][dxy](dest_y    , ref    , stride, h);
814
            }
815
            dmin_sum+= (mv_penalty[mx4-pred_x4] + mv_penalty[my4-pred_y4])*c->mb_penalty_factor;
816 817 818 819 820 821 822 823 824 825
        }else
            dmin_sum+= dmin4;

        if(s->quarter_sample){
            mx4_sum+= mx4/2;
            my4_sum+= my4/2;
        }else{
            mx4_sum+= mx4;
            my4_sum+= my4;
        }
826

827 828
        s->current_picture.motion_val[0][ s->block_index[block] ][0]= mx4;
        s->current_picture.motion_val[0][ s->block_index[block] ][1]= my4;
829 830

        if(mx4 != mx || my4 != my) same=0;
831
    }
832

833 834
    if(same)
        return INT_MAX;
835

836
    if(s->dsp.me_sub_cmp[0] != s->dsp.mb_cmp[0]){
837
        dmin_sum += s->dsp.mb_cmp[0](s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*16*stride, c->scratchpad, stride, 16);
838
    }
839

840
    if(c->avctx->mb_cmp&FF_CMP_CHROMA){
841 842 843 844 845 846 847
        int dxy;
        int mx, my;
        int offset;

        mx= ff_h263_round_chroma(mx4_sum);
        my= ff_h263_round_chroma(my4_sum);
        dxy = ((my & 1) << 1) | (mx & 1);
848

849
        offset= (s->mb_x*8 + (mx>>1)) + (s->mb_y*8 + (my>>1))*s->uvlinesize;
850

851
        if(s->no_rounding){
852 853
            s->dsp.put_no_rnd_pixels_tab[1][dxy](c->scratchpad    , s->last_picture.data[1] + offset, s->uvlinesize, 8);
            s->dsp.put_no_rnd_pixels_tab[1][dxy](c->scratchpad+8  , s->last_picture.data[2] + offset, s->uvlinesize, 8);
854
        }else{
855 856
            s->dsp.put_pixels_tab       [1][dxy](c->scratchpad    , s->last_picture.data[1] + offset, s->uvlinesize, 8);
            s->dsp.put_pixels_tab       [1][dxy](c->scratchpad+8  , s->last_picture.data[2] + offset, s->uvlinesize, 8);
857 858
        }

859 860
        dmin_sum += s->dsp.mb_cmp[1](s, s->new_picture.data[1] + s->mb_x*8 + s->mb_y*8*s->uvlinesize, c->scratchpad  , s->uvlinesize, 8);
        dmin_sum += s->dsp.mb_cmp[1](s, s->new_picture.data[2] + s->mb_x*8 + s->mb_y*8*s->uvlinesize, c->scratchpad+8, s->uvlinesize, 8);
861
    }
862

863 864
    c->pred_x= mx;
    c->pred_y= my;
865

866
    switch(c->avctx->mb_cmp&0xFF){
867 868 869 870 871
    /*case FF_CMP_SSE:
        return dmin_sum+ 32*s->qscale*s->qscale;*/
    case FF_CMP_RD:
        return dmin_sum;
    default:
872
        return dmin_sum+ 11*c->mb_penalty_factor;
873
    }
874 875
}

876 877 878 879 880 881 882 883 884 885 886 887 888
static inline void init_interlaced_ref(MpegEncContext *s, int ref_index){
    MotionEstContext * const c= &s->me;

    c->ref[1+ref_index][0] = c->ref[0+ref_index][0] + s->linesize;
    c->src[1][0] = c->src[0][0] + s->linesize;
    if(c->flags & FLAG_CHROMA){
        c->ref[1+ref_index][1] = c->ref[0+ref_index][1] + s->uvlinesize;
        c->ref[1+ref_index][2] = c->ref[0+ref_index][2] + s->uvlinesize;
        c->src[1][1] = c->src[0][1] + s->uvlinesize;
        c->src[1][2] = c->src[0][2] + s->uvlinesize;
    }
}

889
static int interlaced_search(MpegEncContext *s, int ref_index,
890
                             int16_t (*mv_tables[2][2])[2], uint8_t *field_select_tables[2], int mx, int my, int user_field_select)
891
{
892
    MotionEstContext * const c= &s->me;
893 894 895 896
    const int size=0;
    const int h=8;
    int block;
    int P[10][2];
897
    uint8_t * const mv_penalty= c->current_mv_penalty;
898 899 900 901 902
    int same=1;
    const int stride= 2*s->linesize;
    int dmin_sum= 0;
    const int mot_stride= s->mb_stride;
    const int xy= s->mb_x + s->mb_y*mot_stride;
903

904 905 906 907
    c->ymin>>=1;
    c->ymax>>=1;
    c->stride<<=1;
    c->uvstride<<=1;
908
    init_interlaced_ref(s, ref_index);
909

910 911 912 913 914 915
    for(block=0; block<2; block++){
        int field_select;
        int best_dmin= INT_MAX;
        int best_field= -1;

        for(field_select=0; field_select<2; field_select++){
916
            int dmin, mx_i, my_i;
917
            int16_t (*mv_table)[2]= mv_tables[block][field_select];
918

919
            if(user_field_select){
920 921
                assert(field_select==0 || field_select==1);
                assert(field_select_tables[block][xy]==0 || field_select_tables[block][xy]==1);
922 923 924
                if(field_select_tables[block][xy] != field_select)
                    continue;
            }
925

926 927
            P_LEFT[0] = mv_table[xy - 1][0];
            P_LEFT[1] = mv_table[xy - 1][1];
928
            if(P_LEFT[0]       > (c->xmax<<1)) P_LEFT[0]       = (c->xmax<<1);
929

930 931
            c->pred_x= P_LEFT[0];
            c->pred_y= P_LEFT[1];
932

933
            if(!s->first_slice_line){
934 935 936 937
                P_TOP[0]      = mv_table[xy - mot_stride][0];
                P_TOP[1]      = mv_table[xy - mot_stride][1];
                P_TOPRIGHT[0] = mv_table[xy - mot_stride + 1][0];
                P_TOPRIGHT[1] = mv_table[xy - mot_stride + 1][1];
938 939 940 941
                if(P_TOP[1]      > (c->ymax<<1)) P_TOP[1]     = (c->ymax<<1);
                if(P_TOPRIGHT[0] < (c->xmin<<1)) P_TOPRIGHT[0]= (c->xmin<<1);
                if(P_TOPRIGHT[0] > (c->xmax<<1)) P_TOPRIGHT[0]= (c->xmax<<1);
                if(P_TOPRIGHT[1] > (c->ymax<<1)) P_TOPRIGHT[1]= (c->ymax<<1);
942

943 944 945 946 947
                P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
                P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
            }
            P_MV1[0]= mx; //FIXME not correct if block != field_select
            P_MV1[1]= my / 2;
948

949
            dmin = epzs_motion_search2(s, &mx_i, &my_i, P, block, field_select+ref_index, mv_table, (1<<16)>>1);
950

951
            dmin= c->sub_motion_search(s, &mx_i, &my_i, dmin, block, field_select+ref_index, size, h);
952

953 954
            mv_table[xy][0]= mx_i;
            mv_table[xy][1]= my_i;
955

956
            if(s->dsp.me_sub_cmp[0] != s->dsp.mb_cmp[0]){
957 958 959
                int dxy;

                //FIXME chroma ME
960
                uint8_t *ref= c->ref[field_select+ref_index][0] + (mx_i>>1) + (my_i>>1)*stride;
961 962 963
                dxy = ((my_i & 1) << 1) | (mx_i & 1);

                if(s->no_rounding){
964
                    s->dsp.put_no_rnd_pixels_tab[size][dxy](c->scratchpad, ref    , stride, h);
965
                }else{
966
                    s->dsp.put_pixels_tab       [size][dxy](c->scratchpad, ref    , stride, h);
967
                }
968
                dmin= s->dsp.mb_cmp[size](s, c->src[block][0], c->scratchpad, stride, h);
969
                dmin+= (mv_penalty[mx_i-c->pred_x] + mv_penalty[my_i-c->pred_y] + 1)*c->mb_penalty_factor;
970
            }else
971
                dmin+= c->mb_penalty_factor; //field_select bits
972

973
            dmin += field_select != block; //slightly prefer same field
974

975 976 977 978 979 980 981 982 983 984
            if(dmin < best_dmin){
                best_dmin= dmin;
                best_field= field_select;
            }
        }
        {
            int16_t (*mv_table)[2]= mv_tables[block][best_field];

            if(mv_table[xy][0] != mx) same=0; //FIXME check if these checks work and are any good at all
            if(mv_table[xy][1]&1) same=0;
985
            if(mv_table[xy][1]*2 != my) same=0;
986 987 988 989 990 991
            if(best_field != block) same=0;
        }

        field_select_tables[block][xy]= best_field;
        dmin_sum += best_dmin;
    }
992

993 994 995 996
    c->ymin<<=1;
    c->ymax<<=1;
    c->stride>>=1;
    c->uvstride>>=1;
997 998 999

    if(same)
        return INT_MAX;
1000

1001
    switch(c->avctx->mb_cmp&0xFF){
1002 1003 1004 1005 1006
    /*case FF_CMP_SSE:
        return dmin_sum+ 32*s->qscale*s->qscale;*/
    case FF_CMP_RD:
        return dmin_sum;
    default:
1007
        return dmin_sum+ 11*c->mb_penalty_factor;
1008 1009 1010
    }
}

1011 1012 1013
static void clip_input_mv(MpegEncContext * s, int16_t *mv, int interlaced){
    int ymax= s->me.ymax>>interlaced;
    int ymin= s->me.ymin>>interlaced;
1014

1015 1016 1017 1018 1019 1020
    if(mv[0] < s->me.xmin) mv[0] = s->me.xmin;
    if(mv[0] > s->me.xmax) mv[0] = s->me.xmax;
    if(mv[1] <       ymin) mv[1] =       ymin;
    if(mv[1] >       ymax) mv[1] =       ymax;
}

1021 1022 1023 1024 1025 1026 1027 1028 1029
static inline int check_input_motion(MpegEncContext * s, int mb_x, int mb_y, int p_type){
    MotionEstContext * const c= &s->me;
    Picture *p= s->current_picture_ptr;
    int mb_xy= mb_x + mb_y*s->mb_stride;
    int xy= 2*mb_x + 2*mb_y*s->b8_stride;
    int mb_type= s->current_picture.mb_type[mb_xy];
    int flags= c->flags;
    int shift= (flags&FLAG_QPEL) + 1;
    int mask= (1<<shift)-1;
1030
    int x, y, i;
1031 1032 1033
    int d=0;
    me_cmp_func cmpf= s->dsp.sse[0];
    me_cmp_func chroma_cmpf= s->dsp.sse[1];
1034

1035 1036
    if(p_type && USES_LIST(mb_type, 1)){
        av_log(c->avctx, AV_LOG_ERROR, "backward motion vector in P frame\n");
1037
        return INT_MAX/2;
1038
    }
1039
    assert(IS_INTRA(mb_type) || USES_LIST(mb_type,0) || USES_LIST(mb_type,1));
1040

1041 1042 1043 1044 1045 1046
    for(i=0; i<4; i++){
        int xy= s->block_index[i];
        clip_input_mv(s, p->motion_val[0][xy], !!IS_INTERLACED(mb_type));
        clip_input_mv(s, p->motion_val[1][xy], !!IS_INTERLACED(mb_type));
    }

1047 1048 1049 1050 1051
    if(IS_INTERLACED(mb_type)){
        int xy2= xy  + s->b8_stride;
        s->mb_type[mb_xy]=CANDIDATE_MB_TYPE_INTRA;
        c->stride<<=1;
        c->uvstride<<=1;
1052

1053
        if(!(s->flags & CODEC_FLAG_INTERLACED_ME)){
1054
            av_log(c->avctx, AV_LOG_ERROR, "Interlaced macroblock selected but interlaced motion estimation disabled\n");
1055
            return INT_MAX/2;
1056
        }
1057

1058 1059 1060 1061 1062
        if(USES_LIST(mb_type, 0)){
            int field_select0= p->ref_index[0][xy ];
            int field_select1= p->ref_index[0][xy2];
            assert(field_select0==0 ||field_select0==1);
            assert(field_select1==0 ||field_select1==1);
1063 1064
            init_interlaced_ref(s, 0);

1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078
            if(p_type){
                s->p_field_select_table[0][mb_xy]= field_select0;
                s->p_field_select_table[1][mb_xy]= field_select1;
                *(uint32_t*)s->p_field_mv_table[0][field_select0][mb_xy]= *(uint32_t*)p->motion_val[0][xy ];
                *(uint32_t*)s->p_field_mv_table[1][field_select1][mb_xy]= *(uint32_t*)p->motion_val[0][xy2];
                s->mb_type[mb_xy]=CANDIDATE_MB_TYPE_INTER_I;
            }else{
                s->b_field_select_table[0][0][mb_xy]= field_select0;
                s->b_field_select_table[0][1][mb_xy]= field_select1;
                *(uint32_t*)s->b_field_mv_table[0][0][field_select0][mb_xy]= *(uint32_t*)p->motion_val[0][xy ];
                *(uint32_t*)s->b_field_mv_table[0][1][field_select1][mb_xy]= *(uint32_t*)p->motion_val[0][xy2];
                s->mb_type[mb_xy]= CANDIDATE_MB_TYPE_FORWARD_I;
            }

1079
            x= p->motion_val[0][xy ][0];
1080 1081
            y= p->motion_val[0][xy ][1];
            d = cmp(s, x>>shift, y>>shift, x&mask, y&mask, 0, 8, field_select0, 0, cmpf, chroma_cmpf, flags);
1082
            x= p->motion_val[0][xy2][0];
1083 1084 1085 1086 1087 1088 1089 1090
            y= p->motion_val[0][xy2][1];
            d+= cmp(s, x>>shift, y>>shift, x&mask, y&mask, 0, 8, field_select1, 1, cmpf, chroma_cmpf, flags);
        }
        if(USES_LIST(mb_type, 1)){
            int field_select0= p->ref_index[1][xy ];
            int field_select1= p->ref_index[1][xy2];
            assert(field_select0==0 ||field_select0==1);
            assert(field_select1==0 ||field_select1==1);
1091 1092
            init_interlaced_ref(s, 2);

1093 1094 1095 1096 1097 1098 1099 1100 1101 1102
            s->b_field_select_table[1][0][mb_xy]= field_select0;
            s->b_field_select_table[1][1][mb_xy]= field_select1;
            *(uint32_t*)s->b_field_mv_table[1][0][field_select0][mb_xy]= *(uint32_t*)p->motion_val[1][xy ];
            *(uint32_t*)s->b_field_mv_table[1][1][field_select1][mb_xy]= *(uint32_t*)p->motion_val[1][xy2];
            if(USES_LIST(mb_type, 0)){
                s->mb_type[mb_xy]= CANDIDATE_MB_TYPE_BIDIR_I;
            }else{
                s->mb_type[mb_xy]= CANDIDATE_MB_TYPE_BACKWARD_I;
            }

1103
            x= p->motion_val[1][xy ][0];
1104 1105
            y= p->motion_val[1][xy ][1];
            d = cmp(s, x>>shift, y>>shift, x&mask, y&mask, 0, 8, field_select0+2, 0, cmpf, chroma_cmpf, flags);
1106
            x= p->motion_val[1][xy2][0];
1107 1108 1109 1110 1111 1112
            y= p->motion_val[1][xy2][1];
            d+= cmp(s, x>>shift, y>>shift, x&mask, y&mask, 0, 8, field_select1+2, 1, cmpf, chroma_cmpf, flags);
            //FIXME bidir scores
        }
        c->stride>>=1;
        c->uvstride>>=1;
1113
    }else if(IS_8X8(mb_type)){
1114
        if(!(s->flags & CODEC_FLAG_4MV)){
1115
            av_log(c->avctx, AV_LOG_ERROR, "4MV macroblock selected but 4MV encoding disabled\n");
1116
            return INT_MAX/2;
1117
        }
1118 1119
        cmpf= s->dsp.sse[1];
        chroma_cmpf= s->dsp.sse[1];
1120
        init_mv4_ref(c);
1121 1122
        for(i=0; i<4; i++){
            xy= s->block_index[i];
1123
            x= p->motion_val[0][xy][0];
1124 1125 1126 1127
            y= p->motion_val[0][xy][1];
            d+= cmp(s, x>>shift, y>>shift, x&mask, y&mask, 1, 8, i, i, cmpf, chroma_cmpf, flags);
        }
        s->mb_type[mb_xy]=CANDIDATE_MB_TYPE_INTER4V;
1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140
    }else{
        if(USES_LIST(mb_type, 0)){
            if(p_type){
                *(uint32_t*)s->p_mv_table[mb_xy]= *(uint32_t*)p->motion_val[0][xy];
                s->mb_type[mb_xy]=CANDIDATE_MB_TYPE_INTER;
            }else if(USES_LIST(mb_type, 1)){
                *(uint32_t*)s->b_bidir_forw_mv_table[mb_xy]= *(uint32_t*)p->motion_val[0][xy];
                *(uint32_t*)s->b_bidir_back_mv_table[mb_xy]= *(uint32_t*)p->motion_val[1][xy];
                s->mb_type[mb_xy]=CANDIDATE_MB_TYPE_BIDIR;
            }else{
                *(uint32_t*)s->b_forw_mv_table[mb_xy]= *(uint32_t*)p->motion_val[0][xy];
                s->mb_type[mb_xy]=CANDIDATE_MB_TYPE_FORWARD;
            }
1141
            x= p->motion_val[0][xy][0];
1142 1143 1144 1145 1146
            y= p->motion_val[0][xy][1];
            d = cmp(s, x>>shift, y>>shift, x&mask, y&mask, 0, 16, 0, 0, cmpf, chroma_cmpf, flags);
        }else if(USES_LIST(mb_type, 1)){
            *(uint32_t*)s->b_back_mv_table[mb_xy]= *(uint32_t*)p->motion_val[1][xy];
            s->mb_type[mb_xy]=CANDIDATE_MB_TYPE_BACKWARD;
1147 1148

            x= p->motion_val[1][xy][0];
1149 1150 1151 1152 1153 1154 1155 1156
            y= p->motion_val[1][xy][1];
            d = cmp(s, x>>shift, y>>shift, x&mask, y&mask, 0, 16, 2, 0, cmpf, chroma_cmpf, flags);
        }else
            s->mb_type[mb_xy]=CANDIDATE_MB_TYPE_INTRA;
    }
    return d;
}

1157 1158 1159
void ff_estimate_p_frame_motion(MpegEncContext * s,
                                int mb_x, int mb_y)
{
1160
    MotionEstContext * const c= &s->me;
1161
    uint8_t *pix, *ppix;
1162 1163 1164
    int sum, mx, my, dmin;
    int varc;            ///< the variance of the block (sum of squared (p[y][x]-average))
    int vard;            ///< sum of squared differences with the estimated motion vector
1165
    int P[10][2];
1166 1167
    const int shift= 1+s->quarter_sample;
    int mb_type=0;
Michael Niedermayer's avatar
Michael Niedermayer committed
1168
    Picture * const pic= &s->current_picture;
1169

1170
    init_ref(c, s->new_picture.data, s->last_picture.data, NULL, 16*mb_x, 16*mb_y, 0);
1171

Michael Niedermayer's avatar
Michael Niedermayer committed
1172
    assert(s->quarter_sample==0 || s->quarter_sample==1);
1173 1174
    assert(s->linesize == c->stride);
    assert(s->uvlinesize == c->uvstride);
Michael Niedermayer's avatar
Michael Niedermayer committed
1175

1176 1177 1178
    c->penalty_factor    = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
    c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
    c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
1179
    c->current_mv_penalty= c->mv_penalty[s->f_code] + MAX_MV;
1180

1181
    get_limits(s, 16*mb_x, 16*mb_y);
1182
    c->skip=0;
1183

1184 1185 1186
    /* intra / predictive decision */
    pix = c->src[0][0];
    sum = s->dsp.pix_sum(pix, s->linesize);
1187
    varc = s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)(sum*sum))>>8) + 500;
1188 1189

    pic->mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
1190 1191
    pic->mb_var [s->mb_stride * mb_y + mb_x] = (varc+128)>>8;
    c->mb_var_sum_temp += (varc+128)>>8;
1192

1193
    if(c->avctx->me_threshold){
1194
        vard= check_input_motion(s, mb_x, mb_y, 1);
1195

1196
        if((vard+128)>>8 < c->avctx->me_threshold){
1197 1198
            int p_score= FFMIN(vard, varc-500+(s->lambda2>>FF_LAMBDA_SHIFT)*100);
            int i_score= varc-500+(s->lambda2>>FF_LAMBDA_SHIFT)*20;
1199 1200
            pic->mc_mb_var[s->mb_stride * mb_y + mb_x] = (vard+128)>>8;
            c->mc_mb_var_sum_temp += (vard+128)>>8;
1201
            c->scene_change_score+= ff_sqrt(p_score) - ff_sqrt(i_score);
1202 1203
            return;
        }
1204
        if((vard+128)>>8 < c->avctx->mb_threshold)
1205
            mb_type= s->mb_type[mb_x + mb_y*s->mb_stride];
1206 1207
    }

1208
    switch(s->me_method) {
Fabrice Bellard's avatar
Fabrice Bellard committed
1209 1210
    case ME_ZERO:
    default:
1211
        no_motion_search(s, &mx, &my);
1212 1213
        mx-= mb_x*16;
        my-= mb_y*16;
Fabrice Bellard's avatar
Fabrice Bellard committed
1214 1215
        dmin = 0;
        break;
1216
#if 0
Fabrice Bellard's avatar
Fabrice Bellard committed
1217
    case ME_FULL:
1218
        dmin = full_motion_search(s, &mx, &my, range, ref_picture);
1219 1220
        mx-= mb_x*16;
        my-= mb_y*16;
Fabrice Bellard's avatar
Fabrice Bellard committed
1221 1222
        break;
    case ME_LOG:
1223
        dmin = log_motion_search(s, &mx, &my, range / 2, ref_picture);
1224 1225
        mx-= mb_x*16;
        my-= mb_y*16;
Fabrice Bellard's avatar
Fabrice Bellard committed
1226 1227
        break;
    case ME_PHODS:
1228
        dmin = phods_motion_search(s, &mx, &my, range / 2, ref_picture);
1229 1230
        mx-= mb_x*16;
        my-= mb_y*16;
Fabrice Bellard's avatar
Fabrice Bellard committed
1231
        break;
1232
#endif
1233
    case ME_X1:
1234
    case ME_EPZS:
1235
       {
1236
            const int mot_stride = s->b8_stride;
1237
            const int mot_xy = s->block_index[0];
1238

1239 1240
            P_LEFT[0]       = s->current_picture.motion_val[0][mot_xy - 1][0];
            P_LEFT[1]       = s->current_picture.motion_val[0][mot_xy - 1][1];
1241

1242
            if(P_LEFT[0]       > (c->xmax<<shift)) P_LEFT[0]       = (c->xmax<<shift);
1243

1244
            if(!s->first_slice_line) {
1245 1246 1247 1248
                P_TOP[0]      = s->current_picture.motion_val[0][mot_xy - mot_stride    ][0];
                P_TOP[1]      = s->current_picture.motion_val[0][mot_xy - mot_stride    ][1];
                P_TOPRIGHT[0] = s->current_picture.motion_val[0][mot_xy - mot_stride + 2][0];
                P_TOPRIGHT[1] = s->current_picture.motion_val[0][mot_xy - mot_stride + 2][1];
1249 1250 1251
                if(P_TOP[1]      > (c->ymax<<shift)) P_TOP[1]     = (c->ymax<<shift);
                if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
                if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
1252

1253 1254 1255 1256
                P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
                P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);

                if(s->out_format == FMT_H263){
1257 1258
                    c->pred_x = P_MEDIAN[0];
                    c->pred_y = P_MEDIAN[1];
1259
                }else { /* mpeg1 at least */
1260 1261
                    c->pred_x= P_LEFT[0];
                    c->pred_y= P_LEFT[1];
1262
                }
1263
            }else{
1264 1265
                c->pred_x= P_LEFT[0];
                c->pred_y= P_LEFT[1];
1266
            }
1267

1268
        }
1269
        dmin = ff_epzs_motion_search(s, &mx, &my, P, 0, 0, s->p_mv_table, (1<<16)>>shift, 0, 16);
1270

1271
        break;
Fabrice Bellard's avatar
Fabrice Bellard committed
1272 1273
    }

1274
    /* At this point (mx,my) are full-pell and the relative displacement */
1275
    ppix = c->ref[0][0] + (my * s->linesize) + mx;
1276

1277
    vard = s->dsp.sse[0](NULL, pix, ppix, s->linesize, 16);
1278

1279
    pic->mc_mb_var[s->mb_stride * mb_y + mb_x] = (vard+128)>>8;
1280
//    pic->mb_cmp_score[s->mb_stride * mb_y + mb_x] = dmin;
1281
    c->mc_mb_var_sum_temp += (vard+128)>>8;
1282

Fabrice Bellard's avatar
Fabrice Bellard committed
1283
#if 0
1284
    printf("varc=%4d avg_var=%4d (sum=%4d) vard=%4d mx=%2d my=%2d\n",
1285
           varc, s->avg_mb_var, sum, vard, mx - xx, my - yy);
Fabrice Bellard's avatar
Fabrice Bellard committed
1286
#endif
1287
    if(mb_type){
1288 1289 1290
        int p_score= FFMIN(vard, varc-500+(s->lambda2>>FF_LAMBDA_SHIFT)*100);
        int i_score= varc-500+(s->lambda2>>FF_LAMBDA_SHIFT)*20;
        c->scene_change_score+= ff_sqrt(p_score) - ff_sqrt(i_score);
1291 1292

        if(mb_type == CANDIDATE_MB_TYPE_INTER){
1293
            c->sub_motion_search(s, &mx, &my, dmin, 0, 0, 0, 16);
1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306
            set_p_mv_tables(s, mx, my, 1);
        }else{
            mx <<=shift;
            my <<=shift;
        }
        if(mb_type == CANDIDATE_MB_TYPE_INTER4V){
            h263_mv4_search(s, mx, my, shift);

            set_p_mv_tables(s, mx, my, 0);
        }
        if(mb_type == CANDIDATE_MB_TYPE_INTER_I){
            interlaced_search(s, 0, s->p_field_mv_table, s->p_field_select_table, mx, my, 1);
        }
1307
    }else if(c->avctx->mb_decision > FF_MB_DECISION_SIMPLE){
1308 1309 1310
        int p_score= FFMIN(vard, varc-500+(s->lambda2>>FF_LAMBDA_SHIFT)*100);
        int i_score= varc-500+(s->lambda2>>FF_LAMBDA_SHIFT)*20;
        c->scene_change_score+= ff_sqrt(p_score) - ff_sqrt(i_score);
1311

1312
        if (vard*2 + 200*256 > varc)
1313
            mb_type|= CANDIDATE_MB_TYPE_INTRA;
1314 1315
        if (varc*2 + 200*256 > vard || s->qscale > 24){
//        if (varc*2 + 200*256 + 50*(s->lambda2>>FF_LAMBDA_SHIFT) > vard){
1316
            mb_type|= CANDIDATE_MB_TYPE_INTER;
1317
            c->sub_motion_search(s, &mx, &my, dmin, 0, 0, 0, 16);
1318 1319
            if(s->flags&CODEC_FLAG_MV0)
                if(mx || my)
1320
                    mb_type |= CANDIDATE_MB_TYPE_SKIPPED; //FIXME check difference
1321
        }else{
Michael Niedermayer's avatar
Michael Niedermayer committed
1322 1323
            mx <<=shift;
            my <<=shift;
Fabrice Bellard's avatar
Fabrice Bellard committed
1324
        }
1325
        if((s->flags&CODEC_FLAG_4MV)
1326
           && !c->skip && varc>50<<8 && vard>10<<8){
1327 1328
            if(h263_mv4_search(s, mx, my, shift) < INT_MAX)
                mb_type|=CANDIDATE_MB_TYPE_INTER4V;
1329 1330 1331 1332

            set_p_mv_tables(s, mx, my, 0);
        }else
            set_p_mv_tables(s, mx, my, 1);
1333
        if((s->flags&CODEC_FLAG_INTERLACED_ME)
1334
           && !c->skip){ //FIXME varc/d checks
1335
            if(interlaced_search(s, 0, s->p_field_mv_table, s->p_field_select_table, mx, my, 0) < INT_MAX)
1336 1337
                mb_type |= CANDIDATE_MB_TYPE_INTER_I;
        }
1338
    }else{
1339
        int intra_score, i;
1340
        mb_type= CANDIDATE_MB_TYPE_INTER;
1341

1342
        dmin= c->sub_motion_search(s, &mx, &my, dmin, 0, 0, 0, 16);
1343
        if(c->avctx->me_sub_cmp != c->avctx->mb_cmp && !c->skip)
1344
            dmin= ff_get_mb_score(s, mx, my, 0, 0, 0, 16, 1);
1345 1346

        if((s->flags&CODEC_FLAG_4MV)
1347
           && !c->skip && varc>50<<8 && vard>10<<8){
1348
            int dmin4= h263_mv4_search(s, mx, my, shift);
1349
            if(dmin4 < dmin){
1350
                mb_type= CANDIDATE_MB_TYPE_INTER4V;
1351
                dmin=dmin4;
1352
            }
1353
        }
1354
        if((s->flags&CODEC_FLAG_INTERLACED_ME)
1355
           && !c->skip){ //FIXME varc/d checks
1356
            int dmin_i= interlaced_search(s, 0, s->p_field_mv_table, s->p_field_select_table, mx, my, 0);
1357 1358 1359 1360 1361
            if(dmin_i < dmin){
                mb_type = CANDIDATE_MB_TYPE_INTER_I;
                dmin= dmin_i;
            }
        }
1362 1363

//        pic->mb_cmp_score[s->mb_stride * mb_y + mb_x] = dmin;
1364
        set_p_mv_tables(s, mx, my, mb_type!=CANDIDATE_MB_TYPE_INTER4V);
1365 1366

        /* get intra luma score */
1367
        if((c->avctx->mb_cmp&0xFF)==FF_CMP_SSE){
1368
            intra_score= varc - 500;
1369 1370 1371
        }else{
            int mean= (sum+128)>>8;
            mean*= 0x01010101;
1372

1373
            for(i=0; i<16; i++){
1374 1375 1376 1377
                *(uint32_t*)(&c->scratchpad[i*s->linesize+ 0]) = mean;
                *(uint32_t*)(&c->scratchpad[i*s->linesize+ 4]) = mean;
                *(uint32_t*)(&c->scratchpad[i*s->linesize+ 8]) = mean;
                *(uint32_t*)(&c->scratchpad[i*s->linesize+12]) = mean;
1378 1379
            }

1380
            intra_score= s->dsp.mb_cmp[0](s, c->scratchpad, pix, s->linesize, 16);
1381 1382 1383
        }
#if 0 //FIXME
        /* get chroma score */
1384
        if(c->avctx->mb_cmp&FF_CMP_CHROMA){
1385 1386 1387
            for(i=1; i<3; i++){
                uint8_t *dest_c;
                int mean;
1388

1389
                if(s->out_format == FMT_H263){
1390
                    mean= (s->dc_val[i][mb_x + mb_y*s->b8_stride] + 4)>>3; //FIXME not exact but simple ;)
1391 1392 1393 1394
                }else{
                    mean= (s->last_dc[i] + 4)>>3;
                }
                dest_c = s->new_picture.data[i] + (mb_y * 8  * (s->uvlinesize)) + mb_x * 8;
1395

1396 1397
                mean*= 0x01010101;
                for(i=0; i<8; i++){
1398 1399
                    *(uint32_t*)(&c->scratchpad[i*s->uvlinesize+ 0]) = mean;
                    *(uint32_t*)(&c->scratchpad[i*s->uvlinesize+ 4]) = mean;
1400
                }
1401

1402
                intra_score+= s->dsp.mb_cmp[1](s, c->scratchpad, dest_c, s->uvlinesize);
1403
            }
1404 1405
        }
#endif
1406
        intra_score += c->mb_penalty_factor*16;
1407

1408
        if(intra_score < dmin){
1409 1410
            mb_type= CANDIDATE_MB_TYPE_INTRA;
            s->current_picture.mb_type[mb_y*s->mb_stride + mb_x]= CANDIDATE_MB_TYPE_INTRA; //FIXME cleanup
1411 1412
        }else
            s->current_picture.mb_type[mb_y*s->mb_stride + mb_x]= 0;
1413

1414 1415 1416 1417
        {
            int p_score= FFMIN(vard, varc-500+(s->lambda2>>FF_LAMBDA_SHIFT)*100);
            int i_score= varc-500+(s->lambda2>>FF_LAMBDA_SHIFT)*20;
            c->scene_change_score+= ff_sqrt(p_score) - ff_sqrt(i_score);
1418
        }
Fabrice Bellard's avatar
Fabrice Bellard committed
1419
    }
1420

1421
    s->mb_type[mb_y*s->mb_stride + mb_x]= mb_type;
1422 1423
}

1424 1425 1426
int ff_pre_estimate_p_frame_motion(MpegEncContext * s,
                                    int mb_x, int mb_y)
{
1427
    MotionEstContext * const c= &s->me;
1428
    int mx, my, dmin;
1429 1430
    int P[10][2];
    const int shift= 1+s->quarter_sample;
1431
    const int xy= mb_x + mb_y*s->mb_stride;
1432
    init_ref(c, s->new_picture.data, s->last_picture.data, NULL, 16*mb_x, 16*mb_y, 0);
1433

1434 1435
    assert(s->quarter_sample==0 || s->quarter_sample==1);

1436
    c->pre_penalty_factor    = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_pre_cmp);
1437
    c->current_mv_penalty= c->mv_penalty[s->f_code] + MAX_MV;
1438

1439
    get_limits(s, 16*mb_x, 16*mb_y);
1440
    c->skip=0;
1441 1442 1443 1444

    P_LEFT[0]       = s->p_mv_table[xy + 1][0];
    P_LEFT[1]       = s->p_mv_table[xy + 1][1];

1445
    if(P_LEFT[0]       < (c->xmin<<shift)) P_LEFT[0]       = (c->xmin<<shift);
1446 1447

    /* special case for first line */
1448
    if (s->first_slice_line) {
1449 1450
        c->pred_x= P_LEFT[0];
        c->pred_y= P_LEFT[1];
1451
        P_TOP[0]= P_TOPRIGHT[0]= P_MEDIAN[0]=
1452
        P_TOP[1]= P_TOPRIGHT[1]= P_MEDIAN[1]= 0; //FIXME
1453
    } else {
1454 1455 1456 1457
        P_TOP[0]      = s->p_mv_table[xy + s->mb_stride    ][0];
        P_TOP[1]      = s->p_mv_table[xy + s->mb_stride    ][1];
        P_TOPRIGHT[0] = s->p_mv_table[xy + s->mb_stride - 1][0];
        P_TOPRIGHT[1] = s->p_mv_table[xy + s->mb_stride - 1][1];
1458 1459 1460
        if(P_TOP[1]      < (c->ymin<<shift)) P_TOP[1]     = (c->ymin<<shift);
        if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift);
        if(P_TOPRIGHT[1] < (c->ymin<<shift)) P_TOPRIGHT[1]= (c->ymin<<shift);
1461

1462 1463 1464
        P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
        P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);

1465 1466
        c->pred_x = P_MEDIAN[0];
        c->pred_y = P_MEDIAN[1];
1467
    }
1468

1469
    dmin = ff_epzs_motion_search(s, &mx, &my, P, 0, 0, s->p_mv_table, (1<<16)>>shift, 0, 16);
1470

1471 1472
    s->p_mv_table[xy][0] = mx<<shift;
    s->p_mv_table[xy][1] = my<<shift;
1473

1474 1475 1476
    return dmin;
}

1477
static int ff_estimate_motion_b(MpegEncContext * s,
1478
                       int mb_x, int mb_y, int16_t (*mv_table)[2], int ref_index, int f_code)
1479
{
1480
    MotionEstContext * const c= &s->me;
1481
    int mx, my, dmin;
1482
    int P[10][2];
1483
    const int shift= 1+s->quarter_sample;
1484 1485
    const int mot_stride = s->mb_stride;
    const int mot_xy = mb_y*mot_stride + mb_x;
1486
    uint8_t * const mv_penalty= c->mv_penalty[f_code] + MAX_MV;
1487
    int mv_scale;
1488

1489 1490 1491
    c->penalty_factor    = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
    c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
    c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
1492
    c->current_mv_penalty= mv_penalty;
Michael Niedermayer's avatar
Michael Niedermayer committed
1493

1494
    get_limits(s, 16*mb_x, 16*mb_y);
1495 1496 1497 1498

    switch(s->me_method) {
    case ME_ZERO:
    default:
1499
        no_motion_search(s, &mx, &my);
1500
        dmin = 0;
1501 1502
        mx-= mb_x*16;
        my-= mb_y*16;
1503
        break;
1504
#if 0
1505
    case ME_FULL:
1506
        dmin = full_motion_search(s, &mx, &my, range, ref_picture);
1507 1508
        mx-= mb_x*16;
        my-= mb_y*16;
1509 1510
        break;
    case ME_LOG:
1511
        dmin = log_motion_search(s, &mx, &my, range / 2, ref_picture);
1512 1513
        mx-= mb_x*16;
        my-= mb_y*16;
1514 1515
        break;
    case ME_PHODS:
1516
        dmin = phods_motion_search(s, &mx, &my, range / 2, ref_picture);
1517 1518
        mx-= mb_x*16;
        my-= mb_y*16;
1519
        break;
1520
#endif
1521 1522 1523
    case ME_X1:
    case ME_EPZS:
       {
1524 1525
            P_LEFT[0]        = mv_table[mot_xy - 1][0];
            P_LEFT[1]        = mv_table[mot_xy - 1][1];
1526

1527
            if(P_LEFT[0]       > (c->xmax<<shift)) P_LEFT[0]       = (c->xmax<<shift);
1528 1529

            /* special case for first line */
1530
            if (!s->first_slice_line) {
1531 1532 1533 1534
                P_TOP[0] = mv_table[mot_xy - mot_stride             ][0];
                P_TOP[1] = mv_table[mot_xy - mot_stride             ][1];
                P_TOPRIGHT[0] = mv_table[mot_xy - mot_stride + 1         ][0];
                P_TOPRIGHT[1] = mv_table[mot_xy - mot_stride + 1         ][1];
1535 1536 1537
                if(P_TOP[1] > (c->ymax<<shift)) P_TOP[1]= (c->ymax<<shift);
                if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
                if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
1538

1539 1540
                P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
                P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
1541
            }
1542 1543
            c->pred_x= P_LEFT[0];
            c->pred_y= P_LEFT[1];
1544
        }
1545

1546 1547 1548 1549 1550
        if(mv_table == s->b_forw_mv_table){
            mv_scale= (s->pb_time<<16) / (s->pp_time<<shift);
        }else{
            mv_scale= ((s->pb_time - s->pp_time)<<16) / (s->pp_time<<shift);
        }
1551

1552
        dmin = ff_epzs_motion_search(s, &mx, &my, P, 0, ref_index, s->p_mv_table, mv_scale, 0, 16);
1553

1554 1555
        break;
    }
1556

1557
    dmin= c->sub_motion_search(s, &mx, &my, dmin, 0, ref_index, 0, 16);
1558

1559
    if(c->avctx->me_sub_cmp != c->avctx->mb_cmp && !c->skip)
1560
        dmin= ff_get_mb_score(s, mx, my, 0, ref_index, 0, 16, 1);
1561

1562
//printf("%d %d %d %d//", s->mb_x, s->mb_y, mx, my);
1563 1564 1565
//    s->mb_type[mb_y*s->mb_width + mb_x]= mb_type;
    mv_table[mot_xy][0]= mx;
    mv_table[mot_xy][1]= my;
Michael Niedermayer's avatar
Michael Niedermayer committed
1566

1567
    return dmin;
Fabrice Bellard's avatar
Fabrice Bellard committed
1568 1569
}

1570
static inline int check_bidir_mv(MpegEncContext * s,
1571 1572 1573
                   int motion_fx, int motion_fy,
                   int motion_bx, int motion_by,
                   int pred_fx, int pred_fy,
1574 1575
                   int pred_bx, int pred_by,
                   int size, int h)
Fabrice Bellard's avatar
Fabrice Bellard committed
1576
{
1577
    //FIXME optimize?
Michael Niedermayer's avatar
Michael Niedermayer committed
1578
    //FIXME better f_code prediction (max mv & distance)
1579
    //FIXME pointers
1580
    MotionEstContext * const c= &s->me;
Michael Niedermayer's avatar
Michael Niedermayer committed
1581 1582
    uint8_t * const mv_penalty_f= c->mv_penalty[s->f_code] + MAX_MV; // f_code of the prev frame
    uint8_t * const mv_penalty_b= c->mv_penalty[s->b_code] + MAX_MV; // f_code of the prev frame
1583 1584
    int stride= c->stride;
    uint8_t *dest_y = c->scratchpad;
1585 1586 1587 1588
    uint8_t *ptr;
    int dxy;
    int src_x, src_y;
    int fbmin;
1589 1590 1591
    uint8_t **src_data= c->src[0];
    uint8_t **ref_data= c->ref[0];
    uint8_t **ref2_data= c->ref[2];
1592

Michael Niedermayer's avatar
Michael Niedermayer committed
1593 1594
    if(s->quarter_sample){
        dxy = ((motion_fy & 3) << 2) | (motion_fx & 3);
1595 1596
        src_x = motion_fx >> 2;
        src_y = motion_fy >> 2;
Michael Niedermayer's avatar
Michael Niedermayer committed
1597

1598 1599
        ptr = ref_data[0] + (src_y * stride) + src_x;
        s->dsp.put_qpel_pixels_tab[0][dxy](dest_y    , ptr    , stride);
Michael Niedermayer's avatar
Michael Niedermayer committed
1600 1601

        dxy = ((motion_by & 3) << 2) | (motion_bx & 3);
1602 1603
        src_x = motion_bx >> 2;
        src_y = motion_by >> 2;
1604

1605
        ptr = ref2_data[0] + (src_y * stride) + src_x;
1606
        s->dsp.avg_qpel_pixels_tab[size][dxy](dest_y    , ptr    , stride);
Michael Niedermayer's avatar
Michael Niedermayer committed
1607 1608
    }else{
        dxy = ((motion_fy & 1) << 1) | (motion_fx & 1);
1609 1610
        src_x = motion_fx >> 1;
        src_y = motion_fy >> 1;
Michael Niedermayer's avatar
Michael Niedermayer committed
1611

1612 1613
        ptr = ref_data[0] + (src_y * stride) + src_x;
        s->dsp.put_pixels_tab[size][dxy](dest_y    , ptr    , stride, h);
Michael Niedermayer's avatar
Michael Niedermayer committed
1614 1615

        dxy = ((motion_by & 1) << 1) | (motion_bx & 1);
1616 1617
        src_x = motion_bx >> 1;
        src_y = motion_by >> 1;
1618

1619
        ptr = ref2_data[0] + (src_y * stride) + src_x;
1620
        s->dsp.avg_pixels_tab[size][dxy](dest_y    , ptr    , stride, h);
Michael Niedermayer's avatar
Michael Niedermayer committed
1621 1622
    }

Michael Niedermayer's avatar
Michael Niedermayer committed
1623 1624
    fbmin = (mv_penalty_f[motion_fx-pred_fx] + mv_penalty_f[motion_fy-pred_fy])*c->mb_penalty_factor
           +(mv_penalty_b[motion_bx-pred_bx] + mv_penalty_b[motion_by-pred_by])*c->mb_penalty_factor
1625
           + s->dsp.mb_cmp[size](s, src_data[0], dest_y, stride, h); //FIXME new_pic
1626

1627
    if(c->avctx->mb_cmp&FF_CMP_CHROMA){
1628 1629
    }
    //FIXME CHROMA !!!
1630

1631 1632
    return fbmin;
}
1633

1634
/* refine the bidir vectors in hq mode and return the score in both lq & hq mode*/
1635
static inline int bidir_refine(MpegEncContext * s, int mb_x, int mb_y)
1636
{
1637
    MotionEstContext * const c= &s->me;
1638 1639
    const int mot_stride = s->mb_stride;
    const int xy = mb_y *mot_stride + mb_x;
1640 1641 1642 1643 1644 1645 1646 1647 1648
    int fbmin;
    int pred_fx= s->b_bidir_forw_mv_table[xy-1][0];
    int pred_fy= s->b_bidir_forw_mv_table[xy-1][1];
    int pred_bx= s->b_bidir_back_mv_table[xy-1][0];
    int pred_by= s->b_bidir_back_mv_table[xy-1][1];
    int motion_fx= s->b_bidir_forw_mv_table[xy][0]= s->b_forw_mv_table[xy][0];
    int motion_fy= s->b_bidir_forw_mv_table[xy][1]= s->b_forw_mv_table[xy][1];
    int motion_bx= s->b_bidir_back_mv_table[xy][0]= s->b_back_mv_table[xy][0];
    int motion_by= s->b_bidir_back_mv_table[xy][1]= s->b_back_mv_table[xy][1];
1649 1650 1651 1652 1653 1654 1655
    const int flags= c->sub_flags;
    const int qpel= flags&FLAG_QPEL;
    const int shift= 1+qpel;
    const int xmin= c->xmin<<shift;
    const int ymin= c->ymin<<shift;
    const int xmax= c->xmax<<shift;
    const int ymax= c->ymax<<shift;
1656 1657 1658 1659 1660 1661
    uint8_t map[8][8][8][8];

    memset(map,0,sizeof(map));
#define BIDIR_MAP(fx,fy,bx,by) \
    map[(motion_fx+fx)&7][(motion_fy+fy)&7][(motion_bx+bx)&7][(motion_by+by)&7]
    BIDIR_MAP(0,0,0,0) = 1;
1662

1663
    fbmin= check_bidir_mv(s, motion_fx, motion_fy,
1664 1665
                          motion_bx, motion_by,
                          pred_fx, pred_fy,
1666 1667
                          pred_bx, pred_by,
                          0, 16);
1668

1669 1670 1671
    if(s->avctx->bidir_refine){
        int score, end;
#define CHECK_BIDIR(fx,fy,bx,by)\
1672 1673
    if( !BIDIR_MAP(fx,fy,bx,by)\
       &&(fx<=0 || motion_fx+fx<=xmax) && (fy<=0 || motion_fy+fy<=ymax) && (bx<=0 || motion_bx+bx<=xmax) && (by<=0 || motion_by+by<=ymax)\
Michael Niedermayer's avatar
Michael Niedermayer committed
1674
       &&(fx>=0 || motion_fx+fx>=xmin) && (fy>=0 || motion_fy+fy>=ymin) && (bx>=0 || motion_bx+bx>=xmin) && (by>=0 || motion_by+by>=ymin)){\
1675
        BIDIR_MAP(fx,fy,bx,by) = 1;\
Michael Niedermayer's avatar
Michael Niedermayer committed
1676 1677 1678 1679 1680 1681 1682 1683 1684
        score= check_bidir_mv(s, motion_fx+fx, motion_fy+fy, motion_bx+bx, motion_by+by, pred_fx, pred_fy, pred_bx, pred_by, 0, 16);\
        if(score < fbmin){\
            fbmin= score;\
            motion_fx+=fx;\
            motion_fy+=fy;\
            motion_bx+=bx;\
            motion_by+=by;\
            end=0;\
        }\
1685 1686 1687
    }
#define CHECK_BIDIR2(a,b,c,d)\
CHECK_BIDIR(a,b,c,d)\
1688
CHECK_BIDIR(-(a),-(b),-(c),-(d))
1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723

#define CHECK_BIDIRR(a,b,c,d)\
CHECK_BIDIR2(a,b,c,d)\
CHECK_BIDIR2(b,c,d,a)\
CHECK_BIDIR2(c,d,a,b)\
CHECK_BIDIR2(d,a,b,c)

        do{
            end=1;

            CHECK_BIDIRR( 0, 0, 0, 1)
            if(s->avctx->bidir_refine > 1){
                CHECK_BIDIRR( 0, 0, 1, 1)
                CHECK_BIDIR2( 0, 1, 0, 1)
                CHECK_BIDIR2( 1, 0, 1, 0)
                CHECK_BIDIRR( 0, 0,-1, 1)
                CHECK_BIDIR2( 0,-1, 0, 1)
                CHECK_BIDIR2(-1, 0, 1, 0)
                if(s->avctx->bidir_refine > 2){
                    CHECK_BIDIRR( 0, 1, 1, 1)
                    CHECK_BIDIRR( 0,-1, 1, 1)
                    CHECK_BIDIRR( 0, 1,-1, 1)
                    CHECK_BIDIRR( 0, 1, 1,-1)
                    if(s->avctx->bidir_refine > 3){
                        CHECK_BIDIR2( 1, 1, 1, 1)
                        CHECK_BIDIRR( 1, 1, 1,-1)
                        CHECK_BIDIR2( 1, 1,-1,-1)
                        CHECK_BIDIR2( 1,-1,-1, 1)
                        CHECK_BIDIR2( 1,-1, 1,-1)
                    }
                }
            }
        }while(!end);
    }

1724 1725 1726 1727 1728
    s->b_bidir_forw_mv_table[xy][0]= motion_fx;
    s->b_bidir_forw_mv_table[xy][1]= motion_fy;
    s->b_bidir_back_mv_table[xy][0]= motion_bx;
    s->b_bidir_back_mv_table[xy][1]= motion_by;

1729
    return fbmin;
1730 1731
}

1732
static inline int direct_search(MpegEncContext * s, int mb_x, int mb_y)
1733
{
1734
    MotionEstContext * const c= &s->me;
1735
    int P[10][2];
1736 1737
    const int mot_stride = s->mb_stride;
    const int mot_xy = mb_y*mot_stride + mb_x;
Michael Niedermayer's avatar
Michael Niedermayer committed
1738 1739
    const int shift= 1+s->quarter_sample;
    int dmin, i;
1740
    const int time_pp= s->pp_time;
1741
    const int time_pb= s->pb_time;
Michael Niedermayer's avatar
Michael Niedermayer committed
1742
    int mx, my, xmin, xmax, ymin, ymax;
1743
    int16_t (*mv_table)[2]= s->b_direct_mv_table;
1744

1745
    c->current_mv_penalty= c->mv_penalty[1] + MAX_MV;
Michael Niedermayer's avatar
Michael Niedermayer committed
1746 1747 1748
    ymin= xmin=(-32)>>shift;
    ymax= xmax=   31>>shift;

1749
    if(IS_8X8(s->next_picture.mb_type[mot_xy])){
Michael Niedermayer's avatar
Michael Niedermayer committed
1750 1751 1752
        s->mv_type= MV_TYPE_8X8;
    }else{
        s->mv_type= MV_TYPE_16X16;
1753
    }
Michael Niedermayer's avatar
Michael Niedermayer committed
1754 1755 1756 1757

    for(i=0; i<4; i++){
        int index= s->block_index[i];
        int min, max;
1758

1759 1760 1761 1762 1763 1764 1765 1766 1767
        c->co_located_mv[i][0]= s->next_picture.motion_val[0][index][0];
        c->co_located_mv[i][1]= s->next_picture.motion_val[0][index][1];
        c->direct_basis_mv[i][0]= c->co_located_mv[i][0]*time_pb/time_pp + ((i& 1)<<(shift+3));
        c->direct_basis_mv[i][1]= c->co_located_mv[i][1]*time_pb/time_pp + ((i>>1)<<(shift+3));
//        c->direct_basis_mv[1][i][0]= c->co_located_mv[i][0]*(time_pb - time_pp)/time_pp + ((i &1)<<(shift+3);
//        c->direct_basis_mv[1][i][1]= c->co_located_mv[i][1]*(time_pb - time_pp)/time_pp + ((i>>1)<<(shift+3);

        max= FFMAX(c->direct_basis_mv[i][0], c->direct_basis_mv[i][0] - c->co_located_mv[i][0])>>shift;
        min= FFMIN(c->direct_basis_mv[i][0], c->direct_basis_mv[i][0] - c->co_located_mv[i][0])>>shift;
1768 1769
        max+= 16*mb_x + 1; // +-1 is for the simpler rounding
        min+= 16*mb_x - 1;
1770 1771
        xmax= FFMIN(xmax, s->width - max);
        xmin= FFMAX(xmin, - 16     - min);
Michael Niedermayer's avatar
Michael Niedermayer committed
1772

1773 1774
        max= FFMAX(c->direct_basis_mv[i][1], c->direct_basis_mv[i][1] - c->co_located_mv[i][1])>>shift;
        min= FFMIN(c->direct_basis_mv[i][1], c->direct_basis_mv[i][1] - c->co_located_mv[i][1])>>shift;
1775 1776
        max+= 16*mb_y + 1; // +-1 is for the simpler rounding
        min+= 16*mb_y - 1;
1777 1778
        ymax= FFMIN(ymax, s->height - max);
        ymin= FFMAX(ymin, - 16      - min);
1779

Michael Niedermayer's avatar
Michael Niedermayer committed
1780
        if(s->mv_type == MV_TYPE_16X16) break;
1781
    }
1782

Michael Niedermayer's avatar
Michael Niedermayer committed
1783
    assert(xmax <= 15 && ymax <= 15 && xmin >= -16 && ymin >= -16);
1784

Michael Niedermayer's avatar
Michael Niedermayer committed
1785 1786 1787 1788 1789 1790
    if(xmax < 0 || xmin >0 || ymax < 0 || ymin > 0){
        s->b_direct_mv_table[mot_xy][0]= 0;
        s->b_direct_mv_table[mot_xy][1]= 0;

        return 256*256*256*64;
    }
1791

1792 1793 1794 1795 1796 1797 1798 1799
    c->xmin= xmin;
    c->ymin= ymin;
    c->xmax= xmax;
    c->ymax= ymax;
    c->flags     |= FLAG_DIRECT;
    c->sub_flags |= FLAG_DIRECT;
    c->pred_x=0;
    c->pred_y=0;
Michael Niedermayer's avatar
Michael Niedermayer committed
1800

Michael Niedermayer's avatar
Michael Niedermayer committed
1801 1802 1803 1804
    P_LEFT[0]        = clip(mv_table[mot_xy - 1][0], xmin<<shift, xmax<<shift);
    P_LEFT[1]        = clip(mv_table[mot_xy - 1][1], ymin<<shift, ymax<<shift);

    /* special case for first line */
1805
    if (!s->first_slice_line) { //FIXME maybe allow this over thread boundary as its clipped
Michael Niedermayer's avatar
Michael Niedermayer committed
1806 1807 1808 1809
        P_TOP[0]      = clip(mv_table[mot_xy - mot_stride             ][0], xmin<<shift, xmax<<shift);
        P_TOP[1]      = clip(mv_table[mot_xy - mot_stride             ][1], ymin<<shift, ymax<<shift);
        P_TOPRIGHT[0] = clip(mv_table[mot_xy - mot_stride + 1         ][0], xmin<<shift, xmax<<shift);
        P_TOPRIGHT[1] = clip(mv_table[mot_xy - mot_stride + 1         ][1], ymin<<shift, ymax<<shift);
1810

Michael Niedermayer's avatar
Michael Niedermayer committed
1811 1812 1813
        P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
        P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
    }
1814

1815
    dmin = ff_epzs_motion_search(s, &mx, &my, P, 0, 0, mv_table, 1<<(16-shift), 0, 16);
1816
    if(c->sub_flags&FLAG_QPEL)
1817 1818 1819
        dmin = qpel_motion_search(s, &mx, &my, dmin, 0, 0, 0, 16);
    else
        dmin = hpel_motion_search(s, &mx, &my, dmin, 0, 0, 0, 16);
1820

1821
    if(c->avctx->me_sub_cmp != c->avctx->mb_cmp && !c->skip)
1822
        dmin= ff_get_mb_score(s, mx, my, 0, 0, 0, 16, 1);
1823

1824
    get_limits(s, 16*mb_x, 16*mb_y); //restore c->?min/max, maybe not needed
1825

Michael Niedermayer's avatar
Michael Niedermayer committed
1826 1827
    mv_table[mot_xy][0]= mx;
    mv_table[mot_xy][1]= my;
1828 1829
    c->flags     &= ~FLAG_DIRECT;
    c->sub_flags &= ~FLAG_DIRECT;
1830

1831
    return dmin;
1832 1833 1834 1835 1836
}

void ff_estimate_b_frame_motion(MpegEncContext * s,
                             int mb_x, int mb_y)
{
1837 1838
    MotionEstContext * const c= &s->me;
    const int penalty_factor= c->mb_penalty_factor;
1839
    int fmin, bmin, dmin, fbmin, bimin, fimin;
1840
    int type=0;
1841
    const int xy = mb_y*s->mb_stride + mb_x;
1842
    init_ref(c, s->new_picture.data, s->last_picture.data, s->next_picture.data, 16*mb_x, 16*mb_y, 2);
1843

1844
    get_limits(s, 16*mb_x, 16*mb_y);
1845

1846
    c->skip=0;
1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858

    if(s->codec_id == CODEC_ID_MPEG4 && s->next_picture.mbskip_table[xy]){
        int score= direct_search(s, mb_x, mb_y); //FIXME just check 0,0

        score= ((unsigned)(score*score + 128*256))>>16;
        c->mc_mb_var_sum_temp += score;
        s->current_picture.mc_mb_var[mb_y*s->mb_stride + mb_x] = score; //FIXME use SSE
        s->mb_type[mb_y*s->mb_stride + mb_x]= CANDIDATE_MB_TYPE_DIRECT0;

        return;
    }

1859
    if(c->avctx->me_threshold){
1860
        int vard= check_input_motion(s, mb_x, mb_y, 0);
1861

1862
        if((vard+128)>>8 < c->avctx->me_threshold){
1863 1864
//            pix = c->src[0][0];
//            sum = s->dsp.pix_sum(pix, s->linesize);
1865
//            varc = s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)(sum*sum))>>8) + 500;
1866

1867 1868
//            pic->mb_var   [s->mb_stride * mb_y + mb_x] = (varc+128)>>8;
             s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] = (vard+128)>>8;
1869
/*            pic->mb_mean  [s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
1870 1871 1872 1873
            c->mb_var_sum_temp    += (varc+128)>>8;*/
            c->mc_mb_var_sum_temp += (vard+128)>>8;
/*            if (vard <= 64<<8 || vard < varc) {
                c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
1874
            }else{
1875
                c->scene_change_score+= s->qscale * s->avctx->scenechange_factor;
1876 1877 1878
            }*/
            return;
        }
1879
        if((vard+128)>>8 < c->avctx->mb_threshold){
1880 1881 1882 1883 1884
            type= s->mb_type[mb_y*s->mb_stride + mb_x];
            if(type == CANDIDATE_MB_TYPE_DIRECT){
                direct_search(s, mb_x, mb_y);
            }
            if(type == CANDIDATE_MB_TYPE_FORWARD || type == CANDIDATE_MB_TYPE_BIDIR){
1885
                c->skip=0;
1886 1887 1888
                ff_estimate_motion_b(s, mb_x, mb_y, s->b_forw_mv_table, 0, s->f_code);
            }
            if(type == CANDIDATE_MB_TYPE_BACKWARD || type == CANDIDATE_MB_TYPE_BIDIR){
1889
                c->skip=0;
1890 1891 1892
                ff_estimate_motion_b(s, mb_x, mb_y, s->b_back_mv_table, 2, s->b_code);
            }
            if(type == CANDIDATE_MB_TYPE_FORWARD_I || type == CANDIDATE_MB_TYPE_BIDIR_I){
1893 1894
                c->skip=0;
                c->current_mv_penalty= c->mv_penalty[s->f_code] + MAX_MV;
1895 1896 1897 1898 1899
                interlaced_search(s, 0,
                                        s->b_field_mv_table[0], s->b_field_select_table[0],
                                        s->b_forw_mv_table[xy][0], s->b_forw_mv_table[xy][1], 1);
            }
            if(type == CANDIDATE_MB_TYPE_BACKWARD_I || type == CANDIDATE_MB_TYPE_BIDIR_I){
1900 1901
                c->skip=0;
                c->current_mv_penalty= c->mv_penalty[s->b_code] + MAX_MV;
1902 1903 1904 1905 1906 1907
                interlaced_search(s, 2,
                                        s->b_field_mv_table[1], s->b_field_select_table[1],
                                        s->b_back_mv_table[xy][0], s->b_back_mv_table[xy][1], 1);
            }
            return;
        }
1908 1909
    }

1910
    if (s->codec_id == CODEC_ID_MPEG4)
1911
        dmin= direct_search(s, mb_x, mb_y);
1912 1913
    else
        dmin= INT_MAX;
1914
//FIXME penalty stuff for non mpeg4
1915
    c->skip=0;
1916
    fmin= ff_estimate_motion_b(s, mb_x, mb_y, s->b_forw_mv_table, 0, s->f_code) + 3*penalty_factor;
1917

1918
    c->skip=0;
1919
    bmin= ff_estimate_motion_b(s, mb_x, mb_y, s->b_back_mv_table, 2, s->b_code) + 2*penalty_factor;
1920
//printf(" %d %d ", s->b_forw_mv_table[xy][0], s->b_forw_mv_table[xy][1]);
1921

1922
    c->skip=0;
1923
    fbmin= bidir_refine(s, mb_x, mb_y) + penalty_factor;
1924
//printf("%d %d %d %d\n", dmin, fmin, bmin, fbmin);
1925

1926 1927
    if(s->flags & CODEC_FLAG_INTERLACED_ME){
//FIXME mb type penalty
1928 1929
        c->skip=0;
        c->current_mv_penalty= c->mv_penalty[s->f_code] + MAX_MV;
1930 1931
        fimin= interlaced_search(s, 0,
                                 s->b_field_mv_table[0], s->b_field_select_table[0],
1932
                                 s->b_forw_mv_table[xy][0], s->b_forw_mv_table[xy][1], 0);
1933
        c->current_mv_penalty= c->mv_penalty[s->b_code] + MAX_MV;
1934 1935
        bimin= interlaced_search(s, 2,
                                 s->b_field_mv_table[1], s->b_field_select_table[1],
1936
                                 s->b_back_mv_table[xy][0], s->b_back_mv_table[xy][1], 0);
1937 1938 1939
    }else
        fimin= bimin= INT_MAX;

1940
    {
1941
        int score= fmin;
1942
        type = CANDIDATE_MB_TYPE_FORWARD;
1943

1944
        if (dmin <= score){
1945
            score = dmin;
1946
            type = CANDIDATE_MB_TYPE_DIRECT;
1947 1948 1949
        }
        if(bmin<score){
            score=bmin;
1950
            type= CANDIDATE_MB_TYPE_BACKWARD;
1951 1952 1953
        }
        if(fbmin<score){
            score=fbmin;
1954 1955 1956 1957 1958 1959 1960 1961 1962
            type= CANDIDATE_MB_TYPE_BIDIR;
        }
        if(fimin<score){
            score=fimin;
            type= CANDIDATE_MB_TYPE_FORWARD_I;
        }
        if(bimin<score){
            score=bimin;
            type= CANDIDATE_MB_TYPE_BACKWARD_I;
1963
        }
1964

1965
        score= ((unsigned)(score*score + 128*256))>>16;
1966
        c->mc_mb_var_sum_temp += score;
1967
        s->current_picture.mc_mb_var[mb_y*s->mb_stride + mb_x] = score; //FIXME use SSE
1968
    }
1969

1970
    if(c->avctx->mb_decision > FF_MB_DECISION_SIMPLE){
1971 1972 1973 1974 1975 1976 1977 1978 1979 1980
        type= CANDIDATE_MB_TYPE_FORWARD | CANDIDATE_MB_TYPE_BACKWARD | CANDIDATE_MB_TYPE_BIDIR | CANDIDATE_MB_TYPE_DIRECT;
        if(fimin < INT_MAX)
            type |= CANDIDATE_MB_TYPE_FORWARD_I;
        if(bimin < INT_MAX)
            type |= CANDIDATE_MB_TYPE_BACKWARD_I;
        if(fimin < INT_MAX && bimin < INT_MAX){
            type |= CANDIDATE_MB_TYPE_BIDIR_I;
        }
         //FIXME something smarter
        if(dmin>256*256*16) type&= ~CANDIDATE_MB_TYPE_DIRECT; //dont try direct mode if its invalid for this MB
1981 1982
        if(s->codec_id == CODEC_ID_MPEG4 && type&CANDIDATE_MB_TYPE_DIRECT && s->flags&CODEC_FLAG_MV0 && *(uint32_t*)s->b_direct_mv_table[xy])
            type |= CANDIDATE_MB_TYPE_DIRECT0;
1983
#if 0
1984 1985 1986
        if(s->out_format == FMT_MPEG1)
            type |= CANDIDATE_MB_TYPE_INTRA;
#endif
1987 1988
    }

1989
    s->mb_type[mb_y*s->mb_stride + mb_x]= type;
1990 1991 1992 1993 1994 1995
}

/* find best f_code for ME which do unlimited searches */
int ff_get_best_fcode(MpegEncContext * s, int16_t (*mv_table)[2], int type)
{
    if(s->me_method>=ME_EPZS){
1996
        int score[8];
1997
        int i, y, range= s->avctx->me_range ? s->avctx->me_range : (INT_MAX/2);
1998
        uint8_t * fcode_tab= s->fcode_tab;
1999 2000
        int best_fcode=-1;
        int best_score=-10000000;
2001

2002
        if(s->msmpeg4_version)
2003 2004 2005 2006
            range= FFMIN(range, 16);
        else if(s->codec_id == CODEC_ID_MPEG2VIDEO && s->avctx->strict_std_compliance >= FF_COMPLIANCE_NORMAL)
            range= FFMIN(range, 256);

Michael Niedermayer's avatar
Michael Niedermayer committed
2007
        for(i=0; i<8; i++) score[i]= s->mb_num*(8-i);
2008 2009 2010

        for(y=0; y<s->mb_height; y++){
            int x;
2011
            int xy= y*s->mb_stride;
2012
            for(x=0; x<s->mb_width; x++){
Michael Niedermayer's avatar
Michael Niedermayer committed
2013
                if(s->mb_type[xy] & type){
2014 2015 2016 2017
                    int mx= mv_table[xy][0];
                    int my= mv_table[xy][1];
                    int fcode= FFMAX(fcode_tab[mx + MAX_MV],
                                     fcode_tab[my + MAX_MV]);
2018
                    int j;
2019 2020

                        if(mx >= range || mx < -range ||
2021 2022
                           my >= range || my < -range)
                            continue;
2023

2024
                    for(j=0; j<fcode && j<8; j++){
2025
                        if(s->pict_type==B_TYPE || s->current_picture.mc_mb_var[xy] < s->current_picture.mb_var[xy])
2026 2027
                            score[j]-= 170;
                    }
2028 2029 2030 2031
                }
                xy++;
            }
        }
2032

2033 2034 2035 2036 2037 2038
        for(i=1; i<8; i++){
            if(score[i] > best_score){
                best_score= score[i];
                best_fcode= i;
            }
//            printf("%d %d\n", i, score[i]);
2039
        }
2040

2041
//    printf("fcode: %d type: %d\n", i, s->pict_type);
2042
        return best_fcode;
2043 2044 2045 2046 2047 2048
/*        for(i=0; i<=MAX_FCODE; i++){
            printf("%d ", mv_num[i]);
        }
        printf("\n");*/
    }else{
        return 1;
Fabrice Bellard's avatar
Fabrice Bellard committed
2049 2050 2051
    }
}

2052 2053
void ff_fix_long_p_mvs(MpegEncContext * s)
{
2054
    MotionEstContext * const c= &s->me;
2055
    const int f_code= s->f_code;
2056
    int y, range;
Michael Niedermayer's avatar
Michael Niedermayer committed
2057
    assert(s->pict_type==P_TYPE);
2058

2059 2060 2061 2062
    range = (((s->out_format == FMT_MPEG1 || s->msmpeg4_version) ? 8 : 16) << f_code);

    assert(range <= 16 || !s->msmpeg4_version);
    assert(range <=256 || !(s->codec_id == CODEC_ID_MPEG2VIDEO && s->avctx->strict_std_compliance >= FF_COMPLIANCE_NORMAL));
2063

2064
    if(c->avctx->me_range && range > c->avctx->me_range) range= c->avctx->me_range;
2065

2066
//printf("%d no:%d %d//\n", clip, noclip, f_code);
2067
    if(s->flags&CODEC_FLAG_4MV){
2068
        const int wrap= s->b8_stride;
2069 2070 2071

        /* clip / convert to intra 8x8 type MVs */
        for(y=0; y<s->mb_height; y++){
2072
            int xy= y*2*wrap;
2073
            int i= y*s->mb_stride;
2074 2075 2076
            int x;

            for(x=0; x<s->mb_width; x++){
2077
                if(s->mb_type[i]&CANDIDATE_MB_TYPE_INTER4V){
2078 2079 2080
                    int block;
                    for(block=0; block<4; block++){
                        int off= (block& 1) + (block>>1)*wrap;
2081 2082
                        int mx= s->current_picture.motion_val[0][ xy + off ][0];
                        int my= s->current_picture.motion_val[0][ xy + off ][1];
2083

2084 2085
                        if(   mx >=range || mx <-range
                           || my >=range || my <-range){
2086 2087 2088
                            s->mb_type[i] &= ~CANDIDATE_MB_TYPE_INTER4V;
                            s->mb_type[i] |= CANDIDATE_MB_TYPE_INTRA;
                            s->current_picture.mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
2089 2090 2091
                        }
                    }
                }
Michael Niedermayer's avatar
Michael Niedermayer committed
2092 2093
                xy+=2;
                i++;
2094 2095 2096 2097 2098
            }
        }
    }
}

2099 2100 2101 2102
/**
 *
 * @param truncate 1 for truncation, 0 for using intra
 */
2103
void ff_fix_long_mvs(MpegEncContext * s, uint8_t *field_select_table, int field_select,
2104
                     int16_t (*mv_table)[2], int f_code, int type, int truncate)
2105
{
2106
    MotionEstContext * const c= &s->me;
2107
    int y, h_range, v_range;
2108

2109
    // RAL: 8 in MPEG-1, 16 in MPEG-4
2110
    int range = (((s->out_format == FMT_MPEG1 || s->msmpeg4_version) ? 8 : 16) << f_code);
2111

2112
    if(c->avctx->me_range && range > c->avctx->me_range) range= c->avctx->me_range;
2113

2114 2115 2116
    h_range= range;
    v_range= field_select_table ? range>>1 : range;

2117 2118 2119
    /* clip / convert to intra 16x16 type MVs */
    for(y=0; y<s->mb_height; y++){
        int x;
2120
        int xy= y*s->mb_stride;
2121
        for(x=0; x<s->mb_width; x++){
2122
            if (s->mb_type[xy] & type){    // RAL: "type" test added...
2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137
                if(field_select_table==NULL || field_select_table[xy] == field_select){
                    if(   mv_table[xy][0] >=h_range || mv_table[xy][0] <-h_range
                       || mv_table[xy][1] >=v_range || mv_table[xy][1] <-v_range){

                        if(truncate){
                            if     (mv_table[xy][0] > h_range-1) mv_table[xy][0]=  h_range-1;
                            else if(mv_table[xy][0] < -h_range ) mv_table[xy][0]= -h_range;
                            if     (mv_table[xy][1] > v_range-1) mv_table[xy][1]=  v_range-1;
                            else if(mv_table[xy][1] < -v_range ) mv_table[xy][1]= -v_range;
                        }else{
                            s->mb_type[xy] &= ~type;
                            s->mb_type[xy] |= CANDIDATE_MB_TYPE_INTRA;
                            mv_table[xy][0]=
                            mv_table[xy][1]= 0;
                        }
2138
                    }
2139
                }
2140 2141 2142 2143 2144
            }
            xy++;
        }
    }
}