motion_est.c 76.4 KB
Newer Older
Fabrice Bellard's avatar
Fabrice Bellard committed
1
/*
2
 * Motion estimation
3
 * Copyright (c) 2000,2001 Fabrice Bellard.
4
 * Copyright (c) 2002-2004 Michael Niedermayer
5
 *
6
 * new motion estimation (X1/EPZS) by Michael Niedermayer <michaelni@gmx.at>
Fabrice Bellard's avatar
Fabrice Bellard committed
7
 *
8 9 10
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
11 12
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
13
 * version 2.1 of the License, or (at your option) any later version.
Fabrice Bellard's avatar
Fabrice Bellard committed
14
 *
15
 * FFmpeg is distributed in the hope that it will be useful,
Fabrice Bellard's avatar
Fabrice Bellard committed
16
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 18
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
Fabrice Bellard's avatar
Fabrice Bellard committed
19
 *
20
 * You should have received a copy of the GNU Lesser General Public
21
 * License along with FFmpeg; if not, write to the Free Software
22
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Fabrice Bellard's avatar
Fabrice Bellard committed
23
 */
24

Michael Niedermayer's avatar
Michael Niedermayer committed
25 26 27 28
/**
 * @file motion_est.c
 * Motion estimation.
 */
29

Fabrice Bellard's avatar
Fabrice Bellard committed
30 31
#include <stdlib.h>
#include <stdio.h>
32
#include <limits.h>
Fabrice Bellard's avatar
Fabrice Bellard committed
33 34 35 36
#include "avcodec.h"
#include "dsputil.h"
#include "mpegvideo.h"

37 38
#undef NDEBUG
#include <assert.h>
Michael Niedermayer's avatar
Michael Niedermayer committed
39

40
#define SQ(a) ((a)*(a))
41

42 43 44 45 46 47
#define P_LEFT P[1]
#define P_TOP P[2]
#define P_TOPRIGHT P[3]
#define P_MEDIAN P[4]
#define P_MV1 P[9]

Michael Niedermayer's avatar
Michael Niedermayer committed
48
static inline int sad_hpel_motion_search(MpegEncContext * s,
49
                                  int *mx_ptr, int *my_ptr, int dmin,
50 51
                                  int src_index, int ref_index,
                                  int size, int h);
Michael Niedermayer's avatar
Michael Niedermayer committed
52

53
static inline int update_map_generation(MotionEstContext *c)
Michael Niedermayer's avatar
Michael Niedermayer committed
54
{
55 56 57 58
    c->map_generation+= 1<<(ME_MAP_MV_BITS*2);
    if(c->map_generation==0){
        c->map_generation= 1<<(ME_MAP_MV_BITS*2);
        memset(c->map, 0, sizeof(uint32_t)*ME_MAP_SIZE);
Michael Niedermayer's avatar
Michael Niedermayer committed
59
    }
60
    return c->map_generation;
Michael Niedermayer's avatar
Michael Niedermayer committed
61 62
}

63 64 65 66 67 68
/* shape adaptive search stuff */
typedef struct Minima{
    int height;
    int x, y;
    int checked;
}Minima;
Michael Niedermayer's avatar
Michael Niedermayer committed
69

70
static int minima_cmp(const void *a, const void *b){
71 72
    const Minima *da = (const Minima *) a;
    const Minima *db = (const Minima *) b;
73

74 75
    return da->height - db->height;
}
Michael Niedermayer's avatar
Michael Niedermayer committed
76

77 78 79
#define FLAG_QPEL   1 //must be 1
#define FLAG_CHROMA 2
#define FLAG_DIRECT 4
Michael Niedermayer's avatar
Michael Niedermayer committed
80

81
static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){
82 83 84 85 86 87 88 89 90 91 92 93 94 95 96
    const int offset[3]= {
          y*c->  stride + x,
        ((y*c->uvstride + x)>>1),
        ((y*c->uvstride + x)>>1),
    };
    int i;
    for(i=0; i<3; i++){
        c->src[0][i]= src [i] + offset[i];
        c->ref[0][i]= ref [i] + offset[i];
    }
    if(ref_index){
        for(i=0; i<3; i++){
            c->ref[ref_index][i]= ref2[i] + offset[i];
        }
    }
Michael Niedermayer's avatar
Michael Niedermayer committed
97 98
}

99 100
static int get_flags(MotionEstContext *c, int direct, int chroma){
    return   ((c->avctx->flags&CODEC_FLAG_QPEL) ? FLAG_QPEL : 0)
101
           + (direct ? FLAG_DIRECT : 0)
102
           + (chroma ? FLAG_CHROMA : 0);
Michael Niedermayer's avatar
Michael Niedermayer committed
103 104
}

Loren Merritt's avatar
Loren Merritt committed
105 106
/*! \brief compares a block (either a full macroblock or a partition thereof)
    against a proposed motion-compensated prediction of that block
107
 */
108
static av_always_inline int cmp(MpegEncContext *s, const int x, const int y, const int subx, const int suby,
109 110 111 112 113 114 115 116 117 118 119 120 121 122 123
                      const int size, const int h, int ref_index, int src_index,
                      me_cmp_func cmp_func, me_cmp_func chroma_cmp_func, const int flags){
    MotionEstContext * const c= &s->me;
    const int stride= c->stride;
    const int uvstride= c->uvstride;
    const int qpel= flags&FLAG_QPEL;
    const int chroma= flags&FLAG_CHROMA;
    const int dxy= subx + (suby<<(1+qpel)); //FIXME log2_subpel?
    const int hx= subx + (x<<(1+qpel));
    const int hy= suby + (y<<(1+qpel));
    uint8_t * const * const ref= c->ref[ref_index];
    uint8_t * const * const src= c->src[src_index];
    int d;
    //FIXME check chroma 4mv, (no crashes ...)
    if(flags&FLAG_DIRECT){
124
        assert(x >= c->xmin && hx <= c->xmax<<(qpel+1) && y >= c->ymin && hy <= c->ymax<<(qpel+1));
125 126 127 128 129 130 131 132 133 134 135 136 137
        if(x >= c->xmin && hx <= c->xmax<<(qpel+1) && y >= c->ymin && hy <= c->ymax<<(qpel+1)){
            const int time_pp= s->pp_time;
            const int time_pb= s->pb_time;
            const int mask= 2*qpel+1;
            if(s->mv_type==MV_TYPE_8X8){
                int i;
                for(i=0; i<4; i++){
                    int fx = c->direct_basis_mv[i][0] + hx;
                    int fy = c->direct_basis_mv[i][1] + hy;
                    int bx = hx ? fx - c->co_located_mv[i][0] : c->co_located_mv[i][0]*(time_pb - time_pp)/time_pp + ((i &1)<<(qpel+4));
                    int by = hy ? fy - c->co_located_mv[i][1] : c->co_located_mv[i][1]*(time_pb - time_pp)/time_pp + ((i>>1)<<(qpel+4));
                    int fxy= (fx&mask) + ((fy&mask)<<(qpel+1));
                    int bxy= (bx&mask) + ((by&mask)<<(qpel+1));
138

139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154
                    uint8_t *dst= c->temp + 8*(i&1) + 8*stride*(i>>1);
                    if(qpel){
                        c->qpel_put[1][fxy](dst, ref[0] + (fx>>2) + (fy>>2)*stride, stride);
                        c->qpel_avg[1][bxy](dst, ref[8] + (bx>>2) + (by>>2)*stride, stride);
                    }else{
                        c->hpel_put[1][fxy](dst, ref[0] + (fx>>1) + (fy>>1)*stride, stride, 8);
                        c->hpel_avg[1][bxy](dst, ref[8] + (bx>>1) + (by>>1)*stride, stride, 8);
                    }
                }
            }else{
                int fx = c->direct_basis_mv[0][0] + hx;
                int fy = c->direct_basis_mv[0][1] + hy;
                int bx = hx ? fx - c->co_located_mv[0][0] : (c->co_located_mv[0][0]*(time_pb - time_pp)/time_pp);
                int by = hy ? fy - c->co_located_mv[0][1] : (c->co_located_mv[0][1]*(time_pb - time_pp)/time_pp);
                int fxy= (fx&mask) + ((fy&mask)<<(qpel+1));
                int bxy= (bx&mask) + ((by&mask)<<(qpel+1));
155

156 157 158 159 160 161 162 163 164
                if(qpel){
                    c->qpel_put[1][fxy](c->temp               , ref[0] + (fx>>2) + (fy>>2)*stride               , stride);
                    c->qpel_put[1][fxy](c->temp + 8           , ref[0] + (fx>>2) + (fy>>2)*stride + 8           , stride);
                    c->qpel_put[1][fxy](c->temp     + 8*stride, ref[0] + (fx>>2) + (fy>>2)*stride     + 8*stride, stride);
                    c->qpel_put[1][fxy](c->temp + 8 + 8*stride, ref[0] + (fx>>2) + (fy>>2)*stride + 8 + 8*stride, stride);
                    c->qpel_avg[1][bxy](c->temp               , ref[8] + (bx>>2) + (by>>2)*stride               , stride);
                    c->qpel_avg[1][bxy](c->temp + 8           , ref[8] + (bx>>2) + (by>>2)*stride + 8           , stride);
                    c->qpel_avg[1][bxy](c->temp     + 8*stride, ref[8] + (bx>>2) + (by>>2)*stride     + 8*stride, stride);
                    c->qpel_avg[1][bxy](c->temp + 8 + 8*stride, ref[8] + (bx>>2) + (by>>2)*stride + 8 + 8*stride, stride);
165
                }else{
166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182
                    assert((fx>>1) + 16*s->mb_x >= -16);
                    assert((fy>>1) + 16*s->mb_y >= -16);
                    assert((fx>>1) + 16*s->mb_x <= s->width);
                    assert((fy>>1) + 16*s->mb_y <= s->height);
                    assert((bx>>1) + 16*s->mb_x >= -16);
                    assert((by>>1) + 16*s->mb_y >= -16);
                    assert((bx>>1) + 16*s->mb_x <= s->width);
                    assert((by>>1) + 16*s->mb_y <= s->height);

                    c->hpel_put[0][fxy](c->temp, ref[0] + (fx>>1) + (fy>>1)*stride, stride, 16);
                    c->hpel_avg[0][bxy](c->temp, ref[8] + (bx>>1) + (by>>1)*stride, stride, 16);
                }
            }
            d = cmp_func(s, c->temp, src[0], stride, 16);
        }else
            d= 256*256*256*32;
    }else{
183
        int uvdxy;              /* no, it might not be used uninitialized */
184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199
        if(dxy){
            if(qpel){
                c->qpel_put[size][dxy](c->temp, ref[0] + x + y*stride, stride); //FIXME prototype (add h)
                if(chroma){
                    int cx= hx/2;
                    int cy= hy/2;
                    cx= (cx>>1)|(cx&1);
                    cy= (cy>>1)|(cy&1);
                    uvdxy= (cx&1) + 2*(cy&1);
                    //FIXME x/y wrong, but mpeg4 qpel is sick anyway, we should drop as much of it as possible in favor for h264
                }
            }else{
                c->hpel_put[size][dxy](c->temp, ref[0] + x + y*stride, stride, h);
                if(chroma)
                    uvdxy= dxy | (x&1) | (2*(y&1));
            }
200
            d = cmp_func(s, c->temp, src[0], stride, h);
201
        }else{
202
            d = cmp_func(s, src[0], ref[0] + x + y*stride, stride, h);
203 204 205 206 207 208 209
            if(chroma)
                uvdxy= (x&1) + 2*(y&1);
        }
        if(chroma){
            uint8_t * const uvtemp= c->temp + 16*stride;
            c->hpel_put[size+1][uvdxy](uvtemp  , ref[1] + (x>>1) + (y>>1)*uvstride, uvstride, h>>1);
            c->hpel_put[size+1][uvdxy](uvtemp+8, ref[2] + (x>>1) + (y>>1)*uvstride, uvstride, h>>1);
210 211
            d += chroma_cmp_func(s, uvtemp  , src[1], uvstride, h>>1);
            d += chroma_cmp_func(s, uvtemp+8, src[2], uvstride, h>>1);
212 213 214 215 216 217 218 219 220 221 222 223
        }
    }
#if 0
    if(full_pel){
        const int index= (((y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);
        score_map[index]= d;
    }

    d += (c->mv_penalty[hx - c->pred_x] + c->mv_penalty[hy - c->pred_y])*c->penalty_factor;
#endif
    return d;
}
Michael Niedermayer's avatar
Michael Niedermayer committed
224 225 226

#include "motion_est_template.c"

Michael Niedermayer's avatar
Michael Niedermayer committed
227 228 229 230 231 232 233
static int zero_cmp(void *s, uint8_t *a, uint8_t *b, int stride, int h){
    return 0;
}

static void zero_hpel(uint8_t *a, const uint8_t *b, int stride, int h){
}

Michael Niedermayer's avatar
Michael Niedermayer committed
234
void ff_init_me(MpegEncContext *s){
235
    MotionEstContext * const c= &s->me;
236 237
    int cache_size= FFMIN(ME_MAP_SIZE>>ME_MAP_SHIFT, 1<<ME_MAP_SHIFT);
    int dia_size= FFMAX(FFABS(s->avctx->dia_size)&255, FFABS(s->avctx->pre_dia_size)&255);
Michael Niedermayer's avatar
Michael Niedermayer committed
238
    c->avctx= s->avctx;
239 240 241 242

    if(cache_size < 2*dia_size && !c->stride){
        av_log(s->avctx, AV_LOG_INFO, "ME_MAP size may be a little small for the selected diamond size\n");
    }
243

244 245 246 247
    ff_set_cmp(&s->dsp, s->dsp.me_pre_cmp, c->avctx->me_pre_cmp);
    ff_set_cmp(&s->dsp, s->dsp.me_cmp, c->avctx->me_cmp);
    ff_set_cmp(&s->dsp, s->dsp.me_sub_cmp, c->avctx->me_sub_cmp);
    ff_set_cmp(&s->dsp, s->dsp.mb_cmp, c->avctx->mb_cmp);
248

249 250 251
    c->flags    = get_flags(c, 0, c->avctx->me_cmp    &FF_CMP_CHROMA);
    c->sub_flags= get_flags(c, 0, c->avctx->me_sub_cmp&FF_CMP_CHROMA);
    c->mb_flags = get_flags(c, 0, c->avctx->mb_cmp    &FF_CMP_CHROMA);
Fabrice Bellard's avatar
Fabrice Bellard committed
252

253
/*FIXME s->no_rounding b_type*/
Michael Niedermayer's avatar
Michael Niedermayer committed
254
    if(s->flags&CODEC_FLAG_QPEL){
255
        c->sub_motion_search= qpel_motion_search;
256 257 258
        c->qpel_avg= s->dsp.avg_qpel_pixels_tab;
        if(s->no_rounding) c->qpel_put= s->dsp.put_no_rnd_qpel_pixels_tab;
        else               c->qpel_put= s->dsp.put_qpel_pixels_tab;
Michael Niedermayer's avatar
Michael Niedermayer committed
259
    }else{
260
        if(c->avctx->me_sub_cmp&FF_CMP_CHROMA)
261
            c->sub_motion_search= hpel_motion_search;
262 263
        else if(   c->avctx->me_sub_cmp == FF_CMP_SAD
                && c->avctx->    me_cmp == FF_CMP_SAD
264
                && c->avctx->    mb_cmp == FF_CMP_SAD)
265
            c->sub_motion_search= sad_hpel_motion_search; // 2050 vs. 2450 cycles
Michael Niedermayer's avatar
Michael Niedermayer committed
266
        else
267
            c->sub_motion_search= hpel_motion_search;
Michael Niedermayer's avatar
Michael Niedermayer committed
268
    }
Michael Niedermayer's avatar
Michael Niedermayer committed
269 270 271 272
    c->hpel_avg= s->dsp.avg_pixels_tab;
    if(s->no_rounding) c->hpel_put= s->dsp.put_no_rnd_pixels_tab;
    else               c->hpel_put= s->dsp.put_pixels_tab;

273
    if(s->linesize){
274
        c->stride  = s->linesize;
275
        c->uvstride= s->uvlinesize;
276
    }else{
277 278
        c->stride  = 16*s->mb_width + 32;
        c->uvstride=  8*s->mb_width + 16;
279
    }
280

Diego Biurrun's avatar
Diego Biurrun committed
281 282 283
    /* 8x8 fullpel search would need a 4x4 chroma compare, which we do
     * not have yet, and even if we had, the motion estimation code
     * does not expect it. */
284
    if(s->codec_id != CODEC_ID_SNOW){
285
        if((c->avctx->me_cmp&FF_CMP_CHROMA)/* && !s->dsp.me_cmp[2]*/){
286 287 288 289 290 291 292
            s->dsp.me_cmp[2]= zero_cmp;
        }
        if((c->avctx->me_sub_cmp&FF_CMP_CHROMA) && !s->dsp.me_sub_cmp[2]){
            s->dsp.me_sub_cmp[2]= zero_cmp;
        }
        c->hpel_put[2][0]= c->hpel_put[2][1]=
        c->hpel_put[2][2]= c->hpel_put[2][3]= zero_hpel;
Michael Niedermayer's avatar
Michael Niedermayer committed
293 294
    }

295 296 297 298
    if(s->codec_id == CODEC_ID_H261){
        c->sub_motion_search= no_sub_motion_search;
    }

299
    c->temp= c->scratchpad;
Michael Niedermayer's avatar
Michael Niedermayer committed
300
}
301

302
#if 0
303
static int pix_dev(uint8_t * pix, int line_size, int mean)
304 305 306 307 308
{
    int s, i, j;

    s = 0;
    for (i = 0; i < 16; i++) {
309
        for (j = 0; j < 16; j += 8) {
310 311 312 313 314 315 316 317
            s += FFABS(pix[0]-mean);
            s += FFABS(pix[1]-mean);
            s += FFABS(pix[2]-mean);
            s += FFABS(pix[3]-mean);
            s += FFABS(pix[4]-mean);
            s += FFABS(pix[5]-mean);
            s += FFABS(pix[6]-mean);
            s += FFABS(pix[7]-mean);
318 319 320
            pix += 8;
        }
        pix += line_size - 16;
321 322 323
    }
    return s;
}
324
#endif
325

326
static inline void no_motion_search(MpegEncContext * s,
327
                                    int *mx_ptr, int *my_ptr)
Fabrice Bellard's avatar
Fabrice Bellard committed
328 329 330 331 332
{
    *mx_ptr = 16 * s->mb_x;
    *my_ptr = 16 * s->mb_y;
}

333
#if 0  /* the use of these functions is inside #if 0 */
Fabrice Bellard's avatar
Fabrice Bellard committed
334 335
static int full_motion_search(MpegEncContext * s,
                              int *mx_ptr, int *my_ptr, int range,
336
                              int xmin, int ymin, int xmax, int ymax, uint8_t *ref_picture)
Fabrice Bellard's avatar
Fabrice Bellard committed
337 338 339
{
    int x1, y1, x2, y2, xx, yy, x, y;
    int mx, my, dmin, d;
340
    uint8_t *pix;
Fabrice Bellard's avatar
Fabrice Bellard committed
341 342 343

    xx = 16 * s->mb_x;
    yy = 16 * s->mb_y;
344
    x1 = xx - range + 1;        /* we loose one pixel to avoid boundary pb with half pixel pred */
Fabrice Bellard's avatar
Fabrice Bellard committed
345
    if (x1 < xmin)
346
        x1 = xmin;
Fabrice Bellard's avatar
Fabrice Bellard committed
347 348
    x2 = xx + range - 1;
    if (x2 > xmax)
349
        x2 = xmax;
Fabrice Bellard's avatar
Fabrice Bellard committed
350 351
    y1 = yy - range + 1;
    if (y1 < ymin)
352
        y1 = ymin;
Fabrice Bellard's avatar
Fabrice Bellard committed
353 354
    y2 = yy + range - 1;
    if (y2 > ymax)
355
        y2 = ymax;
Michael Niedermayer's avatar
Michael Niedermayer committed
356
    pix = s->new_picture.data[0] + (yy * s->linesize) + xx;
Fabrice Bellard's avatar
Fabrice Bellard committed
357 358 359 360
    dmin = 0x7fffffff;
    mx = 0;
    my = 0;
    for (y = y1; y <= y2; y++) {
361 362 363 364 365 366 367 368 369 370 371 372
        for (x = x1; x <= x2; x++) {
            d = s->dsp.pix_abs[0][0](NULL, pix, ref_picture + (y * s->linesize) + x,
                             s->linesize, 16);
            if (d < dmin ||
                (d == dmin &&
                 (abs(x - xx) + abs(y - yy)) <
                 (abs(mx - xx) + abs(my - yy)))) {
                dmin = d;
                mx = x;
                my = y;
            }
        }
Fabrice Bellard's avatar
Fabrice Bellard committed
373 374 375 376 377 378 379
    }

    *mx_ptr = mx;
    *my_ptr = my;

#if 0
    if (*mx_ptr < -(2 * range) || *mx_ptr >= (2 * range) ||
380
        *my_ptr < -(2 * range) || *my_ptr >= (2 * range)) {
381
        av_log(NULL, AV_LOG_ERROR, "error %d %d\n", *mx_ptr, *my_ptr);
Fabrice Bellard's avatar
Fabrice Bellard committed
382 383 384 385 386 387 388 389
    }
#endif
    return dmin;
}


static int log_motion_search(MpegEncContext * s,
                             int *mx_ptr, int *my_ptr, int range,
390
                             int xmin, int ymin, int xmax, int ymax, uint8_t *ref_picture)
Fabrice Bellard's avatar
Fabrice Bellard committed
391 392 393
{
    int x1, y1, x2, y2, xx, yy, x, y;
    int mx, my, dmin, d;
394
    uint8_t *pix;
Fabrice Bellard's avatar
Fabrice Bellard committed
395 396 397 398 399 400 401

    xx = s->mb_x << 4;
    yy = s->mb_y << 4;

    /* Left limit */
    x1 = xx - range;
    if (x1 < xmin)
402
        x1 = xmin;
Fabrice Bellard's avatar
Fabrice Bellard committed
403 404 405 406

    /* Right limit */
    x2 = xx + range;
    if (x2 > xmax)
407
        x2 = xmax;
Fabrice Bellard's avatar
Fabrice Bellard committed
408 409 410 411

    /* Upper limit */
    y1 = yy - range;
    if (y1 < ymin)
412
        y1 = ymin;
Fabrice Bellard's avatar
Fabrice Bellard committed
413 414 415 416

    /* Lower limit */
    y2 = yy + range;
    if (y2 > ymax)
417
        y2 = ymax;
Fabrice Bellard's avatar
Fabrice Bellard committed
418

Michael Niedermayer's avatar
Michael Niedermayer committed
419
    pix = s->new_picture.data[0] + (yy * s->linesize) + xx;
Fabrice Bellard's avatar
Fabrice Bellard committed
420 421 422 423 424
    dmin = 0x7fffffff;
    mx = 0;
    my = 0;

    do {
425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452
        for (y = y1; y <= y2; y += range) {
            for (x = x1; x <= x2; x += range) {
                d = s->dsp.pix_abs[0][0](NULL, pix, ref_picture + (y * s->linesize) + x, s->linesize, 16);
                if (d < dmin || (d == dmin && (abs(x - xx) + abs(y - yy)) < (abs(mx - xx) + abs(my - yy)))) {
                    dmin = d;
                    mx = x;
                    my = y;
                }
            }
        }

        range = range >> 1;

        x1 = mx - range;
        if (x1 < xmin)
            x1 = xmin;

        x2 = mx + range;
        if (x2 > xmax)
            x2 = xmax;

        y1 = my - range;
        if (y1 < ymin)
            y1 = ymin;

        y2 = my + range;
        if (y2 > ymax)
            y2 = ymax;
Fabrice Bellard's avatar
Fabrice Bellard committed
453 454 455 456

    } while (range >= 1);

#ifdef DEBUG
457
    av_log(s->avctx, AV_LOG_DEBUG, "log       - MX: %d\tMY: %d\n", mx, my);
Fabrice Bellard's avatar
Fabrice Bellard committed
458 459 460 461 462 463 464 465
#endif
    *mx_ptr = mx;
    *my_ptr = my;
    return dmin;
}

static int phods_motion_search(MpegEncContext * s,
                               int *mx_ptr, int *my_ptr, int range,
466
                               int xmin, int ymin, int xmax, int ymax, uint8_t *ref_picture)
Fabrice Bellard's avatar
Fabrice Bellard committed
467 468 469
{
    int x1, y1, x2, y2, xx, yy, x, y, lastx, d;
    int mx, my, dminx, dminy;
470
    uint8_t *pix;
Fabrice Bellard's avatar
Fabrice Bellard committed
471 472 473 474 475 476 477

    xx = s->mb_x << 4;
    yy = s->mb_y << 4;

    /* Left limit */
    x1 = xx - range;
    if (x1 < xmin)
478
        x1 = xmin;
Fabrice Bellard's avatar
Fabrice Bellard committed
479 480 481 482

    /* Right limit */
    x2 = xx + range;
    if (x2 > xmax)
483
        x2 = xmax;
Fabrice Bellard's avatar
Fabrice Bellard committed
484 485 486 487

    /* Upper limit */
    y1 = yy - range;
    if (y1 < ymin)
488
        y1 = ymin;
Fabrice Bellard's avatar
Fabrice Bellard committed
489 490 491 492

    /* Lower limit */
    y2 = yy + range;
    if (y2 > ymax)
493
        y2 = ymax;
Fabrice Bellard's avatar
Fabrice Bellard committed
494

Michael Niedermayer's avatar
Michael Niedermayer committed
495
    pix = s->new_picture.data[0] + (yy * s->linesize) + xx;
Fabrice Bellard's avatar
Fabrice Bellard committed
496 497 498 499 500 501 502 503 504
    mx = 0;
    my = 0;

    x = xx;
    y = yy;
    do {
        dminx = 0x7fffffff;
        dminy = 0x7fffffff;

505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541
        lastx = x;
        for (x = x1; x <= x2; x += range) {
            d = s->dsp.pix_abs[0][0](NULL, pix, ref_picture + (y * s->linesize) + x, s->linesize, 16);
            if (d < dminx || (d == dminx && (abs(x - xx) + abs(y - yy)) < (abs(mx - xx) + abs(my - yy)))) {
                dminx = d;
                mx = x;
            }
        }

        x = lastx;
        for (y = y1; y <= y2; y += range) {
            d = s->dsp.pix_abs[0][0](NULL, pix, ref_picture + (y * s->linesize) + x, s->linesize, 16);
            if (d < dminy || (d == dminy && (abs(x - xx) + abs(y - yy)) < (abs(mx - xx) + abs(my - yy)))) {
                dminy = d;
                my = y;
            }
        }

        range = range >> 1;

        x = mx;
        y = my;
        x1 = mx - range;
        if (x1 < xmin)
            x1 = xmin;

        x2 = mx + range;
        if (x2 > xmax)
            x2 = xmax;

        y1 = my - range;
        if (y1 < ymin)
            y1 = ymin;

        y2 = my + range;
        if (y2 > ymax)
            y2 = ymax;
Fabrice Bellard's avatar
Fabrice Bellard committed
542 543 544 545

    } while (range >= 1);

#ifdef DEBUG
546
    av_log(s->avctx, AV_LOG_DEBUG, "phods     - MX: %d\tMY: %d\n", mx, my);
Fabrice Bellard's avatar
Fabrice Bellard committed
547 548 549 550 551 552 553
#endif

    /* half pixel search */
    *mx_ptr = mx;
    *my_ptr = my;
    return dminy;
}
554
#endif /* 0 */
555 556 557

#define Z_THRESHOLD 256

Michael Niedermayer's avatar
Michael Niedermayer committed
558
#define CHECK_SAD_HALF_MV(suffix, x, y) \
559
{\
560
    d= s->dsp.pix_abs[size][(x?1:0)+(y?2:0)](NULL, pix, ptr+((x)>>1), stride, h);\
Michael Niedermayer's avatar
Michael Niedermayer committed
561
    d += (mv_penalty[pen_x + x] + mv_penalty[pen_y + y])*penalty_factor;\
562 563
    COPY3_IF_LT(dminh, d, dx, x, dy, y)\
}
564

Michael Niedermayer's avatar
Michael Niedermayer committed
565
static inline int sad_hpel_motion_search(MpegEncContext * s,
566
                                  int *mx_ptr, int *my_ptr, int dmin,
567 568
                                  int src_index, int ref_index,
                                  int size, int h)
Fabrice Bellard's avatar
Fabrice Bellard committed
569
{
570 571
    MotionEstContext * const c= &s->me;
    const int penalty_factor= c->sub_penalty_factor;
572
    int mx, my, dminh;
573
    uint8_t *pix, *ptr;
574 575
    int stride= c->stride;
    const int flags= c->sub_flags;
576
    LOAD_COMMON
577

578
    assert(flags == 0);
Fabrice Bellard's avatar
Fabrice Bellard committed
579

580
    if(c->skip){
581 582 583 584 585 586
//    printf("S");
        *mx_ptr = 0;
        *my_ptr = 0;
        return dmin;
    }
//    printf("N");
587

588
    pix = c->src[src_index][0];
589

590 591
    mx = *mx_ptr;
    my = *my_ptr;
592
    ptr = c->ref[ref_index][0] + (my * stride) + mx;
593

594 595
    dminh = dmin;

596
    if (mx > xmin && mx < xmax &&
597
        my > ymin && my < ymax) {
598
        int dx=0, dy=0;
599
        int d, pen_x, pen_y;
600 601 602 603 604 605 606
        const int index= (my<<ME_MAP_SHIFT) + mx;
        const int t= score_map[(index-(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)];
        const int l= score_map[(index- 1               )&(ME_MAP_SIZE-1)];
        const int r= score_map[(index+ 1               )&(ME_MAP_SIZE-1)];
        const int b= score_map[(index+(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)];
        mx<<=1;
        my<<=1;
607

608

609 610 611
        pen_x= pred_x + mx;
        pen_y= pred_y + my;

612
        ptr-= stride;
613
        if(t<=b){
Michael Niedermayer's avatar
Michael Niedermayer committed
614
            CHECK_SAD_HALF_MV(y2 , 0, -1)
615
            if(l<=r){
Michael Niedermayer's avatar
Michael Niedermayer committed
616
                CHECK_SAD_HALF_MV(xy2, -1, -1)
617
                if(t+r<=b+l){
Michael Niedermayer's avatar
Michael Niedermayer committed
618
                    CHECK_SAD_HALF_MV(xy2, +1, -1)
619
                    ptr+= stride;
620
                }else{
621
                    ptr+= stride;
Michael Niedermayer's avatar
Michael Niedermayer committed
622
                    CHECK_SAD_HALF_MV(xy2, -1, +1)
623
                }
Michael Niedermayer's avatar
Michael Niedermayer committed
624
                CHECK_SAD_HALF_MV(x2 , -1,  0)
625
            }else{
Michael Niedermayer's avatar
Michael Niedermayer committed
626
                CHECK_SAD_HALF_MV(xy2, +1, -1)
627
                if(t+l<=b+r){
Michael Niedermayer's avatar
Michael Niedermayer committed
628
                    CHECK_SAD_HALF_MV(xy2, -1, -1)
629
                    ptr+= stride;
630
                }else{
631
                    ptr+= stride;
Michael Niedermayer's avatar
Michael Niedermayer committed
632
                    CHECK_SAD_HALF_MV(xy2, +1, +1)
633
                }
Michael Niedermayer's avatar
Michael Niedermayer committed
634
                CHECK_SAD_HALF_MV(x2 , +1,  0)
635 636 637 638
            }
        }else{
            if(l<=r){
                if(t+l<=b+r){
Michael Niedermayer's avatar
Michael Niedermayer committed
639
                    CHECK_SAD_HALF_MV(xy2, -1, -1)
640
                    ptr+= stride;
641
                }else{
642
                    ptr+= stride;
Michael Niedermayer's avatar
Michael Niedermayer committed
643
                    CHECK_SAD_HALF_MV(xy2, +1, +1)
644
                }
Michael Niedermayer's avatar
Michael Niedermayer committed
645 646
                CHECK_SAD_HALF_MV(x2 , -1,  0)
                CHECK_SAD_HALF_MV(xy2, -1, +1)
647 648
            }else{
                if(t+r<=b+l){
Michael Niedermayer's avatar
Michael Niedermayer committed
649
                    CHECK_SAD_HALF_MV(xy2, +1, -1)
650
                    ptr+= stride;
651
                }else{
652
                    ptr+= stride;
Michael Niedermayer's avatar
Michael Niedermayer committed
653
                    CHECK_SAD_HALF_MV(xy2, -1, +1)
654
                }
Michael Niedermayer's avatar
Michael Niedermayer committed
655 656
                CHECK_SAD_HALF_MV(x2 , +1,  0)
                CHECK_SAD_HALF_MV(xy2, +1, +1)
657
            }
Michael Niedermayer's avatar
Michael Niedermayer committed
658
            CHECK_SAD_HALF_MV(y2 ,  0, +1)
659 660 661
        }
        mx+=dx;
        my+=dy;
662 663

    }else{
664 665
        mx<<=1;
        my<<=1;
666 667 668 669
    }

    *mx_ptr = mx;
    *my_ptr = my;
670
    return dminh;
671 672
}

673
static inline void set_p_mv_tables(MpegEncContext * s, int mx, int my, int mv4)
674
{
675
    const int xy= s->mb_x + s->mb_y*s->mb_stride;
676

677 678
    s->p_mv_table[xy][0] = mx;
    s->p_mv_table[xy][1] = my;
679

Diego Biurrun's avatar
Diego Biurrun committed
680
    /* has already been set to the 4 MV if 4MV is done */
681
    if(mv4){
682 683
        int mot_xy= s->block_index[0];

684 685 686 687
        s->current_picture.motion_val[0][mot_xy  ][0]= mx;
        s->current_picture.motion_val[0][mot_xy  ][1]= my;
        s->current_picture.motion_val[0][mot_xy+1][0]= mx;
        s->current_picture.motion_val[0][mot_xy+1][1]= my;
688

689
        mot_xy += s->b8_stride;
690 691 692 693
        s->current_picture.motion_val[0][mot_xy  ][0]= mx;
        s->current_picture.motion_val[0][mot_xy  ][1]= my;
        s->current_picture.motion_val[0][mot_xy+1][0]= mx;
        s->current_picture.motion_val[0][mot_xy+1][1]= my;
694 695 696
    }
}

697 698 699
/**
 * get fullpel ME search limits.
 */
700
static inline void get_limits(MpegEncContext *s, int x, int y)
Fabrice Bellard's avatar
Fabrice Bellard committed
701
{
702
    MotionEstContext * const c= &s->me;
703
    int range= c->avctx->me_range >> (1 + !!(c->flags&FLAG_QPEL));
704
/*
705
    if(c->avctx->me_range) c->range= c->avctx->me_range >> 1;
706
    else                   c->range= 16;
707
*/
Fabrice Bellard's avatar
Fabrice Bellard committed
708
    if (s->unrestricted_mv) {
709 710 711 712
        c->xmin = - x - 16;
        c->ymin = - y - 16;
        c->xmax = - x + s->mb_width *16;
        c->ymax = - y + s->mb_height*16;
713 714 715 716
    } else if (s->out_format == FMT_H261){
        // Search range of H261 is different from other codec standards
        c->xmin = (x > 15) ? - 15 : 0;
        c->ymin = (y > 15) ? - 15 : 0;
717
        c->xmax = (x < s->mb_width * 16 - 16) ? 15 : 0;
718
        c->ymax = (y < s->mb_height * 16 - 16) ? 15 : 0;
Fabrice Bellard's avatar
Fabrice Bellard committed
719
    } else {
720 721 722 723
        c->xmin = - x;
        c->ymin = - y;
        c->xmax = - x + s->mb_width *16 - 16;
        c->ymax = - y + s->mb_height*16 - 16;
Fabrice Bellard's avatar
Fabrice Bellard committed
724
    }
725 726 727 728 729 730
    if(range){
        c->xmin = FFMAX(c->xmin,-range);
        c->xmax = FFMIN(c->xmax, range);
        c->ymin = FFMAX(c->ymin,-range);
        c->ymax = FFMIN(c->ymax, range);
    }
731 732
}

733 734
static inline void init_mv4_ref(MotionEstContext *c){
    const int stride= c->stride;
735 736 737 738 739 740 741 742 743

    c->ref[1][0] = c->ref[0][0] + 8;
    c->ref[2][0] = c->ref[0][0] + 8*stride;
    c->ref[3][0] = c->ref[2][0] + 8;
    c->src[1][0] = c->src[0][0] + 8;
    c->src[2][0] = c->src[0][0] + 8*stride;
    c->src[3][0] = c->src[2][0] + 8;
}

744
static inline int h263_mv4_search(MpegEncContext *s, int mx, int my, int shift)
745
{
746
    MotionEstContext * const c= &s->me;
747 748
    const int size= 1;
    const int h=8;
749 750
    int block;
    int P[10][2];
751
    int dmin_sum=0, mx4_sum=0, my4_sum=0;
752
    int same=1;
753
    const int stride= c->stride;
754
    uint8_t *mv_penalty= c->current_mv_penalty;
755

756
    init_mv4_ref(c);
757

758 759 760 761 762
    for(block=0; block<4; block++){
        int mx4, my4;
        int pred_x4, pred_y4;
        int dmin4;
        static const int off[4]= {2, 1, 1, -1};
763
        const int mot_stride = s->b8_stride;
764
        const int mot_xy = s->block_index[block];
765

766 767
        P_LEFT[0] = s->current_picture.motion_val[0][mot_xy - 1][0];
        P_LEFT[1] = s->current_picture.motion_val[0][mot_xy - 1][1];
768

769
        if(P_LEFT[0]       > (c->xmax<<shift)) P_LEFT[0]       = (c->xmax<<shift);
770 771

        /* special case for first line */
772
        if (s->first_slice_line && block<2) {
773 774
            c->pred_x= pred_x4= P_LEFT[0];
            c->pred_y= pred_y4= P_LEFT[1];
775
        } else {
776 777 778 779
            P_TOP[0]      = s->current_picture.motion_val[0][mot_xy - mot_stride             ][0];
            P_TOP[1]      = s->current_picture.motion_val[0][mot_xy - mot_stride             ][1];
            P_TOPRIGHT[0] = s->current_picture.motion_val[0][mot_xy - mot_stride + off[block]][0];
            P_TOPRIGHT[1] = s->current_picture.motion_val[0][mot_xy - mot_stride + off[block]][1];
780 781 782 783
            if(P_TOP[1]      > (c->ymax<<shift)) P_TOP[1]     = (c->ymax<<shift);
            if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
            if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift);
            if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
784

785 786 787
            P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
            P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);

788 789
            c->pred_x= pred_x4 = P_MEDIAN[0];
            c->pred_y= pred_y4 = P_MEDIAN[1];
790 791 792 793
        }
        P_MV1[0]= mx;
        P_MV1[1]= my;

794
        dmin4 = epzs_motion_search4(s, &mx4, &my4, P, block, block, s->p_mv_table, (1<<16)>>shift);
795

796
        dmin4= c->sub_motion_search(s, &mx4, &my4, dmin4, block, block, size, h);
797

798
        if(s->dsp.me_sub_cmp[0] != s->dsp.mb_cmp[0]){
799
            int dxy;
800
            const int offset= ((block&1) + (block>>1)*stride)*8;
801
            uint8_t *dest_y = c->scratchpad + offset;
802
            if(s->quarter_sample){
803
                uint8_t *ref= c->ref[block][0] + (mx4>>2) + (my4>>2)*stride;
804 805 806
                dxy = ((my4 & 3) << 2) | (mx4 & 3);

                if(s->no_rounding)
807
                    s->dsp.put_no_rnd_qpel_pixels_tab[1][dxy](dest_y   , ref    , stride);
808
                else
809
                    s->dsp.put_qpel_pixels_tab       [1][dxy](dest_y   , ref    , stride);
810
            }else{
811
                uint8_t *ref= c->ref[block][0] + (mx4>>1) + (my4>>1)*stride;
812 813 814
                dxy = ((my4 & 1) << 1) | (mx4 & 1);

                if(s->no_rounding)
815
                    s->dsp.put_no_rnd_pixels_tab[1][dxy](dest_y    , ref    , stride, h);
816
                else
817
                    s->dsp.put_pixels_tab       [1][dxy](dest_y    , ref    , stride, h);
818
            }
819
            dmin_sum+= (mv_penalty[mx4-pred_x4] + mv_penalty[my4-pred_y4])*c->mb_penalty_factor;
820 821 822 823 824 825 826 827 828 829
        }else
            dmin_sum+= dmin4;

        if(s->quarter_sample){
            mx4_sum+= mx4/2;
            my4_sum+= my4/2;
        }else{
            mx4_sum+= mx4;
            my4_sum+= my4;
        }
830

831 832
        s->current_picture.motion_val[0][ s->block_index[block] ][0]= mx4;
        s->current_picture.motion_val[0][ s->block_index[block] ][1]= my4;
833 834

        if(mx4 != mx || my4 != my) same=0;
835
    }
836

837 838
    if(same)
        return INT_MAX;
839

840
    if(s->dsp.me_sub_cmp[0] != s->dsp.mb_cmp[0]){
841
        dmin_sum += s->dsp.mb_cmp[0](s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*16*stride, c->scratchpad, stride, 16);
842
    }
843

844
    if(c->avctx->mb_cmp&FF_CMP_CHROMA){
845 846 847 848 849 850 851
        int dxy;
        int mx, my;
        int offset;

        mx= ff_h263_round_chroma(mx4_sum);
        my= ff_h263_round_chroma(my4_sum);
        dxy = ((my & 1) << 1) | (mx & 1);
852

853
        offset= (s->mb_x*8 + (mx>>1)) + (s->mb_y*8 + (my>>1))*s->uvlinesize;
854

855
        if(s->no_rounding){
856 857
            s->dsp.put_no_rnd_pixels_tab[1][dxy](c->scratchpad    , s->last_picture.data[1] + offset, s->uvlinesize, 8);
            s->dsp.put_no_rnd_pixels_tab[1][dxy](c->scratchpad+8  , s->last_picture.data[2] + offset, s->uvlinesize, 8);
858
        }else{
859 860
            s->dsp.put_pixels_tab       [1][dxy](c->scratchpad    , s->last_picture.data[1] + offset, s->uvlinesize, 8);
            s->dsp.put_pixels_tab       [1][dxy](c->scratchpad+8  , s->last_picture.data[2] + offset, s->uvlinesize, 8);
861 862
        }

863 864
        dmin_sum += s->dsp.mb_cmp[1](s, s->new_picture.data[1] + s->mb_x*8 + s->mb_y*8*s->uvlinesize, c->scratchpad  , s->uvlinesize, 8);
        dmin_sum += s->dsp.mb_cmp[1](s, s->new_picture.data[2] + s->mb_x*8 + s->mb_y*8*s->uvlinesize, c->scratchpad+8, s->uvlinesize, 8);
865
    }
866

867 868
    c->pred_x= mx;
    c->pred_y= my;
869

870
    switch(c->avctx->mb_cmp&0xFF){
871 872 873 874 875
    /*case FF_CMP_SSE:
        return dmin_sum+ 32*s->qscale*s->qscale;*/
    case FF_CMP_RD:
        return dmin_sum;
    default:
876
        return dmin_sum+ 11*c->mb_penalty_factor;
877
    }
878 879
}

880 881 882 883 884 885 886 887 888 889 890 891 892
static inline void init_interlaced_ref(MpegEncContext *s, int ref_index){
    MotionEstContext * const c= &s->me;

    c->ref[1+ref_index][0] = c->ref[0+ref_index][0] + s->linesize;
    c->src[1][0] = c->src[0][0] + s->linesize;
    if(c->flags & FLAG_CHROMA){
        c->ref[1+ref_index][1] = c->ref[0+ref_index][1] + s->uvlinesize;
        c->ref[1+ref_index][2] = c->ref[0+ref_index][2] + s->uvlinesize;
        c->src[1][1] = c->src[0][1] + s->uvlinesize;
        c->src[1][2] = c->src[0][2] + s->uvlinesize;
    }
}

893
static int interlaced_search(MpegEncContext *s, int ref_index,
894
                             int16_t (*mv_tables[2][2])[2], uint8_t *field_select_tables[2], int mx, int my, int user_field_select)
895
{
896
    MotionEstContext * const c= &s->me;
897 898 899 900
    const int size=0;
    const int h=8;
    int block;
    int P[10][2];
901
    uint8_t * const mv_penalty= c->current_mv_penalty;
902 903 904 905 906
    int same=1;
    const int stride= 2*s->linesize;
    int dmin_sum= 0;
    const int mot_stride= s->mb_stride;
    const int xy= s->mb_x + s->mb_y*mot_stride;
907

908 909 910 911
    c->ymin>>=1;
    c->ymax>>=1;
    c->stride<<=1;
    c->uvstride<<=1;
912
    init_interlaced_ref(s, ref_index);
913

914 915 916 917 918 919
    for(block=0; block<2; block++){
        int field_select;
        int best_dmin= INT_MAX;
        int best_field= -1;

        for(field_select=0; field_select<2; field_select++){
920
            int dmin, mx_i, my_i;
921
            int16_t (*mv_table)[2]= mv_tables[block][field_select];
922

923
            if(user_field_select){
924 925
                assert(field_select==0 || field_select==1);
                assert(field_select_tables[block][xy]==0 || field_select_tables[block][xy]==1);
926 927 928
                if(field_select_tables[block][xy] != field_select)
                    continue;
            }
929

930 931
            P_LEFT[0] = mv_table[xy - 1][0];
            P_LEFT[1] = mv_table[xy - 1][1];
932
            if(P_LEFT[0]       > (c->xmax<<1)) P_LEFT[0]       = (c->xmax<<1);
933

934 935
            c->pred_x= P_LEFT[0];
            c->pred_y= P_LEFT[1];
936

937
            if(!s->first_slice_line){
938 939 940 941
                P_TOP[0]      = mv_table[xy - mot_stride][0];
                P_TOP[1]      = mv_table[xy - mot_stride][1];
                P_TOPRIGHT[0] = mv_table[xy - mot_stride + 1][0];
                P_TOPRIGHT[1] = mv_table[xy - mot_stride + 1][1];
942 943 944 945
                if(P_TOP[1]      > (c->ymax<<1)) P_TOP[1]     = (c->ymax<<1);
                if(P_TOPRIGHT[0] < (c->xmin<<1)) P_TOPRIGHT[0]= (c->xmin<<1);
                if(P_TOPRIGHT[0] > (c->xmax<<1)) P_TOPRIGHT[0]= (c->xmax<<1);
                if(P_TOPRIGHT[1] > (c->ymax<<1)) P_TOPRIGHT[1]= (c->ymax<<1);
946

947 948 949 950 951
                P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
                P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
            }
            P_MV1[0]= mx; //FIXME not correct if block != field_select
            P_MV1[1]= my / 2;
952

953
            dmin = epzs_motion_search2(s, &mx_i, &my_i, P, block, field_select+ref_index, mv_table, (1<<16)>>1);
954

955
            dmin= c->sub_motion_search(s, &mx_i, &my_i, dmin, block, field_select+ref_index, size, h);
956

957 958
            mv_table[xy][0]= mx_i;
            mv_table[xy][1]= my_i;
959

960
            if(s->dsp.me_sub_cmp[0] != s->dsp.mb_cmp[0]){
961 962 963
                int dxy;

                //FIXME chroma ME
964
                uint8_t *ref= c->ref[field_select+ref_index][0] + (mx_i>>1) + (my_i>>1)*stride;
965 966 967
                dxy = ((my_i & 1) << 1) | (mx_i & 1);

                if(s->no_rounding){
968
                    s->dsp.put_no_rnd_pixels_tab[size][dxy](c->scratchpad, ref    , stride, h);
969
                }else{
970
                    s->dsp.put_pixels_tab       [size][dxy](c->scratchpad, ref    , stride, h);
971
                }
972
                dmin= s->dsp.mb_cmp[size](s, c->src[block][0], c->scratchpad, stride, h);
973
                dmin+= (mv_penalty[mx_i-c->pred_x] + mv_penalty[my_i-c->pred_y] + 1)*c->mb_penalty_factor;
974
            }else
975
                dmin+= c->mb_penalty_factor; //field_select bits
976

977
            dmin += field_select != block; //slightly prefer same field
978

979 980 981 982 983 984 985 986 987 988
            if(dmin < best_dmin){
                best_dmin= dmin;
                best_field= field_select;
            }
        }
        {
            int16_t (*mv_table)[2]= mv_tables[block][best_field];

            if(mv_table[xy][0] != mx) same=0; //FIXME check if these checks work and are any good at all
            if(mv_table[xy][1]&1) same=0;
989
            if(mv_table[xy][1]*2 != my) same=0;
990 991 992 993 994 995
            if(best_field != block) same=0;
        }

        field_select_tables[block][xy]= best_field;
        dmin_sum += best_dmin;
    }
996

997 998 999 1000
    c->ymin<<=1;
    c->ymax<<=1;
    c->stride>>=1;
    c->uvstride>>=1;
1001 1002 1003

    if(same)
        return INT_MAX;
1004

1005
    switch(c->avctx->mb_cmp&0xFF){
1006 1007 1008 1009 1010
    /*case FF_CMP_SSE:
        return dmin_sum+ 32*s->qscale*s->qscale;*/
    case FF_CMP_RD:
        return dmin_sum;
    default:
1011
        return dmin_sum+ 11*c->mb_penalty_factor;
1012 1013 1014
    }
}

1015 1016 1017
static void clip_input_mv(MpegEncContext * s, int16_t *mv, int interlaced){
    int ymax= s->me.ymax>>interlaced;
    int ymin= s->me.ymin>>interlaced;
1018

1019 1020 1021 1022 1023 1024
    if(mv[0] < s->me.xmin) mv[0] = s->me.xmin;
    if(mv[0] > s->me.xmax) mv[0] = s->me.xmax;
    if(mv[1] <       ymin) mv[1] =       ymin;
    if(mv[1] >       ymax) mv[1] =       ymax;
}

1025 1026 1027 1028 1029 1030 1031 1032 1033
static inline int check_input_motion(MpegEncContext * s, int mb_x, int mb_y, int p_type){
    MotionEstContext * const c= &s->me;
    Picture *p= s->current_picture_ptr;
    int mb_xy= mb_x + mb_y*s->mb_stride;
    int xy= 2*mb_x + 2*mb_y*s->b8_stride;
    int mb_type= s->current_picture.mb_type[mb_xy];
    int flags= c->flags;
    int shift= (flags&FLAG_QPEL) + 1;
    int mask= (1<<shift)-1;
1034
    int x, y, i;
1035 1036 1037
    int d=0;
    me_cmp_func cmpf= s->dsp.sse[0];
    me_cmp_func chroma_cmpf= s->dsp.sse[1];
1038

1039 1040
    if(p_type && USES_LIST(mb_type, 1)){
        av_log(c->avctx, AV_LOG_ERROR, "backward motion vector in P frame\n");
1041
        return INT_MAX/2;
1042
    }
1043
    assert(IS_INTRA(mb_type) || USES_LIST(mb_type,0) || USES_LIST(mb_type,1));
1044

1045 1046 1047 1048 1049 1050
    for(i=0; i<4; i++){
        int xy= s->block_index[i];
        clip_input_mv(s, p->motion_val[0][xy], !!IS_INTERLACED(mb_type));
        clip_input_mv(s, p->motion_val[1][xy], !!IS_INTERLACED(mb_type));
    }

1051 1052 1053 1054 1055
    if(IS_INTERLACED(mb_type)){
        int xy2= xy  + s->b8_stride;
        s->mb_type[mb_xy]=CANDIDATE_MB_TYPE_INTRA;
        c->stride<<=1;
        c->uvstride<<=1;
1056

1057
        if(!(s->flags & CODEC_FLAG_INTERLACED_ME)){
1058
            av_log(c->avctx, AV_LOG_ERROR, "Interlaced macroblock selected but interlaced motion estimation disabled\n");
1059
            return INT_MAX/2;
1060
        }
1061

1062 1063 1064 1065 1066
        if(USES_LIST(mb_type, 0)){
            int field_select0= p->ref_index[0][xy ];
            int field_select1= p->ref_index[0][xy2];
            assert(field_select0==0 ||field_select0==1);
            assert(field_select1==0 ||field_select1==1);
1067 1068
            init_interlaced_ref(s, 0);

1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082
            if(p_type){
                s->p_field_select_table[0][mb_xy]= field_select0;
                s->p_field_select_table[1][mb_xy]= field_select1;
                *(uint32_t*)s->p_field_mv_table[0][field_select0][mb_xy]= *(uint32_t*)p->motion_val[0][xy ];
                *(uint32_t*)s->p_field_mv_table[1][field_select1][mb_xy]= *(uint32_t*)p->motion_val[0][xy2];
                s->mb_type[mb_xy]=CANDIDATE_MB_TYPE_INTER_I;
            }else{
                s->b_field_select_table[0][0][mb_xy]= field_select0;
                s->b_field_select_table[0][1][mb_xy]= field_select1;
                *(uint32_t*)s->b_field_mv_table[0][0][field_select0][mb_xy]= *(uint32_t*)p->motion_val[0][xy ];
                *(uint32_t*)s->b_field_mv_table[0][1][field_select1][mb_xy]= *(uint32_t*)p->motion_val[0][xy2];
                s->mb_type[mb_xy]= CANDIDATE_MB_TYPE_FORWARD_I;
            }

1083
            x= p->motion_val[0][xy ][0];
1084 1085
            y= p->motion_val[0][xy ][1];
            d = cmp(s, x>>shift, y>>shift, x&mask, y&mask, 0, 8, field_select0, 0, cmpf, chroma_cmpf, flags);
1086
            x= p->motion_val[0][xy2][0];
1087 1088 1089 1090 1091 1092 1093 1094
            y= p->motion_val[0][xy2][1];
            d+= cmp(s, x>>shift, y>>shift, x&mask, y&mask, 0, 8, field_select1, 1, cmpf, chroma_cmpf, flags);
        }
        if(USES_LIST(mb_type, 1)){
            int field_select0= p->ref_index[1][xy ];
            int field_select1= p->ref_index[1][xy2];
            assert(field_select0==0 ||field_select0==1);
            assert(field_select1==0 ||field_select1==1);
1095 1096
            init_interlaced_ref(s, 2);

1097 1098 1099 1100 1101 1102 1103 1104 1105 1106
            s->b_field_select_table[1][0][mb_xy]= field_select0;
            s->b_field_select_table[1][1][mb_xy]= field_select1;
            *(uint32_t*)s->b_field_mv_table[1][0][field_select0][mb_xy]= *(uint32_t*)p->motion_val[1][xy ];
            *(uint32_t*)s->b_field_mv_table[1][1][field_select1][mb_xy]= *(uint32_t*)p->motion_val[1][xy2];
            if(USES_LIST(mb_type, 0)){
                s->mb_type[mb_xy]= CANDIDATE_MB_TYPE_BIDIR_I;
            }else{
                s->mb_type[mb_xy]= CANDIDATE_MB_TYPE_BACKWARD_I;
            }

1107
            x= p->motion_val[1][xy ][0];
1108 1109
            y= p->motion_val[1][xy ][1];
            d = cmp(s, x>>shift, y>>shift, x&mask, y&mask, 0, 8, field_select0+2, 0, cmpf, chroma_cmpf, flags);
1110
            x= p->motion_val[1][xy2][0];
1111 1112 1113 1114 1115 1116
            y= p->motion_val[1][xy2][1];
            d+= cmp(s, x>>shift, y>>shift, x&mask, y&mask, 0, 8, field_select1+2, 1, cmpf, chroma_cmpf, flags);
            //FIXME bidir scores
        }
        c->stride>>=1;
        c->uvstride>>=1;
1117
    }else if(IS_8X8(mb_type)){
1118
        if(!(s->flags & CODEC_FLAG_4MV)){
1119
            av_log(c->avctx, AV_LOG_ERROR, "4MV macroblock selected but 4MV encoding disabled\n");
1120
            return INT_MAX/2;
1121
        }
1122 1123
        cmpf= s->dsp.sse[1];
        chroma_cmpf= s->dsp.sse[1];
1124
        init_mv4_ref(c);
1125 1126
        for(i=0; i<4; i++){
            xy= s->block_index[i];
1127
            x= p->motion_val[0][xy][0];
1128 1129 1130 1131
            y= p->motion_val[0][xy][1];
            d+= cmp(s, x>>shift, y>>shift, x&mask, y&mask, 1, 8, i, i, cmpf, chroma_cmpf, flags);
        }
        s->mb_type[mb_xy]=CANDIDATE_MB_TYPE_INTER4V;
1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144
    }else{
        if(USES_LIST(mb_type, 0)){
            if(p_type){
                *(uint32_t*)s->p_mv_table[mb_xy]= *(uint32_t*)p->motion_val[0][xy];
                s->mb_type[mb_xy]=CANDIDATE_MB_TYPE_INTER;
            }else if(USES_LIST(mb_type, 1)){
                *(uint32_t*)s->b_bidir_forw_mv_table[mb_xy]= *(uint32_t*)p->motion_val[0][xy];
                *(uint32_t*)s->b_bidir_back_mv_table[mb_xy]= *(uint32_t*)p->motion_val[1][xy];
                s->mb_type[mb_xy]=CANDIDATE_MB_TYPE_BIDIR;
            }else{
                *(uint32_t*)s->b_forw_mv_table[mb_xy]= *(uint32_t*)p->motion_val[0][xy];
                s->mb_type[mb_xy]=CANDIDATE_MB_TYPE_FORWARD;
            }
1145
            x= p->motion_val[0][xy][0];
1146 1147 1148 1149 1150
            y= p->motion_val[0][xy][1];
            d = cmp(s, x>>shift, y>>shift, x&mask, y&mask, 0, 16, 0, 0, cmpf, chroma_cmpf, flags);
        }else if(USES_LIST(mb_type, 1)){
            *(uint32_t*)s->b_back_mv_table[mb_xy]= *(uint32_t*)p->motion_val[1][xy];
            s->mb_type[mb_xy]=CANDIDATE_MB_TYPE_BACKWARD;
1151 1152

            x= p->motion_val[1][xy][0];
1153 1154 1155 1156 1157 1158 1159 1160
            y= p->motion_val[1][xy][1];
            d = cmp(s, x>>shift, y>>shift, x&mask, y&mask, 0, 16, 2, 0, cmpf, chroma_cmpf, flags);
        }else
            s->mb_type[mb_xy]=CANDIDATE_MB_TYPE_INTRA;
    }
    return d;
}

1161 1162 1163
void ff_estimate_p_frame_motion(MpegEncContext * s,
                                int mb_x, int mb_y)
{
1164
    MotionEstContext * const c= &s->me;
1165
    uint8_t *pix, *ppix;
1166 1167 1168
    int sum, mx, my, dmin;
    int varc;            ///< the variance of the block (sum of squared (p[y][x]-average))
    int vard;            ///< sum of squared differences with the estimated motion vector
1169
    int P[10][2];
1170 1171
    const int shift= 1+s->quarter_sample;
    int mb_type=0;
Michael Niedermayer's avatar
Michael Niedermayer committed
1172
    Picture * const pic= &s->current_picture;
1173

1174
    init_ref(c, s->new_picture.data, s->last_picture.data, NULL, 16*mb_x, 16*mb_y, 0);
1175

Michael Niedermayer's avatar
Michael Niedermayer committed
1176
    assert(s->quarter_sample==0 || s->quarter_sample==1);
1177 1178
    assert(s->linesize == c->stride);
    assert(s->uvlinesize == c->uvstride);
Michael Niedermayer's avatar
Michael Niedermayer committed
1179

1180 1181 1182
    c->penalty_factor    = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
    c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
    c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
1183
    c->current_mv_penalty= c->mv_penalty[s->f_code] + MAX_MV;
1184

1185
    get_limits(s, 16*mb_x, 16*mb_y);
1186
    c->skip=0;
1187

1188 1189 1190
    /* intra / predictive decision */
    pix = c->src[0][0];
    sum = s->dsp.pix_sum(pix, s->linesize);
1191
    varc = s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)(sum*sum))>>8) + 500;
1192 1193

    pic->mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
1194 1195
    pic->mb_var [s->mb_stride * mb_y + mb_x] = (varc+128)>>8;
    c->mb_var_sum_temp += (varc+128)>>8;
1196

1197
    if(c->avctx->me_threshold){
1198
        vard= check_input_motion(s, mb_x, mb_y, 1);
1199

1200
        if((vard+128)>>8 < c->avctx->me_threshold){
1201 1202
            int p_score= FFMIN(vard, varc-500+(s->lambda2>>FF_LAMBDA_SHIFT)*100);
            int i_score= varc-500+(s->lambda2>>FF_LAMBDA_SHIFT)*20;
1203 1204
            pic->mc_mb_var[s->mb_stride * mb_y + mb_x] = (vard+128)>>8;
            c->mc_mb_var_sum_temp += (vard+128)>>8;
1205
            c->scene_change_score+= ff_sqrt(p_score) - ff_sqrt(i_score);
1206 1207
            return;
        }
1208
        if((vard+128)>>8 < c->avctx->mb_threshold)
1209
            mb_type= s->mb_type[mb_x + mb_y*s->mb_stride];
1210 1211
    }

1212
    switch(s->me_method) {
Fabrice Bellard's avatar
Fabrice Bellard committed
1213 1214
    case ME_ZERO:
    default:
1215
        no_motion_search(s, &mx, &my);
1216 1217
        mx-= mb_x*16;
        my-= mb_y*16;
Fabrice Bellard's avatar
Fabrice Bellard committed
1218 1219
        dmin = 0;
        break;
1220
#if 0
Fabrice Bellard's avatar
Fabrice Bellard committed
1221
    case ME_FULL:
1222
        dmin = full_motion_search(s, &mx, &my, range, ref_picture);
1223 1224
        mx-= mb_x*16;
        my-= mb_y*16;
Fabrice Bellard's avatar
Fabrice Bellard committed
1225 1226
        break;
    case ME_LOG:
1227
        dmin = log_motion_search(s, &mx, &my, range / 2, ref_picture);
1228 1229
        mx-= mb_x*16;
        my-= mb_y*16;
Fabrice Bellard's avatar
Fabrice Bellard committed
1230 1231
        break;
    case ME_PHODS:
1232
        dmin = phods_motion_search(s, &mx, &my, range / 2, ref_picture);
1233 1234
        mx-= mb_x*16;
        my-= mb_y*16;
Fabrice Bellard's avatar
Fabrice Bellard committed
1235
        break;
1236
#endif
1237
    case ME_X1:
1238
    case ME_EPZS:
1239
       {
1240
            const int mot_stride = s->b8_stride;
1241
            const int mot_xy = s->block_index[0];
1242

1243 1244
            P_LEFT[0]       = s->current_picture.motion_val[0][mot_xy - 1][0];
            P_LEFT[1]       = s->current_picture.motion_val[0][mot_xy - 1][1];
1245

1246
            if(P_LEFT[0]       > (c->xmax<<shift)) P_LEFT[0]       = (c->xmax<<shift);
1247

1248
            if(!s->first_slice_line) {
1249 1250 1251 1252
                P_TOP[0]      = s->current_picture.motion_val[0][mot_xy - mot_stride    ][0];
                P_TOP[1]      = s->current_picture.motion_val[0][mot_xy - mot_stride    ][1];
                P_TOPRIGHT[0] = s->current_picture.motion_val[0][mot_xy - mot_stride + 2][0];
                P_TOPRIGHT[1] = s->current_picture.motion_val[0][mot_xy - mot_stride + 2][1];
1253 1254 1255
                if(P_TOP[1]      > (c->ymax<<shift)) P_TOP[1]     = (c->ymax<<shift);
                if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
                if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
1256

1257 1258 1259 1260
                P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
                P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);

                if(s->out_format == FMT_H263){
1261 1262
                    c->pred_x = P_MEDIAN[0];
                    c->pred_y = P_MEDIAN[1];
1263
                }else { /* mpeg1 at least */
1264 1265
                    c->pred_x= P_LEFT[0];
                    c->pred_y= P_LEFT[1];
1266
                }
1267
            }else{
1268 1269
                c->pred_x= P_LEFT[0];
                c->pred_y= P_LEFT[1];
1270
            }
1271

1272
        }
1273
        dmin = ff_epzs_motion_search(s, &mx, &my, P, 0, 0, s->p_mv_table, (1<<16)>>shift, 0, 16);
1274

1275
        break;
Fabrice Bellard's avatar
Fabrice Bellard committed
1276 1277
    }

1278
    /* At this point (mx,my) are full-pell and the relative displacement */
1279
    ppix = c->ref[0][0] + (my * s->linesize) + mx;
1280

1281
    vard = s->dsp.sse[0](NULL, pix, ppix, s->linesize, 16);
1282

1283
    pic->mc_mb_var[s->mb_stride * mb_y + mb_x] = (vard+128)>>8;
1284
//    pic->mb_cmp_score[s->mb_stride * mb_y + mb_x] = dmin;
1285
    c->mc_mb_var_sum_temp += (vard+128)>>8;
1286

Fabrice Bellard's avatar
Fabrice Bellard committed
1287
#if 0
1288
    printf("varc=%4d avg_var=%4d (sum=%4d) vard=%4d mx=%2d my=%2d\n",
1289
           varc, s->avg_mb_var, sum, vard, mx - xx, my - yy);
Fabrice Bellard's avatar
Fabrice Bellard committed
1290
#endif
1291
    if(mb_type){
1292 1293 1294
        int p_score= FFMIN(vard, varc-500+(s->lambda2>>FF_LAMBDA_SHIFT)*100);
        int i_score= varc-500+(s->lambda2>>FF_LAMBDA_SHIFT)*20;
        c->scene_change_score+= ff_sqrt(p_score) - ff_sqrt(i_score);
1295 1296

        if(mb_type == CANDIDATE_MB_TYPE_INTER){
1297
            c->sub_motion_search(s, &mx, &my, dmin, 0, 0, 0, 16);
1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310
            set_p_mv_tables(s, mx, my, 1);
        }else{
            mx <<=shift;
            my <<=shift;
        }
        if(mb_type == CANDIDATE_MB_TYPE_INTER4V){
            h263_mv4_search(s, mx, my, shift);

            set_p_mv_tables(s, mx, my, 0);
        }
        if(mb_type == CANDIDATE_MB_TYPE_INTER_I){
            interlaced_search(s, 0, s->p_field_mv_table, s->p_field_select_table, mx, my, 1);
        }
1311
    }else if(c->avctx->mb_decision > FF_MB_DECISION_SIMPLE){
1312 1313 1314
        int p_score= FFMIN(vard, varc-500+(s->lambda2>>FF_LAMBDA_SHIFT)*100);
        int i_score= varc-500+(s->lambda2>>FF_LAMBDA_SHIFT)*20;
        c->scene_change_score+= ff_sqrt(p_score) - ff_sqrt(i_score);
1315

1316
        if (vard*2 + 200*256 > varc)
1317
            mb_type|= CANDIDATE_MB_TYPE_INTRA;
1318 1319
        if (varc*2 + 200*256 > vard || s->qscale > 24){
//        if (varc*2 + 200*256 + 50*(s->lambda2>>FF_LAMBDA_SHIFT) > vard){
1320
            mb_type|= CANDIDATE_MB_TYPE_INTER;
1321
            c->sub_motion_search(s, &mx, &my, dmin, 0, 0, 0, 16);
1322 1323
            if(s->flags&CODEC_FLAG_MV0)
                if(mx || my)
1324
                    mb_type |= CANDIDATE_MB_TYPE_SKIPPED; //FIXME check difference
1325
        }else{
Michael Niedermayer's avatar
Michael Niedermayer committed
1326 1327
            mx <<=shift;
            my <<=shift;
Fabrice Bellard's avatar
Fabrice Bellard committed
1328
        }
1329
        if((s->flags&CODEC_FLAG_4MV)
1330
           && !c->skip && varc>50<<8 && vard>10<<8){
1331 1332
            if(h263_mv4_search(s, mx, my, shift) < INT_MAX)
                mb_type|=CANDIDATE_MB_TYPE_INTER4V;
1333 1334 1335 1336

            set_p_mv_tables(s, mx, my, 0);
        }else
            set_p_mv_tables(s, mx, my, 1);
1337
        if((s->flags&CODEC_FLAG_INTERLACED_ME)
1338
           && !c->skip){ //FIXME varc/d checks
1339
            if(interlaced_search(s, 0, s->p_field_mv_table, s->p_field_select_table, mx, my, 0) < INT_MAX)
1340 1341
                mb_type |= CANDIDATE_MB_TYPE_INTER_I;
        }
1342
    }else{
1343
        int intra_score, i;
1344
        mb_type= CANDIDATE_MB_TYPE_INTER;
1345

1346
        dmin= c->sub_motion_search(s, &mx, &my, dmin, 0, 0, 0, 16);
1347
        if(c->avctx->me_sub_cmp != c->avctx->mb_cmp && !c->skip)
1348
            dmin= ff_get_mb_score(s, mx, my, 0, 0, 0, 16, 1);
1349 1350

        if((s->flags&CODEC_FLAG_4MV)
1351
           && !c->skip && varc>50<<8 && vard>10<<8){
1352
            int dmin4= h263_mv4_search(s, mx, my, shift);
1353
            if(dmin4 < dmin){
1354
                mb_type= CANDIDATE_MB_TYPE_INTER4V;
1355
                dmin=dmin4;
1356
            }
1357
        }
1358
        if((s->flags&CODEC_FLAG_INTERLACED_ME)
1359
           && !c->skip){ //FIXME varc/d checks
1360
            int dmin_i= interlaced_search(s, 0, s->p_field_mv_table, s->p_field_select_table, mx, my, 0);
1361 1362 1363 1364 1365
            if(dmin_i < dmin){
                mb_type = CANDIDATE_MB_TYPE_INTER_I;
                dmin= dmin_i;
            }
        }
1366 1367

//        pic->mb_cmp_score[s->mb_stride * mb_y + mb_x] = dmin;
1368
        set_p_mv_tables(s, mx, my, mb_type!=CANDIDATE_MB_TYPE_INTER4V);
1369 1370

        /* get intra luma score */
1371
        if((c->avctx->mb_cmp&0xFF)==FF_CMP_SSE){
1372
            intra_score= varc - 500;
1373 1374 1375
        }else{
            int mean= (sum+128)>>8;
            mean*= 0x01010101;
1376

1377
            for(i=0; i<16; i++){
1378 1379 1380 1381
                *(uint32_t*)(&c->scratchpad[i*s->linesize+ 0]) = mean;
                *(uint32_t*)(&c->scratchpad[i*s->linesize+ 4]) = mean;
                *(uint32_t*)(&c->scratchpad[i*s->linesize+ 8]) = mean;
                *(uint32_t*)(&c->scratchpad[i*s->linesize+12]) = mean;
1382 1383
            }

1384
            intra_score= s->dsp.mb_cmp[0](s, c->scratchpad, pix, s->linesize, 16);
1385 1386 1387
        }
#if 0 //FIXME
        /* get chroma score */
1388
        if(c->avctx->mb_cmp&FF_CMP_CHROMA){
1389 1390 1391
            for(i=1; i<3; i++){
                uint8_t *dest_c;
                int mean;
1392

1393
                if(s->out_format == FMT_H263){
1394
                    mean= (s->dc_val[i][mb_x + mb_y*s->b8_stride] + 4)>>3; //FIXME not exact but simple ;)
1395 1396 1397 1398
                }else{
                    mean= (s->last_dc[i] + 4)>>3;
                }
                dest_c = s->new_picture.data[i] + (mb_y * 8  * (s->uvlinesize)) + mb_x * 8;
1399

1400 1401
                mean*= 0x01010101;
                for(i=0; i<8; i++){
1402 1403
                    *(uint32_t*)(&c->scratchpad[i*s->uvlinesize+ 0]) = mean;
                    *(uint32_t*)(&c->scratchpad[i*s->uvlinesize+ 4]) = mean;
1404
                }
1405

1406
                intra_score+= s->dsp.mb_cmp[1](s, c->scratchpad, dest_c, s->uvlinesize);
1407
            }
1408 1409
        }
#endif
1410
        intra_score += c->mb_penalty_factor*16;
1411

1412
        if(intra_score < dmin){
1413 1414
            mb_type= CANDIDATE_MB_TYPE_INTRA;
            s->current_picture.mb_type[mb_y*s->mb_stride + mb_x]= CANDIDATE_MB_TYPE_INTRA; //FIXME cleanup
1415 1416
        }else
            s->current_picture.mb_type[mb_y*s->mb_stride + mb_x]= 0;
1417

1418 1419 1420 1421
        {
            int p_score= FFMIN(vard, varc-500+(s->lambda2>>FF_LAMBDA_SHIFT)*100);
            int i_score= varc-500+(s->lambda2>>FF_LAMBDA_SHIFT)*20;
            c->scene_change_score+= ff_sqrt(p_score) - ff_sqrt(i_score);
1422
        }
Fabrice Bellard's avatar
Fabrice Bellard committed
1423
    }
1424

1425
    s->mb_type[mb_y*s->mb_stride + mb_x]= mb_type;
1426 1427
}

1428 1429 1430
int ff_pre_estimate_p_frame_motion(MpegEncContext * s,
                                    int mb_x, int mb_y)
{
1431
    MotionEstContext * const c= &s->me;
1432
    int mx, my, dmin;
1433 1434
    int P[10][2];
    const int shift= 1+s->quarter_sample;
1435
    const int xy= mb_x + mb_y*s->mb_stride;
1436
    init_ref(c, s->new_picture.data, s->last_picture.data, NULL, 16*mb_x, 16*mb_y, 0);
1437

1438 1439
    assert(s->quarter_sample==0 || s->quarter_sample==1);

1440
    c->pre_penalty_factor    = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_pre_cmp);
1441
    c->current_mv_penalty= c->mv_penalty[s->f_code] + MAX_MV;
1442

1443
    get_limits(s, 16*mb_x, 16*mb_y);
1444
    c->skip=0;
1445 1446 1447 1448

    P_LEFT[0]       = s->p_mv_table[xy + 1][0];
    P_LEFT[1]       = s->p_mv_table[xy + 1][1];

1449
    if(P_LEFT[0]       < (c->xmin<<shift)) P_LEFT[0]       = (c->xmin<<shift);
1450 1451

    /* special case for first line */
1452
    if (s->first_slice_line) {
1453 1454
        c->pred_x= P_LEFT[0];
        c->pred_y= P_LEFT[1];
1455
        P_TOP[0]= P_TOPRIGHT[0]= P_MEDIAN[0]=
1456
        P_TOP[1]= P_TOPRIGHT[1]= P_MEDIAN[1]= 0; //FIXME
1457
    } else {
1458 1459 1460 1461
        P_TOP[0]      = s->p_mv_table[xy + s->mb_stride    ][0];
        P_TOP[1]      = s->p_mv_table[xy + s->mb_stride    ][1];
        P_TOPRIGHT[0] = s->p_mv_table[xy + s->mb_stride - 1][0];
        P_TOPRIGHT[1] = s->p_mv_table[xy + s->mb_stride - 1][1];
1462 1463 1464
        if(P_TOP[1]      < (c->ymin<<shift)) P_TOP[1]     = (c->ymin<<shift);
        if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift);
        if(P_TOPRIGHT[1] < (c->ymin<<shift)) P_TOPRIGHT[1]= (c->ymin<<shift);
1465

1466 1467 1468
        P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
        P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);

1469 1470
        c->pred_x = P_MEDIAN[0];
        c->pred_y = P_MEDIAN[1];
1471
    }
1472

1473
    dmin = ff_epzs_motion_search(s, &mx, &my, P, 0, 0, s->p_mv_table, (1<<16)>>shift, 0, 16);
1474

1475 1476
    s->p_mv_table[xy][0] = mx<<shift;
    s->p_mv_table[xy][1] = my<<shift;
1477

1478 1479 1480
    return dmin;
}

1481
static int ff_estimate_motion_b(MpegEncContext * s,
1482
                       int mb_x, int mb_y, int16_t (*mv_table)[2], int ref_index, int f_code)
1483
{
1484
    MotionEstContext * const c= &s->me;
1485
    int mx, my, dmin;
1486
    int P[10][2];
1487
    const int shift= 1+s->quarter_sample;
1488 1489
    const int mot_stride = s->mb_stride;
    const int mot_xy = mb_y*mot_stride + mb_x;
1490
    uint8_t * const mv_penalty= c->mv_penalty[f_code] + MAX_MV;
1491
    int mv_scale;
1492

1493 1494 1495
    c->penalty_factor    = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
    c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
    c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
1496
    c->current_mv_penalty= mv_penalty;
Michael Niedermayer's avatar
Michael Niedermayer committed
1497

1498
    get_limits(s, 16*mb_x, 16*mb_y);
1499 1500 1501 1502

    switch(s->me_method) {
    case ME_ZERO:
    default:
1503
        no_motion_search(s, &mx, &my);
1504
        dmin = 0;
1505 1506
        mx-= mb_x*16;
        my-= mb_y*16;
1507
        break;
1508
#if 0
1509
    case ME_FULL:
1510
        dmin = full_motion_search(s, &mx, &my, range, ref_picture);
1511 1512
        mx-= mb_x*16;
        my-= mb_y*16;
1513 1514
        break;
    case ME_LOG:
1515
        dmin = log_motion_search(s, &mx, &my, range / 2, ref_picture);
1516 1517
        mx-= mb_x*16;
        my-= mb_y*16;
1518 1519
        break;
    case ME_PHODS:
1520
        dmin = phods_motion_search(s, &mx, &my, range / 2, ref_picture);
1521 1522
        mx-= mb_x*16;
        my-= mb_y*16;
1523
        break;
1524
#endif
1525 1526 1527
    case ME_X1:
    case ME_EPZS:
       {
1528 1529
            P_LEFT[0]        = mv_table[mot_xy - 1][0];
            P_LEFT[1]        = mv_table[mot_xy - 1][1];
1530

1531
            if(P_LEFT[0]       > (c->xmax<<shift)) P_LEFT[0]       = (c->xmax<<shift);
1532 1533

            /* special case for first line */
1534
            if (!s->first_slice_line) {
1535 1536 1537 1538
                P_TOP[0] = mv_table[mot_xy - mot_stride             ][0];
                P_TOP[1] = mv_table[mot_xy - mot_stride             ][1];
                P_TOPRIGHT[0] = mv_table[mot_xy - mot_stride + 1         ][0];
                P_TOPRIGHT[1] = mv_table[mot_xy - mot_stride + 1         ][1];
1539 1540 1541
                if(P_TOP[1] > (c->ymax<<shift)) P_TOP[1]= (c->ymax<<shift);
                if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
                if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
1542

1543 1544
                P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
                P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
1545
            }
1546 1547
            c->pred_x= P_LEFT[0];
            c->pred_y= P_LEFT[1];
1548
        }
1549

1550 1551 1552 1553 1554
        if(mv_table == s->b_forw_mv_table){
            mv_scale= (s->pb_time<<16) / (s->pp_time<<shift);
        }else{
            mv_scale= ((s->pb_time - s->pp_time)<<16) / (s->pp_time<<shift);
        }
1555

1556
        dmin = ff_epzs_motion_search(s, &mx, &my, P, 0, ref_index, s->p_mv_table, mv_scale, 0, 16);
1557

1558 1559
        break;
    }
1560

1561
    dmin= c->sub_motion_search(s, &mx, &my, dmin, 0, ref_index, 0, 16);
1562

1563
    if(c->avctx->me_sub_cmp != c->avctx->mb_cmp && !c->skip)
1564
        dmin= ff_get_mb_score(s, mx, my, 0, ref_index, 0, 16, 1);
1565

1566
//printf("%d %d %d %d//", s->mb_x, s->mb_y, mx, my);
1567 1568 1569
//    s->mb_type[mb_y*s->mb_width + mb_x]= mb_type;
    mv_table[mot_xy][0]= mx;
    mv_table[mot_xy][1]= my;
Michael Niedermayer's avatar
Michael Niedermayer committed
1570

1571
    return dmin;
Fabrice Bellard's avatar
Fabrice Bellard committed
1572 1573
}

1574
static inline int check_bidir_mv(MpegEncContext * s,
1575 1576 1577
                   int motion_fx, int motion_fy,
                   int motion_bx, int motion_by,
                   int pred_fx, int pred_fy,
1578 1579
                   int pred_bx, int pred_by,
                   int size, int h)
Fabrice Bellard's avatar
Fabrice Bellard committed
1580
{
1581
    //FIXME optimize?
Michael Niedermayer's avatar
Michael Niedermayer committed
1582
    //FIXME better f_code prediction (max mv & distance)
1583
    //FIXME pointers
1584
    MotionEstContext * const c= &s->me;
Michael Niedermayer's avatar
Michael Niedermayer committed
1585 1586
    uint8_t * const mv_penalty_f= c->mv_penalty[s->f_code] + MAX_MV; // f_code of the prev frame
    uint8_t * const mv_penalty_b= c->mv_penalty[s->b_code] + MAX_MV; // f_code of the prev frame
1587 1588
    int stride= c->stride;
    uint8_t *dest_y = c->scratchpad;
1589 1590 1591 1592
    uint8_t *ptr;
    int dxy;
    int src_x, src_y;
    int fbmin;
1593 1594 1595
    uint8_t **src_data= c->src[0];
    uint8_t **ref_data= c->ref[0];
    uint8_t **ref2_data= c->ref[2];
1596

Michael Niedermayer's avatar
Michael Niedermayer committed
1597 1598
    if(s->quarter_sample){
        dxy = ((motion_fy & 3) << 2) | (motion_fx & 3);
1599 1600
        src_x = motion_fx >> 2;
        src_y = motion_fy >> 2;
Michael Niedermayer's avatar
Michael Niedermayer committed
1601

1602 1603
        ptr = ref_data[0] + (src_y * stride) + src_x;
        s->dsp.put_qpel_pixels_tab[0][dxy](dest_y    , ptr    , stride);
Michael Niedermayer's avatar
Michael Niedermayer committed
1604 1605

        dxy = ((motion_by & 3) << 2) | (motion_bx & 3);
1606 1607
        src_x = motion_bx >> 2;
        src_y = motion_by >> 2;
1608

1609
        ptr = ref2_data[0] + (src_y * stride) + src_x;
1610
        s->dsp.avg_qpel_pixels_tab[size][dxy](dest_y    , ptr    , stride);
Michael Niedermayer's avatar
Michael Niedermayer committed
1611 1612
    }else{
        dxy = ((motion_fy & 1) << 1) | (motion_fx & 1);
1613 1614
        src_x = motion_fx >> 1;
        src_y = motion_fy >> 1;
Michael Niedermayer's avatar
Michael Niedermayer committed
1615

1616 1617
        ptr = ref_data[0] + (src_y * stride) + src_x;
        s->dsp.put_pixels_tab[size][dxy](dest_y    , ptr    , stride, h);
Michael Niedermayer's avatar
Michael Niedermayer committed
1618 1619

        dxy = ((motion_by & 1) << 1) | (motion_bx & 1);
1620 1621
        src_x = motion_bx >> 1;
        src_y = motion_by >> 1;
1622

1623
        ptr = ref2_data[0] + (src_y * stride) + src_x;
1624
        s->dsp.avg_pixels_tab[size][dxy](dest_y    , ptr    , stride, h);
Michael Niedermayer's avatar
Michael Niedermayer committed
1625 1626
    }

Michael Niedermayer's avatar
Michael Niedermayer committed
1627 1628
    fbmin = (mv_penalty_f[motion_fx-pred_fx] + mv_penalty_f[motion_fy-pred_fy])*c->mb_penalty_factor
           +(mv_penalty_b[motion_bx-pred_bx] + mv_penalty_b[motion_by-pred_by])*c->mb_penalty_factor
1629
           + s->dsp.mb_cmp[size](s, src_data[0], dest_y, stride, h); //FIXME new_pic
1630

1631
    if(c->avctx->mb_cmp&FF_CMP_CHROMA){
1632 1633
    }
    //FIXME CHROMA !!!
1634

1635 1636
    return fbmin;
}
1637

1638
/* refine the bidir vectors in hq mode and return the score in both lq & hq mode*/
1639
static inline int bidir_refine(MpegEncContext * s, int mb_x, int mb_y)
1640
{
1641
    MotionEstContext * const c= &s->me;
1642 1643
    const int mot_stride = s->mb_stride;
    const int xy = mb_y *mot_stride + mb_x;
1644 1645 1646 1647 1648 1649 1650 1651 1652
    int fbmin;
    int pred_fx= s->b_bidir_forw_mv_table[xy-1][0];
    int pred_fy= s->b_bidir_forw_mv_table[xy-1][1];
    int pred_bx= s->b_bidir_back_mv_table[xy-1][0];
    int pred_by= s->b_bidir_back_mv_table[xy-1][1];
    int motion_fx= s->b_bidir_forw_mv_table[xy][0]= s->b_forw_mv_table[xy][0];
    int motion_fy= s->b_bidir_forw_mv_table[xy][1]= s->b_forw_mv_table[xy][1];
    int motion_bx= s->b_bidir_back_mv_table[xy][0]= s->b_back_mv_table[xy][0];
    int motion_by= s->b_bidir_back_mv_table[xy][1]= s->b_back_mv_table[xy][1];
1653 1654 1655 1656 1657 1658 1659
    const int flags= c->sub_flags;
    const int qpel= flags&FLAG_QPEL;
    const int shift= 1+qpel;
    const int xmin= c->xmin<<shift;
    const int ymin= c->ymin<<shift;
    const int xmax= c->xmax<<shift;
    const int ymax= c->ymax<<shift;
1660 1661 1662 1663 1664 1665
    uint8_t map[8][8][8][8];

    memset(map,0,sizeof(map));
#define BIDIR_MAP(fx,fy,bx,by) \
    map[(motion_fx+fx)&7][(motion_fy+fy)&7][(motion_bx+bx)&7][(motion_by+by)&7]
    BIDIR_MAP(0,0,0,0) = 1;
1666

1667
    fbmin= check_bidir_mv(s, motion_fx, motion_fy,
1668 1669
                          motion_bx, motion_by,
                          pred_fx, pred_fy,
1670 1671
                          pred_bx, pred_by,
                          0, 16);
1672

1673 1674 1675
    if(s->avctx->bidir_refine){
        int score, end;
#define CHECK_BIDIR(fx,fy,bx,by)\
1676 1677
    if( !BIDIR_MAP(fx,fy,bx,by)\
       &&(fx<=0 || motion_fx+fx<=xmax) && (fy<=0 || motion_fy+fy<=ymax) && (bx<=0 || motion_bx+bx<=xmax) && (by<=0 || motion_by+by<=ymax)\
Michael Niedermayer's avatar
Michael Niedermayer committed
1678
       &&(fx>=0 || motion_fx+fx>=xmin) && (fy>=0 || motion_fy+fy>=ymin) && (bx>=0 || motion_bx+bx>=xmin) && (by>=0 || motion_by+by>=ymin)){\
1679
        BIDIR_MAP(fx,fy,bx,by) = 1;\
Michael Niedermayer's avatar
Michael Niedermayer committed
1680 1681 1682 1683 1684 1685 1686 1687 1688
        score= check_bidir_mv(s, motion_fx+fx, motion_fy+fy, motion_bx+bx, motion_by+by, pred_fx, pred_fy, pred_bx, pred_by, 0, 16);\
        if(score < fbmin){\
            fbmin= score;\
            motion_fx+=fx;\
            motion_fy+=fy;\
            motion_bx+=bx;\
            motion_by+=by;\
            end=0;\
        }\
1689 1690 1691
    }
#define CHECK_BIDIR2(a,b,c,d)\
CHECK_BIDIR(a,b,c,d)\
1692
CHECK_BIDIR(-(a),-(b),-(c),-(d))
1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727

#define CHECK_BIDIRR(a,b,c,d)\
CHECK_BIDIR2(a,b,c,d)\
CHECK_BIDIR2(b,c,d,a)\
CHECK_BIDIR2(c,d,a,b)\
CHECK_BIDIR2(d,a,b,c)

        do{
            end=1;

            CHECK_BIDIRR( 0, 0, 0, 1)
            if(s->avctx->bidir_refine > 1){
                CHECK_BIDIRR( 0, 0, 1, 1)
                CHECK_BIDIR2( 0, 1, 0, 1)
                CHECK_BIDIR2( 1, 0, 1, 0)
                CHECK_BIDIRR( 0, 0,-1, 1)
                CHECK_BIDIR2( 0,-1, 0, 1)
                CHECK_BIDIR2(-1, 0, 1, 0)
                if(s->avctx->bidir_refine > 2){
                    CHECK_BIDIRR( 0, 1, 1, 1)
                    CHECK_BIDIRR( 0,-1, 1, 1)
                    CHECK_BIDIRR( 0, 1,-1, 1)
                    CHECK_BIDIRR( 0, 1, 1,-1)
                    if(s->avctx->bidir_refine > 3){
                        CHECK_BIDIR2( 1, 1, 1, 1)
                        CHECK_BIDIRR( 1, 1, 1,-1)
                        CHECK_BIDIR2( 1, 1,-1,-1)
                        CHECK_BIDIR2( 1,-1,-1, 1)
                        CHECK_BIDIR2( 1,-1, 1,-1)
                    }
                }
            }
        }while(!end);
    }

1728 1729 1730 1731 1732
    s->b_bidir_forw_mv_table[xy][0]= motion_fx;
    s->b_bidir_forw_mv_table[xy][1]= motion_fy;
    s->b_bidir_back_mv_table[xy][0]= motion_bx;
    s->b_bidir_back_mv_table[xy][1]= motion_by;

1733
    return fbmin;
1734 1735
}

1736
static inline int direct_search(MpegEncContext * s, int mb_x, int mb_y)
1737
{
1738
    MotionEstContext * const c= &s->me;
1739
    int P[10][2];
1740 1741
    const int mot_stride = s->mb_stride;
    const int mot_xy = mb_y*mot_stride + mb_x;
Michael Niedermayer's avatar
Michael Niedermayer committed
1742 1743
    const int shift= 1+s->quarter_sample;
    int dmin, i;
1744
    const int time_pp= s->pp_time;
1745
    const int time_pb= s->pb_time;
Michael Niedermayer's avatar
Michael Niedermayer committed
1746
    int mx, my, xmin, xmax, ymin, ymax;
1747
    int16_t (*mv_table)[2]= s->b_direct_mv_table;
1748

1749
    c->current_mv_penalty= c->mv_penalty[1] + MAX_MV;
Michael Niedermayer's avatar
Michael Niedermayer committed
1750 1751 1752
    ymin= xmin=(-32)>>shift;
    ymax= xmax=   31>>shift;

1753
    if(IS_8X8(s->next_picture.mb_type[mot_xy])){
Michael Niedermayer's avatar
Michael Niedermayer committed
1754 1755 1756
        s->mv_type= MV_TYPE_8X8;
    }else{
        s->mv_type= MV_TYPE_16X16;
1757
    }
Michael Niedermayer's avatar
Michael Niedermayer committed
1758 1759 1760 1761

    for(i=0; i<4; i++){
        int index= s->block_index[i];
        int min, max;
1762

1763 1764 1765 1766 1767 1768 1769 1770 1771
        c->co_located_mv[i][0]= s->next_picture.motion_val[0][index][0];
        c->co_located_mv[i][1]= s->next_picture.motion_val[0][index][1];
        c->direct_basis_mv[i][0]= c->co_located_mv[i][0]*time_pb/time_pp + ((i& 1)<<(shift+3));
        c->direct_basis_mv[i][1]= c->co_located_mv[i][1]*time_pb/time_pp + ((i>>1)<<(shift+3));
//        c->direct_basis_mv[1][i][0]= c->co_located_mv[i][0]*(time_pb - time_pp)/time_pp + ((i &1)<<(shift+3);
//        c->direct_basis_mv[1][i][1]= c->co_located_mv[i][1]*(time_pb - time_pp)/time_pp + ((i>>1)<<(shift+3);

        max= FFMAX(c->direct_basis_mv[i][0], c->direct_basis_mv[i][0] - c->co_located_mv[i][0])>>shift;
        min= FFMIN(c->direct_basis_mv[i][0], c->direct_basis_mv[i][0] - c->co_located_mv[i][0])>>shift;
1772 1773
        max+= 16*mb_x + 1; // +-1 is for the simpler rounding
        min+= 16*mb_x - 1;
1774 1775
        xmax= FFMIN(xmax, s->width - max);
        xmin= FFMAX(xmin, - 16     - min);
Michael Niedermayer's avatar
Michael Niedermayer committed
1776

1777 1778
        max= FFMAX(c->direct_basis_mv[i][1], c->direct_basis_mv[i][1] - c->co_located_mv[i][1])>>shift;
        min= FFMIN(c->direct_basis_mv[i][1], c->direct_basis_mv[i][1] - c->co_located_mv[i][1])>>shift;
1779 1780
        max+= 16*mb_y + 1; // +-1 is for the simpler rounding
        min+= 16*mb_y - 1;
1781 1782
        ymax= FFMIN(ymax, s->height - max);
        ymin= FFMAX(ymin, - 16      - min);
1783

Michael Niedermayer's avatar
Michael Niedermayer committed
1784
        if(s->mv_type == MV_TYPE_16X16) break;
1785
    }
1786

Michael Niedermayer's avatar
Michael Niedermayer committed
1787
    assert(xmax <= 15 && ymax <= 15 && xmin >= -16 && ymin >= -16);
1788

Michael Niedermayer's avatar
Michael Niedermayer committed
1789 1790 1791 1792 1793 1794
    if(xmax < 0 || xmin >0 || ymax < 0 || ymin > 0){
        s->b_direct_mv_table[mot_xy][0]= 0;
        s->b_direct_mv_table[mot_xy][1]= 0;

        return 256*256*256*64;
    }
1795

1796 1797 1798 1799 1800 1801 1802 1803
    c->xmin= xmin;
    c->ymin= ymin;
    c->xmax= xmax;
    c->ymax= ymax;
    c->flags     |= FLAG_DIRECT;
    c->sub_flags |= FLAG_DIRECT;
    c->pred_x=0;
    c->pred_y=0;
Michael Niedermayer's avatar
Michael Niedermayer committed
1804

1805 1806
    P_LEFT[0]        = av_clip(mv_table[mot_xy - 1][0], xmin<<shift, xmax<<shift);
    P_LEFT[1]        = av_clip(mv_table[mot_xy - 1][1], ymin<<shift, ymax<<shift);
Michael Niedermayer's avatar
Michael Niedermayer committed
1807 1808

    /* special case for first line */
Diego Biurrun's avatar
Diego Biurrun committed
1809
    if (!s->first_slice_line) { //FIXME maybe allow this over thread boundary as it is clipped
1810 1811 1812 1813
        P_TOP[0]      = av_clip(mv_table[mot_xy - mot_stride             ][0], xmin<<shift, xmax<<shift);
        P_TOP[1]      = av_clip(mv_table[mot_xy - mot_stride             ][1], ymin<<shift, ymax<<shift);
        P_TOPRIGHT[0] = av_clip(mv_table[mot_xy - mot_stride + 1         ][0], xmin<<shift, xmax<<shift);
        P_TOPRIGHT[1] = av_clip(mv_table[mot_xy - mot_stride + 1         ][1], ymin<<shift, ymax<<shift);
1814

Michael Niedermayer's avatar
Michael Niedermayer committed
1815 1816 1817
        P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
        P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
    }
1818

1819
    dmin = ff_epzs_motion_search(s, &mx, &my, P, 0, 0, mv_table, 1<<(16-shift), 0, 16);
1820
    if(c->sub_flags&FLAG_QPEL)
1821 1822 1823
        dmin = qpel_motion_search(s, &mx, &my, dmin, 0, 0, 0, 16);
    else
        dmin = hpel_motion_search(s, &mx, &my, dmin, 0, 0, 0, 16);
1824

1825
    if(c->avctx->me_sub_cmp != c->avctx->mb_cmp && !c->skip)
1826
        dmin= ff_get_mb_score(s, mx, my, 0, 0, 0, 16, 1);
1827

1828
    get_limits(s, 16*mb_x, 16*mb_y); //restore c->?min/max, maybe not needed
1829

Michael Niedermayer's avatar
Michael Niedermayer committed
1830 1831
    mv_table[mot_xy][0]= mx;
    mv_table[mot_xy][1]= my;
1832 1833
    c->flags     &= ~FLAG_DIRECT;
    c->sub_flags &= ~FLAG_DIRECT;
1834

1835
    return dmin;
1836 1837 1838 1839 1840
}

void ff_estimate_b_frame_motion(MpegEncContext * s,
                             int mb_x, int mb_y)
{
1841 1842
    MotionEstContext * const c= &s->me;
    const int penalty_factor= c->mb_penalty_factor;
1843
    int fmin, bmin, dmin, fbmin, bimin, fimin;
1844
    int type=0;
1845
    const int xy = mb_y*s->mb_stride + mb_x;
1846
    init_ref(c, s->new_picture.data, s->last_picture.data, s->next_picture.data, 16*mb_x, 16*mb_y, 2);
1847

1848
    get_limits(s, 16*mb_x, 16*mb_y);
1849

1850
    c->skip=0;
1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862

    if(s->codec_id == CODEC_ID_MPEG4 && s->next_picture.mbskip_table[xy]){
        int score= direct_search(s, mb_x, mb_y); //FIXME just check 0,0

        score= ((unsigned)(score*score + 128*256))>>16;
        c->mc_mb_var_sum_temp += score;
        s->current_picture.mc_mb_var[mb_y*s->mb_stride + mb_x] = score; //FIXME use SSE
        s->mb_type[mb_y*s->mb_stride + mb_x]= CANDIDATE_MB_TYPE_DIRECT0;

        return;
    }

1863
    if(c->avctx->me_threshold){
1864
        int vard= check_input_motion(s, mb_x, mb_y, 0);
1865

1866
        if((vard+128)>>8 < c->avctx->me_threshold){
1867 1868
//            pix = c->src[0][0];
//            sum = s->dsp.pix_sum(pix, s->linesize);
1869
//            varc = s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)(sum*sum))>>8) + 500;
1870

1871 1872
//            pic->mb_var   [s->mb_stride * mb_y + mb_x] = (varc+128)>>8;
             s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] = (vard+128)>>8;
1873
/*            pic->mb_mean  [s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
1874 1875 1876 1877
            c->mb_var_sum_temp    += (varc+128)>>8;*/
            c->mc_mb_var_sum_temp += (vard+128)>>8;
/*            if (vard <= 64<<8 || vard < varc) {
                c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
1878
            }else{
1879
                c->scene_change_score+= s->qscale * s->avctx->scenechange_factor;
1880 1881 1882
            }*/
            return;
        }
1883
        if((vard+128)>>8 < c->avctx->mb_threshold){
1884 1885 1886 1887 1888
            type= s->mb_type[mb_y*s->mb_stride + mb_x];
            if(type == CANDIDATE_MB_TYPE_DIRECT){
                direct_search(s, mb_x, mb_y);
            }
            if(type == CANDIDATE_MB_TYPE_FORWARD || type == CANDIDATE_MB_TYPE_BIDIR){
1889
                c->skip=0;
1890 1891 1892
                ff_estimate_motion_b(s, mb_x, mb_y, s->b_forw_mv_table, 0, s->f_code);
            }
            if(type == CANDIDATE_MB_TYPE_BACKWARD || type == CANDIDATE_MB_TYPE_BIDIR){
1893
                c->skip=0;
1894 1895 1896
                ff_estimate_motion_b(s, mb_x, mb_y, s->b_back_mv_table, 2, s->b_code);
            }
            if(type == CANDIDATE_MB_TYPE_FORWARD_I || type == CANDIDATE_MB_TYPE_BIDIR_I){
1897 1898
                c->skip=0;
                c->current_mv_penalty= c->mv_penalty[s->f_code] + MAX_MV;
1899 1900 1901 1902 1903
                interlaced_search(s, 0,
                                        s->b_field_mv_table[0], s->b_field_select_table[0],
                                        s->b_forw_mv_table[xy][0], s->b_forw_mv_table[xy][1], 1);
            }
            if(type == CANDIDATE_MB_TYPE_BACKWARD_I || type == CANDIDATE_MB_TYPE_BIDIR_I){
1904 1905
                c->skip=0;
                c->current_mv_penalty= c->mv_penalty[s->b_code] + MAX_MV;
1906 1907 1908 1909 1910 1911
                interlaced_search(s, 2,
                                        s->b_field_mv_table[1], s->b_field_select_table[1],
                                        s->b_back_mv_table[xy][0], s->b_back_mv_table[xy][1], 1);
            }
            return;
        }
1912 1913
    }

1914
    if (s->codec_id == CODEC_ID_MPEG4)
1915
        dmin= direct_search(s, mb_x, mb_y);
1916 1917
    else
        dmin= INT_MAX;
1918
//FIXME penalty stuff for non mpeg4
1919
    c->skip=0;
1920
    fmin= ff_estimate_motion_b(s, mb_x, mb_y, s->b_forw_mv_table, 0, s->f_code) + 3*penalty_factor;
1921

1922
    c->skip=0;
1923
    bmin= ff_estimate_motion_b(s, mb_x, mb_y, s->b_back_mv_table, 2, s->b_code) + 2*penalty_factor;
1924
//printf(" %d %d ", s->b_forw_mv_table[xy][0], s->b_forw_mv_table[xy][1]);
1925

1926
    c->skip=0;
1927
    fbmin= bidir_refine(s, mb_x, mb_y) + penalty_factor;
1928
//printf("%d %d %d %d\n", dmin, fmin, bmin, fbmin);
1929

1930 1931
    if(s->flags & CODEC_FLAG_INTERLACED_ME){
//FIXME mb type penalty
1932 1933
        c->skip=0;
        c->current_mv_penalty= c->mv_penalty[s->f_code] + MAX_MV;
1934 1935
        fimin= interlaced_search(s, 0,
                                 s->b_field_mv_table[0], s->b_field_select_table[0],
1936
                                 s->b_forw_mv_table[xy][0], s->b_forw_mv_table[xy][1], 0);
1937
        c->current_mv_penalty= c->mv_penalty[s->b_code] + MAX_MV;
1938 1939
        bimin= interlaced_search(s, 2,
                                 s->b_field_mv_table[1], s->b_field_select_table[1],
1940
                                 s->b_back_mv_table[xy][0], s->b_back_mv_table[xy][1], 0);
1941 1942 1943
    }else
        fimin= bimin= INT_MAX;

1944
    {
1945
        int score= fmin;
1946
        type = CANDIDATE_MB_TYPE_FORWARD;
1947

1948
        if (dmin <= score){
1949
            score = dmin;
1950
            type = CANDIDATE_MB_TYPE_DIRECT;
1951 1952 1953
        }
        if(bmin<score){
            score=bmin;
1954
            type= CANDIDATE_MB_TYPE_BACKWARD;
1955 1956 1957
        }
        if(fbmin<score){
            score=fbmin;
1958 1959 1960 1961 1962 1963 1964 1965 1966
            type= CANDIDATE_MB_TYPE_BIDIR;
        }
        if(fimin<score){
            score=fimin;
            type= CANDIDATE_MB_TYPE_FORWARD_I;
        }
        if(bimin<score){
            score=bimin;
            type= CANDIDATE_MB_TYPE_BACKWARD_I;
1967
        }
1968

1969
        score= ((unsigned)(score*score + 128*256))>>16;
1970
        c->mc_mb_var_sum_temp += score;
1971
        s->current_picture.mc_mb_var[mb_y*s->mb_stride + mb_x] = score; //FIXME use SSE
1972
    }
1973

1974
    if(c->avctx->mb_decision > FF_MB_DECISION_SIMPLE){
1975 1976 1977 1978 1979 1980 1981 1982 1983
        type= CANDIDATE_MB_TYPE_FORWARD | CANDIDATE_MB_TYPE_BACKWARD | CANDIDATE_MB_TYPE_BIDIR | CANDIDATE_MB_TYPE_DIRECT;
        if(fimin < INT_MAX)
            type |= CANDIDATE_MB_TYPE_FORWARD_I;
        if(bimin < INT_MAX)
            type |= CANDIDATE_MB_TYPE_BACKWARD_I;
        if(fimin < INT_MAX && bimin < INT_MAX){
            type |= CANDIDATE_MB_TYPE_BIDIR_I;
        }
         //FIXME something smarter
Diego Biurrun's avatar
Diego Biurrun committed
1984
        if(dmin>256*256*16) type&= ~CANDIDATE_MB_TYPE_DIRECT; //do not try direct mode if it is invalid for this MB
1985 1986
        if(s->codec_id == CODEC_ID_MPEG4 && type&CANDIDATE_MB_TYPE_DIRECT && s->flags&CODEC_FLAG_MV0 && *(uint32_t*)s->b_direct_mv_table[xy])
            type |= CANDIDATE_MB_TYPE_DIRECT0;
1987
#if 0
1988 1989 1990
        if(s->out_format == FMT_MPEG1)
            type |= CANDIDATE_MB_TYPE_INTRA;
#endif
1991 1992
    }

1993
    s->mb_type[mb_y*s->mb_stride + mb_x]= type;
1994 1995 1996 1997 1998 1999
}

/* find best f_code for ME which do unlimited searches */
int ff_get_best_fcode(MpegEncContext * s, int16_t (*mv_table)[2], int type)
{
    if(s->me_method>=ME_EPZS){
2000
        int score[8];
2001
        int i, y, range= s->avctx->me_range ? s->avctx->me_range : (INT_MAX/2);
2002
        uint8_t * fcode_tab= s->fcode_tab;
2003 2004
        int best_fcode=-1;
        int best_score=-10000000;
2005

2006
        if(s->msmpeg4_version)
2007 2008 2009 2010
            range= FFMIN(range, 16);
        else if(s->codec_id == CODEC_ID_MPEG2VIDEO && s->avctx->strict_std_compliance >= FF_COMPLIANCE_NORMAL)
            range= FFMIN(range, 256);

Michael Niedermayer's avatar
Michael Niedermayer committed
2011
        for(i=0; i<8; i++) score[i]= s->mb_num*(8-i);
2012 2013 2014

        for(y=0; y<s->mb_height; y++){
            int x;
2015
            int xy= y*s->mb_stride;
2016
            for(x=0; x<s->mb_width; x++){
Michael Niedermayer's avatar
Michael Niedermayer committed
2017
                if(s->mb_type[xy] & type){
2018 2019 2020 2021
                    int mx= mv_table[xy][0];
                    int my= mv_table[xy][1];
                    int fcode= FFMAX(fcode_tab[mx + MAX_MV],
                                     fcode_tab[my + MAX_MV]);
2022
                    int j;
2023 2024

                        if(mx >= range || mx < -range ||
2025 2026
                           my >= range || my < -range)
                            continue;
2027

2028
                    for(j=0; j<fcode && j<8; j++){
2029
                        if(s->pict_type==B_TYPE || s->current_picture.mc_mb_var[xy] < s->current_picture.mb_var[xy])
2030 2031
                            score[j]-= 170;
                    }
2032 2033 2034 2035
                }
                xy++;
            }
        }
2036

2037 2038 2039 2040 2041 2042
        for(i=1; i<8; i++){
            if(score[i] > best_score){
                best_score= score[i];
                best_fcode= i;
            }
//            printf("%d %d\n", i, score[i]);
2043
        }
2044

2045
//    printf("fcode: %d type: %d\n", i, s->pict_type);
2046
        return best_fcode;
2047 2048 2049 2050 2051 2052
/*        for(i=0; i<=MAX_FCODE; i++){
            printf("%d ", mv_num[i]);
        }
        printf("\n");*/
    }else{
        return 1;
Fabrice Bellard's avatar
Fabrice Bellard committed
2053 2054 2055
    }
}

2056 2057
void ff_fix_long_p_mvs(MpegEncContext * s)
{
2058
    MotionEstContext * const c= &s->me;
2059
    const int f_code= s->f_code;
2060
    int y, range;
Michael Niedermayer's avatar
Michael Niedermayer committed
2061
    assert(s->pict_type==P_TYPE);
2062

2063 2064 2065 2066
    range = (((s->out_format == FMT_MPEG1 || s->msmpeg4_version) ? 8 : 16) << f_code);

    assert(range <= 16 || !s->msmpeg4_version);
    assert(range <=256 || !(s->codec_id == CODEC_ID_MPEG2VIDEO && s->avctx->strict_std_compliance >= FF_COMPLIANCE_NORMAL));
2067

2068
    if(c->avctx->me_range && range > c->avctx->me_range) range= c->avctx->me_range;
2069

2070
//printf("%d no:%d %d//\n", clip, noclip, f_code);
2071
    if(s->flags&CODEC_FLAG_4MV){
2072
        const int wrap= s->b8_stride;
2073 2074 2075

        /* clip / convert to intra 8x8 type MVs */
        for(y=0; y<s->mb_height; y++){
2076
            int xy= y*2*wrap;
2077
            int i= y*s->mb_stride;
2078 2079 2080
            int x;

            for(x=0; x<s->mb_width; x++){
2081
                if(s->mb_type[i]&CANDIDATE_MB_TYPE_INTER4V){
2082 2083 2084
                    int block;
                    for(block=0; block<4; block++){
                        int off= (block& 1) + (block>>1)*wrap;
2085 2086
                        int mx= s->current_picture.motion_val[0][ xy + off ][0];
                        int my= s->current_picture.motion_val[0][ xy + off ][1];
2087

2088 2089
                        if(   mx >=range || mx <-range
                           || my >=range || my <-range){
2090 2091 2092
                            s->mb_type[i] &= ~CANDIDATE_MB_TYPE_INTER4V;
                            s->mb_type[i] |= CANDIDATE_MB_TYPE_INTRA;
                            s->current_picture.mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
2093 2094 2095
                        }
                    }
                }
Michael Niedermayer's avatar
Michael Niedermayer committed
2096 2097
                xy+=2;
                i++;
2098 2099 2100 2101 2102
            }
        }
    }
}

2103 2104 2105 2106
/**
 *
 * @param truncate 1 for truncation, 0 for using intra
 */
2107
void ff_fix_long_mvs(MpegEncContext * s, uint8_t *field_select_table, int field_select,
2108
                     int16_t (*mv_table)[2], int f_code, int type, int truncate)
2109
{
2110
    MotionEstContext * const c= &s->me;
2111
    int y, h_range, v_range;
2112

2113
    // RAL: 8 in MPEG-1, 16 in MPEG-4
2114
    int range = (((s->out_format == FMT_MPEG1 || s->msmpeg4_version) ? 8 : 16) << f_code);
2115

2116
    if(c->avctx->me_range && range > c->avctx->me_range) range= c->avctx->me_range;
2117

2118 2119 2120
    h_range= range;
    v_range= field_select_table ? range>>1 : range;

2121 2122 2123
    /* clip / convert to intra 16x16 type MVs */
    for(y=0; y<s->mb_height; y++){
        int x;
2124
        int xy= y*s->mb_stride;
2125
        for(x=0; x<s->mb_width; x++){
2126
            if (s->mb_type[xy] & type){    // RAL: "type" test added...
2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141
                if(field_select_table==NULL || field_select_table[xy] == field_select){
                    if(   mv_table[xy][0] >=h_range || mv_table[xy][0] <-h_range
                       || mv_table[xy][1] >=v_range || mv_table[xy][1] <-v_range){

                        if(truncate){
                            if     (mv_table[xy][0] > h_range-1) mv_table[xy][0]=  h_range-1;
                            else if(mv_table[xy][0] < -h_range ) mv_table[xy][0]= -h_range;
                            if     (mv_table[xy][1] > v_range-1) mv_table[xy][1]=  v_range-1;
                            else if(mv_table[xy][1] < -v_range ) mv_table[xy][1]= -v_range;
                        }else{
                            s->mb_type[xy] &= ~type;
                            s->mb_type[xy] |= CANDIDATE_MB_TYPE_INTRA;
                            mv_table[xy][0]=
                            mv_table[xy][1]= 0;
                        }
2142
                    }
2143
                }
2144 2145 2146 2147 2148
            }
            xy++;
        }
    }
}