motion_est.c 78 KB
Newer Older
Fabrice Bellard's avatar
Fabrice Bellard committed
1
/*
2
 * Motion estimation
3
 * Copyright (c) 2000,2001 Fabrice Bellard
4
 * Copyright (c) 2002-2004 Michael Niedermayer
5
 *
6
 * new motion estimation (X1/EPZS) by Michael Niedermayer <michaelni@gmx.at>
Fabrice Bellard's avatar
Fabrice Bellard committed
7
 *
8 9 10
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
11 12
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
13
 * version 2.1 of the License, or (at your option) any later version.
Fabrice Bellard's avatar
Fabrice Bellard committed
14
 *
15
 * FFmpeg is distributed in the hope that it will be useful,
Fabrice Bellard's avatar
Fabrice Bellard committed
16
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 18
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
Fabrice Bellard's avatar
Fabrice Bellard committed
19
 *
20
 * You should have received a copy of the GNU Lesser General Public
21
 * License along with FFmpeg; if not, write to the Free Software
22
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Fabrice Bellard's avatar
Fabrice Bellard committed
23
 */
24

Michael Niedermayer's avatar
Michael Niedermayer committed
25
/**
26
 * @file
Michael Niedermayer's avatar
Michael Niedermayer committed
27 28
 * Motion estimation.
 */
29

Fabrice Bellard's avatar
Fabrice Bellard committed
30 31
#include <stdlib.h>
#include <stdio.h>
32
#include <limits.h>
33
#include "libavutil/intmath.h"
Fabrice Bellard's avatar
Fabrice Bellard committed
34 35
#include "avcodec.h"
#include "dsputil.h"
36
#include "mathops.h"
Fabrice Bellard's avatar
Fabrice Bellard committed
37 38
#include "mpegvideo.h"

39 40
#undef NDEBUG
#include <assert.h>
Michael Niedermayer's avatar
Michael Niedermayer committed
41

42
#define SQ(a) ((a)*(a))
43

44 45 46 47 48 49
#define P_LEFT P[1]
#define P_TOP P[2]
#define P_TOPRIGHT P[3]
#define P_MEDIAN P[4]
#define P_MV1 P[9]

Michael Niedermayer's avatar
Michael Niedermayer committed
50
static inline int sad_hpel_motion_search(MpegEncContext * s,
51
                                  int *mx_ptr, int *my_ptr, int dmin,
52 53
                                  int src_index, int ref_index,
                                  int size, int h);
Michael Niedermayer's avatar
Michael Niedermayer committed
54

55
static inline int update_map_generation(MotionEstContext *c)
Michael Niedermayer's avatar
Michael Niedermayer committed
56
{
57 58 59 60
    c->map_generation+= 1<<(ME_MAP_MV_BITS*2);
    if(c->map_generation==0){
        c->map_generation= 1<<(ME_MAP_MV_BITS*2);
        memset(c->map, 0, sizeof(uint32_t)*ME_MAP_SIZE);
Michael Niedermayer's avatar
Michael Niedermayer committed
61
    }
62
    return c->map_generation;
Michael Niedermayer's avatar
Michael Niedermayer committed
63 64
}

65 66 67 68 69 70
/* shape adaptive search stuff */
typedef struct Minima{
    int height;
    int x, y;
    int checked;
}Minima;
Michael Niedermayer's avatar
Michael Niedermayer committed
71

72
static int minima_cmp(const void *a, const void *b){
73 74
    const Minima *da = (const Minima *) a;
    const Minima *db = (const Minima *) b;
75

76 77
    return da->height - db->height;
}
Michael Niedermayer's avatar
Michael Niedermayer committed
78

79 80 81
#define FLAG_QPEL   1 //must be 1
#define FLAG_CHROMA 2
#define FLAG_DIRECT 4
Michael Niedermayer's avatar
Michael Niedermayer committed
82

83
static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){
84 85 86 87 88 89 90 91 92 93 94 95 96 97 98
    const int offset[3]= {
          y*c->  stride + x,
        ((y*c->uvstride + x)>>1),
        ((y*c->uvstride + x)>>1),
    };
    int i;
    for(i=0; i<3; i++){
        c->src[0][i]= src [i] + offset[i];
        c->ref[0][i]= ref [i] + offset[i];
    }
    if(ref_index){
        for(i=0; i<3; i++){
            c->ref[ref_index][i]= ref2[i] + offset[i];
        }
    }
Michael Niedermayer's avatar
Michael Niedermayer committed
99 100
}

101 102
static int get_flags(MotionEstContext *c, int direct, int chroma){
    return   ((c->avctx->flags&CODEC_FLAG_QPEL) ? FLAG_QPEL : 0)
103
           + (direct ? FLAG_DIRECT : 0)
104
           + (chroma ? FLAG_CHROMA : 0);
Michael Niedermayer's avatar
Michael Niedermayer committed
105 106
}

107
static av_always_inline int cmp_direct_inline(MpegEncContext *s, const int x, const int y, const int subx, const int suby,
108
                      const int size, const int h, int ref_index, int src_index,
109
                      me_cmp_func cmp_func, me_cmp_func chroma_cmp_func, int qpel){
110 111 112 113 114 115 116 117
    MotionEstContext * const c= &s->me;
    const int stride= c->stride;
    const int hx= subx + (x<<(1+qpel));
    const int hy= suby + (y<<(1+qpel));
    uint8_t * const * const ref= c->ref[ref_index];
    uint8_t * const * const src= c->src[src_index];
    int d;
    //FIXME check chroma 4mv, (no crashes ...)
118
        assert(x >= c->xmin && hx <= c->xmax<<(qpel+1) && y >= c->ymin && hy <= c->ymax<<(qpel+1));
119 120 121 122 123 124 125 126 127 128 129 130 131
        if(x >= c->xmin && hx <= c->xmax<<(qpel+1) && y >= c->ymin && hy <= c->ymax<<(qpel+1)){
            const int time_pp= s->pp_time;
            const int time_pb= s->pb_time;
            const int mask= 2*qpel+1;
            if(s->mv_type==MV_TYPE_8X8){
                int i;
                for(i=0; i<4; i++){
                    int fx = c->direct_basis_mv[i][0] + hx;
                    int fy = c->direct_basis_mv[i][1] + hy;
                    int bx = hx ? fx - c->co_located_mv[i][0] : c->co_located_mv[i][0]*(time_pb - time_pp)/time_pp + ((i &1)<<(qpel+4));
                    int by = hy ? fy - c->co_located_mv[i][1] : c->co_located_mv[i][1]*(time_pb - time_pp)/time_pp + ((i>>1)<<(qpel+4));
                    int fxy= (fx&mask) + ((fy&mask)<<(qpel+1));
                    int bxy= (bx&mask) + ((by&mask)<<(qpel+1));
132

133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148
                    uint8_t *dst= c->temp + 8*(i&1) + 8*stride*(i>>1);
                    if(qpel){
                        c->qpel_put[1][fxy](dst, ref[0] + (fx>>2) + (fy>>2)*stride, stride);
                        c->qpel_avg[1][bxy](dst, ref[8] + (bx>>2) + (by>>2)*stride, stride);
                    }else{
                        c->hpel_put[1][fxy](dst, ref[0] + (fx>>1) + (fy>>1)*stride, stride, 8);
                        c->hpel_avg[1][bxy](dst, ref[8] + (bx>>1) + (by>>1)*stride, stride, 8);
                    }
                }
            }else{
                int fx = c->direct_basis_mv[0][0] + hx;
                int fy = c->direct_basis_mv[0][1] + hy;
                int bx = hx ? fx - c->co_located_mv[0][0] : (c->co_located_mv[0][0]*(time_pb - time_pp)/time_pp);
                int by = hy ? fy - c->co_located_mv[0][1] : (c->co_located_mv[0][1]*(time_pb - time_pp)/time_pp);
                int fxy= (fx&mask) + ((fy&mask)<<(qpel+1));
                int bxy= (bx&mask) + ((by&mask)<<(qpel+1));
149

150 151 152 153 154 155 156 157 158
                if(qpel){
                    c->qpel_put[1][fxy](c->temp               , ref[0] + (fx>>2) + (fy>>2)*stride               , stride);
                    c->qpel_put[1][fxy](c->temp + 8           , ref[0] + (fx>>2) + (fy>>2)*stride + 8           , stride);
                    c->qpel_put[1][fxy](c->temp     + 8*stride, ref[0] + (fx>>2) + (fy>>2)*stride     + 8*stride, stride);
                    c->qpel_put[1][fxy](c->temp + 8 + 8*stride, ref[0] + (fx>>2) + (fy>>2)*stride + 8 + 8*stride, stride);
                    c->qpel_avg[1][bxy](c->temp               , ref[8] + (bx>>2) + (by>>2)*stride               , stride);
                    c->qpel_avg[1][bxy](c->temp + 8           , ref[8] + (bx>>2) + (by>>2)*stride + 8           , stride);
                    c->qpel_avg[1][bxy](c->temp     + 8*stride, ref[8] + (bx>>2) + (by>>2)*stride     + 8*stride, stride);
                    c->qpel_avg[1][bxy](c->temp + 8 + 8*stride, ref[8] + (bx>>2) + (by>>2)*stride + 8 + 8*stride, stride);
159
                }else{
160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175
                    assert((fx>>1) + 16*s->mb_x >= -16);
                    assert((fy>>1) + 16*s->mb_y >= -16);
                    assert((fx>>1) + 16*s->mb_x <= s->width);
                    assert((fy>>1) + 16*s->mb_y <= s->height);
                    assert((bx>>1) + 16*s->mb_x >= -16);
                    assert((by>>1) + 16*s->mb_y >= -16);
                    assert((bx>>1) + 16*s->mb_x <= s->width);
                    assert((by>>1) + 16*s->mb_y <= s->height);

                    c->hpel_put[0][fxy](c->temp, ref[0] + (fx>>1) + (fy>>1)*stride, stride, 16);
                    c->hpel_avg[0][bxy](c->temp, ref[8] + (bx>>1) + (by>>1)*stride, stride, 16);
                }
            }
            d = cmp_func(s, c->temp, src[0], stride, 16);
        }else
            d= 256*256*256*32;
176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191
    return d;
}

static av_always_inline int cmp_inline(MpegEncContext *s, const int x, const int y, const int subx, const int suby,
                      const int size, const int h, int ref_index, int src_index,
                      me_cmp_func cmp_func, me_cmp_func chroma_cmp_func, int qpel, int chroma){
    MotionEstContext * const c= &s->me;
    const int stride= c->stride;
    const int uvstride= c->uvstride;
    const int dxy= subx + (suby<<(1+qpel)); //FIXME log2_subpel?
    const int hx= subx + (x<<(1+qpel));
    const int hy= suby + (y<<(1+qpel));
    uint8_t * const * const ref= c->ref[ref_index];
    uint8_t * const * const src= c->src[src_index];
    int d;
    //FIXME check chroma 4mv, (no crashes ...)
192
        int uvdxy;              /* no, it might not be used uninitialized */
193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208
        if(dxy){
            if(qpel){
                c->qpel_put[size][dxy](c->temp, ref[0] + x + y*stride, stride); //FIXME prototype (add h)
                if(chroma){
                    int cx= hx/2;
                    int cy= hy/2;
                    cx= (cx>>1)|(cx&1);
                    cy= (cy>>1)|(cy&1);
                    uvdxy= (cx&1) + 2*(cy&1);
                    //FIXME x/y wrong, but mpeg4 qpel is sick anyway, we should drop as much of it as possible in favor for h264
                }
            }else{
                c->hpel_put[size][dxy](c->temp, ref[0] + x + y*stride, stride, h);
                if(chroma)
                    uvdxy= dxy | (x&1) | (2*(y&1));
            }
209
            d = cmp_func(s, c->temp, src[0], stride, h);
210
        }else{
211
            d = cmp_func(s, src[0], ref[0] + x + y*stride, stride, h);
212 213 214 215 216 217 218
            if(chroma)
                uvdxy= (x&1) + 2*(y&1);
        }
        if(chroma){
            uint8_t * const uvtemp= c->temp + 16*stride;
            c->hpel_put[size+1][uvdxy](uvtemp  , ref[1] + (x>>1) + (y>>1)*uvstride, uvstride, h>>1);
            c->hpel_put[size+1][uvdxy](uvtemp+8, ref[2] + (x>>1) + (y>>1)*uvstride, uvstride, h>>1);
219 220
            d += chroma_cmp_func(s, uvtemp  , src[1], uvstride, h>>1);
            d += chroma_cmp_func(s, uvtemp+8, src[2], uvstride, h>>1);
221
        }
222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237
    return d;
}

static int cmp_simple(MpegEncContext *s, const int x, const int y,
                      int ref_index, int src_index,
                      me_cmp_func cmp_func, me_cmp_func chroma_cmp_func){
    return cmp_inline(s,x,y,0,0,0,16,ref_index,src_index, cmp_func, chroma_cmp_func, 0, 0);
}

static int cmp_fpel_internal(MpegEncContext *s, const int x, const int y,
                      const int size, const int h, int ref_index, int src_index,
                      me_cmp_func cmp_func, me_cmp_func chroma_cmp_func, const int flags){
    if(flags&FLAG_DIRECT){
        return cmp_direct_inline(s,x,y,0,0,size,h,ref_index,src_index, cmp_func, chroma_cmp_func, flags&FLAG_QPEL);
    }else{
        return cmp_inline(s,x,y,0,0,size,h,ref_index,src_index, cmp_func, chroma_cmp_func, 0, flags&FLAG_CHROMA);
238
    }
239 240 241 242 243 244 245 246 247
}

static int cmp_internal(MpegEncContext *s, const int x, const int y, const int subx, const int suby,
                      const int size, const int h, int ref_index, int src_index,
                      me_cmp_func cmp_func, me_cmp_func chroma_cmp_func, const int flags){
    if(flags&FLAG_DIRECT){
        return cmp_direct_inline(s,x,y,subx,suby,size,h,ref_index,src_index, cmp_func, chroma_cmp_func, flags&FLAG_QPEL);
    }else{
        return cmp_inline(s,x,y,subx,suby,size,h,ref_index,src_index, cmp_func, chroma_cmp_func, flags&FLAG_QPEL, flags&FLAG_CHROMA);
248
    }
249
}
250

251 252 253
/*! \brief compares a block (either a full macroblock or a partition thereof)
    against a proposed motion-compensated prediction of that block
 */
254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286
static av_always_inline int cmp(MpegEncContext *s, const int x, const int y, const int subx, const int suby,
                      const int size, const int h, int ref_index, int src_index,
                      me_cmp_func cmp_func, me_cmp_func chroma_cmp_func, const int flags){
    if(av_builtin_constant_p(flags) && av_builtin_constant_p(h) && av_builtin_constant_p(size)
       && av_builtin_constant_p(subx) && av_builtin_constant_p(suby)
       && flags==0 && h==16 && size==0 && subx==0 && suby==0){
        return cmp_simple(s,x,y,ref_index,src_index, cmp_func, chroma_cmp_func);
    }else if(av_builtin_constant_p(subx) && av_builtin_constant_p(suby)
       && subx==0 && suby==0){
        return cmp_fpel_internal(s,x,y,size,h,ref_index,src_index, cmp_func, chroma_cmp_func,flags);
    }else{
        return cmp_internal(s,x,y,subx,suby,size,h,ref_index,src_index, cmp_func, chroma_cmp_func, flags);
    }
}

static int cmp_hpel(MpegEncContext *s, const int x, const int y, const int subx, const int suby,
                      const int size, const int h, int ref_index, int src_index,
                      me_cmp_func cmp_func, me_cmp_func chroma_cmp_func, const int flags){
    if(flags&FLAG_DIRECT){
        return cmp_direct_inline(s,x,y,subx,suby,size,h,ref_index,src_index, cmp_func, chroma_cmp_func, 0);
    }else{
        return cmp_inline(s,x,y,subx,suby,size,h,ref_index,src_index, cmp_func, chroma_cmp_func, 0, flags&FLAG_CHROMA);
    }
}

static int cmp_qpel(MpegEncContext *s, const int x, const int y, const int subx, const int suby,
                      const int size, const int h, int ref_index, int src_index,
                      me_cmp_func cmp_func, me_cmp_func chroma_cmp_func, const int flags){
    if(flags&FLAG_DIRECT){
        return cmp_direct_inline(s,x,y,subx,suby,size,h,ref_index,src_index, cmp_func, chroma_cmp_func, 1);
    }else{
        return cmp_inline(s,x,y,subx,suby,size,h,ref_index,src_index, cmp_func, chroma_cmp_func, 1, flags&FLAG_CHROMA);
    }
287
}
Michael Niedermayer's avatar
Michael Niedermayer committed
288 289 290

#include "motion_est_template.c"

Michael Niedermayer's avatar
Michael Niedermayer committed
291 292 293 294 295 296 297
static int zero_cmp(void *s, uint8_t *a, uint8_t *b, int stride, int h){
    return 0;
}

static void zero_hpel(uint8_t *a, const uint8_t *b, int stride, int h){
}

298
int ff_init_me(MpegEncContext *s){
299
    MotionEstContext * const c= &s->me;
300 301
    int cache_size= FFMIN(ME_MAP_SIZE>>ME_MAP_SHIFT, 1<<ME_MAP_SHIFT);
    int dia_size= FFMAX(FFABS(s->avctx->dia_size)&255, FFABS(s->avctx->pre_dia_size)&255);
302 303 304 305 306

    if(FFMIN(s->avctx->dia_size, s->avctx->pre_dia_size) < -ME_MAP_SIZE){
        av_log(s->avctx, AV_LOG_ERROR, "ME_MAP size is too small for SAB diamond\n");
        return -1;
    }
307 308
    //special case of snow is needed because snow uses its own iterative ME code
    if(s->me_method!=ME_ZERO && s->me_method!=ME_EPZS && s->me_method!=ME_X1 && s->avctx->codec_id != CODEC_ID_SNOW){
309 310 311
        av_log(s->avctx, AV_LOG_ERROR, "me_method is only allowed to be set to zero and epzs; for hex,umh,full and others see dia_size\n");
        return -1;
    }
312

Michael Niedermayer's avatar
Michael Niedermayer committed
313
    c->avctx= s->avctx;
314 315 316 317

    if(cache_size < 2*dia_size && !c->stride){
        av_log(s->avctx, AV_LOG_INFO, "ME_MAP size may be a little small for the selected diamond size\n");
    }
318

319 320 321 322
    ff_set_cmp(&s->dsp, s->dsp.me_pre_cmp, c->avctx->me_pre_cmp);
    ff_set_cmp(&s->dsp, s->dsp.me_cmp, c->avctx->me_cmp);
    ff_set_cmp(&s->dsp, s->dsp.me_sub_cmp, c->avctx->me_sub_cmp);
    ff_set_cmp(&s->dsp, s->dsp.mb_cmp, c->avctx->mb_cmp);
323

324 325 326
    c->flags    = get_flags(c, 0, c->avctx->me_cmp    &FF_CMP_CHROMA);
    c->sub_flags= get_flags(c, 0, c->avctx->me_sub_cmp&FF_CMP_CHROMA);
    c->mb_flags = get_flags(c, 0, c->avctx->mb_cmp    &FF_CMP_CHROMA);
Fabrice Bellard's avatar
Fabrice Bellard committed
327

328
/*FIXME s->no_rounding b_type*/
Michael Niedermayer's avatar
Michael Niedermayer committed
329
    if(s->flags&CODEC_FLAG_QPEL){
330
        c->sub_motion_search= qpel_motion_search;
331 332 333
        c->qpel_avg= s->dsp.avg_qpel_pixels_tab;
        if(s->no_rounding) c->qpel_put= s->dsp.put_no_rnd_qpel_pixels_tab;
        else               c->qpel_put= s->dsp.put_qpel_pixels_tab;
Michael Niedermayer's avatar
Michael Niedermayer committed
334
    }else{
335
        if(c->avctx->me_sub_cmp&FF_CMP_CHROMA)
336
            c->sub_motion_search= hpel_motion_search;
337 338
        else if(   c->avctx->me_sub_cmp == FF_CMP_SAD
                && c->avctx->    me_cmp == FF_CMP_SAD
339
                && c->avctx->    mb_cmp == FF_CMP_SAD)
340
            c->sub_motion_search= sad_hpel_motion_search; // 2050 vs. 2450 cycles
Michael Niedermayer's avatar
Michael Niedermayer committed
341
        else
342
            c->sub_motion_search= hpel_motion_search;
Michael Niedermayer's avatar
Michael Niedermayer committed
343
    }
Michael Niedermayer's avatar
Michael Niedermayer committed
344 345 346 347
    c->hpel_avg= s->dsp.avg_pixels_tab;
    if(s->no_rounding) c->hpel_put= s->dsp.put_no_rnd_pixels_tab;
    else               c->hpel_put= s->dsp.put_pixels_tab;

348
    if(s->linesize){
349
        c->stride  = s->linesize;
350
        c->uvstride= s->uvlinesize;
351
    }else{
352 353
        c->stride  = 16*s->mb_width + 32;
        c->uvstride=  8*s->mb_width + 16;
354
    }
355

Diego Biurrun's avatar
Diego Biurrun committed
356 357 358
    /* 8x8 fullpel search would need a 4x4 chroma compare, which we do
     * not have yet, and even if we had, the motion estimation code
     * does not expect it. */
359
    if(s->codec_id != CODEC_ID_SNOW){
360
        if((c->avctx->me_cmp&FF_CMP_CHROMA)/* && !s->dsp.me_cmp[2]*/){
361 362 363 364 365 366 367
            s->dsp.me_cmp[2]= zero_cmp;
        }
        if((c->avctx->me_sub_cmp&FF_CMP_CHROMA) && !s->dsp.me_sub_cmp[2]){
            s->dsp.me_sub_cmp[2]= zero_cmp;
        }
        c->hpel_put[2][0]= c->hpel_put[2][1]=
        c->hpel_put[2][2]= c->hpel_put[2][3]= zero_hpel;
Michael Niedermayer's avatar
Michael Niedermayer committed
368 369
    }

370 371 372 373
    if(s->codec_id == CODEC_ID_H261){
        c->sub_motion_search= no_sub_motion_search;
    }

374
    return 0;
Michael Niedermayer's avatar
Michael Niedermayer committed
375
}
376

377
#if 0
378
static int pix_dev(uint8_t * pix, int line_size, int mean)
379 380 381 382 383
{
    int s, i, j;

    s = 0;
    for (i = 0; i < 16; i++) {
384
        for (j = 0; j < 16; j += 8) {
385 386 387 388 389 390 391 392
            s += FFABS(pix[0]-mean);
            s += FFABS(pix[1]-mean);
            s += FFABS(pix[2]-mean);
            s += FFABS(pix[3]-mean);
            s += FFABS(pix[4]-mean);
            s += FFABS(pix[5]-mean);
            s += FFABS(pix[6]-mean);
            s += FFABS(pix[7]-mean);
393 394 395
            pix += 8;
        }
        pix += line_size - 16;
396 397 398
    }
    return s;
}
399
#endif
400

401
static inline void no_motion_search(MpegEncContext * s,
402
                                    int *mx_ptr, int *my_ptr)
Fabrice Bellard's avatar
Fabrice Bellard committed
403 404 405 406 407
{
    *mx_ptr = 16 * s->mb_x;
    *my_ptr = 16 * s->mb_y;
}

408 409
#define Z_THRESHOLD 256

Michael Niedermayer's avatar
Michael Niedermayer committed
410
#define CHECK_SAD_HALF_MV(suffix, x, y) \
411
{\
412
    d= s->dsp.pix_abs[size][(x?1:0)+(y?2:0)](NULL, pix, ptr+((x)>>1), stride, h);\
Michael Niedermayer's avatar
Michael Niedermayer committed
413
    d += (mv_penalty[pen_x + x] + mv_penalty[pen_y + y])*penalty_factor;\
414 415
    COPY3_IF_LT(dminh, d, dx, x, dy, y)\
}
416

Michael Niedermayer's avatar
Michael Niedermayer committed
417
static inline int sad_hpel_motion_search(MpegEncContext * s,
418
                                  int *mx_ptr, int *my_ptr, int dmin,
419 420
                                  int src_index, int ref_index,
                                  int size, int h)
Fabrice Bellard's avatar
Fabrice Bellard committed
421
{
422 423
    MotionEstContext * const c= &s->me;
    const int penalty_factor= c->sub_penalty_factor;
424
    int mx, my, dminh;
425
    uint8_t *pix, *ptr;
426 427
    int stride= c->stride;
    const int flags= c->sub_flags;
428
    LOAD_COMMON
429

430
    assert(flags == 0);
Fabrice Bellard's avatar
Fabrice Bellard committed
431

432
    if(c->skip){
433 434 435 436 437 438
//    printf("S");
        *mx_ptr = 0;
        *my_ptr = 0;
        return dmin;
    }
//    printf("N");
439

440
    pix = c->src[src_index][0];
441

442 443
    mx = *mx_ptr;
    my = *my_ptr;
444
    ptr = c->ref[ref_index][0] + (my * stride) + mx;
445

446 447
    dminh = dmin;

448
    if (mx > xmin && mx < xmax &&
449
        my > ymin && my < ymax) {
450
        int dx=0, dy=0;
451
        int d, pen_x, pen_y;
452 453 454 455 456 457 458
        const int index= (my<<ME_MAP_SHIFT) + mx;
        const int t= score_map[(index-(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)];
        const int l= score_map[(index- 1               )&(ME_MAP_SIZE-1)];
        const int r= score_map[(index+ 1               )&(ME_MAP_SIZE-1)];
        const int b= score_map[(index+(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)];
        mx<<=1;
        my<<=1;
459

460

461 462 463
        pen_x= pred_x + mx;
        pen_y= pred_y + my;

464
        ptr-= stride;
465
        if(t<=b){
Michael Niedermayer's avatar
Michael Niedermayer committed
466
            CHECK_SAD_HALF_MV(y2 , 0, -1)
467
            if(l<=r){
Michael Niedermayer's avatar
Michael Niedermayer committed
468
                CHECK_SAD_HALF_MV(xy2, -1, -1)
469
                if(t+r<=b+l){
Michael Niedermayer's avatar
Michael Niedermayer committed
470
                    CHECK_SAD_HALF_MV(xy2, +1, -1)
471
                    ptr+= stride;
472
                }else{
473
                    ptr+= stride;
Michael Niedermayer's avatar
Michael Niedermayer committed
474
                    CHECK_SAD_HALF_MV(xy2, -1, +1)
475
                }
Michael Niedermayer's avatar
Michael Niedermayer committed
476
                CHECK_SAD_HALF_MV(x2 , -1,  0)
477
            }else{
Michael Niedermayer's avatar
Michael Niedermayer committed
478
                CHECK_SAD_HALF_MV(xy2, +1, -1)
479
                if(t+l<=b+r){
Michael Niedermayer's avatar
Michael Niedermayer committed
480
                    CHECK_SAD_HALF_MV(xy2, -1, -1)
481
                    ptr+= stride;
482
                }else{
483
                    ptr+= stride;
Michael Niedermayer's avatar
Michael Niedermayer committed
484
                    CHECK_SAD_HALF_MV(xy2, +1, +1)
485
                }
Michael Niedermayer's avatar
Michael Niedermayer committed
486
                CHECK_SAD_HALF_MV(x2 , +1,  0)
487 488 489 490
            }
        }else{
            if(l<=r){
                if(t+l<=b+r){
Michael Niedermayer's avatar
Michael Niedermayer committed
491
                    CHECK_SAD_HALF_MV(xy2, -1, -1)
492
                    ptr+= stride;
493
                }else{
494
                    ptr+= stride;
Michael Niedermayer's avatar
Michael Niedermayer committed
495
                    CHECK_SAD_HALF_MV(xy2, +1, +1)
496
                }
Michael Niedermayer's avatar
Michael Niedermayer committed
497 498
                CHECK_SAD_HALF_MV(x2 , -1,  0)
                CHECK_SAD_HALF_MV(xy2, -1, +1)
499 500
            }else{
                if(t+r<=b+l){
Michael Niedermayer's avatar
Michael Niedermayer committed
501
                    CHECK_SAD_HALF_MV(xy2, +1, -1)
502
                    ptr+= stride;
503
                }else{
504
                    ptr+= stride;
Michael Niedermayer's avatar
Michael Niedermayer committed
505
                    CHECK_SAD_HALF_MV(xy2, -1, +1)
506
                }
Michael Niedermayer's avatar
Michael Niedermayer committed
507 508
                CHECK_SAD_HALF_MV(x2 , +1,  0)
                CHECK_SAD_HALF_MV(xy2, +1, +1)
509
            }
Michael Niedermayer's avatar
Michael Niedermayer committed
510
            CHECK_SAD_HALF_MV(y2 ,  0, +1)
511 512 513
        }
        mx+=dx;
        my+=dy;
514 515

    }else{
516 517
        mx<<=1;
        my<<=1;
518 519 520 521
    }

    *mx_ptr = mx;
    *my_ptr = my;
522
    return dminh;
523 524
}

525
static inline void set_p_mv_tables(MpegEncContext * s, int mx, int my, int mv4)
526
{
527
    const int xy= s->mb_x + s->mb_y*s->mb_stride;
528

529 530
    s->p_mv_table[xy][0] = mx;
    s->p_mv_table[xy][1] = my;
531

Diego Biurrun's avatar
Diego Biurrun committed
532
    /* has already been set to the 4 MV if 4MV is done */
533
    if(mv4){
534 535
        int mot_xy= s->block_index[0];

536 537 538 539
        s->current_picture.motion_val[0][mot_xy  ][0]= mx;
        s->current_picture.motion_val[0][mot_xy  ][1]= my;
        s->current_picture.motion_val[0][mot_xy+1][0]= mx;
        s->current_picture.motion_val[0][mot_xy+1][1]= my;
540

541
        mot_xy += s->b8_stride;
542 543 544 545
        s->current_picture.motion_val[0][mot_xy  ][0]= mx;
        s->current_picture.motion_val[0][mot_xy  ][1]= my;
        s->current_picture.motion_val[0][mot_xy+1][0]= mx;
        s->current_picture.motion_val[0][mot_xy+1][1]= my;
546 547 548
    }
}

549 550 551
/**
 * get fullpel ME search limits.
 */
552
static inline void get_limits(MpegEncContext *s, int x, int y)
Fabrice Bellard's avatar
Fabrice Bellard committed
553
{
554
    MotionEstContext * const c= &s->me;
555
    int range= c->avctx->me_range >> (1 + !!(c->flags&FLAG_QPEL));
556
/*
557
    if(c->avctx->me_range) c->range= c->avctx->me_range >> 1;
558
    else                   c->range= 16;
559
*/
Fabrice Bellard's avatar
Fabrice Bellard committed
560
    if (s->unrestricted_mv) {
561 562 563 564
        c->xmin = - x - 16;
        c->ymin = - y - 16;
        c->xmax = - x + s->mb_width *16;
        c->ymax = - y + s->mb_height*16;
565 566 567 568
    } else if (s->out_format == FMT_H261){
        // Search range of H261 is different from other codec standards
        c->xmin = (x > 15) ? - 15 : 0;
        c->ymin = (y > 15) ? - 15 : 0;
569
        c->xmax = (x < s->mb_width * 16 - 16) ? 15 : 0;
570
        c->ymax = (y < s->mb_height * 16 - 16) ? 15 : 0;
Fabrice Bellard's avatar
Fabrice Bellard committed
571
    } else {
572 573 574 575
        c->xmin = - x;
        c->ymin = - y;
        c->xmax = - x + s->mb_width *16 - 16;
        c->ymax = - y + s->mb_height*16 - 16;
Fabrice Bellard's avatar
Fabrice Bellard committed
576
    }
577 578 579 580 581 582
    if(range){
        c->xmin = FFMAX(c->xmin,-range);
        c->xmax = FFMIN(c->xmax, range);
        c->ymin = FFMAX(c->ymin,-range);
        c->ymax = FFMIN(c->ymax, range);
    }
583 584
}

585 586
static inline void init_mv4_ref(MotionEstContext *c){
    const int stride= c->stride;
587 588 589 590 591 592 593 594 595

    c->ref[1][0] = c->ref[0][0] + 8;
    c->ref[2][0] = c->ref[0][0] + 8*stride;
    c->ref[3][0] = c->ref[2][0] + 8;
    c->src[1][0] = c->src[0][0] + 8;
    c->src[2][0] = c->src[0][0] + 8*stride;
    c->src[3][0] = c->src[2][0] + 8;
}

596
static inline int h263_mv4_search(MpegEncContext *s, int mx, int my, int shift)
597
{
598
    MotionEstContext * const c= &s->me;
599 600
    const int size= 1;
    const int h=8;
601 602
    int block;
    int P[10][2];
603
    int dmin_sum=0, mx4_sum=0, my4_sum=0;
604
    int same=1;
605
    const int stride= c->stride;
606
    uint8_t *mv_penalty= c->current_mv_penalty;
607

608
    init_mv4_ref(c);
609

610 611 612 613 614
    for(block=0; block<4; block++){
        int mx4, my4;
        int pred_x4, pred_y4;
        int dmin4;
        static const int off[4]= {2, 1, 1, -1};
615
        const int mot_stride = s->b8_stride;
616
        const int mot_xy = s->block_index[block];
617

618 619
        P_LEFT[0] = s->current_picture.motion_val[0][mot_xy - 1][0];
        P_LEFT[1] = s->current_picture.motion_val[0][mot_xy - 1][1];
620

621
        if(P_LEFT[0]       > (c->xmax<<shift)) P_LEFT[0]       = (c->xmax<<shift);
622 623

        /* special case for first line */
624
        if (s->first_slice_line && block<2) {
625 626
            c->pred_x= pred_x4= P_LEFT[0];
            c->pred_y= pred_y4= P_LEFT[1];
627
        } else {
628 629 630 631
            P_TOP[0]      = s->current_picture.motion_val[0][mot_xy - mot_stride             ][0];
            P_TOP[1]      = s->current_picture.motion_val[0][mot_xy - mot_stride             ][1];
            P_TOPRIGHT[0] = s->current_picture.motion_val[0][mot_xy - mot_stride + off[block]][0];
            P_TOPRIGHT[1] = s->current_picture.motion_val[0][mot_xy - mot_stride + off[block]][1];
632 633 634 635
            if(P_TOP[1]      > (c->ymax<<shift)) P_TOP[1]     = (c->ymax<<shift);
            if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
            if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift);
            if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
636

637 638 639
            P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
            P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);

640 641
            c->pred_x= pred_x4 = P_MEDIAN[0];
            c->pred_y= pred_y4 = P_MEDIAN[1];
642 643 644 645
        }
        P_MV1[0]= mx;
        P_MV1[1]= my;

646
        dmin4 = epzs_motion_search4(s, &mx4, &my4, P, block, block, s->p_mv_table, (1<<16)>>shift);
647

648
        dmin4= c->sub_motion_search(s, &mx4, &my4, dmin4, block, block, size, h);
649

650
        if(s->dsp.me_sub_cmp[0] != s->dsp.mb_cmp[0]){
651
            int dxy;
652
            const int offset= ((block&1) + (block>>1)*stride)*8;
653
            uint8_t *dest_y = c->scratchpad + offset;
654
            if(s->quarter_sample){
655
                uint8_t *ref= c->ref[block][0] + (mx4>>2) + (my4>>2)*stride;
656 657 658
                dxy = ((my4 & 3) << 2) | (mx4 & 3);

                if(s->no_rounding)
659
                    s->dsp.put_no_rnd_qpel_pixels_tab[1][dxy](dest_y   , ref    , stride);
660
                else
661
                    s->dsp.put_qpel_pixels_tab       [1][dxy](dest_y   , ref    , stride);
662
            }else{
663
                uint8_t *ref= c->ref[block][0] + (mx4>>1) + (my4>>1)*stride;
664 665 666
                dxy = ((my4 & 1) << 1) | (mx4 & 1);

                if(s->no_rounding)
667
                    s->dsp.put_no_rnd_pixels_tab[1][dxy](dest_y    , ref    , stride, h);
668
                else
669
                    s->dsp.put_pixels_tab       [1][dxy](dest_y    , ref    , stride, h);
670
            }
671
            dmin_sum+= (mv_penalty[mx4-pred_x4] + mv_penalty[my4-pred_y4])*c->mb_penalty_factor;
672 673 674 675 676 677 678 679 680 681
        }else
            dmin_sum+= dmin4;

        if(s->quarter_sample){
            mx4_sum+= mx4/2;
            my4_sum+= my4/2;
        }else{
            mx4_sum+= mx4;
            my4_sum+= my4;
        }
682

683 684
        s->current_picture.motion_val[0][ s->block_index[block] ][0]= mx4;
        s->current_picture.motion_val[0][ s->block_index[block] ][1]= my4;
685 686

        if(mx4 != mx || my4 != my) same=0;
687
    }
688

689 690
    if(same)
        return INT_MAX;
691

692
    if(s->dsp.me_sub_cmp[0] != s->dsp.mb_cmp[0]){
693
        dmin_sum += s->dsp.mb_cmp[0](s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*16*stride, c->scratchpad, stride, 16);
694
    }
695

696
    if(c->avctx->mb_cmp&FF_CMP_CHROMA){
697 698 699 700 701 702 703
        int dxy;
        int mx, my;
        int offset;

        mx= ff_h263_round_chroma(mx4_sum);
        my= ff_h263_round_chroma(my4_sum);
        dxy = ((my & 1) << 1) | (mx & 1);
704

705
        offset= (s->mb_x*8 + (mx>>1)) + (s->mb_y*8 + (my>>1))*s->uvlinesize;
706

707
        if(s->no_rounding){
708 709
            s->dsp.put_no_rnd_pixels_tab[1][dxy](c->scratchpad    , s->last_picture.data[1] + offset, s->uvlinesize, 8);
            s->dsp.put_no_rnd_pixels_tab[1][dxy](c->scratchpad+8  , s->last_picture.data[2] + offset, s->uvlinesize, 8);
710
        }else{
711 712
            s->dsp.put_pixels_tab       [1][dxy](c->scratchpad    , s->last_picture.data[1] + offset, s->uvlinesize, 8);
            s->dsp.put_pixels_tab       [1][dxy](c->scratchpad+8  , s->last_picture.data[2] + offset, s->uvlinesize, 8);
713 714
        }

715 716
        dmin_sum += s->dsp.mb_cmp[1](s, s->new_picture.data[1] + s->mb_x*8 + s->mb_y*8*s->uvlinesize, c->scratchpad  , s->uvlinesize, 8);
        dmin_sum += s->dsp.mb_cmp[1](s, s->new_picture.data[2] + s->mb_x*8 + s->mb_y*8*s->uvlinesize, c->scratchpad+8, s->uvlinesize, 8);
717
    }
718

719 720
    c->pred_x= mx;
    c->pred_y= my;
721

722
    switch(c->avctx->mb_cmp&0xFF){
723 724 725 726 727
    /*case FF_CMP_SSE:
        return dmin_sum+ 32*s->qscale*s->qscale;*/
    case FF_CMP_RD:
        return dmin_sum;
    default:
728
        return dmin_sum+ 11*c->mb_penalty_factor;
729
    }
730 731
}

732 733 734 735 736 737 738 739 740 741 742 743 744
static inline void init_interlaced_ref(MpegEncContext *s, int ref_index){
    MotionEstContext * const c= &s->me;

    c->ref[1+ref_index][0] = c->ref[0+ref_index][0] + s->linesize;
    c->src[1][0] = c->src[0][0] + s->linesize;
    if(c->flags & FLAG_CHROMA){
        c->ref[1+ref_index][1] = c->ref[0+ref_index][1] + s->uvlinesize;
        c->ref[1+ref_index][2] = c->ref[0+ref_index][2] + s->uvlinesize;
        c->src[1][1] = c->src[0][1] + s->uvlinesize;
        c->src[1][2] = c->src[0][2] + s->uvlinesize;
    }
}

745
static int interlaced_search(MpegEncContext *s, int ref_index,
746
                             int16_t (*mv_tables[2][2])[2], uint8_t *field_select_tables[2], int mx, int my, int user_field_select)
747
{
748
    MotionEstContext * const c= &s->me;
749 750 751 752
    const int size=0;
    const int h=8;
    int block;
    int P[10][2];
753
    uint8_t * const mv_penalty= c->current_mv_penalty;
754 755 756 757 758
    int same=1;
    const int stride= 2*s->linesize;
    int dmin_sum= 0;
    const int mot_stride= s->mb_stride;
    const int xy= s->mb_x + s->mb_y*mot_stride;
759

760 761 762 763
    c->ymin>>=1;
    c->ymax>>=1;
    c->stride<<=1;
    c->uvstride<<=1;
764
    init_interlaced_ref(s, ref_index);
765

766 767 768 769 770 771
    for(block=0; block<2; block++){
        int field_select;
        int best_dmin= INT_MAX;
        int best_field= -1;

        for(field_select=0; field_select<2; field_select++){
772
            int dmin, mx_i, my_i;
773
            int16_t (*mv_table)[2]= mv_tables[block][field_select];
774

775
            if(user_field_select){
776 777
                assert(field_select==0 || field_select==1);
                assert(field_select_tables[block][xy]==0 || field_select_tables[block][xy]==1);
778 779 780
                if(field_select_tables[block][xy] != field_select)
                    continue;
            }
781

782 783
            P_LEFT[0] = mv_table[xy - 1][0];
            P_LEFT[1] = mv_table[xy - 1][1];
784
            if(P_LEFT[0]       > (c->xmax<<1)) P_LEFT[0]       = (c->xmax<<1);
785

786 787
            c->pred_x= P_LEFT[0];
            c->pred_y= P_LEFT[1];
788

789
            if(!s->first_slice_line){
790 791 792 793
                P_TOP[0]      = mv_table[xy - mot_stride][0];
                P_TOP[1]      = mv_table[xy - mot_stride][1];
                P_TOPRIGHT[0] = mv_table[xy - mot_stride + 1][0];
                P_TOPRIGHT[1] = mv_table[xy - mot_stride + 1][1];
794 795 796 797
                if(P_TOP[1]      > (c->ymax<<1)) P_TOP[1]     = (c->ymax<<1);
                if(P_TOPRIGHT[0] < (c->xmin<<1)) P_TOPRIGHT[0]= (c->xmin<<1);
                if(P_TOPRIGHT[0] > (c->xmax<<1)) P_TOPRIGHT[0]= (c->xmax<<1);
                if(P_TOPRIGHT[1] > (c->ymax<<1)) P_TOPRIGHT[1]= (c->ymax<<1);
798

799 800 801 802 803
                P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
                P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
            }
            P_MV1[0]= mx; //FIXME not correct if block != field_select
            P_MV1[1]= my / 2;
804

805
            dmin = epzs_motion_search2(s, &mx_i, &my_i, P, block, field_select+ref_index, mv_table, (1<<16)>>1);
806

807
            dmin= c->sub_motion_search(s, &mx_i, &my_i, dmin, block, field_select+ref_index, size, h);
808

809 810
            mv_table[xy][0]= mx_i;
            mv_table[xy][1]= my_i;
811

812
            if(s->dsp.me_sub_cmp[0] != s->dsp.mb_cmp[0]){
813 814 815
                int dxy;

                //FIXME chroma ME
816
                uint8_t *ref= c->ref[field_select+ref_index][0] + (mx_i>>1) + (my_i>>1)*stride;
817 818 819
                dxy = ((my_i & 1) << 1) | (mx_i & 1);

                if(s->no_rounding){
820
                    s->dsp.put_no_rnd_pixels_tab[size][dxy](c->scratchpad, ref    , stride, h);
821
                }else{
822
                    s->dsp.put_pixels_tab       [size][dxy](c->scratchpad, ref    , stride, h);
823
                }
824
                dmin= s->dsp.mb_cmp[size](s, c->src[block][0], c->scratchpad, stride, h);
825
                dmin+= (mv_penalty[mx_i-c->pred_x] + mv_penalty[my_i-c->pred_y] + 1)*c->mb_penalty_factor;
826
            }else
827
                dmin+= c->mb_penalty_factor; //field_select bits
828

829
            dmin += field_select != block; //slightly prefer same field
830

831 832 833 834 835 836 837 838 839 840
            if(dmin < best_dmin){
                best_dmin= dmin;
                best_field= field_select;
            }
        }
        {
            int16_t (*mv_table)[2]= mv_tables[block][best_field];

            if(mv_table[xy][0] != mx) same=0; //FIXME check if these checks work and are any good at all
            if(mv_table[xy][1]&1) same=0;
841
            if(mv_table[xy][1]*2 != my) same=0;
842 843 844 845 846 847
            if(best_field != block) same=0;
        }

        field_select_tables[block][xy]= best_field;
        dmin_sum += best_dmin;
    }
848

849 850 851 852
    c->ymin<<=1;
    c->ymax<<=1;
    c->stride>>=1;
    c->uvstride>>=1;
853 854 855

    if(same)
        return INT_MAX;
856

857
    switch(c->avctx->mb_cmp&0xFF){
858 859 860 861 862
    /*case FF_CMP_SSE:
        return dmin_sum+ 32*s->qscale*s->qscale;*/
    case FF_CMP_RD:
        return dmin_sum;
    default:
863
        return dmin_sum+ 11*c->mb_penalty_factor;
864 865 866
    }
}

867 868 869
static void clip_input_mv(MpegEncContext * s, int16_t *mv, int interlaced){
    int ymax= s->me.ymax>>interlaced;
    int ymin= s->me.ymin>>interlaced;
870

871 872 873 874 875 876
    if(mv[0] < s->me.xmin) mv[0] = s->me.xmin;
    if(mv[0] > s->me.xmax) mv[0] = s->me.xmax;
    if(mv[1] <       ymin) mv[1] =       ymin;
    if(mv[1] >       ymax) mv[1] =       ymax;
}

877 878 879 880 881 882 883 884 885
static inline int check_input_motion(MpegEncContext * s, int mb_x, int mb_y, int p_type){
    MotionEstContext * const c= &s->me;
    Picture *p= s->current_picture_ptr;
    int mb_xy= mb_x + mb_y*s->mb_stride;
    int xy= 2*mb_x + 2*mb_y*s->b8_stride;
    int mb_type= s->current_picture.mb_type[mb_xy];
    int flags= c->flags;
    int shift= (flags&FLAG_QPEL) + 1;
    int mask= (1<<shift)-1;
886
    int x, y, i;
887 888 889
    int d=0;
    me_cmp_func cmpf= s->dsp.sse[0];
    me_cmp_func chroma_cmpf= s->dsp.sse[1];
890

891 892
    if(p_type && USES_LIST(mb_type, 1)){
        av_log(c->avctx, AV_LOG_ERROR, "backward motion vector in P frame\n");
893
        return INT_MAX/2;
894
    }
895
    assert(IS_INTRA(mb_type) || USES_LIST(mb_type,0) || USES_LIST(mb_type,1));
896

897 898 899 900 901 902
    for(i=0; i<4; i++){
        int xy= s->block_index[i];
        clip_input_mv(s, p->motion_val[0][xy], !!IS_INTERLACED(mb_type));
        clip_input_mv(s, p->motion_val[1][xy], !!IS_INTERLACED(mb_type));
    }

903 904 905 906 907
    if(IS_INTERLACED(mb_type)){
        int xy2= xy  + s->b8_stride;
        s->mb_type[mb_xy]=CANDIDATE_MB_TYPE_INTRA;
        c->stride<<=1;
        c->uvstride<<=1;
908

909
        if(!(s->flags & CODEC_FLAG_INTERLACED_ME)){
910
            av_log(c->avctx, AV_LOG_ERROR, "Interlaced macroblock selected but interlaced motion estimation disabled\n");
911
            return INT_MAX/2;
912
        }
913

914
        if(USES_LIST(mb_type, 0)){
915 916
            int field_select0= p->ref_index[0][4*mb_xy  ];
            int field_select1= p->ref_index[0][4*mb_xy+2];
917 918
            assert(field_select0==0 ||field_select0==1);
            assert(field_select1==0 ||field_select1==1);
919 920
            init_interlaced_ref(s, 0);

921 922 923 924 925 926 927 928 929 930 931 932 933 934
            if(p_type){
                s->p_field_select_table[0][mb_xy]= field_select0;
                s->p_field_select_table[1][mb_xy]= field_select1;
                *(uint32_t*)s->p_field_mv_table[0][field_select0][mb_xy]= *(uint32_t*)p->motion_val[0][xy ];
                *(uint32_t*)s->p_field_mv_table[1][field_select1][mb_xy]= *(uint32_t*)p->motion_val[0][xy2];
                s->mb_type[mb_xy]=CANDIDATE_MB_TYPE_INTER_I;
            }else{
                s->b_field_select_table[0][0][mb_xy]= field_select0;
                s->b_field_select_table[0][1][mb_xy]= field_select1;
                *(uint32_t*)s->b_field_mv_table[0][0][field_select0][mb_xy]= *(uint32_t*)p->motion_val[0][xy ];
                *(uint32_t*)s->b_field_mv_table[0][1][field_select1][mb_xy]= *(uint32_t*)p->motion_val[0][xy2];
                s->mb_type[mb_xy]= CANDIDATE_MB_TYPE_FORWARD_I;
            }

935
            x= p->motion_val[0][xy ][0];
936 937
            y= p->motion_val[0][xy ][1];
            d = cmp(s, x>>shift, y>>shift, x&mask, y&mask, 0, 8, field_select0, 0, cmpf, chroma_cmpf, flags);
938
            x= p->motion_val[0][xy2][0];
939 940 941 942
            y= p->motion_val[0][xy2][1];
            d+= cmp(s, x>>shift, y>>shift, x&mask, y&mask, 0, 8, field_select1, 1, cmpf, chroma_cmpf, flags);
        }
        if(USES_LIST(mb_type, 1)){
943 944
            int field_select0= p->ref_index[1][4*mb_xy  ];
            int field_select1= p->ref_index[1][4*mb_xy+2];
945 946
            assert(field_select0==0 ||field_select0==1);
            assert(field_select1==0 ||field_select1==1);
947 948
            init_interlaced_ref(s, 2);

949 950 951 952 953 954 955 956 957 958
            s->b_field_select_table[1][0][mb_xy]= field_select0;
            s->b_field_select_table[1][1][mb_xy]= field_select1;
            *(uint32_t*)s->b_field_mv_table[1][0][field_select0][mb_xy]= *(uint32_t*)p->motion_val[1][xy ];
            *(uint32_t*)s->b_field_mv_table[1][1][field_select1][mb_xy]= *(uint32_t*)p->motion_val[1][xy2];
            if(USES_LIST(mb_type, 0)){
                s->mb_type[mb_xy]= CANDIDATE_MB_TYPE_BIDIR_I;
            }else{
                s->mb_type[mb_xy]= CANDIDATE_MB_TYPE_BACKWARD_I;
            }

959
            x= p->motion_val[1][xy ][0];
960 961
            y= p->motion_val[1][xy ][1];
            d = cmp(s, x>>shift, y>>shift, x&mask, y&mask, 0, 8, field_select0+2, 0, cmpf, chroma_cmpf, flags);
962
            x= p->motion_val[1][xy2][0];
963 964 965 966 967 968
            y= p->motion_val[1][xy2][1];
            d+= cmp(s, x>>shift, y>>shift, x&mask, y&mask, 0, 8, field_select1+2, 1, cmpf, chroma_cmpf, flags);
            //FIXME bidir scores
        }
        c->stride>>=1;
        c->uvstride>>=1;
969
    }else if(IS_8X8(mb_type)){
970
        if(!(s->flags & CODEC_FLAG_4MV)){
971
            av_log(c->avctx, AV_LOG_ERROR, "4MV macroblock selected but 4MV encoding disabled\n");
972
            return INT_MAX/2;
973
        }
974 975
        cmpf= s->dsp.sse[1];
        chroma_cmpf= s->dsp.sse[1];
976
        init_mv4_ref(c);
977 978
        for(i=0; i<4; i++){
            xy= s->block_index[i];
979
            x= p->motion_val[0][xy][0];
980 981 982 983
            y= p->motion_val[0][xy][1];
            d+= cmp(s, x>>shift, y>>shift, x&mask, y&mask, 1, 8, i, i, cmpf, chroma_cmpf, flags);
        }
        s->mb_type[mb_xy]=CANDIDATE_MB_TYPE_INTER4V;
984 985 986 987 988 989 990 991 992 993 994 995 996
    }else{
        if(USES_LIST(mb_type, 0)){
            if(p_type){
                *(uint32_t*)s->p_mv_table[mb_xy]= *(uint32_t*)p->motion_val[0][xy];
                s->mb_type[mb_xy]=CANDIDATE_MB_TYPE_INTER;
            }else if(USES_LIST(mb_type, 1)){
                *(uint32_t*)s->b_bidir_forw_mv_table[mb_xy]= *(uint32_t*)p->motion_val[0][xy];
                *(uint32_t*)s->b_bidir_back_mv_table[mb_xy]= *(uint32_t*)p->motion_val[1][xy];
                s->mb_type[mb_xy]=CANDIDATE_MB_TYPE_BIDIR;
            }else{
                *(uint32_t*)s->b_forw_mv_table[mb_xy]= *(uint32_t*)p->motion_val[0][xy];
                s->mb_type[mb_xy]=CANDIDATE_MB_TYPE_FORWARD;
            }
997
            x= p->motion_val[0][xy][0];
998 999 1000 1001 1002
            y= p->motion_val[0][xy][1];
            d = cmp(s, x>>shift, y>>shift, x&mask, y&mask, 0, 16, 0, 0, cmpf, chroma_cmpf, flags);
        }else if(USES_LIST(mb_type, 1)){
            *(uint32_t*)s->b_back_mv_table[mb_xy]= *(uint32_t*)p->motion_val[1][xy];
            s->mb_type[mb_xy]=CANDIDATE_MB_TYPE_BACKWARD;
1003 1004

            x= p->motion_val[1][xy][0];
1005 1006 1007 1008 1009 1010 1011 1012
            y= p->motion_val[1][xy][1];
            d = cmp(s, x>>shift, y>>shift, x&mask, y&mask, 0, 16, 2, 0, cmpf, chroma_cmpf, flags);
        }else
            s->mb_type[mb_xy]=CANDIDATE_MB_TYPE_INTRA;
    }
    return d;
}

1013 1014 1015
void ff_estimate_p_frame_motion(MpegEncContext * s,
                                int mb_x, int mb_y)
{
1016
    MotionEstContext * const c= &s->me;
1017
    uint8_t *pix, *ppix;
1018 1019 1020
    int sum, mx, my, dmin;
    int varc;            ///< the variance of the block (sum of squared (p[y][x]-average))
    int vard;            ///< sum of squared differences with the estimated motion vector
1021
    int P[10][2];
1022 1023
    const int shift= 1+s->quarter_sample;
    int mb_type=0;
Michael Niedermayer's avatar
Michael Niedermayer committed
1024
    Picture * const pic= &s->current_picture;
1025

1026
    init_ref(c, s->new_picture.data, s->last_picture.data, NULL, 16*mb_x, 16*mb_y, 0);
1027

Michael Niedermayer's avatar
Michael Niedermayer committed
1028
    assert(s->quarter_sample==0 || s->quarter_sample==1);
1029 1030
    assert(s->linesize == c->stride);
    assert(s->uvlinesize == c->uvstride);
Michael Niedermayer's avatar
Michael Niedermayer committed
1031

1032 1033 1034
    c->penalty_factor    = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
    c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
    c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
1035
    c->current_mv_penalty= c->mv_penalty[s->f_code] + MAX_MV;
1036

1037
    get_limits(s, 16*mb_x, 16*mb_y);
1038
    c->skip=0;
1039

1040 1041 1042
    /* intra / predictive decision */
    pix = c->src[0][0];
    sum = s->dsp.pix_sum(pix, s->linesize);
1043
    varc = s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)(sum*sum))>>8) + 500;
1044 1045

    pic->mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
1046 1047
    pic->mb_var [s->mb_stride * mb_y + mb_x] = (varc+128)>>8;
    c->mb_var_sum_temp += (varc+128)>>8;
1048

1049
    if(c->avctx->me_threshold){
1050
        vard= check_input_motion(s, mb_x, mb_y, 1);
1051

1052
        if((vard+128)>>8 < c->avctx->me_threshold){
1053 1054
            int p_score= FFMIN(vard, varc-500+(s->lambda2>>FF_LAMBDA_SHIFT)*100);
            int i_score= varc-500+(s->lambda2>>FF_LAMBDA_SHIFT)*20;
1055 1056
            pic->mc_mb_var[s->mb_stride * mb_y + mb_x] = (vard+128)>>8;
            c->mc_mb_var_sum_temp += (vard+128)>>8;
1057
            c->scene_change_score+= ff_sqrt(p_score) - ff_sqrt(i_score);
1058 1059
            return;
        }
1060
        if((vard+128)>>8 < c->avctx->mb_threshold)
1061
            mb_type= s->mb_type[mb_x + mb_y*s->mb_stride];
1062 1063
    }

1064
    switch(s->me_method) {
Fabrice Bellard's avatar
Fabrice Bellard committed
1065 1066
    case ME_ZERO:
    default:
1067
        no_motion_search(s, &mx, &my);
1068 1069
        mx-= mb_x*16;
        my-= mb_y*16;
Fabrice Bellard's avatar
Fabrice Bellard committed
1070 1071
        dmin = 0;
        break;
1072
    case ME_X1:
1073
    case ME_EPZS:
1074
       {
1075
            const int mot_stride = s->b8_stride;
1076
            const int mot_xy = s->block_index[0];
1077

1078 1079
            P_LEFT[0]       = s->current_picture.motion_val[0][mot_xy - 1][0];
            P_LEFT[1]       = s->current_picture.motion_val[0][mot_xy - 1][1];
1080

1081
            if(P_LEFT[0]       > (c->xmax<<shift)) P_LEFT[0]       = (c->xmax<<shift);
1082

1083
            if(!s->first_slice_line) {
1084 1085 1086 1087
                P_TOP[0]      = s->current_picture.motion_val[0][mot_xy - mot_stride    ][0];
                P_TOP[1]      = s->current_picture.motion_val[0][mot_xy - mot_stride    ][1];
                P_TOPRIGHT[0] = s->current_picture.motion_val[0][mot_xy - mot_stride + 2][0];
                P_TOPRIGHT[1] = s->current_picture.motion_val[0][mot_xy - mot_stride + 2][1];
1088 1089 1090
                if(P_TOP[1]      > (c->ymax<<shift)) P_TOP[1]     = (c->ymax<<shift);
                if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
                if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
1091

1092 1093 1094 1095
                P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
                P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);

                if(s->out_format == FMT_H263){
1096 1097
                    c->pred_x = P_MEDIAN[0];
                    c->pred_y = P_MEDIAN[1];
1098
                }else { /* mpeg1 at least */
1099 1100
                    c->pred_x= P_LEFT[0];
                    c->pred_y= P_LEFT[1];
1101
                }
1102
            }else{
1103 1104
                c->pred_x= P_LEFT[0];
                c->pred_y= P_LEFT[1];
1105
            }
1106

1107
        }
1108
        dmin = ff_epzs_motion_search(s, &mx, &my, P, 0, 0, s->p_mv_table, (1<<16)>>shift, 0, 16);
1109

1110
        break;
Fabrice Bellard's avatar
Fabrice Bellard committed
1111 1112
    }

1113
    /* At this point (mx,my) are full-pell and the relative displacement */
1114
    ppix = c->ref[0][0] + (my * s->linesize) + mx;
1115

1116
    vard = s->dsp.sse[0](NULL, pix, ppix, s->linesize, 16);
1117

1118
    pic->mc_mb_var[s->mb_stride * mb_y + mb_x] = (vard+128)>>8;
1119
//    pic->mb_cmp_score[s->mb_stride * mb_y + mb_x] = dmin;
1120
    c->mc_mb_var_sum_temp += (vard+128)>>8;
1121

Fabrice Bellard's avatar
Fabrice Bellard committed
1122
#if 0
1123
    printf("varc=%4d avg_var=%4d (sum=%4d) vard=%4d mx=%2d my=%2d\n",
1124
           varc, s->avg_mb_var, sum, vard, mx - xx, my - yy);
Fabrice Bellard's avatar
Fabrice Bellard committed
1125
#endif
1126
    if(mb_type){
1127 1128 1129
        int p_score= FFMIN(vard, varc-500+(s->lambda2>>FF_LAMBDA_SHIFT)*100);
        int i_score= varc-500+(s->lambda2>>FF_LAMBDA_SHIFT)*20;
        c->scene_change_score+= ff_sqrt(p_score) - ff_sqrt(i_score);
1130 1131

        if(mb_type == CANDIDATE_MB_TYPE_INTER){
1132
            c->sub_motion_search(s, &mx, &my, dmin, 0, 0, 0, 16);
1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145
            set_p_mv_tables(s, mx, my, 1);
        }else{
            mx <<=shift;
            my <<=shift;
        }
        if(mb_type == CANDIDATE_MB_TYPE_INTER4V){
            h263_mv4_search(s, mx, my, shift);

            set_p_mv_tables(s, mx, my, 0);
        }
        if(mb_type == CANDIDATE_MB_TYPE_INTER_I){
            interlaced_search(s, 0, s->p_field_mv_table, s->p_field_select_table, mx, my, 1);
        }
1146
    }else if(c->avctx->mb_decision > FF_MB_DECISION_SIMPLE){
1147 1148 1149
        int p_score= FFMIN(vard, varc-500+(s->lambda2>>FF_LAMBDA_SHIFT)*100);
        int i_score= varc-500+(s->lambda2>>FF_LAMBDA_SHIFT)*20;
        c->scene_change_score+= ff_sqrt(p_score) - ff_sqrt(i_score);
1150

1151
        if (vard*2 + 200*256 > varc)
1152
            mb_type|= CANDIDATE_MB_TYPE_INTRA;
1153 1154
        if (varc*2 + 200*256 > vard || s->qscale > 24){
//        if (varc*2 + 200*256 + 50*(s->lambda2>>FF_LAMBDA_SHIFT) > vard){
1155
            mb_type|= CANDIDATE_MB_TYPE_INTER;
1156
            c->sub_motion_search(s, &mx, &my, dmin, 0, 0, 0, 16);
1157 1158
            if(s->flags&CODEC_FLAG_MV0)
                if(mx || my)
1159
                    mb_type |= CANDIDATE_MB_TYPE_SKIPPED; //FIXME check difference
1160
        }else{
Michael Niedermayer's avatar
Michael Niedermayer committed
1161 1162
            mx <<=shift;
            my <<=shift;
Fabrice Bellard's avatar
Fabrice Bellard committed
1163
        }
1164
        if((s->flags&CODEC_FLAG_4MV)
1165
           && !c->skip && varc>50<<8 && vard>10<<8){
1166 1167
            if(h263_mv4_search(s, mx, my, shift) < INT_MAX)
                mb_type|=CANDIDATE_MB_TYPE_INTER4V;
1168 1169 1170 1171

            set_p_mv_tables(s, mx, my, 0);
        }else
            set_p_mv_tables(s, mx, my, 1);
1172
        if((s->flags&CODEC_FLAG_INTERLACED_ME)
1173
           && !c->skip){ //FIXME varc/d checks
1174
            if(interlaced_search(s, 0, s->p_field_mv_table, s->p_field_select_table, mx, my, 0) < INT_MAX)
1175 1176
                mb_type |= CANDIDATE_MB_TYPE_INTER_I;
        }
1177
    }else{
1178
        int intra_score, i;
1179
        mb_type= CANDIDATE_MB_TYPE_INTER;
1180

1181
        dmin= c->sub_motion_search(s, &mx, &my, dmin, 0, 0, 0, 16);
1182
        if(c->avctx->me_sub_cmp != c->avctx->mb_cmp && !c->skip)
1183
            dmin= ff_get_mb_score(s, mx, my, 0, 0, 0, 16, 1);
1184 1185

        if((s->flags&CODEC_FLAG_4MV)
1186
           && !c->skip && varc>50<<8 && vard>10<<8){
1187
            int dmin4= h263_mv4_search(s, mx, my, shift);
1188
            if(dmin4 < dmin){
1189
                mb_type= CANDIDATE_MB_TYPE_INTER4V;
1190
                dmin=dmin4;
1191
            }
1192
        }
1193
        if((s->flags&CODEC_FLAG_INTERLACED_ME)
1194
           && !c->skip){ //FIXME varc/d checks
1195
            int dmin_i= interlaced_search(s, 0, s->p_field_mv_table, s->p_field_select_table, mx, my, 0);
1196 1197 1198 1199 1200
            if(dmin_i < dmin){
                mb_type = CANDIDATE_MB_TYPE_INTER_I;
                dmin= dmin_i;
            }
        }
1201 1202

//        pic->mb_cmp_score[s->mb_stride * mb_y + mb_x] = dmin;
1203
        set_p_mv_tables(s, mx, my, mb_type!=CANDIDATE_MB_TYPE_INTER4V);
1204 1205

        /* get intra luma score */
1206
        if((c->avctx->mb_cmp&0xFF)==FF_CMP_SSE){
1207
            intra_score= varc - 500;
1208 1209 1210
        }else{
            int mean= (sum+128)>>8;
            mean*= 0x01010101;
1211

1212
            for(i=0; i<16; i++){
1213 1214 1215 1216
                *(uint32_t*)(&c->scratchpad[i*s->linesize+ 0]) = mean;
                *(uint32_t*)(&c->scratchpad[i*s->linesize+ 4]) = mean;
                *(uint32_t*)(&c->scratchpad[i*s->linesize+ 8]) = mean;
                *(uint32_t*)(&c->scratchpad[i*s->linesize+12]) = mean;
1217 1218
            }

1219
            intra_score= s->dsp.mb_cmp[0](s, c->scratchpad, pix, s->linesize, 16);
1220 1221 1222
        }
#if 0 //FIXME
        /* get chroma score */
1223
        if(c->avctx->mb_cmp&FF_CMP_CHROMA){
1224 1225 1226
            for(i=1; i<3; i++){
                uint8_t *dest_c;
                int mean;
1227

1228
                if(s->out_format == FMT_H263){
1229
                    mean= (s->dc_val[i][mb_x + mb_y*s->b8_stride] + 4)>>3; //FIXME not exact but simple ;)
1230 1231 1232 1233
                }else{
                    mean= (s->last_dc[i] + 4)>>3;
                }
                dest_c = s->new_picture.data[i] + (mb_y * 8  * (s->uvlinesize)) + mb_x * 8;
1234

1235 1236
                mean*= 0x01010101;
                for(i=0; i<8; i++){
1237 1238
                    *(uint32_t*)(&c->scratchpad[i*s->uvlinesize+ 0]) = mean;
                    *(uint32_t*)(&c->scratchpad[i*s->uvlinesize+ 4]) = mean;
1239
                }
1240

1241
                intra_score+= s->dsp.mb_cmp[1](s, c->scratchpad, dest_c, s->uvlinesize);
1242
            }
1243 1244
        }
#endif
1245
        intra_score += c->mb_penalty_factor*16;
1246

1247
        if(intra_score < dmin){
1248 1249
            mb_type= CANDIDATE_MB_TYPE_INTRA;
            s->current_picture.mb_type[mb_y*s->mb_stride + mb_x]= CANDIDATE_MB_TYPE_INTRA; //FIXME cleanup
1250 1251
        }else
            s->current_picture.mb_type[mb_y*s->mb_stride + mb_x]= 0;
1252

1253 1254 1255 1256
        {
            int p_score= FFMIN(vard, varc-500+(s->lambda2>>FF_LAMBDA_SHIFT)*100);
            int i_score= varc-500+(s->lambda2>>FF_LAMBDA_SHIFT)*20;
            c->scene_change_score+= ff_sqrt(p_score) - ff_sqrt(i_score);
1257
        }
Fabrice Bellard's avatar
Fabrice Bellard committed
1258
    }
1259

1260
    s->mb_type[mb_y*s->mb_stride + mb_x]= mb_type;
1261 1262
}

1263 1264 1265
int ff_pre_estimate_p_frame_motion(MpegEncContext * s,
                                    int mb_x, int mb_y)
{
1266
    MotionEstContext * const c= &s->me;
1267
    int mx, my, dmin;
1268 1269
    int P[10][2];
    const int shift= 1+s->quarter_sample;
1270
    const int xy= mb_x + mb_y*s->mb_stride;
1271
    init_ref(c, s->new_picture.data, s->last_picture.data, NULL, 16*mb_x, 16*mb_y, 0);
1272

1273 1274
    assert(s->quarter_sample==0 || s->quarter_sample==1);

1275
    c->pre_penalty_factor    = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_pre_cmp);
1276
    c->current_mv_penalty= c->mv_penalty[s->f_code] + MAX_MV;
1277

1278
    get_limits(s, 16*mb_x, 16*mb_y);
1279
    c->skip=0;
1280 1281 1282 1283

    P_LEFT[0]       = s->p_mv_table[xy + 1][0];
    P_LEFT[1]       = s->p_mv_table[xy + 1][1];

1284
    if(P_LEFT[0]       < (c->xmin<<shift)) P_LEFT[0]       = (c->xmin<<shift);
1285 1286

    /* special case for first line */
1287
    if (s->first_slice_line) {
1288 1289
        c->pred_x= P_LEFT[0];
        c->pred_y= P_LEFT[1];
1290
        P_TOP[0]= P_TOPRIGHT[0]= P_MEDIAN[0]=
1291
        P_TOP[1]= P_TOPRIGHT[1]= P_MEDIAN[1]= 0; //FIXME
1292
    } else {
1293 1294 1295 1296
        P_TOP[0]      = s->p_mv_table[xy + s->mb_stride    ][0];
        P_TOP[1]      = s->p_mv_table[xy + s->mb_stride    ][1];
        P_TOPRIGHT[0] = s->p_mv_table[xy + s->mb_stride - 1][0];
        P_TOPRIGHT[1] = s->p_mv_table[xy + s->mb_stride - 1][1];
1297 1298 1299
        if(P_TOP[1]      < (c->ymin<<shift)) P_TOP[1]     = (c->ymin<<shift);
        if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift);
        if(P_TOPRIGHT[1] < (c->ymin<<shift)) P_TOPRIGHT[1]= (c->ymin<<shift);
1300

1301 1302 1303
        P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
        P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);

1304 1305
        c->pred_x = P_MEDIAN[0];
        c->pred_y = P_MEDIAN[1];
1306
    }
1307

1308
    dmin = ff_epzs_motion_search(s, &mx, &my, P, 0, 0, s->p_mv_table, (1<<16)>>shift, 0, 16);
1309

1310 1311
    s->p_mv_table[xy][0] = mx<<shift;
    s->p_mv_table[xy][1] = my<<shift;
1312

1313 1314 1315
    return dmin;
}

1316
static int ff_estimate_motion_b(MpegEncContext * s,
1317
                       int mb_x, int mb_y, int16_t (*mv_table)[2], int ref_index, int f_code)
1318
{
1319
    MotionEstContext * const c= &s->me;
1320
    int mx, my, dmin;
1321
    int P[10][2];
1322
    const int shift= 1+s->quarter_sample;
1323 1324
    const int mot_stride = s->mb_stride;
    const int mot_xy = mb_y*mot_stride + mb_x;
1325
    uint8_t * const mv_penalty= c->mv_penalty[f_code] + MAX_MV;
1326
    int mv_scale;
1327

1328 1329 1330
    c->penalty_factor    = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
    c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
    c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
1331
    c->current_mv_penalty= mv_penalty;
Michael Niedermayer's avatar
Michael Niedermayer committed
1332

1333
    get_limits(s, 16*mb_x, 16*mb_y);
1334 1335 1336 1337

    switch(s->me_method) {
    case ME_ZERO:
    default:
1338
        no_motion_search(s, &mx, &my);
1339
        dmin = 0;
1340 1341
        mx-= mb_x*16;
        my-= mb_y*16;
1342 1343 1344 1345
        break;
    case ME_X1:
    case ME_EPZS:
       {
1346 1347
            P_LEFT[0]        = mv_table[mot_xy - 1][0];
            P_LEFT[1]        = mv_table[mot_xy - 1][1];
1348

1349
            if(P_LEFT[0]       > (c->xmax<<shift)) P_LEFT[0]       = (c->xmax<<shift);
1350 1351

            /* special case for first line */
1352
            if (!s->first_slice_line) {
1353 1354 1355 1356
                P_TOP[0] = mv_table[mot_xy - mot_stride             ][0];
                P_TOP[1] = mv_table[mot_xy - mot_stride             ][1];
                P_TOPRIGHT[0] = mv_table[mot_xy - mot_stride + 1         ][0];
                P_TOPRIGHT[1] = mv_table[mot_xy - mot_stride + 1         ][1];
1357 1358 1359
                if(P_TOP[1] > (c->ymax<<shift)) P_TOP[1]= (c->ymax<<shift);
                if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
                if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
1360

1361 1362
                P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
                P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
1363
            }
1364 1365
            c->pred_x= P_LEFT[0];
            c->pred_y= P_LEFT[1];
1366
        }
1367

1368 1369 1370 1371 1372
        if(mv_table == s->b_forw_mv_table){
            mv_scale= (s->pb_time<<16) / (s->pp_time<<shift);
        }else{
            mv_scale= ((s->pb_time - s->pp_time)<<16) / (s->pp_time<<shift);
        }
1373

1374
        dmin = ff_epzs_motion_search(s, &mx, &my, P, 0, ref_index, s->p_mv_table, mv_scale, 0, 16);
1375

1376 1377
        break;
    }
1378

1379
    dmin= c->sub_motion_search(s, &mx, &my, dmin, 0, ref_index, 0, 16);
1380

1381
    if(c->avctx->me_sub_cmp != c->avctx->mb_cmp && !c->skip)
1382
        dmin= ff_get_mb_score(s, mx, my, 0, ref_index, 0, 16, 1);
1383

1384
//printf("%d %d %d %d//", s->mb_x, s->mb_y, mx, my);
1385 1386 1387
//    s->mb_type[mb_y*s->mb_width + mb_x]= mb_type;
    mv_table[mot_xy][0]= mx;
    mv_table[mot_xy][1]= my;
Michael Niedermayer's avatar
Michael Niedermayer committed
1388

1389
    return dmin;
Fabrice Bellard's avatar
Fabrice Bellard committed
1390 1391
}

1392
static inline int check_bidir_mv(MpegEncContext * s,
1393 1394 1395
                   int motion_fx, int motion_fy,
                   int motion_bx, int motion_by,
                   int pred_fx, int pred_fy,
1396 1397
                   int pred_bx, int pred_by,
                   int size, int h)
Fabrice Bellard's avatar
Fabrice Bellard committed
1398
{
1399
    //FIXME optimize?
Michael Niedermayer's avatar
Michael Niedermayer committed
1400
    //FIXME better f_code prediction (max mv & distance)
1401
    //FIXME pointers
1402
    MotionEstContext * const c= &s->me;
Michael Niedermayer's avatar
Michael Niedermayer committed
1403 1404
    uint8_t * const mv_penalty_f= c->mv_penalty[s->f_code] + MAX_MV; // f_code of the prev frame
    uint8_t * const mv_penalty_b= c->mv_penalty[s->b_code] + MAX_MV; // f_code of the prev frame
1405 1406
    int stride= c->stride;
    uint8_t *dest_y = c->scratchpad;
1407 1408 1409 1410
    uint8_t *ptr;
    int dxy;
    int src_x, src_y;
    int fbmin;
1411 1412 1413
    uint8_t **src_data= c->src[0];
    uint8_t **ref_data= c->ref[0];
    uint8_t **ref2_data= c->ref[2];
1414

Michael Niedermayer's avatar
Michael Niedermayer committed
1415 1416
    if(s->quarter_sample){
        dxy = ((motion_fy & 3) << 2) | (motion_fx & 3);
1417 1418
        src_x = motion_fx >> 2;
        src_y = motion_fy >> 2;
Michael Niedermayer's avatar
Michael Niedermayer committed
1419

1420 1421
        ptr = ref_data[0] + (src_y * stride) + src_x;
        s->dsp.put_qpel_pixels_tab[0][dxy](dest_y    , ptr    , stride);
Michael Niedermayer's avatar
Michael Niedermayer committed
1422 1423

        dxy = ((motion_by & 3) << 2) | (motion_bx & 3);
1424 1425
        src_x = motion_bx >> 2;
        src_y = motion_by >> 2;
1426

1427
        ptr = ref2_data[0] + (src_y * stride) + src_x;
1428
        s->dsp.avg_qpel_pixels_tab[size][dxy](dest_y    , ptr    , stride);
Michael Niedermayer's avatar
Michael Niedermayer committed
1429 1430
    }else{
        dxy = ((motion_fy & 1) << 1) | (motion_fx & 1);
1431 1432
        src_x = motion_fx >> 1;
        src_y = motion_fy >> 1;
Michael Niedermayer's avatar
Michael Niedermayer committed
1433

1434 1435
        ptr = ref_data[0] + (src_y * stride) + src_x;
        s->dsp.put_pixels_tab[size][dxy](dest_y    , ptr    , stride, h);
Michael Niedermayer's avatar
Michael Niedermayer committed
1436 1437

        dxy = ((motion_by & 1) << 1) | (motion_bx & 1);
1438 1439
        src_x = motion_bx >> 1;
        src_y = motion_by >> 1;
1440

1441
        ptr = ref2_data[0] + (src_y * stride) + src_x;
1442
        s->dsp.avg_pixels_tab[size][dxy](dest_y    , ptr    , stride, h);
Michael Niedermayer's avatar
Michael Niedermayer committed
1443 1444
    }

Michael Niedermayer's avatar
Michael Niedermayer committed
1445 1446
    fbmin = (mv_penalty_f[motion_fx-pred_fx] + mv_penalty_f[motion_fy-pred_fy])*c->mb_penalty_factor
           +(mv_penalty_b[motion_bx-pred_bx] + mv_penalty_b[motion_by-pred_by])*c->mb_penalty_factor
1447
           + s->dsp.mb_cmp[size](s, src_data[0], dest_y, stride, h); //FIXME new_pic
1448

1449
    if(c->avctx->mb_cmp&FF_CMP_CHROMA){
1450 1451
    }
    //FIXME CHROMA !!!
1452

1453 1454
    return fbmin;
}
1455

1456
/* refine the bidir vectors in hq mode and return the score in both lq & hq mode*/
1457
static inline int bidir_refine(MpegEncContext * s, int mb_x, int mb_y)
1458
{
1459
    MotionEstContext * const c= &s->me;
1460 1461
    const int mot_stride = s->mb_stride;
    const int xy = mb_y *mot_stride + mb_x;
1462 1463 1464 1465 1466 1467 1468 1469 1470
    int fbmin;
    int pred_fx= s->b_bidir_forw_mv_table[xy-1][0];
    int pred_fy= s->b_bidir_forw_mv_table[xy-1][1];
    int pred_bx= s->b_bidir_back_mv_table[xy-1][0];
    int pred_by= s->b_bidir_back_mv_table[xy-1][1];
    int motion_fx= s->b_bidir_forw_mv_table[xy][0]= s->b_forw_mv_table[xy][0];
    int motion_fy= s->b_bidir_forw_mv_table[xy][1]= s->b_forw_mv_table[xy][1];
    int motion_bx= s->b_bidir_back_mv_table[xy][0]= s->b_back_mv_table[xy][0];
    int motion_by= s->b_bidir_back_mv_table[xy][1]= s->b_back_mv_table[xy][1];
1471 1472 1473 1474 1475 1476 1477
    const int flags= c->sub_flags;
    const int qpel= flags&FLAG_QPEL;
    const int shift= 1+qpel;
    const int xmin= c->xmin<<shift;
    const int ymin= c->ymin<<shift;
    const int xmax= c->xmax<<shift;
    const int ymax= c->ymax<<shift;
1478 1479
#define HASH(fx,fy,bx,by) ((fx)+17*(fy)+63*(bx)+117*(by))
    int hashidx= HASH(motion_fx,motion_fy, motion_bx, motion_by);
1480
    uint8_t map[256];
1481 1482

    memset(map,0,sizeof(map));
1483
    map[hashidx&255] = 1;
1484

1485
    fbmin= check_bidir_mv(s, motion_fx, motion_fy,
1486 1487
                          motion_bx, motion_by,
                          pred_fx, pred_fy,
1488 1489
                          pred_bx, pred_by,
                          0, 16);
1490

1491
    if(s->avctx->bidir_refine){
1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529
        int end;
        static const uint8_t limittab[5]={0,8,32,64,80};
        const int limit= limittab[s->avctx->bidir_refine];
        static const int8_t vect[][4]={
{ 0, 0, 0, 1}, { 0, 0, 0,-1}, { 0, 0, 1, 0}, { 0, 0,-1, 0}, { 0, 1, 0, 0}, { 0,-1, 0, 0}, { 1, 0, 0, 0}, {-1, 0, 0, 0},

{ 0, 0, 1, 1}, { 0, 0,-1,-1}, { 0, 1, 1, 0}, { 0,-1,-1, 0}, { 1, 1, 0, 0}, {-1,-1, 0, 0}, { 1, 0, 0, 1}, {-1, 0, 0,-1},
{ 0, 1, 0, 1}, { 0,-1, 0,-1}, { 1, 0, 1, 0}, {-1, 0,-1, 0},
{ 0, 0,-1, 1}, { 0, 0, 1,-1}, { 0,-1, 1, 0}, { 0, 1,-1, 0}, {-1, 1, 0, 0}, { 1,-1, 0, 0}, { 1, 0, 0,-1}, {-1, 0, 0, 1},
{ 0,-1, 0, 1}, { 0, 1, 0,-1}, {-1, 0, 1, 0}, { 1, 0,-1, 0},

{ 0, 1, 1, 1}, { 0,-1,-1,-1}, { 1, 1, 1, 0}, {-1,-1,-1, 0}, { 1, 1, 0, 1}, {-1,-1, 0,-1}, { 1, 0, 1, 1}, {-1, 0,-1,-1},
{ 0,-1, 1, 1}, { 0, 1,-1,-1}, {-1, 1, 1, 0}, { 1,-1,-1, 0}, { 1, 1, 0,-1}, {-1,-1, 0, 1}, { 1, 0,-1, 1}, {-1, 0, 1,-1},
{ 0, 1,-1, 1}, { 0,-1, 1,-1}, { 1,-1, 1, 0}, {-1, 1,-1, 0}, {-1, 1, 0, 1}, { 1,-1, 0,-1}, { 1, 0, 1,-1}, {-1, 0,-1, 1},
{ 0, 1, 1,-1}, { 0,-1,-1, 1}, { 1, 1,-1, 0}, {-1,-1, 1, 0}, { 1,-1, 0, 1}, {-1, 1, 0,-1}, {-1, 0, 1, 1}, { 1, 0,-1,-1},

{ 1, 1, 1, 1}, {-1,-1,-1,-1},
{ 1, 1, 1,-1}, {-1,-1,-1, 1}, { 1, 1,-1, 1}, {-1,-1, 1,-1}, { 1,-1, 1, 1}, {-1, 1,-1,-1}, {-1, 1, 1, 1}, { 1,-1,-1,-1},
{ 1, 1,-1,-1}, {-1,-1, 1, 1}, { 1,-1,-1, 1}, {-1, 1, 1,-1}, { 1,-1, 1,-1}, {-1, 1,-1, 1},
        };
        static const uint8_t hash[]={
HASH( 0, 0, 0, 1), HASH( 0, 0, 0,-1), HASH( 0, 0, 1, 0), HASH( 0, 0,-1, 0), HASH( 0, 1, 0, 0), HASH( 0,-1, 0, 0), HASH( 1, 0, 0, 0), HASH(-1, 0, 0, 0),

HASH( 0, 0, 1, 1), HASH( 0, 0,-1,-1), HASH( 0, 1, 1, 0), HASH( 0,-1,-1, 0), HASH( 1, 1, 0, 0), HASH(-1,-1, 0, 0), HASH( 1, 0, 0, 1), HASH(-1, 0, 0,-1),
HASH( 0, 1, 0, 1), HASH( 0,-1, 0,-1), HASH( 1, 0, 1, 0), HASH(-1, 0,-1, 0),
HASH( 0, 0,-1, 1), HASH( 0, 0, 1,-1), HASH( 0,-1, 1, 0), HASH( 0, 1,-1, 0), HASH(-1, 1, 0, 0), HASH( 1,-1, 0, 0), HASH( 1, 0, 0,-1), HASH(-1, 0, 0, 1),
HASH( 0,-1, 0, 1), HASH( 0, 1, 0,-1), HASH(-1, 0, 1, 0), HASH( 1, 0,-1, 0),

HASH( 0, 1, 1, 1), HASH( 0,-1,-1,-1), HASH( 1, 1, 1, 0), HASH(-1,-1,-1, 0), HASH( 1, 1, 0, 1), HASH(-1,-1, 0,-1), HASH( 1, 0, 1, 1), HASH(-1, 0,-1,-1),
HASH( 0,-1, 1, 1), HASH( 0, 1,-1,-1), HASH(-1, 1, 1, 0), HASH( 1,-1,-1, 0), HASH( 1, 1, 0,-1), HASH(-1,-1, 0, 1), HASH( 1, 0,-1, 1), HASH(-1, 0, 1,-1),
HASH( 0, 1,-1, 1), HASH( 0,-1, 1,-1), HASH( 1,-1, 1, 0), HASH(-1, 1,-1, 0), HASH(-1, 1, 0, 1), HASH( 1,-1, 0,-1), HASH( 1, 0, 1,-1), HASH(-1, 0,-1, 1),
HASH( 0, 1, 1,-1), HASH( 0,-1,-1, 1), HASH( 1, 1,-1, 0), HASH(-1,-1, 1, 0), HASH( 1,-1, 0, 1), HASH(-1, 1, 0,-1), HASH(-1, 0, 1, 1), HASH( 1, 0,-1,-1),

HASH( 1, 1, 1, 1), HASH(-1,-1,-1,-1),
HASH( 1, 1, 1,-1), HASH(-1,-1,-1, 1), HASH( 1, 1,-1, 1), HASH(-1,-1, 1,-1), HASH( 1,-1, 1, 1), HASH(-1, 1,-1,-1), HASH(-1, 1, 1, 1), HASH( 1,-1,-1,-1),
HASH( 1, 1,-1,-1), HASH(-1,-1, 1, 1), HASH( 1,-1,-1, 1), HASH(-1, 1, 1,-1), HASH( 1,-1, 1,-1), HASH(-1, 1,-1, 1),
};

1530
#define CHECK_BIDIR(fx,fy,bx,by)\
1531
    if( !map[(hashidx+HASH(fx,fy,bx,by))&255]\
1532
       &&(fx<=0 || motion_fx+fx<=xmax) && (fy<=0 || motion_fy+fy<=ymax) && (bx<=0 || motion_bx+bx<=xmax) && (by<=0 || motion_by+by<=ymax)\
Michael Niedermayer's avatar
Michael Niedermayer committed
1533
       &&(fx>=0 || motion_fx+fx>=xmin) && (fy>=0 || motion_fy+fy>=ymin) && (bx>=0 || motion_bx+bx>=xmin) && (by>=0 || motion_by+by>=ymin)){\
1534
        int score;\
1535
        map[(hashidx+HASH(fx,fy,bx,by))&255] = 1;\
Michael Niedermayer's avatar
Michael Niedermayer committed
1536 1537
        score= check_bidir_mv(s, motion_fx+fx, motion_fy+fy, motion_bx+bx, motion_by+by, pred_fx, pred_fy, pred_bx, pred_by, 0, 16);\
        if(score < fbmin){\
1538
            hashidx += HASH(fx,fy,bx,by);\
Michael Niedermayer's avatar
Michael Niedermayer committed
1539 1540 1541 1542 1543 1544 1545
            fbmin= score;\
            motion_fx+=fx;\
            motion_fy+=fy;\
            motion_bx+=bx;\
            motion_by+=by;\
            end=0;\
        }\
1546 1547 1548
    }
#define CHECK_BIDIR2(a,b,c,d)\
CHECK_BIDIR(a,b,c,d)\
1549
CHECK_BIDIR(-(a),-(b),-(c),-(d))
1550 1551

        do{
1552 1553
            int i;
            int borderdist=0;
1554 1555
            end=1;

1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589
            CHECK_BIDIR2(0,0,0,1)
            CHECK_BIDIR2(0,0,1,0)
            CHECK_BIDIR2(0,1,0,0)
            CHECK_BIDIR2(1,0,0,0)

            for(i=8; i<limit; i++){
                int fx= motion_fx+vect[i][0];
                int fy= motion_fy+vect[i][1];
                int bx= motion_bx+vect[i][2];
                int by= motion_by+vect[i][3];
                if(borderdist<=0){
                    int a= (xmax - FFMAX(fx,bx))|(FFMIN(fx,bx) - xmin);
                    int b= (ymax - FFMAX(fy,by))|(FFMIN(fy,by) - ymin);
                    if((a|b) < 0)
                        map[(hashidx+hash[i])&255] = 1;
                }
                if(!map[(hashidx+hash[i])&255]){
                    int score;
                    map[(hashidx+hash[i])&255] = 1;
                    score= check_bidir_mv(s, fx, fy, bx, by, pred_fx, pred_fy, pred_bx, pred_by, 0, 16);
                    if(score < fbmin){
                        hashidx += hash[i];
                        fbmin= score;
                        motion_fx=fx;
                        motion_fy=fy;
                        motion_bx=bx;
                        motion_by=by;
                        end=0;
                        borderdist--;
                        if(borderdist<=0){
                            int a= FFMIN(xmax - FFMAX(fx,bx), FFMIN(fx,bx) - xmin);
                            int b= FFMIN(ymax - FFMAX(fy,by), FFMIN(fy,by) - ymin);
                            borderdist= FFMIN(a,b);
                        }
1590 1591 1592 1593 1594 1595
                    }
                }
            }
        }while(!end);
    }

1596 1597 1598 1599 1600
    s->b_bidir_forw_mv_table[xy][0]= motion_fx;
    s->b_bidir_forw_mv_table[xy][1]= motion_fy;
    s->b_bidir_back_mv_table[xy][0]= motion_bx;
    s->b_bidir_back_mv_table[xy][1]= motion_by;

1601
    return fbmin;
1602 1603
}

1604
static inline int direct_search(MpegEncContext * s, int mb_x, int mb_y)
1605
{
1606
    MotionEstContext * const c= &s->me;
1607
    int P[10][2];
1608 1609
    const int mot_stride = s->mb_stride;
    const int mot_xy = mb_y*mot_stride + mb_x;
Michael Niedermayer's avatar
Michael Niedermayer committed
1610 1611
    const int shift= 1+s->quarter_sample;
    int dmin, i;
1612
    const int time_pp= s->pp_time;
1613
    const int time_pb= s->pb_time;
Michael Niedermayer's avatar
Michael Niedermayer committed
1614
    int mx, my, xmin, xmax, ymin, ymax;
1615
    int16_t (*mv_table)[2]= s->b_direct_mv_table;
1616

1617
    c->current_mv_penalty= c->mv_penalty[1] + MAX_MV;
Michael Niedermayer's avatar
Michael Niedermayer committed
1618 1619 1620
    ymin= xmin=(-32)>>shift;
    ymax= xmax=   31>>shift;

1621
    if(IS_8X8(s->next_picture.mb_type[mot_xy])){
Michael Niedermayer's avatar
Michael Niedermayer committed
1622 1623 1624
        s->mv_type= MV_TYPE_8X8;
    }else{
        s->mv_type= MV_TYPE_16X16;
1625
    }
Michael Niedermayer's avatar
Michael Niedermayer committed
1626 1627 1628 1629

    for(i=0; i<4; i++){
        int index= s->block_index[i];
        int min, max;
1630

1631 1632 1633 1634 1635 1636 1637 1638 1639
        c->co_located_mv[i][0]= s->next_picture.motion_val[0][index][0];
        c->co_located_mv[i][1]= s->next_picture.motion_val[0][index][1];
        c->direct_basis_mv[i][0]= c->co_located_mv[i][0]*time_pb/time_pp + ((i& 1)<<(shift+3));
        c->direct_basis_mv[i][1]= c->co_located_mv[i][1]*time_pb/time_pp + ((i>>1)<<(shift+3));
//        c->direct_basis_mv[1][i][0]= c->co_located_mv[i][0]*(time_pb - time_pp)/time_pp + ((i &1)<<(shift+3);
//        c->direct_basis_mv[1][i][1]= c->co_located_mv[i][1]*(time_pb - time_pp)/time_pp + ((i>>1)<<(shift+3);

        max= FFMAX(c->direct_basis_mv[i][0], c->direct_basis_mv[i][0] - c->co_located_mv[i][0])>>shift;
        min= FFMIN(c->direct_basis_mv[i][0], c->direct_basis_mv[i][0] - c->co_located_mv[i][0])>>shift;
1640 1641
        max+= 16*mb_x + 1; // +-1 is for the simpler rounding
        min+= 16*mb_x - 1;
1642 1643
        xmax= FFMIN(xmax, s->width - max);
        xmin= FFMAX(xmin, - 16     - min);
Michael Niedermayer's avatar
Michael Niedermayer committed
1644

1645 1646
        max= FFMAX(c->direct_basis_mv[i][1], c->direct_basis_mv[i][1] - c->co_located_mv[i][1])>>shift;
        min= FFMIN(c->direct_basis_mv[i][1], c->direct_basis_mv[i][1] - c->co_located_mv[i][1])>>shift;
1647 1648
        max+= 16*mb_y + 1; // +-1 is for the simpler rounding
        min+= 16*mb_y - 1;
1649 1650
        ymax= FFMIN(ymax, s->height - max);
        ymin= FFMAX(ymin, - 16      - min);
1651

Michael Niedermayer's avatar
Michael Niedermayer committed
1652
        if(s->mv_type == MV_TYPE_16X16) break;
1653
    }
1654

Michael Niedermayer's avatar
Michael Niedermayer committed
1655
    assert(xmax <= 15 && ymax <= 15 && xmin >= -16 && ymin >= -16);
1656

Michael Niedermayer's avatar
Michael Niedermayer committed
1657 1658 1659 1660 1661 1662
    if(xmax < 0 || xmin >0 || ymax < 0 || ymin > 0){
        s->b_direct_mv_table[mot_xy][0]= 0;
        s->b_direct_mv_table[mot_xy][1]= 0;

        return 256*256*256*64;
    }
1663

1664 1665 1666 1667 1668 1669 1670 1671
    c->xmin= xmin;
    c->ymin= ymin;
    c->xmax= xmax;
    c->ymax= ymax;
    c->flags     |= FLAG_DIRECT;
    c->sub_flags |= FLAG_DIRECT;
    c->pred_x=0;
    c->pred_y=0;
Michael Niedermayer's avatar
Michael Niedermayer committed
1672

1673 1674
    P_LEFT[0]        = av_clip(mv_table[mot_xy - 1][0], xmin<<shift, xmax<<shift);
    P_LEFT[1]        = av_clip(mv_table[mot_xy - 1][1], ymin<<shift, ymax<<shift);
Michael Niedermayer's avatar
Michael Niedermayer committed
1675 1676

    /* special case for first line */
Diego Biurrun's avatar
Diego Biurrun committed
1677
    if (!s->first_slice_line) { //FIXME maybe allow this over thread boundary as it is clipped
1678 1679 1680 1681
        P_TOP[0]      = av_clip(mv_table[mot_xy - mot_stride             ][0], xmin<<shift, xmax<<shift);
        P_TOP[1]      = av_clip(mv_table[mot_xy - mot_stride             ][1], ymin<<shift, ymax<<shift);
        P_TOPRIGHT[0] = av_clip(mv_table[mot_xy - mot_stride + 1         ][0], xmin<<shift, xmax<<shift);
        P_TOPRIGHT[1] = av_clip(mv_table[mot_xy - mot_stride + 1         ][1], ymin<<shift, ymax<<shift);
1682

Michael Niedermayer's avatar
Michael Niedermayer committed
1683 1684 1685
        P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
        P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
    }
1686

1687
    dmin = ff_epzs_motion_search(s, &mx, &my, P, 0, 0, mv_table, 1<<(16-shift), 0, 16);
1688
    if(c->sub_flags&FLAG_QPEL)
1689 1690 1691
        dmin = qpel_motion_search(s, &mx, &my, dmin, 0, 0, 0, 16);
    else
        dmin = hpel_motion_search(s, &mx, &my, dmin, 0, 0, 0, 16);
1692

1693
    if(c->avctx->me_sub_cmp != c->avctx->mb_cmp && !c->skip)
1694
        dmin= ff_get_mb_score(s, mx, my, 0, 0, 0, 16, 1);
1695

1696
    get_limits(s, 16*mb_x, 16*mb_y); //restore c->?min/max, maybe not needed
1697

Michael Niedermayer's avatar
Michael Niedermayer committed
1698 1699
    mv_table[mot_xy][0]= mx;
    mv_table[mot_xy][1]= my;
1700 1701
    c->flags     &= ~FLAG_DIRECT;
    c->sub_flags &= ~FLAG_DIRECT;
1702

1703
    return dmin;
1704 1705 1706 1707 1708
}

void ff_estimate_b_frame_motion(MpegEncContext * s,
                             int mb_x, int mb_y)
{
1709 1710
    MotionEstContext * const c= &s->me;
    const int penalty_factor= c->mb_penalty_factor;
1711
    int fmin, bmin, dmin, fbmin, bimin, fimin;
1712
    int type=0;
1713
    const int xy = mb_y*s->mb_stride + mb_x;
1714
    init_ref(c, s->new_picture.data, s->last_picture.data, s->next_picture.data, 16*mb_x, 16*mb_y, 2);
1715

1716
    get_limits(s, 16*mb_x, 16*mb_y);
1717

1718
    c->skip=0;
1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730

    if(s->codec_id == CODEC_ID_MPEG4 && s->next_picture.mbskip_table[xy]){
        int score= direct_search(s, mb_x, mb_y); //FIXME just check 0,0

        score= ((unsigned)(score*score + 128*256))>>16;
        c->mc_mb_var_sum_temp += score;
        s->current_picture.mc_mb_var[mb_y*s->mb_stride + mb_x] = score; //FIXME use SSE
        s->mb_type[mb_y*s->mb_stride + mb_x]= CANDIDATE_MB_TYPE_DIRECT0;

        return;
    }

1731
    if(c->avctx->me_threshold){
1732
        int vard= check_input_motion(s, mb_x, mb_y, 0);
1733

1734
        if((vard+128)>>8 < c->avctx->me_threshold){
1735 1736
//            pix = c->src[0][0];
//            sum = s->dsp.pix_sum(pix, s->linesize);
1737
//            varc = s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)(sum*sum))>>8) + 500;
1738

1739 1740
//            pic->mb_var   [s->mb_stride * mb_y + mb_x] = (varc+128)>>8;
             s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] = (vard+128)>>8;
1741
/*            pic->mb_mean  [s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
1742 1743 1744 1745
            c->mb_var_sum_temp    += (varc+128)>>8;*/
            c->mc_mb_var_sum_temp += (vard+128)>>8;
/*            if (vard <= 64<<8 || vard < varc) {
                c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
1746
            }else{
1747
                c->scene_change_score+= s->qscale * s->avctx->scenechange_factor;
1748 1749 1750
            }*/
            return;
        }
1751
        if((vard+128)>>8 < c->avctx->mb_threshold){
1752 1753 1754 1755 1756
            type= s->mb_type[mb_y*s->mb_stride + mb_x];
            if(type == CANDIDATE_MB_TYPE_DIRECT){
                direct_search(s, mb_x, mb_y);
            }
            if(type == CANDIDATE_MB_TYPE_FORWARD || type == CANDIDATE_MB_TYPE_BIDIR){
1757
                c->skip=0;
1758 1759 1760
                ff_estimate_motion_b(s, mb_x, mb_y, s->b_forw_mv_table, 0, s->f_code);
            }
            if(type == CANDIDATE_MB_TYPE_BACKWARD || type == CANDIDATE_MB_TYPE_BIDIR){
1761
                c->skip=0;
1762 1763 1764
                ff_estimate_motion_b(s, mb_x, mb_y, s->b_back_mv_table, 2, s->b_code);
            }
            if(type == CANDIDATE_MB_TYPE_FORWARD_I || type == CANDIDATE_MB_TYPE_BIDIR_I){
1765 1766
                c->skip=0;
                c->current_mv_penalty= c->mv_penalty[s->f_code] + MAX_MV;
1767 1768 1769 1770 1771
                interlaced_search(s, 0,
                                        s->b_field_mv_table[0], s->b_field_select_table[0],
                                        s->b_forw_mv_table[xy][0], s->b_forw_mv_table[xy][1], 1);
            }
            if(type == CANDIDATE_MB_TYPE_BACKWARD_I || type == CANDIDATE_MB_TYPE_BIDIR_I){
1772 1773
                c->skip=0;
                c->current_mv_penalty= c->mv_penalty[s->b_code] + MAX_MV;
1774 1775 1776 1777 1778 1779
                interlaced_search(s, 2,
                                        s->b_field_mv_table[1], s->b_field_select_table[1],
                                        s->b_back_mv_table[xy][0], s->b_back_mv_table[xy][1], 1);
            }
            return;
        }
1780 1781
    }

1782
    if (s->codec_id == CODEC_ID_MPEG4)
1783
        dmin= direct_search(s, mb_x, mb_y);
1784 1785
    else
        dmin= INT_MAX;
1786
//FIXME penalty stuff for non mpeg4
1787
    c->skip=0;
1788
    fmin= ff_estimate_motion_b(s, mb_x, mb_y, s->b_forw_mv_table, 0, s->f_code) + 3*penalty_factor;
1789

1790
    c->skip=0;
1791
    bmin= ff_estimate_motion_b(s, mb_x, mb_y, s->b_back_mv_table, 2, s->b_code) + 2*penalty_factor;
1792
//printf(" %d %d ", s->b_forw_mv_table[xy][0], s->b_forw_mv_table[xy][1]);
1793

1794
    c->skip=0;
1795
    fbmin= bidir_refine(s, mb_x, mb_y) + penalty_factor;
1796
//printf("%d %d %d %d\n", dmin, fmin, bmin, fbmin);
1797

1798 1799
    if(s->flags & CODEC_FLAG_INTERLACED_ME){
//FIXME mb type penalty
1800 1801
        c->skip=0;
        c->current_mv_penalty= c->mv_penalty[s->f_code] + MAX_MV;
1802 1803
        fimin= interlaced_search(s, 0,
                                 s->b_field_mv_table[0], s->b_field_select_table[0],
1804
                                 s->b_forw_mv_table[xy][0], s->b_forw_mv_table[xy][1], 0);
1805
        c->current_mv_penalty= c->mv_penalty[s->b_code] + MAX_MV;
1806 1807
        bimin= interlaced_search(s, 2,
                                 s->b_field_mv_table[1], s->b_field_select_table[1],
1808
                                 s->b_back_mv_table[xy][0], s->b_back_mv_table[xy][1], 0);
1809 1810 1811
    }else
        fimin= bimin= INT_MAX;

1812
    {
1813
        int score= fmin;
1814
        type = CANDIDATE_MB_TYPE_FORWARD;
1815

1816
        if (dmin <= score){
1817
            score = dmin;
1818
            type = CANDIDATE_MB_TYPE_DIRECT;
1819 1820 1821
        }
        if(bmin<score){
            score=bmin;
1822
            type= CANDIDATE_MB_TYPE_BACKWARD;
1823 1824 1825
        }
        if(fbmin<score){
            score=fbmin;
1826 1827 1828 1829 1830 1831 1832 1833 1834
            type= CANDIDATE_MB_TYPE_BIDIR;
        }
        if(fimin<score){
            score=fimin;
            type= CANDIDATE_MB_TYPE_FORWARD_I;
        }
        if(bimin<score){
            score=bimin;
            type= CANDIDATE_MB_TYPE_BACKWARD_I;
1835
        }
1836

1837
        score= ((unsigned)(score*score + 128*256))>>16;
1838
        c->mc_mb_var_sum_temp += score;
1839
        s->current_picture.mc_mb_var[mb_y*s->mb_stride + mb_x] = score; //FIXME use SSE
1840
    }
1841

1842
    if(c->avctx->mb_decision > FF_MB_DECISION_SIMPLE){
1843 1844 1845 1846 1847 1848 1849 1850 1851
        type= CANDIDATE_MB_TYPE_FORWARD | CANDIDATE_MB_TYPE_BACKWARD | CANDIDATE_MB_TYPE_BIDIR | CANDIDATE_MB_TYPE_DIRECT;
        if(fimin < INT_MAX)
            type |= CANDIDATE_MB_TYPE_FORWARD_I;
        if(bimin < INT_MAX)
            type |= CANDIDATE_MB_TYPE_BACKWARD_I;
        if(fimin < INT_MAX && bimin < INT_MAX){
            type |= CANDIDATE_MB_TYPE_BIDIR_I;
        }
         //FIXME something smarter
Diego Biurrun's avatar
Diego Biurrun committed
1852
        if(dmin>256*256*16) type&= ~CANDIDATE_MB_TYPE_DIRECT; //do not try direct mode if it is invalid for this MB
1853 1854
        if(s->codec_id == CODEC_ID_MPEG4 && type&CANDIDATE_MB_TYPE_DIRECT && s->flags&CODEC_FLAG_MV0 && *(uint32_t*)s->b_direct_mv_table[xy])
            type |= CANDIDATE_MB_TYPE_DIRECT0;
1855
#if 0
1856 1857 1858
        if(s->out_format == FMT_MPEG1)
            type |= CANDIDATE_MB_TYPE_INTRA;
#endif
1859 1860
    }

1861
    s->mb_type[mb_y*s->mb_stride + mb_x]= type;
1862 1863 1864 1865 1866 1867
}

/* find best f_code for ME which do unlimited searches */
int ff_get_best_fcode(MpegEncContext * s, int16_t (*mv_table)[2], int type)
{
    if(s->me_method>=ME_EPZS){
1868
        int score[8];
1869
        int i, y, range= s->avctx->me_range ? s->avctx->me_range : (INT_MAX/2);
1870
        uint8_t * fcode_tab= s->fcode_tab;
1871 1872
        int best_fcode=-1;
        int best_score=-10000000;
1873

1874
        if(s->msmpeg4_version)
1875 1876 1877 1878
            range= FFMIN(range, 16);
        else if(s->codec_id == CODEC_ID_MPEG2VIDEO && s->avctx->strict_std_compliance >= FF_COMPLIANCE_NORMAL)
            range= FFMIN(range, 256);

Michael Niedermayer's avatar
Michael Niedermayer committed
1879
        for(i=0; i<8; i++) score[i]= s->mb_num*(8-i);
1880 1881 1882

        for(y=0; y<s->mb_height; y++){
            int x;
1883
            int xy= y*s->mb_stride;
1884
            for(x=0; x<s->mb_width; x++){
Michael Niedermayer's avatar
Michael Niedermayer committed
1885
                if(s->mb_type[xy] & type){
1886 1887 1888 1889
                    int mx= mv_table[xy][0];
                    int my= mv_table[xy][1];
                    int fcode= FFMAX(fcode_tab[mx + MAX_MV],
                                     fcode_tab[my + MAX_MV]);
1890
                    int j;
1891 1892

                        if(mx >= range || mx < -range ||
1893 1894
                           my >= range || my < -range)
                            continue;
1895

1896
                    for(j=0; j<fcode && j<8; j++){
1897
                        if(s->pict_type==FF_B_TYPE || s->current_picture.mc_mb_var[xy] < s->current_picture.mb_var[xy])
1898 1899
                            score[j]-= 170;
                    }
1900 1901 1902 1903
                }
                xy++;
            }
        }
1904

1905 1906 1907 1908 1909 1910
        for(i=1; i<8; i++){
            if(score[i] > best_score){
                best_score= score[i];
                best_fcode= i;
            }
//            printf("%d %d\n", i, score[i]);
1911
        }
1912

1913
//    printf("fcode: %d type: %d\n", i, s->pict_type);
1914
        return best_fcode;
1915 1916 1917 1918 1919 1920
/*        for(i=0; i<=MAX_FCODE; i++){
            printf("%d ", mv_num[i]);
        }
        printf("\n");*/
    }else{
        return 1;
Fabrice Bellard's avatar
Fabrice Bellard committed
1921 1922 1923
    }
}

1924 1925
void ff_fix_long_p_mvs(MpegEncContext * s)
{
1926
    MotionEstContext * const c= &s->me;
1927
    const int f_code= s->f_code;
1928
    int y, range;
1929
    assert(s->pict_type==FF_P_TYPE);
1930

1931 1932 1933 1934
    range = (((s->out_format == FMT_MPEG1 || s->msmpeg4_version) ? 8 : 16) << f_code);

    assert(range <= 16 || !s->msmpeg4_version);
    assert(range <=256 || !(s->codec_id == CODEC_ID_MPEG2VIDEO && s->avctx->strict_std_compliance >= FF_COMPLIANCE_NORMAL));
1935

1936
    if(c->avctx->me_range && range > c->avctx->me_range) range= c->avctx->me_range;
1937

1938
//printf("%d no:%d %d//\n", clip, noclip, f_code);
1939
    if(s->flags&CODEC_FLAG_4MV){
1940
        const int wrap= s->b8_stride;
1941 1942 1943

        /* clip / convert to intra 8x8 type MVs */
        for(y=0; y<s->mb_height; y++){
1944
            int xy= y*2*wrap;
1945
            int i= y*s->mb_stride;
1946 1947 1948
            int x;

            for(x=0; x<s->mb_width; x++){
1949
                if(s->mb_type[i]&CANDIDATE_MB_TYPE_INTER4V){
1950 1951 1952
                    int block;
                    for(block=0; block<4; block++){
                        int off= (block& 1) + (block>>1)*wrap;
1953 1954
                        int mx= s->current_picture.motion_val[0][ xy + off ][0];
                        int my= s->current_picture.motion_val[0][ xy + off ][1];
1955

1956 1957
                        if(   mx >=range || mx <-range
                           || my >=range || my <-range){
1958 1959 1960
                            s->mb_type[i] &= ~CANDIDATE_MB_TYPE_INTER4V;
                            s->mb_type[i] |= CANDIDATE_MB_TYPE_INTRA;
                            s->current_picture.mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
1961 1962 1963
                        }
                    }
                }
Michael Niedermayer's avatar
Michael Niedermayer committed
1964 1965
                xy+=2;
                i++;
1966 1967 1968 1969 1970
            }
        }
    }
}

1971 1972 1973 1974
/**
 *
 * @param truncate 1 for truncation, 0 for using intra
 */
1975
void ff_fix_long_mvs(MpegEncContext * s, uint8_t *field_select_table, int field_select,
1976
                     int16_t (*mv_table)[2], int f_code, int type, int truncate)
1977
{
1978
    MotionEstContext * const c= &s->me;
1979
    int y, h_range, v_range;
1980

1981
    // RAL: 8 in MPEG-1, 16 in MPEG-4
1982
    int range = (((s->out_format == FMT_MPEG1 || s->msmpeg4_version) ? 8 : 16) << f_code);
1983

1984
    if(c->avctx->me_range && range > c->avctx->me_range) range= c->avctx->me_range;
1985

1986 1987 1988
    h_range= range;
    v_range= field_select_table ? range>>1 : range;

1989 1990 1991
    /* clip / convert to intra 16x16 type MVs */
    for(y=0; y<s->mb_height; y++){
        int x;
1992
        int xy= y*s->mb_stride;
1993
        for(x=0; x<s->mb_width; x++){
1994
            if (s->mb_type[xy] & type){    // RAL: "type" test added...
1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009
                if(field_select_table==NULL || field_select_table[xy] == field_select){
                    if(   mv_table[xy][0] >=h_range || mv_table[xy][0] <-h_range
                       || mv_table[xy][1] >=v_range || mv_table[xy][1] <-v_range){

                        if(truncate){
                            if     (mv_table[xy][0] > h_range-1) mv_table[xy][0]=  h_range-1;
                            else if(mv_table[xy][0] < -h_range ) mv_table[xy][0]= -h_range;
                            if     (mv_table[xy][1] > v_range-1) mv_table[xy][1]=  v_range-1;
                            else if(mv_table[xy][1] < -v_range ) mv_table[xy][1]= -v_range;
                        }else{
                            s->mb_type[xy] &= ~type;
                            s->mb_type[xy] |= CANDIDATE_MB_TYPE_INTRA;
                            mv_table[xy][0]=
                            mv_table[xy][1]= 0;
                        }
2010
                    }
2011
                }
2012 2013 2014 2015 2016
            }
            xy++;
        }
    }
}