motion_est.c 78.1 KB
Newer Older
Fabrice Bellard's avatar
Fabrice Bellard committed
1
/*
2
 * Motion estimation
3
 * Copyright (c) 2000,2001 Fabrice Bellard
4
 * Copyright (c) 2002-2004 Michael Niedermayer
5
 *
6
 * new motion estimation (X1/EPZS) by Michael Niedermayer <michaelni@gmx.at>
Fabrice Bellard's avatar
Fabrice Bellard committed
7
 *
8
 * This file is part of Libav.
9
 *
10
 * Libav is free software; you can redistribute it and/or
11 12
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
13
 * version 2.1 of the License, or (at your option) any later version.
Fabrice Bellard's avatar
Fabrice Bellard committed
14
 *
15
 * Libav is distributed in the hope that it will be useful,
Fabrice Bellard's avatar
Fabrice Bellard committed
16
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 18
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
Fabrice Bellard's avatar
Fabrice Bellard committed
19
 *
20
 * You should have received a copy of the GNU Lesser General Public
21
 * License along with Libav; if not, write to the Free Software
22
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Fabrice Bellard's avatar
Fabrice Bellard committed
23
 */
24

Michael Niedermayer's avatar
Michael Niedermayer committed
25
/**
26
 * @file
Michael Niedermayer's avatar
Michael Niedermayer committed
27 28
 * Motion estimation.
 */
29

Fabrice Bellard's avatar
Fabrice Bellard committed
30 31
#include <stdlib.h>
#include <stdio.h>
32
#include <limits.h>
33
#include "libavutil/intmath.h"
Fabrice Bellard's avatar
Fabrice Bellard committed
34 35
#include "avcodec.h"
#include "dsputil.h"
36
#include "mathops.h"
Fabrice Bellard's avatar
Fabrice Bellard committed
37 38
#include "mpegvideo.h"

39 40
#undef NDEBUG
#include <assert.h>
Michael Niedermayer's avatar
Michael Niedermayer committed
41

42
#define SQ(a) ((a)*(a))
43

44 45 46 47 48 49
#define P_LEFT P[1]
#define P_TOP P[2]
#define P_TOPRIGHT P[3]
#define P_MEDIAN P[4]
#define P_MV1 P[9]

Michael Niedermayer's avatar
Michael Niedermayer committed
50
static inline int sad_hpel_motion_search(MpegEncContext * s,
51
                                  int *mx_ptr, int *my_ptr, int dmin,
52 53
                                  int src_index, int ref_index,
                                  int size, int h);
Michael Niedermayer's avatar
Michael Niedermayer committed
54

55
static inline int update_map_generation(MotionEstContext *c)
Michael Niedermayer's avatar
Michael Niedermayer committed
56
{
57 58 59 60
    c->map_generation+= 1<<(ME_MAP_MV_BITS*2);
    if(c->map_generation==0){
        c->map_generation= 1<<(ME_MAP_MV_BITS*2);
        memset(c->map, 0, sizeof(uint32_t)*ME_MAP_SIZE);
Michael Niedermayer's avatar
Michael Niedermayer committed
61
    }
62
    return c->map_generation;
Michael Niedermayer's avatar
Michael Niedermayer committed
63 64
}

65 66 67 68 69 70
/* shape adaptive search stuff */
typedef struct Minima{
    int height;
    int x, y;
    int checked;
}Minima;
Michael Niedermayer's avatar
Michael Niedermayer committed
71

72
static int minima_cmp(const void *a, const void *b){
73 74
    const Minima *da = (const Minima *) a;
    const Minima *db = (const Minima *) b;
75

76 77
    return da->height - db->height;
}
Michael Niedermayer's avatar
Michael Niedermayer committed
78

79 80 81
#define FLAG_QPEL   1 //must be 1
#define FLAG_CHROMA 2
#define FLAG_DIRECT 4
Michael Niedermayer's avatar
Michael Niedermayer committed
82

83
static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){
84 85 86 87 88 89 90 91 92 93 94 95 96 97 98
    const int offset[3]= {
          y*c->  stride + x,
        ((y*c->uvstride + x)>>1),
        ((y*c->uvstride + x)>>1),
    };
    int i;
    for(i=0; i<3; i++){
        c->src[0][i]= src [i] + offset[i];
        c->ref[0][i]= ref [i] + offset[i];
    }
    if(ref_index){
        for(i=0; i<3; i++){
            c->ref[ref_index][i]= ref2[i] + offset[i];
        }
    }
Michael Niedermayer's avatar
Michael Niedermayer committed
99 100
}

101 102
static int get_flags(MotionEstContext *c, int direct, int chroma){
    return   ((c->avctx->flags&CODEC_FLAG_QPEL) ? FLAG_QPEL : 0)
103
           + (direct ? FLAG_DIRECT : 0)
104
           + (chroma ? FLAG_CHROMA : 0);
Michael Niedermayer's avatar
Michael Niedermayer committed
105 106
}

107
static av_always_inline int cmp_direct_inline(MpegEncContext *s, const int x, const int y, const int subx, const int suby,
108
                      const int size, const int h, int ref_index, int src_index,
109
                      me_cmp_func cmp_func, me_cmp_func chroma_cmp_func, int qpel){
110 111 112 113 114 115 116 117
    MotionEstContext * const c= &s->me;
    const int stride= c->stride;
    const int hx= subx + (x<<(1+qpel));
    const int hy= suby + (y<<(1+qpel));
    uint8_t * const * const ref= c->ref[ref_index];
    uint8_t * const * const src= c->src[src_index];
    int d;
    //FIXME check chroma 4mv, (no crashes ...)
118
        assert(x >= c->xmin && hx <= c->xmax<<(qpel+1) && y >= c->ymin && hy <= c->ymax<<(qpel+1));
119 120 121 122 123 124 125 126 127 128 129 130 131
        if(x >= c->xmin && hx <= c->xmax<<(qpel+1) && y >= c->ymin && hy <= c->ymax<<(qpel+1)){
            const int time_pp= s->pp_time;
            const int time_pb= s->pb_time;
            const int mask= 2*qpel+1;
            if(s->mv_type==MV_TYPE_8X8){
                int i;
                for(i=0; i<4; i++){
                    int fx = c->direct_basis_mv[i][0] + hx;
                    int fy = c->direct_basis_mv[i][1] + hy;
                    int bx = hx ? fx - c->co_located_mv[i][0] : c->co_located_mv[i][0]*(time_pb - time_pp)/time_pp + ((i &1)<<(qpel+4));
                    int by = hy ? fy - c->co_located_mv[i][1] : c->co_located_mv[i][1]*(time_pb - time_pp)/time_pp + ((i>>1)<<(qpel+4));
                    int fxy= (fx&mask) + ((fy&mask)<<(qpel+1));
                    int bxy= (bx&mask) + ((by&mask)<<(qpel+1));
132

133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148
                    uint8_t *dst= c->temp + 8*(i&1) + 8*stride*(i>>1);
                    if(qpel){
                        c->qpel_put[1][fxy](dst, ref[0] + (fx>>2) + (fy>>2)*stride, stride);
                        c->qpel_avg[1][bxy](dst, ref[8] + (bx>>2) + (by>>2)*stride, stride);
                    }else{
                        c->hpel_put[1][fxy](dst, ref[0] + (fx>>1) + (fy>>1)*stride, stride, 8);
                        c->hpel_avg[1][bxy](dst, ref[8] + (bx>>1) + (by>>1)*stride, stride, 8);
                    }
                }
            }else{
                int fx = c->direct_basis_mv[0][0] + hx;
                int fy = c->direct_basis_mv[0][1] + hy;
                int bx = hx ? fx - c->co_located_mv[0][0] : (c->co_located_mv[0][0]*(time_pb - time_pp)/time_pp);
                int by = hy ? fy - c->co_located_mv[0][1] : (c->co_located_mv[0][1]*(time_pb - time_pp)/time_pp);
                int fxy= (fx&mask) + ((fy&mask)<<(qpel+1));
                int bxy= (bx&mask) + ((by&mask)<<(qpel+1));
149

150 151 152 153 154 155 156 157 158
                if(qpel){
                    c->qpel_put[1][fxy](c->temp               , ref[0] + (fx>>2) + (fy>>2)*stride               , stride);
                    c->qpel_put[1][fxy](c->temp + 8           , ref[0] + (fx>>2) + (fy>>2)*stride + 8           , stride);
                    c->qpel_put[1][fxy](c->temp     + 8*stride, ref[0] + (fx>>2) + (fy>>2)*stride     + 8*stride, stride);
                    c->qpel_put[1][fxy](c->temp + 8 + 8*stride, ref[0] + (fx>>2) + (fy>>2)*stride + 8 + 8*stride, stride);
                    c->qpel_avg[1][bxy](c->temp               , ref[8] + (bx>>2) + (by>>2)*stride               , stride);
                    c->qpel_avg[1][bxy](c->temp + 8           , ref[8] + (bx>>2) + (by>>2)*stride + 8           , stride);
                    c->qpel_avg[1][bxy](c->temp     + 8*stride, ref[8] + (bx>>2) + (by>>2)*stride     + 8*stride, stride);
                    c->qpel_avg[1][bxy](c->temp + 8 + 8*stride, ref[8] + (bx>>2) + (by>>2)*stride + 8 + 8*stride, stride);
159
                }else{
160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175
                    assert((fx>>1) + 16*s->mb_x >= -16);
                    assert((fy>>1) + 16*s->mb_y >= -16);
                    assert((fx>>1) + 16*s->mb_x <= s->width);
                    assert((fy>>1) + 16*s->mb_y <= s->height);
                    assert((bx>>1) + 16*s->mb_x >= -16);
                    assert((by>>1) + 16*s->mb_y >= -16);
                    assert((bx>>1) + 16*s->mb_x <= s->width);
                    assert((by>>1) + 16*s->mb_y <= s->height);

                    c->hpel_put[0][fxy](c->temp, ref[0] + (fx>>1) + (fy>>1)*stride, stride, 16);
                    c->hpel_avg[0][bxy](c->temp, ref[8] + (bx>>1) + (by>>1)*stride, stride, 16);
                }
            }
            d = cmp_func(s, c->temp, src[0], stride, 16);
        }else
            d= 256*256*256*32;
176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191
    return d;
}

static av_always_inline int cmp_inline(MpegEncContext *s, const int x, const int y, const int subx, const int suby,
                      const int size, const int h, int ref_index, int src_index,
                      me_cmp_func cmp_func, me_cmp_func chroma_cmp_func, int qpel, int chroma){
    MotionEstContext * const c= &s->me;
    const int stride= c->stride;
    const int uvstride= c->uvstride;
    const int dxy= subx + (suby<<(1+qpel)); //FIXME log2_subpel?
    const int hx= subx + (x<<(1+qpel));
    const int hy= suby + (y<<(1+qpel));
    uint8_t * const * const ref= c->ref[ref_index];
    uint8_t * const * const src= c->src[src_index];
    int d;
    //FIXME check chroma 4mv, (no crashes ...)
192
        int uvdxy;              /* no, it might not be used uninitialized */
193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208
        if(dxy){
            if(qpel){
                c->qpel_put[size][dxy](c->temp, ref[0] + x + y*stride, stride); //FIXME prototype (add h)
                if(chroma){
                    int cx= hx/2;
                    int cy= hy/2;
                    cx= (cx>>1)|(cx&1);
                    cy= (cy>>1)|(cy&1);
                    uvdxy= (cx&1) + 2*(cy&1);
                    //FIXME x/y wrong, but mpeg4 qpel is sick anyway, we should drop as much of it as possible in favor for h264
                }
            }else{
                c->hpel_put[size][dxy](c->temp, ref[0] + x + y*stride, stride, h);
                if(chroma)
                    uvdxy= dxy | (x&1) | (2*(y&1));
            }
209
            d = cmp_func(s, c->temp, src[0], stride, h);
210
        }else{
211
            d = cmp_func(s, src[0], ref[0] + x + y*stride, stride, h);
212 213 214 215 216 217 218
            if(chroma)
                uvdxy= (x&1) + 2*(y&1);
        }
        if(chroma){
            uint8_t * const uvtemp= c->temp + 16*stride;
            c->hpel_put[size+1][uvdxy](uvtemp  , ref[1] + (x>>1) + (y>>1)*uvstride, uvstride, h>>1);
            c->hpel_put[size+1][uvdxy](uvtemp+8, ref[2] + (x>>1) + (y>>1)*uvstride, uvstride, h>>1);
219 220
            d += chroma_cmp_func(s, uvtemp  , src[1], uvstride, h>>1);
            d += chroma_cmp_func(s, uvtemp+8, src[2], uvstride, h>>1);
221
        }
222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237
    return d;
}

static int cmp_simple(MpegEncContext *s, const int x, const int y,
                      int ref_index, int src_index,
                      me_cmp_func cmp_func, me_cmp_func chroma_cmp_func){
    return cmp_inline(s,x,y,0,0,0,16,ref_index,src_index, cmp_func, chroma_cmp_func, 0, 0);
}

static int cmp_fpel_internal(MpegEncContext *s, const int x, const int y,
                      const int size, const int h, int ref_index, int src_index,
                      me_cmp_func cmp_func, me_cmp_func chroma_cmp_func, const int flags){
    if(flags&FLAG_DIRECT){
        return cmp_direct_inline(s,x,y,0,0,size,h,ref_index,src_index, cmp_func, chroma_cmp_func, flags&FLAG_QPEL);
    }else{
        return cmp_inline(s,x,y,0,0,size,h,ref_index,src_index, cmp_func, chroma_cmp_func, 0, flags&FLAG_CHROMA);
238
    }
239 240 241 242 243 244 245 246 247
}

static int cmp_internal(MpegEncContext *s, const int x, const int y, const int subx, const int suby,
                      const int size, const int h, int ref_index, int src_index,
                      me_cmp_func cmp_func, me_cmp_func chroma_cmp_func, const int flags){
    if(flags&FLAG_DIRECT){
        return cmp_direct_inline(s,x,y,subx,suby,size,h,ref_index,src_index, cmp_func, chroma_cmp_func, flags&FLAG_QPEL);
    }else{
        return cmp_inline(s,x,y,subx,suby,size,h,ref_index,src_index, cmp_func, chroma_cmp_func, flags&FLAG_QPEL, flags&FLAG_CHROMA);
248
    }
249
}
250

251 252 253
/*! \brief compares a block (either a full macroblock or a partition thereof)
    against a proposed motion-compensated prediction of that block
 */
254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286
static av_always_inline int cmp(MpegEncContext *s, const int x, const int y, const int subx, const int suby,
                      const int size, const int h, int ref_index, int src_index,
                      me_cmp_func cmp_func, me_cmp_func chroma_cmp_func, const int flags){
    if(av_builtin_constant_p(flags) && av_builtin_constant_p(h) && av_builtin_constant_p(size)
       && av_builtin_constant_p(subx) && av_builtin_constant_p(suby)
       && flags==0 && h==16 && size==0 && subx==0 && suby==0){
        return cmp_simple(s,x,y,ref_index,src_index, cmp_func, chroma_cmp_func);
    }else if(av_builtin_constant_p(subx) && av_builtin_constant_p(suby)
       && subx==0 && suby==0){
        return cmp_fpel_internal(s,x,y,size,h,ref_index,src_index, cmp_func, chroma_cmp_func,flags);
    }else{
        return cmp_internal(s,x,y,subx,suby,size,h,ref_index,src_index, cmp_func, chroma_cmp_func, flags);
    }
}

static int cmp_hpel(MpegEncContext *s, const int x, const int y, const int subx, const int suby,
                      const int size, const int h, int ref_index, int src_index,
                      me_cmp_func cmp_func, me_cmp_func chroma_cmp_func, const int flags){
    if(flags&FLAG_DIRECT){
        return cmp_direct_inline(s,x,y,subx,suby,size,h,ref_index,src_index, cmp_func, chroma_cmp_func, 0);
    }else{
        return cmp_inline(s,x,y,subx,suby,size,h,ref_index,src_index, cmp_func, chroma_cmp_func, 0, flags&FLAG_CHROMA);
    }
}

static int cmp_qpel(MpegEncContext *s, const int x, const int y, const int subx, const int suby,
                      const int size, const int h, int ref_index, int src_index,
                      me_cmp_func cmp_func, me_cmp_func chroma_cmp_func, const int flags){
    if(flags&FLAG_DIRECT){
        return cmp_direct_inline(s,x,y,subx,suby,size,h,ref_index,src_index, cmp_func, chroma_cmp_func, 1);
    }else{
        return cmp_inline(s,x,y,subx,suby,size,h,ref_index,src_index, cmp_func, chroma_cmp_func, 1, flags&FLAG_CHROMA);
    }
287
}
Michael Niedermayer's avatar
Michael Niedermayer committed
288 289 290

#include "motion_est_template.c"

Michael Niedermayer's avatar
Michael Niedermayer committed
291 292 293 294 295 296 297
static int zero_cmp(void *s, uint8_t *a, uint8_t *b, int stride, int h){
    return 0;
}

static void zero_hpel(uint8_t *a, const uint8_t *b, int stride, int h){
}

298
int ff_init_me(MpegEncContext *s){
299
    MotionEstContext * const c= &s->me;
300 301
    int cache_size= FFMIN(ME_MAP_SIZE>>ME_MAP_SHIFT, 1<<ME_MAP_SHIFT);
    int dia_size= FFMAX(FFABS(s->avctx->dia_size)&255, FFABS(s->avctx->pre_dia_size)&255);
302 303 304 305 306

    if(FFMIN(s->avctx->dia_size, s->avctx->pre_dia_size) < -ME_MAP_SIZE){
        av_log(s->avctx, AV_LOG_ERROR, "ME_MAP size is too small for SAB diamond\n");
        return -1;
    }
307 308
    //special case of snow is needed because snow uses its own iterative ME code
    if(s->me_method!=ME_ZERO && s->me_method!=ME_EPZS && s->me_method!=ME_X1 && s->avctx->codec_id != CODEC_ID_SNOW){
309 310 311
        av_log(s->avctx, AV_LOG_ERROR, "me_method is only allowed to be set to zero and epzs; for hex,umh,full and others see dia_size\n");
        return -1;
    }
312

Michael Niedermayer's avatar
Michael Niedermayer committed
313
    c->avctx= s->avctx;
314 315 316 317

    if(cache_size < 2*dia_size && !c->stride){
        av_log(s->avctx, AV_LOG_INFO, "ME_MAP size may be a little small for the selected diamond size\n");
    }
318

319 320 321 322
    ff_set_cmp(&s->dsp, s->dsp.me_pre_cmp, c->avctx->me_pre_cmp);
    ff_set_cmp(&s->dsp, s->dsp.me_cmp, c->avctx->me_cmp);
    ff_set_cmp(&s->dsp, s->dsp.me_sub_cmp, c->avctx->me_sub_cmp);
    ff_set_cmp(&s->dsp, s->dsp.mb_cmp, c->avctx->mb_cmp);
323

324 325 326
    c->flags    = get_flags(c, 0, c->avctx->me_cmp    &FF_CMP_CHROMA);
    c->sub_flags= get_flags(c, 0, c->avctx->me_sub_cmp&FF_CMP_CHROMA);
    c->mb_flags = get_flags(c, 0, c->avctx->mb_cmp    &FF_CMP_CHROMA);
Fabrice Bellard's avatar
Fabrice Bellard committed
327

328
/*FIXME s->no_rounding b_type*/
Michael Niedermayer's avatar
Michael Niedermayer committed
329
    if(s->flags&CODEC_FLAG_QPEL){
330
        c->sub_motion_search= qpel_motion_search;
331 332 333
        c->qpel_avg= s->dsp.avg_qpel_pixels_tab;
        if(s->no_rounding) c->qpel_put= s->dsp.put_no_rnd_qpel_pixels_tab;
        else               c->qpel_put= s->dsp.put_qpel_pixels_tab;
Michael Niedermayer's avatar
Michael Niedermayer committed
334
    }else{
335
        if(c->avctx->me_sub_cmp&FF_CMP_CHROMA)
336
            c->sub_motion_search= hpel_motion_search;
337 338
        else if(   c->avctx->me_sub_cmp == FF_CMP_SAD
                && c->avctx->    me_cmp == FF_CMP_SAD
339
                && c->avctx->    mb_cmp == FF_CMP_SAD)
340
            c->sub_motion_search= sad_hpel_motion_search; // 2050 vs. 2450 cycles
Michael Niedermayer's avatar
Michael Niedermayer committed
341
        else
342
            c->sub_motion_search= hpel_motion_search;
Michael Niedermayer's avatar
Michael Niedermayer committed
343
    }
Michael Niedermayer's avatar
Michael Niedermayer committed
344 345 346 347
    c->hpel_avg= s->dsp.avg_pixels_tab;
    if(s->no_rounding) c->hpel_put= s->dsp.put_no_rnd_pixels_tab;
    else               c->hpel_put= s->dsp.put_pixels_tab;

348
    if(s->linesize){
349
        c->stride  = s->linesize;
350
        c->uvstride= s->uvlinesize;
351
    }else{
352 353
        c->stride  = 16*s->mb_width + 32;
        c->uvstride=  8*s->mb_width + 16;
354
    }
355

Diego Biurrun's avatar
Diego Biurrun committed
356 357 358
    /* 8x8 fullpel search would need a 4x4 chroma compare, which we do
     * not have yet, and even if we had, the motion estimation code
     * does not expect it. */
359
    if(s->codec_id != CODEC_ID_SNOW){
360
        if((c->avctx->me_cmp&FF_CMP_CHROMA)/* && !s->dsp.me_cmp[2]*/){
361 362 363 364 365 366 367
            s->dsp.me_cmp[2]= zero_cmp;
        }
        if((c->avctx->me_sub_cmp&FF_CMP_CHROMA) && !s->dsp.me_sub_cmp[2]){
            s->dsp.me_sub_cmp[2]= zero_cmp;
        }
        c->hpel_put[2][0]= c->hpel_put[2][1]=
        c->hpel_put[2][2]= c->hpel_put[2][3]= zero_hpel;
Michael Niedermayer's avatar
Michael Niedermayer committed
368 369
    }

370 371 372 373
    if(s->codec_id == CODEC_ID_H261){
        c->sub_motion_search= no_sub_motion_search;
    }

374
    return 0;
Michael Niedermayer's avatar
Michael Niedermayer committed
375
}
376

377
#if 0
378
static int pix_dev(uint8_t * pix, int line_size, int mean)
379 380 381 382 383
{
    int s, i, j;

    s = 0;
    for (i = 0; i < 16; i++) {
384
        for (j = 0; j < 16; j += 8) {
385 386 387 388 389 390 391 392
            s += FFABS(pix[0]-mean);
            s += FFABS(pix[1]-mean);
            s += FFABS(pix[2]-mean);
            s += FFABS(pix[3]-mean);
            s += FFABS(pix[4]-mean);
            s += FFABS(pix[5]-mean);
            s += FFABS(pix[6]-mean);
            s += FFABS(pix[7]-mean);
393 394 395
            pix += 8;
        }
        pix += line_size - 16;
396 397 398
    }
    return s;
}
399
#endif
400

401
static inline void no_motion_search(MpegEncContext * s,
402
                                    int *mx_ptr, int *my_ptr)
Fabrice Bellard's avatar
Fabrice Bellard committed
403 404 405 406 407
{
    *mx_ptr = 16 * s->mb_x;
    *my_ptr = 16 * s->mb_y;
}

408 409
#define Z_THRESHOLD 256

Michael Niedermayer's avatar
Michael Niedermayer committed
410
#define CHECK_SAD_HALF_MV(suffix, x, y) \
411
{\
412
    d= s->dsp.pix_abs[size][(x?1:0)+(y?2:0)](NULL, pix, ptr+((x)>>1), stride, h);\
Michael Niedermayer's avatar
Michael Niedermayer committed
413
    d += (mv_penalty[pen_x + x] + mv_penalty[pen_y + y])*penalty_factor;\
414 415
    COPY3_IF_LT(dminh, d, dx, x, dy, y)\
}
416

Michael Niedermayer's avatar
Michael Niedermayer committed
417
static inline int sad_hpel_motion_search(MpegEncContext * s,
418
                                  int *mx_ptr, int *my_ptr, int dmin,
419 420
                                  int src_index, int ref_index,
                                  int size, int h)
Fabrice Bellard's avatar
Fabrice Bellard committed
421
{
422 423
    MotionEstContext * const c= &s->me;
    const int penalty_factor= c->sub_penalty_factor;
424
    int mx, my, dminh;
425
    uint8_t *pix, *ptr;
426 427
    int stride= c->stride;
    const int flags= c->sub_flags;
428
    LOAD_COMMON
429

430
    assert(flags == 0);
Fabrice Bellard's avatar
Fabrice Bellard committed
431

432
    if(c->skip){
433 434 435 436 437 438
//    printf("S");
        *mx_ptr = 0;
        *my_ptr = 0;
        return dmin;
    }
//    printf("N");
439

440
    pix = c->src[src_index][0];
441

442 443
    mx = *mx_ptr;
    my = *my_ptr;
444
    ptr = c->ref[ref_index][0] + (my * stride) + mx;
445

446 447
    dminh = dmin;

448
    if (mx > xmin && mx < xmax &&
449
        my > ymin && my < ymax) {
450
        int dx=0, dy=0;
451
        int d, pen_x, pen_y;
452 453 454 455 456 457 458
        const int index= (my<<ME_MAP_SHIFT) + mx;
        const int t= score_map[(index-(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)];
        const int l= score_map[(index- 1               )&(ME_MAP_SIZE-1)];
        const int r= score_map[(index+ 1               )&(ME_MAP_SIZE-1)];
        const int b= score_map[(index+(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)];
        mx<<=1;
        my<<=1;
459

460

461 462 463
        pen_x= pred_x + mx;
        pen_y= pred_y + my;

464
        ptr-= stride;
465
        if(t<=b){
Michael Niedermayer's avatar
Michael Niedermayer committed
466
            CHECK_SAD_HALF_MV(y2 , 0, -1)
467
            if(l<=r){
Michael Niedermayer's avatar
Michael Niedermayer committed
468
                CHECK_SAD_HALF_MV(xy2, -1, -1)
469
                if(t+r<=b+l){
Michael Niedermayer's avatar
Michael Niedermayer committed
470
                    CHECK_SAD_HALF_MV(xy2, +1, -1)
471
                    ptr+= stride;
472
                }else{
473
                    ptr+= stride;
Michael Niedermayer's avatar
Michael Niedermayer committed
474
                    CHECK_SAD_HALF_MV(xy2, -1, +1)
475
                }
Michael Niedermayer's avatar
Michael Niedermayer committed
476
                CHECK_SAD_HALF_MV(x2 , -1,  0)
477
            }else{
Michael Niedermayer's avatar
Michael Niedermayer committed
478
                CHECK_SAD_HALF_MV(xy2, +1, -1)
479
                if(t+l<=b+r){
Michael Niedermayer's avatar
Michael Niedermayer committed
480
                    CHECK_SAD_HALF_MV(xy2, -1, -1)
481
                    ptr+= stride;
482
                }else{
483
                    ptr+= stride;
Michael Niedermayer's avatar
Michael Niedermayer committed
484
                    CHECK_SAD_HALF_MV(xy2, +1, +1)
485
                }
Michael Niedermayer's avatar
Michael Niedermayer committed
486
                CHECK_SAD_HALF_MV(x2 , +1,  0)
487 488 489 490
            }
        }else{
            if(l<=r){
                if(t+l<=b+r){
Michael Niedermayer's avatar
Michael Niedermayer committed
491
                    CHECK_SAD_HALF_MV(xy2, -1, -1)
492
                    ptr+= stride;
493
                }else{
494
                    ptr+= stride;
Michael Niedermayer's avatar
Michael Niedermayer committed
495
                    CHECK_SAD_HALF_MV(xy2, +1, +1)
496
                }
Michael Niedermayer's avatar
Michael Niedermayer committed
497 498
                CHECK_SAD_HALF_MV(x2 , -1,  0)
                CHECK_SAD_HALF_MV(xy2, -1, +1)
499 500
            }else{
                if(t+r<=b+l){
Michael Niedermayer's avatar
Michael Niedermayer committed
501
                    CHECK_SAD_HALF_MV(xy2, +1, -1)
502
                    ptr+= stride;
503
                }else{
504
                    ptr+= stride;
Michael Niedermayer's avatar
Michael Niedermayer committed
505
                    CHECK_SAD_HALF_MV(xy2, -1, +1)
506
                }
Michael Niedermayer's avatar
Michael Niedermayer committed
507 508
                CHECK_SAD_HALF_MV(x2 , +1,  0)
                CHECK_SAD_HALF_MV(xy2, +1, +1)
509
            }
Michael Niedermayer's avatar
Michael Niedermayer committed
510
            CHECK_SAD_HALF_MV(y2 ,  0, +1)
511 512 513
        }
        mx+=dx;
        my+=dy;
514 515

    }else{
516 517
        mx<<=1;
        my<<=1;
518 519 520 521
    }

    *mx_ptr = mx;
    *my_ptr = my;
522
    return dminh;
523 524
}

525
static inline void set_p_mv_tables(MpegEncContext * s, int mx, int my, int mv4)
526
{
527
    const int xy= s->mb_x + s->mb_y*s->mb_stride;
528

529 530
    s->p_mv_table[xy][0] = mx;
    s->p_mv_table[xy][1] = my;
531

Diego Biurrun's avatar
Diego Biurrun committed
532
    /* has already been set to the 4 MV if 4MV is done */
533
    if(mv4){
534 535
        int mot_xy= s->block_index[0];

536 537 538 539
        s->current_picture.motion_val[0][mot_xy  ][0]= mx;
        s->current_picture.motion_val[0][mot_xy  ][1]= my;
        s->current_picture.motion_val[0][mot_xy+1][0]= mx;
        s->current_picture.motion_val[0][mot_xy+1][1]= my;
540

541
        mot_xy += s->b8_stride;
542 543 544 545
        s->current_picture.motion_val[0][mot_xy  ][0]= mx;
        s->current_picture.motion_val[0][mot_xy  ][1]= my;
        s->current_picture.motion_val[0][mot_xy+1][0]= mx;
        s->current_picture.motion_val[0][mot_xy+1][1]= my;
546 547 548
    }
}

549 550 551
/**
 * get fullpel ME search limits.
 */
552
static inline void get_limits(MpegEncContext *s, int x, int y)
Fabrice Bellard's avatar
Fabrice Bellard committed
553
{
554
    MotionEstContext * const c= &s->me;
555
    int range= c->avctx->me_range >> (1 + !!(c->flags&FLAG_QPEL));
556
/*
557
    if(c->avctx->me_range) c->range= c->avctx->me_range >> 1;
558
    else                   c->range= 16;
559
*/
Fabrice Bellard's avatar
Fabrice Bellard committed
560
    if (s->unrestricted_mv) {
561 562 563 564
        c->xmin = - x - 16;
        c->ymin = - y - 16;
        c->xmax = - x + s->mb_width *16;
        c->ymax = - y + s->mb_height*16;
565 566 567 568
    } else if (s->out_format == FMT_H261){
        // Search range of H261 is different from other codec standards
        c->xmin = (x > 15) ? - 15 : 0;
        c->ymin = (y > 15) ? - 15 : 0;
569
        c->xmax = (x < s->mb_width * 16 - 16) ? 15 : 0;
570
        c->ymax = (y < s->mb_height * 16 - 16) ? 15 : 0;
Fabrice Bellard's avatar
Fabrice Bellard committed
571
    } else {
572 573 574 575
        c->xmin = - x;
        c->ymin = - y;
        c->xmax = - x + s->mb_width *16 - 16;
        c->ymax = - y + s->mb_height*16 - 16;
Fabrice Bellard's avatar
Fabrice Bellard committed
576
    }
577 578 579 580 581 582
    if(range){
        c->xmin = FFMAX(c->xmin,-range);
        c->xmax = FFMIN(c->xmax, range);
        c->ymin = FFMAX(c->ymin,-range);
        c->ymax = FFMIN(c->ymax, range);
    }
583 584
}

585 586
static inline void init_mv4_ref(MotionEstContext *c){
    const int stride= c->stride;
587 588 589 590 591 592 593 594 595

    c->ref[1][0] = c->ref[0][0] + 8;
    c->ref[2][0] = c->ref[0][0] + 8*stride;
    c->ref[3][0] = c->ref[2][0] + 8;
    c->src[1][0] = c->src[0][0] + 8;
    c->src[2][0] = c->src[0][0] + 8*stride;
    c->src[3][0] = c->src[2][0] + 8;
}

596
static inline int h263_mv4_search(MpegEncContext *s, int mx, int my, int shift)
597
{
598
    MotionEstContext * const c= &s->me;
599 600
    const int size= 1;
    const int h=8;
601 602
    int block;
    int P[10][2];
603
    int dmin_sum=0, mx4_sum=0, my4_sum=0;
604
    int same=1;
605
    const int stride= c->stride;
606
    uint8_t *mv_penalty= c->current_mv_penalty;
607

608
    init_mv4_ref(c);
609

610 611 612 613 614
    for(block=0; block<4; block++){
        int mx4, my4;
        int pred_x4, pred_y4;
        int dmin4;
        static const int off[4]= {2, 1, 1, -1};
615
        const int mot_stride = s->b8_stride;
616
        const int mot_xy = s->block_index[block];
617

618 619
        P_LEFT[0] = s->current_picture.motion_val[0][mot_xy - 1][0];
        P_LEFT[1] = s->current_picture.motion_val[0][mot_xy - 1][1];
620

621
        if(P_LEFT[0]       > (c->xmax<<shift)) P_LEFT[0]       = (c->xmax<<shift);
622 623

        /* special case for first line */
624
        if (s->first_slice_line && block<2) {
625 626
            c->pred_x= pred_x4= P_LEFT[0];
            c->pred_y= pred_y4= P_LEFT[1];
627
        } else {
628 629 630 631
            P_TOP[0]      = s->current_picture.motion_val[0][mot_xy - mot_stride             ][0];
            P_TOP[1]      = s->current_picture.motion_val[0][mot_xy - mot_stride             ][1];
            P_TOPRIGHT[0] = s->current_picture.motion_val[0][mot_xy - mot_stride + off[block]][0];
            P_TOPRIGHT[1] = s->current_picture.motion_val[0][mot_xy - mot_stride + off[block]][1];
632 633 634 635
            if(P_TOP[1]      > (c->ymax<<shift)) P_TOP[1]     = (c->ymax<<shift);
            if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
            if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift);
            if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
636

637 638 639
            P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
            P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);

640 641
            c->pred_x= pred_x4 = P_MEDIAN[0];
            c->pred_y= pred_y4 = P_MEDIAN[1];
642 643 644 645
        }
        P_MV1[0]= mx;
        P_MV1[1]= my;

646
        dmin4 = epzs_motion_search4(s, &mx4, &my4, P, block, block, s->p_mv_table, (1<<16)>>shift);
647

648
        dmin4= c->sub_motion_search(s, &mx4, &my4, dmin4, block, block, size, h);
649

650
        if(s->dsp.me_sub_cmp[0] != s->dsp.mb_cmp[0]){
651
            int dxy;
652
            const int offset= ((block&1) + (block>>1)*stride)*8;
653
            uint8_t *dest_y = c->scratchpad + offset;
654
            if(s->quarter_sample){
655
                uint8_t *ref= c->ref[block][0] + (mx4>>2) + (my4>>2)*stride;
656 657 658
                dxy = ((my4 & 3) << 2) | (mx4 & 3);

                if(s->no_rounding)
659
                    s->dsp.put_no_rnd_qpel_pixels_tab[1][dxy](dest_y   , ref    , stride);
660
                else
661
                    s->dsp.put_qpel_pixels_tab       [1][dxy](dest_y   , ref    , stride);
662
            }else{
663
                uint8_t *ref= c->ref[block][0] + (mx4>>1) + (my4>>1)*stride;
664 665 666
                dxy = ((my4 & 1) << 1) | (mx4 & 1);

                if(s->no_rounding)
667
                    s->dsp.put_no_rnd_pixels_tab[1][dxy](dest_y    , ref    , stride, h);
668
                else
669
                    s->dsp.put_pixels_tab       [1][dxy](dest_y    , ref    , stride, h);
670
            }
671
            dmin_sum+= (mv_penalty[mx4-pred_x4] + mv_penalty[my4-pred_y4])*c->mb_penalty_factor;
672 673 674 675 676 677 678 679 680 681
        }else
            dmin_sum+= dmin4;

        if(s->quarter_sample){
            mx4_sum+= mx4/2;
            my4_sum+= my4/2;
        }else{
            mx4_sum+= mx4;
            my4_sum+= my4;
        }
682

683 684
        s->current_picture.motion_val[0][ s->block_index[block] ][0]= mx4;
        s->current_picture.motion_val[0][ s->block_index[block] ][1]= my4;
685 686

        if(mx4 != mx || my4 != my) same=0;
687
    }
688

689 690
    if(same)
        return INT_MAX;
691

692
    if(s->dsp.me_sub_cmp[0] != s->dsp.mb_cmp[0]){
693
        dmin_sum += s->dsp.mb_cmp[0](s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*16*stride, c->scratchpad, stride, 16);
694
    }
695

696
    if(c->avctx->mb_cmp&FF_CMP_CHROMA){
697 698 699 700 701 702 703
        int dxy;
        int mx, my;
        int offset;

        mx= ff_h263_round_chroma(mx4_sum);
        my= ff_h263_round_chroma(my4_sum);
        dxy = ((my & 1) << 1) | (mx & 1);
704

705
        offset= (s->mb_x*8 + (mx>>1)) + (s->mb_y*8 + (my>>1))*s->uvlinesize;
706

707
        if(s->no_rounding){
708 709
            s->dsp.put_no_rnd_pixels_tab[1][dxy](c->scratchpad    , s->last_picture.data[1] + offset, s->uvlinesize, 8);
            s->dsp.put_no_rnd_pixels_tab[1][dxy](c->scratchpad+8  , s->last_picture.data[2] + offset, s->uvlinesize, 8);
710
        }else{
711 712
            s->dsp.put_pixels_tab       [1][dxy](c->scratchpad    , s->last_picture.data[1] + offset, s->uvlinesize, 8);
            s->dsp.put_pixels_tab       [1][dxy](c->scratchpad+8  , s->last_picture.data[2] + offset, s->uvlinesize, 8);
713 714
        }

715 716
        dmin_sum += s->dsp.mb_cmp[1](s, s->new_picture.data[1] + s->mb_x*8 + s->mb_y*8*s->uvlinesize, c->scratchpad  , s->uvlinesize, 8);
        dmin_sum += s->dsp.mb_cmp[1](s, s->new_picture.data[2] + s->mb_x*8 + s->mb_y*8*s->uvlinesize, c->scratchpad+8, s->uvlinesize, 8);
717
    }
718

719 720
    c->pred_x= mx;
    c->pred_y= my;
721

722
    switch(c->avctx->mb_cmp&0xFF){
723 724 725 726 727
    /*case FF_CMP_SSE:
        return dmin_sum+ 32*s->qscale*s->qscale;*/
    case FF_CMP_RD:
        return dmin_sum;
    default:
728
        return dmin_sum+ 11*c->mb_penalty_factor;
729
    }
730 731
}

732 733 734 735 736 737 738 739 740 741 742 743 744
static inline void init_interlaced_ref(MpegEncContext *s, int ref_index){
    MotionEstContext * const c= &s->me;

    c->ref[1+ref_index][0] = c->ref[0+ref_index][0] + s->linesize;
    c->src[1][0] = c->src[0][0] + s->linesize;
    if(c->flags & FLAG_CHROMA){
        c->ref[1+ref_index][1] = c->ref[0+ref_index][1] + s->uvlinesize;
        c->ref[1+ref_index][2] = c->ref[0+ref_index][2] + s->uvlinesize;
        c->src[1][1] = c->src[0][1] + s->uvlinesize;
        c->src[1][2] = c->src[0][2] + s->uvlinesize;
    }
}

745
static int interlaced_search(MpegEncContext *s, int ref_index,
746
                             int16_t (*mv_tables[2][2])[2], uint8_t *field_select_tables[2], int mx, int my, int user_field_select)
747
{
748
    MotionEstContext * const c= &s->me;
749 750 751 752
    const int size=0;
    const int h=8;
    int block;
    int P[10][2];
753
    uint8_t * const mv_penalty= c->current_mv_penalty;
754 755 756 757 758
    int same=1;
    const int stride= 2*s->linesize;
    int dmin_sum= 0;
    const int mot_stride= s->mb_stride;
    const int xy= s->mb_x + s->mb_y*mot_stride;
759

760 761 762 763
    c->ymin>>=1;
    c->ymax>>=1;
    c->stride<<=1;
    c->uvstride<<=1;
764
    init_interlaced_ref(s, ref_index);
765

766 767 768 769 770 771
    for(block=0; block<2; block++){
        int field_select;
        int best_dmin= INT_MAX;
        int best_field= -1;

        for(field_select=0; field_select<2; field_select++){
772
            int dmin, mx_i, my_i;
773
            int16_t (*mv_table)[2]= mv_tables[block][field_select];
774

775
            if(user_field_select){
776 777
                assert(field_select==0 || field_select==1);
                assert(field_select_tables[block][xy]==0 || field_select_tables[block][xy]==1);
778 779 780
                if(field_select_tables[block][xy] != field_select)
                    continue;
            }
781

782 783
            P_LEFT[0] = mv_table[xy - 1][0];
            P_LEFT[1] = mv_table[xy - 1][1];
784
            if(P_LEFT[0]       > (c->xmax<<1)) P_LEFT[0]       = (c->xmax<<1);
785

786 787
            c->pred_x= P_LEFT[0];
            c->pred_y= P_LEFT[1];
788

789
            if(!s->first_slice_line){
790 791 792 793
                P_TOP[0]      = mv_table[xy - mot_stride][0];
                P_TOP[1]      = mv_table[xy - mot_stride][1];
                P_TOPRIGHT[0] = mv_table[xy - mot_stride + 1][0];
                P_TOPRIGHT[1] = mv_table[xy - mot_stride + 1][1];
794 795 796 797
                if(P_TOP[1]      > (c->ymax<<1)) P_TOP[1]     = (c->ymax<<1);
                if(P_TOPRIGHT[0] < (c->xmin<<1)) P_TOPRIGHT[0]= (c->xmin<<1);
                if(P_TOPRIGHT[0] > (c->xmax<<1)) P_TOPRIGHT[0]= (c->xmax<<1);
                if(P_TOPRIGHT[1] > (c->ymax<<1)) P_TOPRIGHT[1]= (c->ymax<<1);
798

799 800 801 802 803
                P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
                P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
            }
            P_MV1[0]= mx; //FIXME not correct if block != field_select
            P_MV1[1]= my / 2;
804

805
            dmin = epzs_motion_search2(s, &mx_i, &my_i, P, block, field_select+ref_index, mv_table, (1<<16)>>1);
806

807
            dmin= c->sub_motion_search(s, &mx_i, &my_i, dmin, block, field_select+ref_index, size, h);
808

809 810
            mv_table[xy][0]= mx_i;
            mv_table[xy][1]= my_i;
811

812
            if(s->dsp.me_sub_cmp[0] != s->dsp.mb_cmp[0]){
813 814 815
                int dxy;

                //FIXME chroma ME
816
                uint8_t *ref= c->ref[field_select+ref_index][0] + (mx_i>>1) + (my_i>>1)*stride;
817 818 819
                dxy = ((my_i & 1) << 1) | (mx_i & 1);

                if(s->no_rounding){
820
                    s->dsp.put_no_rnd_pixels_tab[size][dxy](c->scratchpad, ref    , stride, h);
821
                }else{
822
                    s->dsp.put_pixels_tab       [size][dxy](c->scratchpad, ref    , stride, h);
823
                }
824
                dmin= s->dsp.mb_cmp[size](s, c->src[block][0], c->scratchpad, stride, h);
825
                dmin+= (mv_penalty[mx_i-c->pred_x] + mv_penalty[my_i-c->pred_y] + 1)*c->mb_penalty_factor;
826
            }else
827
                dmin+= c->mb_penalty_factor; //field_select bits
828

829
            dmin += field_select != block; //slightly prefer same field
830

831 832 833 834 835 836 837 838 839 840
            if(dmin < best_dmin){
                best_dmin= dmin;
                best_field= field_select;
            }
        }
        {
            int16_t (*mv_table)[2]= mv_tables[block][best_field];

            if(mv_table[xy][0] != mx) same=0; //FIXME check if these checks work and are any good at all
            if(mv_table[xy][1]&1) same=0;
841
            if(mv_table[xy][1]*2 != my) same=0;
842 843 844 845 846 847
            if(best_field != block) same=0;
        }

        field_select_tables[block][xy]= best_field;
        dmin_sum += best_dmin;
    }
848

849 850 851 852
    c->ymin<<=1;
    c->ymax<<=1;
    c->stride>>=1;
    c->uvstride>>=1;
853 854 855

    if(same)
        return INT_MAX;
856

857
    switch(c->avctx->mb_cmp&0xFF){
858 859 860 861 862
    /*case FF_CMP_SSE:
        return dmin_sum+ 32*s->qscale*s->qscale;*/
    case FF_CMP_RD:
        return dmin_sum;
    default:
863
        return dmin_sum+ 11*c->mb_penalty_factor;
864 865 866
    }
}

867 868 869
static void clip_input_mv(MpegEncContext * s, int16_t *mv, int interlaced){
    int ymax= s->me.ymax>>interlaced;
    int ymin= s->me.ymin>>interlaced;
870

871 872 873 874 875 876
    if(mv[0] < s->me.xmin) mv[0] = s->me.xmin;
    if(mv[0] > s->me.xmax) mv[0] = s->me.xmax;
    if(mv[1] <       ymin) mv[1] =       ymin;
    if(mv[1] >       ymax) mv[1] =       ymax;
}

877 878 879 880 881 882 883 884 885
static inline int check_input_motion(MpegEncContext * s, int mb_x, int mb_y, int p_type){
    MotionEstContext * const c= &s->me;
    Picture *p= s->current_picture_ptr;
    int mb_xy= mb_x + mb_y*s->mb_stride;
    int xy= 2*mb_x + 2*mb_y*s->b8_stride;
    int mb_type= s->current_picture.mb_type[mb_xy];
    int flags= c->flags;
    int shift= (flags&FLAG_QPEL) + 1;
    int mask= (1<<shift)-1;
886
    int x, y, i;
887 888 889
    int d=0;
    me_cmp_func cmpf= s->dsp.sse[0];
    me_cmp_func chroma_cmpf= s->dsp.sse[1];
890

891 892
    if(p_type && USES_LIST(mb_type, 1)){
        av_log(c->avctx, AV_LOG_ERROR, "backward motion vector in P frame\n");
893
        return INT_MAX/2;
894
    }
895
    assert(IS_INTRA(mb_type) || USES_LIST(mb_type,0) || USES_LIST(mb_type,1));
896

897 898 899 900 901 902
    for(i=0; i<4; i++){
        int xy= s->block_index[i];
        clip_input_mv(s, p->motion_val[0][xy], !!IS_INTERLACED(mb_type));
        clip_input_mv(s, p->motion_val[1][xy], !!IS_INTERLACED(mb_type));
    }

903 904 905 906 907
    if(IS_INTERLACED(mb_type)){
        int xy2= xy  + s->b8_stride;
        s->mb_type[mb_xy]=CANDIDATE_MB_TYPE_INTRA;
        c->stride<<=1;
        c->uvstride<<=1;
908

909
        if(!(s->flags & CODEC_FLAG_INTERLACED_ME)){
910
            av_log(c->avctx, AV_LOG_ERROR, "Interlaced macroblock selected but interlaced motion estimation disabled\n");
911
            return INT_MAX/2;
912
        }
913

914
        if(USES_LIST(mb_type, 0)){
915 916
            int field_select0= p->ref_index[0][4*mb_xy  ];
            int field_select1= p->ref_index[0][4*mb_xy+2];
917 918
            assert(field_select0==0 ||field_select0==1);
            assert(field_select1==0 ||field_select1==1);
919 920
            init_interlaced_ref(s, 0);

921 922 923 924 925 926 927 928 929 930 931 932 933 934
            if(p_type){
                s->p_field_select_table[0][mb_xy]= field_select0;
                s->p_field_select_table[1][mb_xy]= field_select1;
                *(uint32_t*)s->p_field_mv_table[0][field_select0][mb_xy]= *(uint32_t*)p->motion_val[0][xy ];
                *(uint32_t*)s->p_field_mv_table[1][field_select1][mb_xy]= *(uint32_t*)p->motion_val[0][xy2];
                s->mb_type[mb_xy]=CANDIDATE_MB_TYPE_INTER_I;
            }else{
                s->b_field_select_table[0][0][mb_xy]= field_select0;
                s->b_field_select_table[0][1][mb_xy]= field_select1;
                *(uint32_t*)s->b_field_mv_table[0][0][field_select0][mb_xy]= *(uint32_t*)p->motion_val[0][xy ];
                *(uint32_t*)s->b_field_mv_table[0][1][field_select1][mb_xy]= *(uint32_t*)p->motion_val[0][xy2];
                s->mb_type[mb_xy]= CANDIDATE_MB_TYPE_FORWARD_I;
            }

935
            x= p->motion_val[0][xy ][0];
936 937
            y= p->motion_val[0][xy ][1];
            d = cmp(s, x>>shift, y>>shift, x&mask, y&mask, 0, 8, field_select0, 0, cmpf, chroma_cmpf, flags);
938
            x= p->motion_val[0][xy2][0];
939 940 941 942
            y= p->motion_val[0][xy2][1];
            d+= cmp(s, x>>shift, y>>shift, x&mask, y&mask, 0, 8, field_select1, 1, cmpf, chroma_cmpf, flags);
        }
        if(USES_LIST(mb_type, 1)){
943 944
            int field_select0= p->ref_index[1][4*mb_xy  ];
            int field_select1= p->ref_index[1][4*mb_xy+2];
945 946
            assert(field_select0==0 ||field_select0==1);
            assert(field_select1==0 ||field_select1==1);
947 948
            init_interlaced_ref(s, 2);

949 950 951 952 953 954 955 956 957 958
            s->b_field_select_table[1][0][mb_xy]= field_select0;
            s->b_field_select_table[1][1][mb_xy]= field_select1;
            *(uint32_t*)s->b_field_mv_table[1][0][field_select0][mb_xy]= *(uint32_t*)p->motion_val[1][xy ];
            *(uint32_t*)s->b_field_mv_table[1][1][field_select1][mb_xy]= *(uint32_t*)p->motion_val[1][xy2];
            if(USES_LIST(mb_type, 0)){
                s->mb_type[mb_xy]= CANDIDATE_MB_TYPE_BIDIR_I;
            }else{
                s->mb_type[mb_xy]= CANDIDATE_MB_TYPE_BACKWARD_I;
            }

959
            x= p->motion_val[1][xy ][0];
960 961
            y= p->motion_val[1][xy ][1];
            d = cmp(s, x>>shift, y>>shift, x&mask, y&mask, 0, 8, field_select0+2, 0, cmpf, chroma_cmpf, flags);
962
            x= p->motion_val[1][xy2][0];
963 964 965 966 967 968
            y= p->motion_val[1][xy2][1];
            d+= cmp(s, x>>shift, y>>shift, x&mask, y&mask, 0, 8, field_select1+2, 1, cmpf, chroma_cmpf, flags);
            //FIXME bidir scores
        }
        c->stride>>=1;
        c->uvstride>>=1;
969
    }else if(IS_8X8(mb_type)){
970
        if(!(s->flags & CODEC_FLAG_4MV)){
971
            av_log(c->avctx, AV_LOG_ERROR, "4MV macroblock selected but 4MV encoding disabled\n");
972
            return INT_MAX/2;
973
        }
974 975
        cmpf= s->dsp.sse[1];
        chroma_cmpf= s->dsp.sse[1];
976
        init_mv4_ref(c);
977 978
        for(i=0; i<4; i++){
            xy= s->block_index[i];
979
            x= p->motion_val[0][xy][0];
980 981 982 983
            y= p->motion_val[0][xy][1];
            d+= cmp(s, x>>shift, y>>shift, x&mask, y&mask, 1, 8, i, i, cmpf, chroma_cmpf, flags);
        }
        s->mb_type[mb_xy]=CANDIDATE_MB_TYPE_INTER4V;
984 985 986 987 988 989 990 991 992 993 994 995 996
    }else{
        if(USES_LIST(mb_type, 0)){
            if(p_type){
                *(uint32_t*)s->p_mv_table[mb_xy]= *(uint32_t*)p->motion_val[0][xy];
                s->mb_type[mb_xy]=CANDIDATE_MB_TYPE_INTER;
            }else if(USES_LIST(mb_type, 1)){
                *(uint32_t*)s->b_bidir_forw_mv_table[mb_xy]= *(uint32_t*)p->motion_val[0][xy];
                *(uint32_t*)s->b_bidir_back_mv_table[mb_xy]= *(uint32_t*)p->motion_val[1][xy];
                s->mb_type[mb_xy]=CANDIDATE_MB_TYPE_BIDIR;
            }else{
                *(uint32_t*)s->b_forw_mv_table[mb_xy]= *(uint32_t*)p->motion_val[0][xy];
                s->mb_type[mb_xy]=CANDIDATE_MB_TYPE_FORWARD;
            }
997
            x= p->motion_val[0][xy][0];
998 999 1000 1001 1002
            y= p->motion_val[0][xy][1];
            d = cmp(s, x>>shift, y>>shift, x&mask, y&mask, 0, 16, 0, 0, cmpf, chroma_cmpf, flags);
        }else if(USES_LIST(mb_type, 1)){
            *(uint32_t*)s->b_back_mv_table[mb_xy]= *(uint32_t*)p->motion_val[1][xy];
            s->mb_type[mb_xy]=CANDIDATE_MB_TYPE_BACKWARD;
1003 1004

            x= p->motion_val[1][xy][0];
1005 1006 1007 1008 1009 1010 1011 1012
            y= p->motion_val[1][xy][1];
            d = cmp(s, x>>shift, y>>shift, x&mask, y&mask, 0, 16, 2, 0, cmpf, chroma_cmpf, flags);
        }else
            s->mb_type[mb_xy]=CANDIDATE_MB_TYPE_INTRA;
    }
    return d;
}

1013 1014 1015
void ff_estimate_p_frame_motion(MpegEncContext * s,
                                int mb_x, int mb_y)
{
1016
    MotionEstContext * const c= &s->me;
1017
    uint8_t *pix, *ppix;
1018 1019 1020
    int sum, mx, my, dmin;
    int varc;            ///< the variance of the block (sum of squared (p[y][x]-average))
    int vard;            ///< sum of squared differences with the estimated motion vector
1021
    int P[10][2];
1022 1023
    const int shift= 1+s->quarter_sample;
    int mb_type=0;
Michael Niedermayer's avatar
Michael Niedermayer committed
1024
    Picture * const pic= &s->current_picture;
1025

1026
    init_ref(c, s->new_picture.data, s->last_picture.data, NULL, 16*mb_x, 16*mb_y, 0);
1027

Michael Niedermayer's avatar
Michael Niedermayer committed
1028
    assert(s->quarter_sample==0 || s->quarter_sample==1);
1029 1030
    assert(s->linesize == c->stride);
    assert(s->uvlinesize == c->uvstride);
Michael Niedermayer's avatar
Michael Niedermayer committed
1031

1032 1033 1034
    c->penalty_factor    = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
    c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
    c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
1035
    c->current_mv_penalty= c->mv_penalty[s->f_code] + MAX_MV;
1036

1037
    get_limits(s, 16*mb_x, 16*mb_y);
1038
    c->skip=0;
1039

1040 1041 1042
    /* intra / predictive decision */
    pix = c->src[0][0];
    sum = s->dsp.pix_sum(pix, s->linesize);
1043
    varc = s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)(sum*sum))>>8) + 500;
1044 1045

    pic->mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
1046 1047
    pic->mb_var [s->mb_stride * mb_y + mb_x] = (varc+128)>>8;
    c->mb_var_sum_temp += (varc+128)>>8;
1048

1049
    if(c->avctx->me_threshold){
1050
        vard= check_input_motion(s, mb_x, mb_y, 1);
1051

1052
        if((vard+128)>>8 < c->avctx->me_threshold){
1053 1054
            int p_score= FFMIN(vard, varc-500+(s->lambda2>>FF_LAMBDA_SHIFT)*100);
            int i_score= varc-500+(s->lambda2>>FF_LAMBDA_SHIFT)*20;
1055 1056
            pic->mc_mb_var[s->mb_stride * mb_y + mb_x] = (vard+128)>>8;
            c->mc_mb_var_sum_temp += (vard+128)>>8;
1057
            c->scene_change_score+= ff_sqrt(p_score) - ff_sqrt(i_score);
1058 1059
            return;
        }
1060
        if((vard+128)>>8 < c->avctx->mb_threshold)
1061
            mb_type= s->mb_type[mb_x + mb_y*s->mb_stride];
1062 1063
    }

1064
    switch(s->me_method) {
Fabrice Bellard's avatar
Fabrice Bellard committed
1065 1066
    case ME_ZERO:
    default:
1067
        no_motion_search(s, &mx, &my);
1068 1069
        mx-= mb_x*16;
        my-= mb_y*16;
Fabrice Bellard's avatar
Fabrice Bellard committed
1070 1071
        dmin = 0;
        break;
1072
    case ME_X1:
1073
    case ME_EPZS:
1074
       {
1075
            const int mot_stride = s->b8_stride;
1076
            const int mot_xy = s->block_index[0];
1077

1078 1079
            P_LEFT[0]       = s->current_picture.motion_val[0][mot_xy - 1][0];
            P_LEFT[1]       = s->current_picture.motion_val[0][mot_xy - 1][1];
1080

1081
            if(P_LEFT[0]       > (c->xmax<<shift)) P_LEFT[0]       = (c->xmax<<shift);
1082

1083
            if(!s->first_slice_line) {
1084 1085 1086 1087
                P_TOP[0]      = s->current_picture.motion_val[0][mot_xy - mot_stride    ][0];
                P_TOP[1]      = s->current_picture.motion_val[0][mot_xy - mot_stride    ][1];
                P_TOPRIGHT[0] = s->current_picture.motion_val[0][mot_xy - mot_stride + 2][0];
                P_TOPRIGHT[1] = s->current_picture.motion_val[0][mot_xy - mot_stride + 2][1];
1088 1089 1090
                if(P_TOP[1]      > (c->ymax<<shift)) P_TOP[1]     = (c->ymax<<shift);
                if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
                if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
1091

1092 1093 1094 1095
                P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
                P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);

                if(s->out_format == FMT_H263){
1096 1097
                    c->pred_x = P_MEDIAN[0];
                    c->pred_y = P_MEDIAN[1];
1098
                }else { /* mpeg1 at least */
1099 1100
                    c->pred_x= P_LEFT[0];
                    c->pred_y= P_LEFT[1];
1101
                }
1102
            }else{
1103 1104
                c->pred_x= P_LEFT[0];
                c->pred_y= P_LEFT[1];
1105
            }
1106

1107
        }
1108
        dmin = ff_epzs_motion_search(s, &mx, &my, P, 0, 0, s->p_mv_table, (1<<16)>>shift, 0, 16);
1109

1110
        break;
Fabrice Bellard's avatar
Fabrice Bellard committed
1111 1112
    }

1113
    /* At this point (mx,my) are full-pell and the relative displacement */
1114
    ppix = c->ref[0][0] + (my * s->linesize) + mx;
1115

1116
    vard = s->dsp.sse[0](NULL, pix, ppix, s->linesize, 16);
1117

1118
    pic->mc_mb_var[s->mb_stride * mb_y + mb_x] = (vard+128)>>8;
1119
//    pic->mb_cmp_score[s->mb_stride * mb_y + mb_x] = dmin;
1120
    c->mc_mb_var_sum_temp += (vard+128)>>8;
1121

1122 1123
    av_dlog(s, "varc=%4d avg_var=%4d (sum=%4d) vard=%4d mx=%2d my=%2d\n",
            varc, s->avg_mb_var, sum, vard, mx - xx, my - yy);
1124
    if(mb_type){
1125 1126 1127
        int p_score= FFMIN(vard, varc-500+(s->lambda2>>FF_LAMBDA_SHIFT)*100);
        int i_score= varc-500+(s->lambda2>>FF_LAMBDA_SHIFT)*20;
        c->scene_change_score+= ff_sqrt(p_score) - ff_sqrt(i_score);
1128 1129

        if(mb_type == CANDIDATE_MB_TYPE_INTER){
1130
            c->sub_motion_search(s, &mx, &my, dmin, 0, 0, 0, 16);
1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143
            set_p_mv_tables(s, mx, my, 1);
        }else{
            mx <<=shift;
            my <<=shift;
        }
        if(mb_type == CANDIDATE_MB_TYPE_INTER4V){
            h263_mv4_search(s, mx, my, shift);

            set_p_mv_tables(s, mx, my, 0);
        }
        if(mb_type == CANDIDATE_MB_TYPE_INTER_I){
            interlaced_search(s, 0, s->p_field_mv_table, s->p_field_select_table, mx, my, 1);
        }
1144
    }else if(c->avctx->mb_decision > FF_MB_DECISION_SIMPLE){
1145 1146 1147
        int p_score= FFMIN(vard, varc-500+(s->lambda2>>FF_LAMBDA_SHIFT)*100);
        int i_score= varc-500+(s->lambda2>>FF_LAMBDA_SHIFT)*20;
        c->scene_change_score+= ff_sqrt(p_score) - ff_sqrt(i_score);
1148

1149
        if (vard*2 + 200*256 > varc)
1150
            mb_type|= CANDIDATE_MB_TYPE_INTRA;
1151 1152
        if (varc*2 + 200*256 > vard || s->qscale > 24){
//        if (varc*2 + 200*256 + 50*(s->lambda2>>FF_LAMBDA_SHIFT) > vard){
1153
            mb_type|= CANDIDATE_MB_TYPE_INTER;
1154
            c->sub_motion_search(s, &mx, &my, dmin, 0, 0, 0, 16);
1155 1156
            if(s->flags&CODEC_FLAG_MV0)
                if(mx || my)
1157
                    mb_type |= CANDIDATE_MB_TYPE_SKIPPED; //FIXME check difference
1158
        }else{
Michael Niedermayer's avatar
Michael Niedermayer committed
1159 1160
            mx <<=shift;
            my <<=shift;
Fabrice Bellard's avatar
Fabrice Bellard committed
1161
        }
1162
        if((s->flags&CODEC_FLAG_4MV)
1163
           && !c->skip && varc>50<<8 && vard>10<<8){
1164 1165
            if(h263_mv4_search(s, mx, my, shift) < INT_MAX)
                mb_type|=CANDIDATE_MB_TYPE_INTER4V;
1166 1167 1168 1169

            set_p_mv_tables(s, mx, my, 0);
        }else
            set_p_mv_tables(s, mx, my, 1);
1170
        if((s->flags&CODEC_FLAG_INTERLACED_ME)
1171
           && !c->skip){ //FIXME varc/d checks
1172
            if(interlaced_search(s, 0, s->p_field_mv_table, s->p_field_select_table, mx, my, 0) < INT_MAX)
1173 1174
                mb_type |= CANDIDATE_MB_TYPE_INTER_I;
        }
1175
    }else{
1176
        int intra_score, i;
1177
        mb_type= CANDIDATE_MB_TYPE_INTER;
1178

1179
        dmin= c->sub_motion_search(s, &mx, &my, dmin, 0, 0, 0, 16);
1180
        if(c->avctx->me_sub_cmp != c->avctx->mb_cmp && !c->skip)
1181
            dmin= ff_get_mb_score(s, mx, my, 0, 0, 0, 16, 1);
1182 1183

        if((s->flags&CODEC_FLAG_4MV)
1184
           && !c->skip && varc>50<<8 && vard>10<<8){
1185
            int dmin4= h263_mv4_search(s, mx, my, shift);
1186
            if(dmin4 < dmin){
1187
                mb_type= CANDIDATE_MB_TYPE_INTER4V;
1188
                dmin=dmin4;
1189
            }
1190
        }
1191
        if((s->flags&CODEC_FLAG_INTERLACED_ME)
1192
           && !c->skip){ //FIXME varc/d checks
1193
            int dmin_i= interlaced_search(s, 0, s->p_field_mv_table, s->p_field_select_table, mx, my, 0);
1194 1195 1196 1197 1198
            if(dmin_i < dmin){
                mb_type = CANDIDATE_MB_TYPE_INTER_I;
                dmin= dmin_i;
            }
        }
1199 1200

//        pic->mb_cmp_score[s->mb_stride * mb_y + mb_x] = dmin;
1201
        set_p_mv_tables(s, mx, my, mb_type!=CANDIDATE_MB_TYPE_INTER4V);
1202 1203

        /* get intra luma score */
1204
        if((c->avctx->mb_cmp&0xFF)==FF_CMP_SSE){
1205
            intra_score= varc - 500;
1206 1207 1208
        }else{
            int mean= (sum+128)>>8;
            mean*= 0x01010101;
1209

1210
            for(i=0; i<16; i++){
1211 1212 1213 1214
                *(uint32_t*)(&c->scratchpad[i*s->linesize+ 0]) = mean;
                *(uint32_t*)(&c->scratchpad[i*s->linesize+ 4]) = mean;
                *(uint32_t*)(&c->scratchpad[i*s->linesize+ 8]) = mean;
                *(uint32_t*)(&c->scratchpad[i*s->linesize+12]) = mean;
1215 1216
            }

1217
            intra_score= s->dsp.mb_cmp[0](s, c->scratchpad, pix, s->linesize, 16);
1218 1219 1220
        }
#if 0 //FIXME
        /* get chroma score */
1221
        if(c->avctx->mb_cmp&FF_CMP_CHROMA){
1222 1223 1224
            for(i=1; i<3; i++){
                uint8_t *dest_c;
                int mean;
1225

1226
                if(s->out_format == FMT_H263){
1227
                    mean= (s->dc_val[i][mb_x + mb_y*s->b8_stride] + 4)>>3; //FIXME not exact but simple ;)
1228 1229 1230 1231
                }else{
                    mean= (s->last_dc[i] + 4)>>3;
                }
                dest_c = s->new_picture.data[i] + (mb_y * 8  * (s->uvlinesize)) + mb_x * 8;
1232

1233 1234
                mean*= 0x01010101;
                for(i=0; i<8; i++){
1235 1236
                    *(uint32_t*)(&c->scratchpad[i*s->uvlinesize+ 0]) = mean;
                    *(uint32_t*)(&c->scratchpad[i*s->uvlinesize+ 4]) = mean;
1237
                }
1238

1239
                intra_score+= s->dsp.mb_cmp[1](s, c->scratchpad, dest_c, s->uvlinesize);
1240
            }
1241 1242
        }
#endif
1243
        intra_score += c->mb_penalty_factor*16;
1244

1245
        if(intra_score < dmin){
1246 1247
            mb_type= CANDIDATE_MB_TYPE_INTRA;
            s->current_picture.mb_type[mb_y*s->mb_stride + mb_x]= CANDIDATE_MB_TYPE_INTRA; //FIXME cleanup
1248 1249
        }else
            s->current_picture.mb_type[mb_y*s->mb_stride + mb_x]= 0;
1250

1251 1252 1253 1254
        {
            int p_score= FFMIN(vard, varc-500+(s->lambda2>>FF_LAMBDA_SHIFT)*100);
            int i_score= varc-500+(s->lambda2>>FF_LAMBDA_SHIFT)*20;
            c->scene_change_score+= ff_sqrt(p_score) - ff_sqrt(i_score);
1255
        }
Fabrice Bellard's avatar
Fabrice Bellard committed
1256
    }
1257

1258
    s->mb_type[mb_y*s->mb_stride + mb_x]= mb_type;
1259 1260
}

1261 1262 1263
int ff_pre_estimate_p_frame_motion(MpegEncContext * s,
                                    int mb_x, int mb_y)
{
1264
    MotionEstContext * const c= &s->me;
1265
    int mx, my, dmin;
1266 1267
    int P[10][2];
    const int shift= 1+s->quarter_sample;
1268
    const int xy= mb_x + mb_y*s->mb_stride;
1269
    init_ref(c, s->new_picture.data, s->last_picture.data, NULL, 16*mb_x, 16*mb_y, 0);
1270

1271 1272
    assert(s->quarter_sample==0 || s->quarter_sample==1);

1273
    c->pre_penalty_factor    = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_pre_cmp);
1274
    c->current_mv_penalty= c->mv_penalty[s->f_code] + MAX_MV;
1275

1276
    get_limits(s, 16*mb_x, 16*mb_y);
1277
    c->skip=0;
1278 1279 1280 1281

    P_LEFT[0]       = s->p_mv_table[xy + 1][0];
    P_LEFT[1]       = s->p_mv_table[xy + 1][1];

1282
    if(P_LEFT[0]       < (c->xmin<<shift)) P_LEFT[0]       = (c->xmin<<shift);
1283 1284

    /* special case for first line */
1285
    if (s->first_slice_line) {
1286 1287
        c->pred_x= P_LEFT[0];
        c->pred_y= P_LEFT[1];
1288
        P_TOP[0]= P_TOPRIGHT[0]= P_MEDIAN[0]=
1289
        P_TOP[1]= P_TOPRIGHT[1]= P_MEDIAN[1]= 0; //FIXME
1290
    } else {
1291 1292 1293 1294
        P_TOP[0]      = s->p_mv_table[xy + s->mb_stride    ][0];
        P_TOP[1]      = s->p_mv_table[xy + s->mb_stride    ][1];
        P_TOPRIGHT[0] = s->p_mv_table[xy + s->mb_stride - 1][0];
        P_TOPRIGHT[1] = s->p_mv_table[xy + s->mb_stride - 1][1];
1295 1296 1297
        if(P_TOP[1]      < (c->ymin<<shift)) P_TOP[1]     = (c->ymin<<shift);
        if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift);
        if(P_TOPRIGHT[1] < (c->ymin<<shift)) P_TOPRIGHT[1]= (c->ymin<<shift);
1298

1299 1300 1301
        P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
        P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);

1302 1303
        c->pred_x = P_MEDIAN[0];
        c->pred_y = P_MEDIAN[1];
1304
    }
1305

1306
    dmin = ff_epzs_motion_search(s, &mx, &my, P, 0, 0, s->p_mv_table, (1<<16)>>shift, 0, 16);
1307

1308 1309
    s->p_mv_table[xy][0] = mx<<shift;
    s->p_mv_table[xy][1] = my<<shift;
1310

1311 1312 1313
    return dmin;
}

1314
static int ff_estimate_motion_b(MpegEncContext * s,
1315
                       int mb_x, int mb_y, int16_t (*mv_table)[2], int ref_index, int f_code)
1316
{
1317
    MotionEstContext * const c= &s->me;
1318
    int mx, my, dmin;
1319
    int P[10][2];
1320
    const int shift= 1+s->quarter_sample;
1321 1322
    const int mot_stride = s->mb_stride;
    const int mot_xy = mb_y*mot_stride + mb_x;
1323
    uint8_t * const mv_penalty= c->mv_penalty[f_code] + MAX_MV;
1324
    int mv_scale;
1325

1326 1327 1328
    c->penalty_factor    = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
    c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
    c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
1329
    c->current_mv_penalty= mv_penalty;
Michael Niedermayer's avatar
Michael Niedermayer committed
1330

1331
    get_limits(s, 16*mb_x, 16*mb_y);
1332 1333 1334 1335

    switch(s->me_method) {
    case ME_ZERO:
    default:
1336
        no_motion_search(s, &mx, &my);
1337
        dmin = 0;
1338 1339
        mx-= mb_x*16;
        my-= mb_y*16;
1340 1341 1342 1343
        break;
    case ME_X1:
    case ME_EPZS:
       {
1344 1345
            P_LEFT[0]        = mv_table[mot_xy - 1][0];
            P_LEFT[1]        = mv_table[mot_xy - 1][1];
1346

1347
            if(P_LEFT[0]       > (c->xmax<<shift)) P_LEFT[0]       = (c->xmax<<shift);
1348 1349

            /* special case for first line */
1350
            if (!s->first_slice_line) {
1351 1352 1353 1354
                P_TOP[0] = mv_table[mot_xy - mot_stride             ][0];
                P_TOP[1] = mv_table[mot_xy - mot_stride             ][1];
                P_TOPRIGHT[0] = mv_table[mot_xy - mot_stride + 1         ][0];
                P_TOPRIGHT[1] = mv_table[mot_xy - mot_stride + 1         ][1];
1355 1356 1357
                if(P_TOP[1] > (c->ymax<<shift)) P_TOP[1]= (c->ymax<<shift);
                if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
                if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
1358

1359 1360
                P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
                P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
1361
            }
1362 1363
            c->pred_x= P_LEFT[0];
            c->pred_y= P_LEFT[1];
1364
        }
1365

1366 1367 1368 1369 1370
        if(mv_table == s->b_forw_mv_table){
            mv_scale= (s->pb_time<<16) / (s->pp_time<<shift);
        }else{
            mv_scale= ((s->pb_time - s->pp_time)<<16) / (s->pp_time<<shift);
        }
1371

1372
        dmin = ff_epzs_motion_search(s, &mx, &my, P, 0, ref_index, s->p_mv_table, mv_scale, 0, 16);
1373

1374 1375
        break;
    }
1376

1377
    dmin= c->sub_motion_search(s, &mx, &my, dmin, 0, ref_index, 0, 16);
1378

1379
    if(c->avctx->me_sub_cmp != c->avctx->mb_cmp && !c->skip)
1380
        dmin= ff_get_mb_score(s, mx, my, 0, ref_index, 0, 16, 1);
1381

1382
//printf("%d %d %d %d//", s->mb_x, s->mb_y, mx, my);
1383 1384 1385
//    s->mb_type[mb_y*s->mb_width + mb_x]= mb_type;
    mv_table[mot_xy][0]= mx;
    mv_table[mot_xy][1]= my;
Michael Niedermayer's avatar
Michael Niedermayer committed
1386

1387
    return dmin;
Fabrice Bellard's avatar
Fabrice Bellard committed
1388 1389
}

1390
static inline int check_bidir_mv(MpegEncContext * s,
1391 1392 1393
                   int motion_fx, int motion_fy,
                   int motion_bx, int motion_by,
                   int pred_fx, int pred_fy,
1394 1395
                   int pred_bx, int pred_by,
                   int size, int h)
Fabrice Bellard's avatar
Fabrice Bellard committed
1396
{
1397
    //FIXME optimize?
Michael Niedermayer's avatar
Michael Niedermayer committed
1398
    //FIXME better f_code prediction (max mv & distance)
1399
    //FIXME pointers
1400
    MotionEstContext * const c= &s->me;
Michael Niedermayer's avatar
Michael Niedermayer committed
1401 1402
    uint8_t * const mv_penalty_f= c->mv_penalty[s->f_code] + MAX_MV; // f_code of the prev frame
    uint8_t * const mv_penalty_b= c->mv_penalty[s->b_code] + MAX_MV; // f_code of the prev frame
1403 1404
    int stride= c->stride;
    uint8_t *dest_y = c->scratchpad;
1405 1406 1407 1408
    uint8_t *ptr;
    int dxy;
    int src_x, src_y;
    int fbmin;
1409 1410 1411
    uint8_t **src_data= c->src[0];
    uint8_t **ref_data= c->ref[0];
    uint8_t **ref2_data= c->ref[2];
1412

Michael Niedermayer's avatar
Michael Niedermayer committed
1413 1414
    if(s->quarter_sample){
        dxy = ((motion_fy & 3) << 2) | (motion_fx & 3);
1415 1416
        src_x = motion_fx >> 2;
        src_y = motion_fy >> 2;
Michael Niedermayer's avatar
Michael Niedermayer committed
1417

1418 1419
        ptr = ref_data[0] + (src_y * stride) + src_x;
        s->dsp.put_qpel_pixels_tab[0][dxy](dest_y    , ptr    , stride);
Michael Niedermayer's avatar
Michael Niedermayer committed
1420 1421

        dxy = ((motion_by & 3) << 2) | (motion_bx & 3);
1422 1423
        src_x = motion_bx >> 2;
        src_y = motion_by >> 2;
1424

1425
        ptr = ref2_data[0] + (src_y * stride) + src_x;
1426
        s->dsp.avg_qpel_pixels_tab[size][dxy](dest_y    , ptr    , stride);
Michael Niedermayer's avatar
Michael Niedermayer committed
1427 1428
    }else{
        dxy = ((motion_fy & 1) << 1) | (motion_fx & 1);
1429 1430
        src_x = motion_fx >> 1;
        src_y = motion_fy >> 1;
Michael Niedermayer's avatar
Michael Niedermayer committed
1431

1432 1433
        ptr = ref_data[0] + (src_y * stride) + src_x;
        s->dsp.put_pixels_tab[size][dxy](dest_y    , ptr    , stride, h);
Michael Niedermayer's avatar
Michael Niedermayer committed
1434 1435

        dxy = ((motion_by & 1) << 1) | (motion_bx & 1);
1436 1437
        src_x = motion_bx >> 1;
        src_y = motion_by >> 1;
1438

1439
        ptr = ref2_data[0] + (src_y * stride) + src_x;
1440
        s->dsp.avg_pixels_tab[size][dxy](dest_y    , ptr    , stride, h);
Michael Niedermayer's avatar
Michael Niedermayer committed
1441 1442
    }

Michael Niedermayer's avatar
Michael Niedermayer committed
1443 1444
    fbmin = (mv_penalty_f[motion_fx-pred_fx] + mv_penalty_f[motion_fy-pred_fy])*c->mb_penalty_factor
           +(mv_penalty_b[motion_bx-pred_bx] + mv_penalty_b[motion_by-pred_by])*c->mb_penalty_factor
1445
           + s->dsp.mb_cmp[size](s, src_data[0], dest_y, stride, h); //FIXME new_pic
1446

1447
    if(c->avctx->mb_cmp&FF_CMP_CHROMA){
1448 1449
    }
    //FIXME CHROMA !!!
1450

1451 1452
    return fbmin;
}
1453

1454
/* refine the bidir vectors in hq mode and return the score in both lq & hq mode*/
1455
static inline int bidir_refine(MpegEncContext * s, int mb_x, int mb_y)
1456
{
1457
    MotionEstContext * const c= &s->me;
1458 1459
    const int mot_stride = s->mb_stride;
    const int xy = mb_y *mot_stride + mb_x;
1460 1461 1462 1463 1464 1465 1466 1467 1468
    int fbmin;
    int pred_fx= s->b_bidir_forw_mv_table[xy-1][0];
    int pred_fy= s->b_bidir_forw_mv_table[xy-1][1];
    int pred_bx= s->b_bidir_back_mv_table[xy-1][0];
    int pred_by= s->b_bidir_back_mv_table[xy-1][1];
    int motion_fx= s->b_bidir_forw_mv_table[xy][0]= s->b_forw_mv_table[xy][0];
    int motion_fy= s->b_bidir_forw_mv_table[xy][1]= s->b_forw_mv_table[xy][1];
    int motion_bx= s->b_bidir_back_mv_table[xy][0]= s->b_back_mv_table[xy][0];
    int motion_by= s->b_bidir_back_mv_table[xy][1]= s->b_back_mv_table[xy][1];
1469 1470 1471 1472 1473 1474 1475
    const int flags= c->sub_flags;
    const int qpel= flags&FLAG_QPEL;
    const int shift= 1+qpel;
    const int xmin= c->xmin<<shift;
    const int ymin= c->ymin<<shift;
    const int xmax= c->xmax<<shift;
    const int ymax= c->ymax<<shift;
1476
#define HASH(fx,fy,bx,by) ((fx)+17*(fy)+63*(bx)+117*(by))
1477
#define HASH8(fx,fy,bx,by) ((uint8_t)HASH(fx,fy,bx,by))
1478
    int hashidx= HASH(motion_fx,motion_fy, motion_bx, motion_by);
1479
    uint8_t map[256];
1480 1481

    memset(map,0,sizeof(map));
1482
    map[hashidx&255] = 1;
1483

1484
    fbmin= check_bidir_mv(s, motion_fx, motion_fy,
1485 1486
                          motion_bx, motion_by,
                          pred_fx, pred_fy,
1487 1488
                          pred_bx, pred_by,
                          0, 16);
1489

1490
    if(s->avctx->bidir_refine){
1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511
        int end;
        static const uint8_t limittab[5]={0,8,32,64,80};
        const int limit= limittab[s->avctx->bidir_refine];
        static const int8_t vect[][4]={
{ 0, 0, 0, 1}, { 0, 0, 0,-1}, { 0, 0, 1, 0}, { 0, 0,-1, 0}, { 0, 1, 0, 0}, { 0,-1, 0, 0}, { 1, 0, 0, 0}, {-1, 0, 0, 0},

{ 0, 0, 1, 1}, { 0, 0,-1,-1}, { 0, 1, 1, 0}, { 0,-1,-1, 0}, { 1, 1, 0, 0}, {-1,-1, 0, 0}, { 1, 0, 0, 1}, {-1, 0, 0,-1},
{ 0, 1, 0, 1}, { 0,-1, 0,-1}, { 1, 0, 1, 0}, {-1, 0,-1, 0},
{ 0, 0,-1, 1}, { 0, 0, 1,-1}, { 0,-1, 1, 0}, { 0, 1,-1, 0}, {-1, 1, 0, 0}, { 1,-1, 0, 0}, { 1, 0, 0,-1}, {-1, 0, 0, 1},
{ 0,-1, 0, 1}, { 0, 1, 0,-1}, {-1, 0, 1, 0}, { 1, 0,-1, 0},

{ 0, 1, 1, 1}, { 0,-1,-1,-1}, { 1, 1, 1, 0}, {-1,-1,-1, 0}, { 1, 1, 0, 1}, {-1,-1, 0,-1}, { 1, 0, 1, 1}, {-1, 0,-1,-1},
{ 0,-1, 1, 1}, { 0, 1,-1,-1}, {-1, 1, 1, 0}, { 1,-1,-1, 0}, { 1, 1, 0,-1}, {-1,-1, 0, 1}, { 1, 0,-1, 1}, {-1, 0, 1,-1},
{ 0, 1,-1, 1}, { 0,-1, 1,-1}, { 1,-1, 1, 0}, {-1, 1,-1, 0}, {-1, 1, 0, 1}, { 1,-1, 0,-1}, { 1, 0, 1,-1}, {-1, 0,-1, 1},
{ 0, 1, 1,-1}, { 0,-1,-1, 1}, { 1, 1,-1, 0}, {-1,-1, 1, 0}, { 1,-1, 0, 1}, {-1, 1, 0,-1}, {-1, 0, 1, 1}, { 1, 0,-1,-1},

{ 1, 1, 1, 1}, {-1,-1,-1,-1},
{ 1, 1, 1,-1}, {-1,-1,-1, 1}, { 1, 1,-1, 1}, {-1,-1, 1,-1}, { 1,-1, 1, 1}, {-1, 1,-1,-1}, {-1, 1, 1, 1}, { 1,-1,-1,-1},
{ 1, 1,-1,-1}, {-1,-1, 1, 1}, { 1,-1,-1, 1}, {-1, 1, 1,-1}, { 1,-1, 1,-1}, {-1, 1,-1, 1},
        };
        static const uint8_t hash[]={
1512
HASH8( 0, 0, 0, 1), HASH8( 0, 0, 0,-1), HASH8( 0, 0, 1, 0), HASH8( 0, 0,-1, 0), HASH8( 0, 1, 0, 0), HASH8( 0,-1, 0, 0), HASH8( 1, 0, 0, 0), HASH8(-1, 0, 0, 0),
1513

1514 1515 1516 1517
HASH8( 0, 0, 1, 1), HASH8( 0, 0,-1,-1), HASH8( 0, 1, 1, 0), HASH8( 0,-1,-1, 0), HASH8( 1, 1, 0, 0), HASH8(-1,-1, 0, 0), HASH8( 1, 0, 0, 1), HASH8(-1, 0, 0,-1),
HASH8( 0, 1, 0, 1), HASH8( 0,-1, 0,-1), HASH8( 1, 0, 1, 0), HASH8(-1, 0,-1, 0),
HASH8( 0, 0,-1, 1), HASH8( 0, 0, 1,-1), HASH8( 0,-1, 1, 0), HASH8( 0, 1,-1, 0), HASH8(-1, 1, 0, 0), HASH8( 1,-1, 0, 0), HASH8( 1, 0, 0,-1), HASH8(-1, 0, 0, 1),
HASH8( 0,-1, 0, 1), HASH8( 0, 1, 0,-1), HASH8(-1, 0, 1, 0), HASH8( 1, 0,-1, 0),
1518

1519 1520 1521 1522
HASH8( 0, 1, 1, 1), HASH8( 0,-1,-1,-1), HASH8( 1, 1, 1, 0), HASH8(-1,-1,-1, 0), HASH8( 1, 1, 0, 1), HASH8(-1,-1, 0,-1), HASH8( 1, 0, 1, 1), HASH8(-1, 0,-1,-1),
HASH8( 0,-1, 1, 1), HASH8( 0, 1,-1,-1), HASH8(-1, 1, 1, 0), HASH8( 1,-1,-1, 0), HASH8( 1, 1, 0,-1), HASH8(-1,-1, 0, 1), HASH8( 1, 0,-1, 1), HASH8(-1, 0, 1,-1),
HASH8( 0, 1,-1, 1), HASH8( 0,-1, 1,-1), HASH8( 1,-1, 1, 0), HASH8(-1, 1,-1, 0), HASH8(-1, 1, 0, 1), HASH8( 1,-1, 0,-1), HASH8( 1, 0, 1,-1), HASH8(-1, 0,-1, 1),
HASH8( 0, 1, 1,-1), HASH8( 0,-1,-1, 1), HASH8( 1, 1,-1, 0), HASH8(-1,-1, 1, 0), HASH8( 1,-1, 0, 1), HASH8(-1, 1, 0,-1), HASH8(-1, 0, 1, 1), HASH8( 1, 0,-1,-1),
1523

1524 1525 1526
HASH8( 1, 1, 1, 1), HASH8(-1,-1,-1,-1),
HASH8( 1, 1, 1,-1), HASH8(-1,-1,-1, 1), HASH8( 1, 1,-1, 1), HASH8(-1,-1, 1,-1), HASH8( 1,-1, 1, 1), HASH8(-1, 1,-1,-1), HASH8(-1, 1, 1, 1), HASH8( 1,-1,-1,-1),
HASH8( 1, 1,-1,-1), HASH8(-1,-1, 1, 1), HASH8( 1,-1,-1, 1), HASH8(-1, 1, 1,-1), HASH8( 1,-1, 1,-1), HASH8(-1, 1,-1, 1),
1527 1528
};

1529
#define CHECK_BIDIR(fx,fy,bx,by)\
1530
    if( !map[(hashidx+HASH(fx,fy,bx,by))&255]\
1531
       &&(fx<=0 || motion_fx+fx<=xmax) && (fy<=0 || motion_fy+fy<=ymax) && (bx<=0 || motion_bx+bx<=xmax) && (by<=0 || motion_by+by<=ymax)\
Michael Niedermayer's avatar
Michael Niedermayer committed
1532
       &&(fx>=0 || motion_fx+fx>=xmin) && (fy>=0 || motion_fy+fy>=ymin) && (bx>=0 || motion_bx+bx>=xmin) && (by>=0 || motion_by+by>=ymin)){\
1533
        int score;\
1534
        map[(hashidx+HASH(fx,fy,bx,by))&255] = 1;\
Michael Niedermayer's avatar
Michael Niedermayer committed
1535 1536
        score= check_bidir_mv(s, motion_fx+fx, motion_fy+fy, motion_bx+bx, motion_by+by, pred_fx, pred_fy, pred_bx, pred_by, 0, 16);\
        if(score < fbmin){\
1537
            hashidx += HASH(fx,fy,bx,by);\
Michael Niedermayer's avatar
Michael Niedermayer committed
1538 1539 1540 1541 1542 1543 1544
            fbmin= score;\
            motion_fx+=fx;\
            motion_fy+=fy;\
            motion_bx+=bx;\
            motion_by+=by;\
            end=0;\
        }\
1545 1546 1547
    }
#define CHECK_BIDIR2(a,b,c,d)\
CHECK_BIDIR(a,b,c,d)\
1548
CHECK_BIDIR(-(a),-(b),-(c),-(d))
1549 1550

        do{
1551 1552
            int i;
            int borderdist=0;
1553 1554
            end=1;

1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588
            CHECK_BIDIR2(0,0,0,1)
            CHECK_BIDIR2(0,0,1,0)
            CHECK_BIDIR2(0,1,0,0)
            CHECK_BIDIR2(1,0,0,0)

            for(i=8; i<limit; i++){
                int fx= motion_fx+vect[i][0];
                int fy= motion_fy+vect[i][1];
                int bx= motion_bx+vect[i][2];
                int by= motion_by+vect[i][3];
                if(borderdist<=0){
                    int a= (xmax - FFMAX(fx,bx))|(FFMIN(fx,bx) - xmin);
                    int b= (ymax - FFMAX(fy,by))|(FFMIN(fy,by) - ymin);
                    if((a|b) < 0)
                        map[(hashidx+hash[i])&255] = 1;
                }
                if(!map[(hashidx+hash[i])&255]){
                    int score;
                    map[(hashidx+hash[i])&255] = 1;
                    score= check_bidir_mv(s, fx, fy, bx, by, pred_fx, pred_fy, pred_bx, pred_by, 0, 16);
                    if(score < fbmin){
                        hashidx += hash[i];
                        fbmin= score;
                        motion_fx=fx;
                        motion_fy=fy;
                        motion_bx=bx;
                        motion_by=by;
                        end=0;
                        borderdist--;
                        if(borderdist<=0){
                            int a= FFMIN(xmax - FFMAX(fx,bx), FFMIN(fx,bx) - xmin);
                            int b= FFMIN(ymax - FFMAX(fy,by), FFMIN(fy,by) - ymin);
                            borderdist= FFMIN(a,b);
                        }
1589 1590 1591 1592 1593 1594
                    }
                }
            }
        }while(!end);
    }

1595 1596 1597 1598 1599
    s->b_bidir_forw_mv_table[xy][0]= motion_fx;
    s->b_bidir_forw_mv_table[xy][1]= motion_fy;
    s->b_bidir_back_mv_table[xy][0]= motion_bx;
    s->b_bidir_back_mv_table[xy][1]= motion_by;

1600
    return fbmin;
1601 1602
}

1603
static inline int direct_search(MpegEncContext * s, int mb_x, int mb_y)
1604
{
1605
    MotionEstContext * const c= &s->me;
1606
    int P[10][2];
1607 1608
    const int mot_stride = s->mb_stride;
    const int mot_xy = mb_y*mot_stride + mb_x;
Michael Niedermayer's avatar
Michael Niedermayer committed
1609 1610
    const int shift= 1+s->quarter_sample;
    int dmin, i;
1611
    const int time_pp= s->pp_time;
1612
    const int time_pb= s->pb_time;
Michael Niedermayer's avatar
Michael Niedermayer committed
1613
    int mx, my, xmin, xmax, ymin, ymax;
1614
    int16_t (*mv_table)[2]= s->b_direct_mv_table;
1615

1616
    c->current_mv_penalty= c->mv_penalty[1] + MAX_MV;
Michael Niedermayer's avatar
Michael Niedermayer committed
1617 1618 1619
    ymin= xmin=(-32)>>shift;
    ymax= xmax=   31>>shift;

1620
    if(IS_8X8(s->next_picture.mb_type[mot_xy])){
Michael Niedermayer's avatar
Michael Niedermayer committed
1621 1622 1623
        s->mv_type= MV_TYPE_8X8;
    }else{
        s->mv_type= MV_TYPE_16X16;
1624
    }
Michael Niedermayer's avatar
Michael Niedermayer committed
1625 1626 1627 1628

    for(i=0; i<4; i++){
        int index= s->block_index[i];
        int min, max;
1629

1630 1631 1632 1633 1634 1635 1636 1637 1638
        c->co_located_mv[i][0]= s->next_picture.motion_val[0][index][0];
        c->co_located_mv[i][1]= s->next_picture.motion_val[0][index][1];
        c->direct_basis_mv[i][0]= c->co_located_mv[i][0]*time_pb/time_pp + ((i& 1)<<(shift+3));
        c->direct_basis_mv[i][1]= c->co_located_mv[i][1]*time_pb/time_pp + ((i>>1)<<(shift+3));
//        c->direct_basis_mv[1][i][0]= c->co_located_mv[i][0]*(time_pb - time_pp)/time_pp + ((i &1)<<(shift+3);
//        c->direct_basis_mv[1][i][1]= c->co_located_mv[i][1]*(time_pb - time_pp)/time_pp + ((i>>1)<<(shift+3);

        max= FFMAX(c->direct_basis_mv[i][0], c->direct_basis_mv[i][0] - c->co_located_mv[i][0])>>shift;
        min= FFMIN(c->direct_basis_mv[i][0], c->direct_basis_mv[i][0] - c->co_located_mv[i][0])>>shift;
1639 1640
        max+= 16*mb_x + 1; // +-1 is for the simpler rounding
        min+= 16*mb_x - 1;
1641 1642
        xmax= FFMIN(xmax, s->width - max);
        xmin= FFMAX(xmin, - 16     - min);
Michael Niedermayer's avatar
Michael Niedermayer committed
1643

1644 1645
        max= FFMAX(c->direct_basis_mv[i][1], c->direct_basis_mv[i][1] - c->co_located_mv[i][1])>>shift;
        min= FFMIN(c->direct_basis_mv[i][1], c->direct_basis_mv[i][1] - c->co_located_mv[i][1])>>shift;
1646 1647
        max+= 16*mb_y + 1; // +-1 is for the simpler rounding
        min+= 16*mb_y - 1;
1648 1649
        ymax= FFMIN(ymax, s->height - max);
        ymin= FFMAX(ymin, - 16      - min);
1650

Michael Niedermayer's avatar
Michael Niedermayer committed
1651
        if(s->mv_type == MV_TYPE_16X16) break;
1652
    }
1653

Michael Niedermayer's avatar
Michael Niedermayer committed
1654
    assert(xmax <= 15 && ymax <= 15 && xmin >= -16 && ymin >= -16);
1655

Michael Niedermayer's avatar
Michael Niedermayer committed
1656 1657 1658 1659 1660 1661
    if(xmax < 0 || xmin >0 || ymax < 0 || ymin > 0){
        s->b_direct_mv_table[mot_xy][0]= 0;
        s->b_direct_mv_table[mot_xy][1]= 0;

        return 256*256*256*64;
    }
1662

1663 1664 1665 1666 1667 1668 1669 1670
    c->xmin= xmin;
    c->ymin= ymin;
    c->xmax= xmax;
    c->ymax= ymax;
    c->flags     |= FLAG_DIRECT;
    c->sub_flags |= FLAG_DIRECT;
    c->pred_x=0;
    c->pred_y=0;
Michael Niedermayer's avatar
Michael Niedermayer committed
1671

1672 1673
    P_LEFT[0]        = av_clip(mv_table[mot_xy - 1][0], xmin<<shift, xmax<<shift);
    P_LEFT[1]        = av_clip(mv_table[mot_xy - 1][1], ymin<<shift, ymax<<shift);
Michael Niedermayer's avatar
Michael Niedermayer committed
1674 1675

    /* special case for first line */
Diego Biurrun's avatar
Diego Biurrun committed
1676
    if (!s->first_slice_line) { //FIXME maybe allow this over thread boundary as it is clipped
1677 1678 1679 1680
        P_TOP[0]      = av_clip(mv_table[mot_xy - mot_stride             ][0], xmin<<shift, xmax<<shift);
        P_TOP[1]      = av_clip(mv_table[mot_xy - mot_stride             ][1], ymin<<shift, ymax<<shift);
        P_TOPRIGHT[0] = av_clip(mv_table[mot_xy - mot_stride + 1         ][0], xmin<<shift, xmax<<shift);
        P_TOPRIGHT[1] = av_clip(mv_table[mot_xy - mot_stride + 1         ][1], ymin<<shift, ymax<<shift);
1681

Michael Niedermayer's avatar
Michael Niedermayer committed
1682 1683 1684
        P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
        P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
    }
1685

1686
    dmin = ff_epzs_motion_search(s, &mx, &my, P, 0, 0, mv_table, 1<<(16-shift), 0, 16);
1687
    if(c->sub_flags&FLAG_QPEL)
1688 1689 1690
        dmin = qpel_motion_search(s, &mx, &my, dmin, 0, 0, 0, 16);
    else
        dmin = hpel_motion_search(s, &mx, &my, dmin, 0, 0, 0, 16);
1691

1692
    if(c->avctx->me_sub_cmp != c->avctx->mb_cmp && !c->skip)
1693
        dmin= ff_get_mb_score(s, mx, my, 0, 0, 0, 16, 1);
1694

1695
    get_limits(s, 16*mb_x, 16*mb_y); //restore c->?min/max, maybe not needed
1696

Michael Niedermayer's avatar
Michael Niedermayer committed
1697 1698
    mv_table[mot_xy][0]= mx;
    mv_table[mot_xy][1]= my;
1699 1700
    c->flags     &= ~FLAG_DIRECT;
    c->sub_flags &= ~FLAG_DIRECT;
1701

1702
    return dmin;
1703 1704 1705 1706 1707
}

void ff_estimate_b_frame_motion(MpegEncContext * s,
                             int mb_x, int mb_y)
{
1708 1709
    MotionEstContext * const c= &s->me;
    const int penalty_factor= c->mb_penalty_factor;
1710
    int fmin, bmin, dmin, fbmin, bimin, fimin;
1711
    int type=0;
1712
    const int xy = mb_y*s->mb_stride + mb_x;
1713
    init_ref(c, s->new_picture.data, s->last_picture.data, s->next_picture.data, 16*mb_x, 16*mb_y, 2);
1714

1715
    get_limits(s, 16*mb_x, 16*mb_y);
1716

1717
    c->skip=0;
1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729

    if(s->codec_id == CODEC_ID_MPEG4 && s->next_picture.mbskip_table[xy]){
        int score= direct_search(s, mb_x, mb_y); //FIXME just check 0,0

        score= ((unsigned)(score*score + 128*256))>>16;
        c->mc_mb_var_sum_temp += score;
        s->current_picture.mc_mb_var[mb_y*s->mb_stride + mb_x] = score; //FIXME use SSE
        s->mb_type[mb_y*s->mb_stride + mb_x]= CANDIDATE_MB_TYPE_DIRECT0;

        return;
    }

1730
    if(c->avctx->me_threshold){
1731
        int vard= check_input_motion(s, mb_x, mb_y, 0);
1732

1733
        if((vard+128)>>8 < c->avctx->me_threshold){
1734 1735
//            pix = c->src[0][0];
//            sum = s->dsp.pix_sum(pix, s->linesize);
1736
//            varc = s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)(sum*sum))>>8) + 500;
1737

1738 1739
//            pic->mb_var   [s->mb_stride * mb_y + mb_x] = (varc+128)>>8;
             s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] = (vard+128)>>8;
1740
/*            pic->mb_mean  [s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
1741 1742 1743 1744
            c->mb_var_sum_temp    += (varc+128)>>8;*/
            c->mc_mb_var_sum_temp += (vard+128)>>8;
/*            if (vard <= 64<<8 || vard < varc) {
                c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
1745
            }else{
1746
                c->scene_change_score+= s->qscale * s->avctx->scenechange_factor;
1747 1748 1749
            }*/
            return;
        }
1750
        if((vard+128)>>8 < c->avctx->mb_threshold){
1751 1752 1753 1754 1755
            type= s->mb_type[mb_y*s->mb_stride + mb_x];
            if(type == CANDIDATE_MB_TYPE_DIRECT){
                direct_search(s, mb_x, mb_y);
            }
            if(type == CANDIDATE_MB_TYPE_FORWARD || type == CANDIDATE_MB_TYPE_BIDIR){
1756
                c->skip=0;
1757 1758 1759
                ff_estimate_motion_b(s, mb_x, mb_y, s->b_forw_mv_table, 0, s->f_code);
            }
            if(type == CANDIDATE_MB_TYPE_BACKWARD || type == CANDIDATE_MB_TYPE_BIDIR){
1760
                c->skip=0;
1761 1762 1763
                ff_estimate_motion_b(s, mb_x, mb_y, s->b_back_mv_table, 2, s->b_code);
            }
            if(type == CANDIDATE_MB_TYPE_FORWARD_I || type == CANDIDATE_MB_TYPE_BIDIR_I){
1764 1765
                c->skip=0;
                c->current_mv_penalty= c->mv_penalty[s->f_code] + MAX_MV;
1766 1767 1768 1769 1770
                interlaced_search(s, 0,
                                        s->b_field_mv_table[0], s->b_field_select_table[0],
                                        s->b_forw_mv_table[xy][0], s->b_forw_mv_table[xy][1], 1);
            }
            if(type == CANDIDATE_MB_TYPE_BACKWARD_I || type == CANDIDATE_MB_TYPE_BIDIR_I){
1771 1772
                c->skip=0;
                c->current_mv_penalty= c->mv_penalty[s->b_code] + MAX_MV;
1773 1774 1775 1776 1777 1778
                interlaced_search(s, 2,
                                        s->b_field_mv_table[1], s->b_field_select_table[1],
                                        s->b_back_mv_table[xy][0], s->b_back_mv_table[xy][1], 1);
            }
            return;
        }
1779 1780
    }

1781
    if (s->codec_id == CODEC_ID_MPEG4)
1782
        dmin= direct_search(s, mb_x, mb_y);
1783 1784
    else
        dmin= INT_MAX;
1785
//FIXME penalty stuff for non mpeg4
1786
    c->skip=0;
1787
    fmin= ff_estimate_motion_b(s, mb_x, mb_y, s->b_forw_mv_table, 0, s->f_code) + 3*penalty_factor;
1788

1789
    c->skip=0;
1790
    bmin= ff_estimate_motion_b(s, mb_x, mb_y, s->b_back_mv_table, 2, s->b_code) + 2*penalty_factor;
1791
//printf(" %d %d ", s->b_forw_mv_table[xy][0], s->b_forw_mv_table[xy][1]);
1792

1793
    c->skip=0;
1794
    fbmin= bidir_refine(s, mb_x, mb_y) + penalty_factor;
1795
//printf("%d %d %d %d\n", dmin, fmin, bmin, fbmin);
1796

1797 1798
    if(s->flags & CODEC_FLAG_INTERLACED_ME){
//FIXME mb type penalty
1799 1800
        c->skip=0;
        c->current_mv_penalty= c->mv_penalty[s->f_code] + MAX_MV;
1801 1802
        fimin= interlaced_search(s, 0,
                                 s->b_field_mv_table[0], s->b_field_select_table[0],
1803
                                 s->b_forw_mv_table[xy][0], s->b_forw_mv_table[xy][1], 0);
1804
        c->current_mv_penalty= c->mv_penalty[s->b_code] + MAX_MV;
1805 1806
        bimin= interlaced_search(s, 2,
                                 s->b_field_mv_table[1], s->b_field_select_table[1],
1807
                                 s->b_back_mv_table[xy][0], s->b_back_mv_table[xy][1], 0);
1808 1809 1810
    }else
        fimin= bimin= INT_MAX;

1811
    {
1812
        int score= fmin;
1813
        type = CANDIDATE_MB_TYPE_FORWARD;
1814

1815
        if (dmin <= score){
1816
            score = dmin;
1817
            type = CANDIDATE_MB_TYPE_DIRECT;
1818 1819 1820
        }
        if(bmin<score){
            score=bmin;
1821
            type= CANDIDATE_MB_TYPE_BACKWARD;
1822 1823 1824
        }
        if(fbmin<score){
            score=fbmin;
1825 1826 1827 1828 1829 1830 1831 1832 1833
            type= CANDIDATE_MB_TYPE_BIDIR;
        }
        if(fimin<score){
            score=fimin;
            type= CANDIDATE_MB_TYPE_FORWARD_I;
        }
        if(bimin<score){
            score=bimin;
            type= CANDIDATE_MB_TYPE_BACKWARD_I;
1834
        }
1835

1836
        score= ((unsigned)(score*score + 128*256))>>16;
1837
        c->mc_mb_var_sum_temp += score;
1838
        s->current_picture.mc_mb_var[mb_y*s->mb_stride + mb_x] = score; //FIXME use SSE
1839
    }
1840

1841
    if(c->avctx->mb_decision > FF_MB_DECISION_SIMPLE){
1842 1843 1844 1845 1846 1847 1848 1849 1850
        type= CANDIDATE_MB_TYPE_FORWARD | CANDIDATE_MB_TYPE_BACKWARD | CANDIDATE_MB_TYPE_BIDIR | CANDIDATE_MB_TYPE_DIRECT;
        if(fimin < INT_MAX)
            type |= CANDIDATE_MB_TYPE_FORWARD_I;
        if(bimin < INT_MAX)
            type |= CANDIDATE_MB_TYPE_BACKWARD_I;
        if(fimin < INT_MAX && bimin < INT_MAX){
            type |= CANDIDATE_MB_TYPE_BIDIR_I;
        }
         //FIXME something smarter
Diego Biurrun's avatar
Diego Biurrun committed
1851
        if(dmin>256*256*16) type&= ~CANDIDATE_MB_TYPE_DIRECT; //do not try direct mode if it is invalid for this MB
1852 1853
        if(s->codec_id == CODEC_ID_MPEG4 && type&CANDIDATE_MB_TYPE_DIRECT && s->flags&CODEC_FLAG_MV0 && *(uint32_t*)s->b_direct_mv_table[xy])
            type |= CANDIDATE_MB_TYPE_DIRECT0;
1854
#if 0
1855 1856 1857
        if(s->out_format == FMT_MPEG1)
            type |= CANDIDATE_MB_TYPE_INTRA;
#endif
1858 1859
    }

1860
    s->mb_type[mb_y*s->mb_stride + mb_x]= type;
1861 1862 1863 1864 1865 1866
}

/* find best f_code for ME which do unlimited searches */
int ff_get_best_fcode(MpegEncContext * s, int16_t (*mv_table)[2], int type)
{
    if(s->me_method>=ME_EPZS){
1867
        int score[8];
1868
        int i, y, range= s->avctx->me_range ? s->avctx->me_range : (INT_MAX/2);
1869
        uint8_t * fcode_tab= s->fcode_tab;
1870 1871
        int best_fcode=-1;
        int best_score=-10000000;
1872

1873
        if(s->msmpeg4_version)
1874 1875 1876 1877
            range= FFMIN(range, 16);
        else if(s->codec_id == CODEC_ID_MPEG2VIDEO && s->avctx->strict_std_compliance >= FF_COMPLIANCE_NORMAL)
            range= FFMIN(range, 256);

Michael Niedermayer's avatar
Michael Niedermayer committed
1878
        for(i=0; i<8; i++) score[i]= s->mb_num*(8-i);
1879 1880 1881

        for(y=0; y<s->mb_height; y++){
            int x;
1882
            int xy= y*s->mb_stride;
1883
            for(x=0; x<s->mb_width; x++){
Michael Niedermayer's avatar
Michael Niedermayer committed
1884
                if(s->mb_type[xy] & type){
1885 1886 1887 1888
                    int mx= mv_table[xy][0];
                    int my= mv_table[xy][1];
                    int fcode= FFMAX(fcode_tab[mx + MAX_MV],
                                     fcode_tab[my + MAX_MV]);
1889
                    int j;
1890 1891

                        if(mx >= range || mx < -range ||
1892 1893
                           my >= range || my < -range)
                            continue;
1894

1895
                    for(j=0; j<fcode && j<8; j++){
1896
                        if(s->pict_type==AV_PICTURE_TYPE_B || s->current_picture.mc_mb_var[xy] < s->current_picture.mb_var[xy])
1897 1898
                            score[j]-= 170;
                    }
1899 1900 1901 1902
                }
                xy++;
            }
        }
1903

1904 1905 1906 1907 1908 1909
        for(i=1; i<8; i++){
            if(score[i] > best_score){
                best_score= score[i];
                best_fcode= i;
            }
//            printf("%d %d\n", i, score[i]);
1910
        }
1911

1912
//    printf("fcode: %d type: %d\n", i, s->pict_type);
1913
        return best_fcode;
1914 1915 1916 1917 1918 1919
/*        for(i=0; i<=MAX_FCODE; i++){
            printf("%d ", mv_num[i]);
        }
        printf("\n");*/
    }else{
        return 1;
Fabrice Bellard's avatar
Fabrice Bellard committed
1920 1921 1922
    }
}

1923 1924
void ff_fix_long_p_mvs(MpegEncContext * s)
{
1925
    MotionEstContext * const c= &s->me;
1926
    const int f_code= s->f_code;
1927
    int y, range;
1928
    assert(s->pict_type==AV_PICTURE_TYPE_P);
1929

1930 1931 1932 1933
    range = (((s->out_format == FMT_MPEG1 || s->msmpeg4_version) ? 8 : 16) << f_code);

    assert(range <= 16 || !s->msmpeg4_version);
    assert(range <=256 || !(s->codec_id == CODEC_ID_MPEG2VIDEO && s->avctx->strict_std_compliance >= FF_COMPLIANCE_NORMAL));
1934

1935
    if(c->avctx->me_range && range > c->avctx->me_range) range= c->avctx->me_range;
1936

1937
//printf("%d no:%d %d//\n", clip, noclip, f_code);
1938
    if(s->flags&CODEC_FLAG_4MV){
1939
        const int wrap= s->b8_stride;
1940 1941 1942

        /* clip / convert to intra 8x8 type MVs */
        for(y=0; y<s->mb_height; y++){
1943
            int xy= y*2*wrap;
1944
            int i= y*s->mb_stride;
1945 1946 1947
            int x;

            for(x=0; x<s->mb_width; x++){
1948
                if(s->mb_type[i]&CANDIDATE_MB_TYPE_INTER4V){
1949 1950 1951
                    int block;
                    for(block=0; block<4; block++){
                        int off= (block& 1) + (block>>1)*wrap;
1952 1953
                        int mx= s->current_picture.motion_val[0][ xy + off ][0];
                        int my= s->current_picture.motion_val[0][ xy + off ][1];
1954

1955 1956
                        if(   mx >=range || mx <-range
                           || my >=range || my <-range){
1957 1958 1959
                            s->mb_type[i] &= ~CANDIDATE_MB_TYPE_INTER4V;
                            s->mb_type[i] |= CANDIDATE_MB_TYPE_INTRA;
                            s->current_picture.mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
1960 1961 1962
                        }
                    }
                }
Michael Niedermayer's avatar
Michael Niedermayer committed
1963 1964
                xy+=2;
                i++;
1965 1966 1967 1968 1969
            }
        }
    }
}

1970 1971 1972 1973
/**
 *
 * @param truncate 1 for truncation, 0 for using intra
 */
1974
void ff_fix_long_mvs(MpegEncContext * s, uint8_t *field_select_table, int field_select,
1975
                     int16_t (*mv_table)[2], int f_code, int type, int truncate)
1976
{
1977
    MotionEstContext * const c= &s->me;
1978
    int y, h_range, v_range;
1979

1980
    // RAL: 8 in MPEG-1, 16 in MPEG-4
1981
    int range = (((s->out_format == FMT_MPEG1 || s->msmpeg4_version) ? 8 : 16) << f_code);
1982

1983
    if(c->avctx->me_range && range > c->avctx->me_range) range= c->avctx->me_range;
1984

1985 1986 1987
    h_range= range;
    v_range= field_select_table ? range>>1 : range;

1988 1989 1990
    /* clip / convert to intra 16x16 type MVs */
    for(y=0; y<s->mb_height; y++){
        int x;
1991
        int xy= y*s->mb_stride;
1992
        for(x=0; x<s->mb_width; x++){
1993
            if (s->mb_type[xy] & type){    // RAL: "type" test added...
1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008
                if(field_select_table==NULL || field_select_table[xy] == field_select){
                    if(   mv_table[xy][0] >=h_range || mv_table[xy][0] <-h_range
                       || mv_table[xy][1] >=v_range || mv_table[xy][1] <-v_range){

                        if(truncate){
                            if     (mv_table[xy][0] > h_range-1) mv_table[xy][0]=  h_range-1;
                            else if(mv_table[xy][0] < -h_range ) mv_table[xy][0]= -h_range;
                            if     (mv_table[xy][1] > v_range-1) mv_table[xy][1]=  v_range-1;
                            else if(mv_table[xy][1] < -v_range ) mv_table[xy][1]= -v_range;
                        }else{
                            s->mb_type[xy] &= ~type;
                            s->mb_type[xy] |= CANDIDATE_MB_TYPE_INTRA;
                            mv_table[xy][0]=
                            mv_table[xy][1]= 0;
                        }
2009
                    }
2010
                }
2011 2012 2013 2014 2015
            }
            xy++;
        }
    }
}