motion_est.c 76.4 KB
Newer Older
Fabrice Bellard's avatar
Fabrice Bellard committed
1
/*
2
 * Motion estimation
3
 * Copyright (c) 2000,2001 Fabrice Bellard
4
 * Copyright (c) 2002-2004 Michael Niedermayer
5
 *
6
 * new motion estimation (X1/EPZS) by Michael Niedermayer <michaelni@gmx.at>
Fabrice Bellard's avatar
Fabrice Bellard committed
7
 *
8 9 10
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
11 12
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
13
 * version 2.1 of the License, or (at your option) any later version.
Fabrice Bellard's avatar
Fabrice Bellard committed
14
 *
15
 * FFmpeg is distributed in the hope that it will be useful,
Fabrice Bellard's avatar
Fabrice Bellard committed
16
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 18
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
Fabrice Bellard's avatar
Fabrice Bellard committed
19
 *
20
 * You should have received a copy of the GNU Lesser General Public
21
 * License along with FFmpeg; if not, write to the Free Software
22
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Fabrice Bellard's avatar
Fabrice Bellard committed
23
 */
24

Michael Niedermayer's avatar
Michael Niedermayer committed
25
/**
26
 * @file
Michael Niedermayer's avatar
Michael Niedermayer committed
27 28
 * Motion estimation.
 */
29

Fabrice Bellard's avatar
Fabrice Bellard committed
30 31
#include <stdlib.h>
#include <stdio.h>
32
#include <limits.h>
33

Fabrice Bellard's avatar
Fabrice Bellard committed
34 35
#include "avcodec.h"
#include "dsputil.h"
36
#include "mathops.h"
Fabrice Bellard's avatar
Fabrice Bellard committed
37 38
#include "mpegvideo.h"

39 40
#undef NDEBUG
#include <assert.h>
Michael Niedermayer's avatar
Michael Niedermayer committed
41

42 43 44 45 46 47
#define P_LEFT P[1]
#define P_TOP P[2]
#define P_TOPRIGHT P[3]
#define P_MEDIAN P[4]
#define P_MV1 P[9]

48
static int sad_hpel_motion_search(MpegEncContext * s,
49
                                  int *mx_ptr, int *my_ptr, int dmin,
50 51
                                  int src_index, int ref_index,
                                  int size, int h);
Michael Niedermayer's avatar
Michael Niedermayer committed
52

53
static inline unsigned update_map_generation(MotionEstContext *c)
Michael Niedermayer's avatar
Michael Niedermayer committed
54
{
55 56 57 58
    c->map_generation+= 1<<(ME_MAP_MV_BITS*2);
    if(c->map_generation==0){
        c->map_generation= 1<<(ME_MAP_MV_BITS*2);
        memset(c->map, 0, sizeof(uint32_t)*ME_MAP_SIZE);
Michael Niedermayer's avatar
Michael Niedermayer committed
59
    }
60
    return c->map_generation;
Michael Niedermayer's avatar
Michael Niedermayer committed
61 62
}

63 64 65 66 67 68
/* shape adaptive search stuff */
typedef struct Minima{
    int height;
    int x, y;
    int checked;
}Minima;
Michael Niedermayer's avatar
Michael Niedermayer committed
69

70
static int minima_cmp(const void *a, const void *b){
71 72
    const Minima *da = (const Minima *) a;
    const Minima *db = (const Minima *) b;
73

74 75
    return da->height - db->height;
}
Michael Niedermayer's avatar
Michael Niedermayer committed
76

77 78 79
#define FLAG_QPEL   1 //must be 1
#define FLAG_CHROMA 2
#define FLAG_DIRECT 4
Michael Niedermayer's avatar
Michael Niedermayer committed
80

81
static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){
82 83 84 85 86 87 88 89 90 91 92 93 94 95 96
    const int offset[3]= {
          y*c->  stride + x,
        ((y*c->uvstride + x)>>1),
        ((y*c->uvstride + x)>>1),
    };
    int i;
    for(i=0; i<3; i++){
        c->src[0][i]= src [i] + offset[i];
        c->ref[0][i]= ref [i] + offset[i];
    }
    if(ref_index){
        for(i=0; i<3; i++){
            c->ref[ref_index][i]= ref2[i] + offset[i];
        }
    }
Michael Niedermayer's avatar
Michael Niedermayer committed
97 98
}

99 100
static int get_flags(MotionEstContext *c, int direct, int chroma){
    return   ((c->avctx->flags&CODEC_FLAG_QPEL) ? FLAG_QPEL : 0)
101
           + (direct ? FLAG_DIRECT : 0)
102
           + (chroma ? FLAG_CHROMA : 0);
Michael Niedermayer's avatar
Michael Niedermayer committed
103 104
}

105
static av_always_inline int cmp_direct_inline(MpegEncContext *s, const int x, const int y, const int subx, const int suby,
106
                      const int size, const int h, int ref_index, int src_index,
107
                      me_cmp_func cmp_func, me_cmp_func chroma_cmp_func, int qpel){
108 109 110 111 112 113 114 115
    MotionEstContext * const c= &s->me;
    const int stride= c->stride;
    const int hx= subx + (x<<(1+qpel));
    const int hy= suby + (y<<(1+qpel));
    uint8_t * const * const ref= c->ref[ref_index];
    uint8_t * const * const src= c->src[src_index];
    int d;
    //FIXME check chroma 4mv, (no crashes ...)
116
        assert(x >= c->xmin && hx <= c->xmax<<(qpel+1) && y >= c->ymin && hy <= c->ymax<<(qpel+1));
117 118 119 120 121 122 123 124 125 126 127 128 129
        if(x >= c->xmin && hx <= c->xmax<<(qpel+1) && y >= c->ymin && hy <= c->ymax<<(qpel+1)){
            const int time_pp= s->pp_time;
            const int time_pb= s->pb_time;
            const int mask= 2*qpel+1;
            if(s->mv_type==MV_TYPE_8X8){
                int i;
                for(i=0; i<4; i++){
                    int fx = c->direct_basis_mv[i][0] + hx;
                    int fy = c->direct_basis_mv[i][1] + hy;
                    int bx = hx ? fx - c->co_located_mv[i][0] : c->co_located_mv[i][0]*(time_pb - time_pp)/time_pp + ((i &1)<<(qpel+4));
                    int by = hy ? fy - c->co_located_mv[i][1] : c->co_located_mv[i][1]*(time_pb - time_pp)/time_pp + ((i>>1)<<(qpel+4));
                    int fxy= (fx&mask) + ((fy&mask)<<(qpel+1));
                    int bxy= (bx&mask) + ((by&mask)<<(qpel+1));
130

131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146
                    uint8_t *dst= c->temp + 8*(i&1) + 8*stride*(i>>1);
                    if(qpel){
                        c->qpel_put[1][fxy](dst, ref[0] + (fx>>2) + (fy>>2)*stride, stride);
                        c->qpel_avg[1][bxy](dst, ref[8] + (bx>>2) + (by>>2)*stride, stride);
                    }else{
                        c->hpel_put[1][fxy](dst, ref[0] + (fx>>1) + (fy>>1)*stride, stride, 8);
                        c->hpel_avg[1][bxy](dst, ref[8] + (bx>>1) + (by>>1)*stride, stride, 8);
                    }
                }
            }else{
                int fx = c->direct_basis_mv[0][0] + hx;
                int fy = c->direct_basis_mv[0][1] + hy;
                int bx = hx ? fx - c->co_located_mv[0][0] : (c->co_located_mv[0][0]*(time_pb - time_pp)/time_pp);
                int by = hy ? fy - c->co_located_mv[0][1] : (c->co_located_mv[0][1]*(time_pb - time_pp)/time_pp);
                int fxy= (fx&mask) + ((fy&mask)<<(qpel+1));
                int bxy= (bx&mask) + ((by&mask)<<(qpel+1));
147

148 149 150 151 152 153 154 155 156
                if(qpel){
                    c->qpel_put[1][fxy](c->temp               , ref[0] + (fx>>2) + (fy>>2)*stride               , stride);
                    c->qpel_put[1][fxy](c->temp + 8           , ref[0] + (fx>>2) + (fy>>2)*stride + 8           , stride);
                    c->qpel_put[1][fxy](c->temp     + 8*stride, ref[0] + (fx>>2) + (fy>>2)*stride     + 8*stride, stride);
                    c->qpel_put[1][fxy](c->temp + 8 + 8*stride, ref[0] + (fx>>2) + (fy>>2)*stride + 8 + 8*stride, stride);
                    c->qpel_avg[1][bxy](c->temp               , ref[8] + (bx>>2) + (by>>2)*stride               , stride);
                    c->qpel_avg[1][bxy](c->temp + 8           , ref[8] + (bx>>2) + (by>>2)*stride + 8           , stride);
                    c->qpel_avg[1][bxy](c->temp     + 8*stride, ref[8] + (bx>>2) + (by>>2)*stride     + 8*stride, stride);
                    c->qpel_avg[1][bxy](c->temp + 8 + 8*stride, ref[8] + (bx>>2) + (by>>2)*stride + 8 + 8*stride, stride);
157
                }else{
158 159 160 161 162 163 164 165
                    av_assert2((fx>>1) + 16*s->mb_x >= -16);
                    av_assert2((fy>>1) + 16*s->mb_y >= -16);
                    av_assert2((fx>>1) + 16*s->mb_x <= s->width);
                    av_assert2((fy>>1) + 16*s->mb_y <= s->height);
                    av_assert2((bx>>1) + 16*s->mb_x >= -16);
                    av_assert2((by>>1) + 16*s->mb_y >= -16);
                    av_assert2((bx>>1) + 16*s->mb_x <= s->width);
                    av_assert2((by>>1) + 16*s->mb_y <= s->height);
166 167 168 169 170 171 172 173

                    c->hpel_put[0][fxy](c->temp, ref[0] + (fx>>1) + (fy>>1)*stride, stride, 16);
                    c->hpel_avg[0][bxy](c->temp, ref[8] + (bx>>1) + (by>>1)*stride, stride, 16);
                }
            }
            d = cmp_func(s, c->temp, src[0], stride, 16);
        }else
            d= 256*256*256*32;
174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189
    return d;
}

static av_always_inline int cmp_inline(MpegEncContext *s, const int x, const int y, const int subx, const int suby,
                      const int size, const int h, int ref_index, int src_index,
                      me_cmp_func cmp_func, me_cmp_func chroma_cmp_func, int qpel, int chroma){
    MotionEstContext * const c= &s->me;
    const int stride= c->stride;
    const int uvstride= c->uvstride;
    const int dxy= subx + (suby<<(1+qpel)); //FIXME log2_subpel?
    const int hx= subx + (x<<(1+qpel));
    const int hy= suby + (y<<(1+qpel));
    uint8_t * const * const ref= c->ref[ref_index];
    uint8_t * const * const src= c->src[src_index];
    int d;
    //FIXME check chroma 4mv, (no crashes ...)
190
        int uvdxy;              /* no, it might not be used uninitialized */
191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206
        if(dxy){
            if(qpel){
                c->qpel_put[size][dxy](c->temp, ref[0] + x + y*stride, stride); //FIXME prototype (add h)
                if(chroma){
                    int cx= hx/2;
                    int cy= hy/2;
                    cx= (cx>>1)|(cx&1);
                    cy= (cy>>1)|(cy&1);
                    uvdxy= (cx&1) + 2*(cy&1);
                    //FIXME x/y wrong, but mpeg4 qpel is sick anyway, we should drop as much of it as possible in favor for h264
                }
            }else{
                c->hpel_put[size][dxy](c->temp, ref[0] + x + y*stride, stride, h);
                if(chroma)
                    uvdxy= dxy | (x&1) | (2*(y&1));
            }
207
            d = cmp_func(s, c->temp, src[0], stride, h);
208
        }else{
209
            d = cmp_func(s, src[0], ref[0] + x + y*stride, stride, h);
210 211 212 213 214 215 216
            if(chroma)
                uvdxy= (x&1) + 2*(y&1);
        }
        if(chroma){
            uint8_t * const uvtemp= c->temp + 16*stride;
            c->hpel_put[size+1][uvdxy](uvtemp  , ref[1] + (x>>1) + (y>>1)*uvstride, uvstride, h>>1);
            c->hpel_put[size+1][uvdxy](uvtemp+8, ref[2] + (x>>1) + (y>>1)*uvstride, uvstride, h>>1);
217 218
            d += chroma_cmp_func(s, uvtemp  , src[1], uvstride, h>>1);
            d += chroma_cmp_func(s, uvtemp+8, src[2], uvstride, h>>1);
219
        }
220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235
    return d;
}

static int cmp_simple(MpegEncContext *s, const int x, const int y,
                      int ref_index, int src_index,
                      me_cmp_func cmp_func, me_cmp_func chroma_cmp_func){
    return cmp_inline(s,x,y,0,0,0,16,ref_index,src_index, cmp_func, chroma_cmp_func, 0, 0);
}

static int cmp_fpel_internal(MpegEncContext *s, const int x, const int y,
                      const int size, const int h, int ref_index, int src_index,
                      me_cmp_func cmp_func, me_cmp_func chroma_cmp_func, const int flags){
    if(flags&FLAG_DIRECT){
        return cmp_direct_inline(s,x,y,0,0,size,h,ref_index,src_index, cmp_func, chroma_cmp_func, flags&FLAG_QPEL);
    }else{
        return cmp_inline(s,x,y,0,0,size,h,ref_index,src_index, cmp_func, chroma_cmp_func, 0, flags&FLAG_CHROMA);
236
    }
237 238 239 240 241 242 243 244 245
}

static int cmp_internal(MpegEncContext *s, const int x, const int y, const int subx, const int suby,
                      const int size, const int h, int ref_index, int src_index,
                      me_cmp_func cmp_func, me_cmp_func chroma_cmp_func, const int flags){
    if(flags&FLAG_DIRECT){
        return cmp_direct_inline(s,x,y,subx,suby,size,h,ref_index,src_index, cmp_func, chroma_cmp_func, flags&FLAG_QPEL);
    }else{
        return cmp_inline(s,x,y,subx,suby,size,h,ref_index,src_index, cmp_func, chroma_cmp_func, flags&FLAG_QPEL, flags&FLAG_CHROMA);
246
    }
247
}
248

249
/** @brief compares a block (either a full macroblock or a partition thereof)
250 251
    against a proposed motion-compensated prediction of that block
 */
252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284
static av_always_inline int cmp(MpegEncContext *s, const int x, const int y, const int subx, const int suby,
                      const int size, const int h, int ref_index, int src_index,
                      me_cmp_func cmp_func, me_cmp_func chroma_cmp_func, const int flags){
    if(av_builtin_constant_p(flags) && av_builtin_constant_p(h) && av_builtin_constant_p(size)
       && av_builtin_constant_p(subx) && av_builtin_constant_p(suby)
       && flags==0 && h==16 && size==0 && subx==0 && suby==0){
        return cmp_simple(s,x,y,ref_index,src_index, cmp_func, chroma_cmp_func);
    }else if(av_builtin_constant_p(subx) && av_builtin_constant_p(suby)
       && subx==0 && suby==0){
        return cmp_fpel_internal(s,x,y,size,h,ref_index,src_index, cmp_func, chroma_cmp_func,flags);
    }else{
        return cmp_internal(s,x,y,subx,suby,size,h,ref_index,src_index, cmp_func, chroma_cmp_func, flags);
    }
}

static int cmp_hpel(MpegEncContext *s, const int x, const int y, const int subx, const int suby,
                      const int size, const int h, int ref_index, int src_index,
                      me_cmp_func cmp_func, me_cmp_func chroma_cmp_func, const int flags){
    if(flags&FLAG_DIRECT){
        return cmp_direct_inline(s,x,y,subx,suby,size,h,ref_index,src_index, cmp_func, chroma_cmp_func, 0);
    }else{
        return cmp_inline(s,x,y,subx,suby,size,h,ref_index,src_index, cmp_func, chroma_cmp_func, 0, flags&FLAG_CHROMA);
    }
}

static int cmp_qpel(MpegEncContext *s, const int x, const int y, const int subx, const int suby,
                      const int size, const int h, int ref_index, int src_index,
                      me_cmp_func cmp_func, me_cmp_func chroma_cmp_func, const int flags){
    if(flags&FLAG_DIRECT){
        return cmp_direct_inline(s,x,y,subx,suby,size,h,ref_index,src_index, cmp_func, chroma_cmp_func, 1);
    }else{
        return cmp_inline(s,x,y,subx,suby,size,h,ref_index,src_index, cmp_func, chroma_cmp_func, 1, flags&FLAG_CHROMA);
    }
285
}
Michael Niedermayer's avatar
Michael Niedermayer committed
286 287 288

#include "motion_est_template.c"

Michael Niedermayer's avatar
Michael Niedermayer committed
289 290 291 292 293 294 295
static int zero_cmp(void *s, uint8_t *a, uint8_t *b, int stride, int h){
    return 0;
}

static void zero_hpel(uint8_t *a, const uint8_t *b, int stride, int h){
}

296
int ff_init_me(MpegEncContext *s){
297
    MotionEstContext * const c= &s->me;
298 299
    int cache_size= FFMIN(ME_MAP_SIZE>>ME_MAP_SHIFT, 1<<ME_MAP_SHIFT);
    int dia_size= FFMAX(FFABS(s->avctx->dia_size)&255, FFABS(s->avctx->pre_dia_size)&255);
300

301
    if(FFMIN(s->avctx->dia_size, s->avctx->pre_dia_size) < -FFMIN(ME_MAP_SIZE, MAX_SAB_SIZE)){
302 303 304
        av_log(s->avctx, AV_LOG_ERROR, "ME_MAP size is too small for SAB diamond\n");
        return -1;
    }
305
    //special case of snow is needed because snow uses its own iterative ME code
306
    if(s->me_method!=ME_ZERO && s->me_method!=ME_EPZS && s->me_method!=ME_X1 && s->avctx->codec_id != AV_CODEC_ID_SNOW){
307 308 309
        av_log(s->avctx, AV_LOG_ERROR, "me_method is only allowed to be set to zero and epzs; for hex,umh,full and others see dia_size\n");
        return -1;
    }
310

Michael Niedermayer's avatar
Michael Niedermayer committed
311
    c->avctx= s->avctx;
312 313 314 315

    if(cache_size < 2*dia_size && !c->stride){
        av_log(s->avctx, AV_LOG_INFO, "ME_MAP size may be a little small for the selected diamond size\n");
    }
316

317 318 319 320
    ff_set_cmp(&s->dsp, s->dsp.me_pre_cmp, c->avctx->me_pre_cmp);
    ff_set_cmp(&s->dsp, s->dsp.me_cmp, c->avctx->me_cmp);
    ff_set_cmp(&s->dsp, s->dsp.me_sub_cmp, c->avctx->me_sub_cmp);
    ff_set_cmp(&s->dsp, s->dsp.mb_cmp, c->avctx->mb_cmp);
321

322 323 324
    c->flags    = get_flags(c, 0, c->avctx->me_cmp    &FF_CMP_CHROMA);
    c->sub_flags= get_flags(c, 0, c->avctx->me_sub_cmp&FF_CMP_CHROMA);
    c->mb_flags = get_flags(c, 0, c->avctx->mb_cmp    &FF_CMP_CHROMA);
Fabrice Bellard's avatar
Fabrice Bellard committed
325

326
/*FIXME s->no_rounding b_type*/
Michael Niedermayer's avatar
Michael Niedermayer committed
327
    if(s->flags&CODEC_FLAG_QPEL){
328
        c->sub_motion_search= qpel_motion_search;
329 330 331
        c->qpel_avg= s->dsp.avg_qpel_pixels_tab;
        if(s->no_rounding) c->qpel_put= s->dsp.put_no_rnd_qpel_pixels_tab;
        else               c->qpel_put= s->dsp.put_qpel_pixels_tab;
Michael Niedermayer's avatar
Michael Niedermayer committed
332
    }else{
333
        if(c->avctx->me_sub_cmp&FF_CMP_CHROMA)
334
            c->sub_motion_search= hpel_motion_search;
335 336
        else if(   c->avctx->me_sub_cmp == FF_CMP_SAD
                && c->avctx->    me_cmp == FF_CMP_SAD
337
                && c->avctx->    mb_cmp == FF_CMP_SAD)
338
            c->sub_motion_search= sad_hpel_motion_search; // 2050 vs. 2450 cycles
Michael Niedermayer's avatar
Michael Niedermayer committed
339
        else
340
            c->sub_motion_search= hpel_motion_search;
Michael Niedermayer's avatar
Michael Niedermayer committed
341
    }
Michael Niedermayer's avatar
Michael Niedermayer committed
342 343 344 345
    c->hpel_avg= s->dsp.avg_pixels_tab;
    if(s->no_rounding) c->hpel_put= s->dsp.put_no_rnd_pixels_tab;
    else               c->hpel_put= s->dsp.put_pixels_tab;

346
    if(s->linesize){
347
        c->stride  = s->linesize;
348
        c->uvstride= s->uvlinesize;
349
    }else{
350 351
        c->stride  = 16*s->mb_width + 32;
        c->uvstride=  8*s->mb_width + 16;
352
    }
353

Diego Biurrun's avatar
Diego Biurrun committed
354 355 356
    /* 8x8 fullpel search would need a 4x4 chroma compare, which we do
     * not have yet, and even if we had, the motion estimation code
     * does not expect it. */
357
    if(s->codec_id != AV_CODEC_ID_SNOW){
358
        if((c->avctx->me_cmp&FF_CMP_CHROMA)/* && !s->dsp.me_cmp[2]*/){
359 360 361 362 363 364 365
            s->dsp.me_cmp[2]= zero_cmp;
        }
        if((c->avctx->me_sub_cmp&FF_CMP_CHROMA) && !s->dsp.me_sub_cmp[2]){
            s->dsp.me_sub_cmp[2]= zero_cmp;
        }
        c->hpel_put[2][0]= c->hpel_put[2][1]=
        c->hpel_put[2][2]= c->hpel_put[2][3]= zero_hpel;
Michael Niedermayer's avatar
Michael Niedermayer committed
366 367
    }

368
    if(s->codec_id == AV_CODEC_ID_H261){
369 370 371
        c->sub_motion_search= no_sub_motion_search;
    }

372
    return 0;
Michael Niedermayer's avatar
Michael Niedermayer committed
373
}
374

Michael Niedermayer's avatar
Michael Niedermayer committed
375
#define CHECK_SAD_HALF_MV(suffix, x, y) \
376
{\
377
    d= s->dsp.pix_abs[size][(x?1:0)+(y?2:0)](NULL, pix, ptr+((x)>>1), stride, h);\
Michael Niedermayer's avatar
Michael Niedermayer committed
378
    d += (mv_penalty[pen_x + x] + mv_penalty[pen_y + y])*penalty_factor;\
379 380
    COPY3_IF_LT(dminh, d, dx, x, dy, y)\
}
381

382
static int sad_hpel_motion_search(MpegEncContext * s,
383
                                  int *mx_ptr, int *my_ptr, int dmin,
384 385
                                  int src_index, int ref_index,
                                  int size, int h)
Fabrice Bellard's avatar
Fabrice Bellard committed
386
{
387 388
    MotionEstContext * const c= &s->me;
    const int penalty_factor= c->sub_penalty_factor;
389
    int mx, my, dminh;
390
    uint8_t *pix, *ptr;
391 392
    int stride= c->stride;
    const int flags= c->sub_flags;
393
    LOAD_COMMON
394

395
    assert(flags == 0);
Fabrice Bellard's avatar
Fabrice Bellard committed
396

397
    if(c->skip){
398 399 400 401
        *mx_ptr = 0;
        *my_ptr = 0;
        return dmin;
    }
402

403
    pix = c->src[src_index][0];
404

405 406
    mx = *mx_ptr;
    my = *my_ptr;
407
    ptr = c->ref[ref_index][0] + (my * stride) + mx;
408

409 410
    dminh = dmin;

411
    if (mx > xmin && mx < xmax &&
412
        my > ymin && my < ymax) {
413
        int dx=0, dy=0;
414
        int d, pen_x, pen_y;
415 416 417 418 419 420 421
        const int index= (my<<ME_MAP_SHIFT) + mx;
        const int t= score_map[(index-(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)];
        const int l= score_map[(index- 1               )&(ME_MAP_SIZE-1)];
        const int r= score_map[(index+ 1               )&(ME_MAP_SIZE-1)];
        const int b= score_map[(index+(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)];
        mx<<=1;
        my<<=1;
422

423

424 425 426
        pen_x= pred_x + mx;
        pen_y= pred_y + my;

427
        ptr-= stride;
428
        if(t<=b){
Michael Niedermayer's avatar
Michael Niedermayer committed
429
            CHECK_SAD_HALF_MV(y2 , 0, -1)
430
            if(l<=r){
Michael Niedermayer's avatar
Michael Niedermayer committed
431
                CHECK_SAD_HALF_MV(xy2, -1, -1)
432
                if(t+r<=b+l){
Michael Niedermayer's avatar
Michael Niedermayer committed
433
                    CHECK_SAD_HALF_MV(xy2, +1, -1)
434
                    ptr+= stride;
435
                }else{
436
                    ptr+= stride;
Michael Niedermayer's avatar
Michael Niedermayer committed
437
                    CHECK_SAD_HALF_MV(xy2, -1, +1)
438
                }
Michael Niedermayer's avatar
Michael Niedermayer committed
439
                CHECK_SAD_HALF_MV(x2 , -1,  0)
440
            }else{
Michael Niedermayer's avatar
Michael Niedermayer committed
441
                CHECK_SAD_HALF_MV(xy2, +1, -1)
442
                if(t+l<=b+r){
Michael Niedermayer's avatar
Michael Niedermayer committed
443
                    CHECK_SAD_HALF_MV(xy2, -1, -1)
444
                    ptr+= stride;
445
                }else{
446
                    ptr+= stride;
Michael Niedermayer's avatar
Michael Niedermayer committed
447
                    CHECK_SAD_HALF_MV(xy2, +1, +1)
448
                }
Michael Niedermayer's avatar
Michael Niedermayer committed
449
                CHECK_SAD_HALF_MV(x2 , +1,  0)
450 451 452 453
            }
        }else{
            if(l<=r){
                if(t+l<=b+r){
Michael Niedermayer's avatar
Michael Niedermayer committed
454
                    CHECK_SAD_HALF_MV(xy2, -1, -1)
455
                    ptr+= stride;
456
                }else{
457
                    ptr+= stride;
Michael Niedermayer's avatar
Michael Niedermayer committed
458
                    CHECK_SAD_HALF_MV(xy2, +1, +1)
459
                }
Michael Niedermayer's avatar
Michael Niedermayer committed
460 461
                CHECK_SAD_HALF_MV(x2 , -1,  0)
                CHECK_SAD_HALF_MV(xy2, -1, +1)
462 463
            }else{
                if(t+r<=b+l){
Michael Niedermayer's avatar
Michael Niedermayer committed
464
                    CHECK_SAD_HALF_MV(xy2, +1, -1)
465
                    ptr+= stride;
466
                }else{
467
                    ptr+= stride;
Michael Niedermayer's avatar
Michael Niedermayer committed
468
                    CHECK_SAD_HALF_MV(xy2, -1, +1)
469
                }
Michael Niedermayer's avatar
Michael Niedermayer committed
470 471
                CHECK_SAD_HALF_MV(x2 , +1,  0)
                CHECK_SAD_HALF_MV(xy2, +1, +1)
472
            }
Michael Niedermayer's avatar
Michael Niedermayer committed
473
            CHECK_SAD_HALF_MV(y2 ,  0, +1)
474 475 476
        }
        mx+=dx;
        my+=dy;
477 478

    }else{
479 480
        mx<<=1;
        my<<=1;
481 482 483 484
    }

    *mx_ptr = mx;
    *my_ptr = my;
485
    return dminh;
486 487
}

488
static inline void set_p_mv_tables(MpegEncContext * s, int mx, int my, int mv4)
489
{
490
    const int xy= s->mb_x + s->mb_y*s->mb_stride;
491

492 493
    s->p_mv_table[xy][0] = mx;
    s->p_mv_table[xy][1] = my;
494

Diego Biurrun's avatar
Diego Biurrun committed
495
    /* has already been set to the 4 MV if 4MV is done */
496
    if(mv4){
497 498
        int mot_xy= s->block_index[0];

499 500 501 502
        s->current_picture.f.motion_val[0][mot_xy    ][0] = mx;
        s->current_picture.f.motion_val[0][mot_xy    ][1] = my;
        s->current_picture.f.motion_val[0][mot_xy + 1][0] = mx;
        s->current_picture.f.motion_val[0][mot_xy + 1][1] = my;
503

504
        mot_xy += s->b8_stride;
505 506 507 508
        s->current_picture.f.motion_val[0][mot_xy    ][0] = mx;
        s->current_picture.f.motion_val[0][mot_xy    ][1] = my;
        s->current_picture.f.motion_val[0][mot_xy + 1][0] = mx;
        s->current_picture.f.motion_val[0][mot_xy + 1][1] = my;
509 510 511
    }
}

512 513 514
/**
 * get fullpel ME search limits.
 */
515
static inline void get_limits(MpegEncContext *s, int x, int y)
Fabrice Bellard's avatar
Fabrice Bellard committed
516
{
517
    MotionEstContext * const c= &s->me;
518
    int range= c->avctx->me_range >> (1 + !!(c->flags&FLAG_QPEL));
519
/*
520
    if(c->avctx->me_range) c->range= c->avctx->me_range >> 1;
521
    else                   c->range= 16;
522
*/
Fabrice Bellard's avatar
Fabrice Bellard committed
523
    if (s->unrestricted_mv) {
524 525
        c->xmin = - x - 16;
        c->ymin = - y - 16;
526 527
        c->xmax = - x + s->width;
        c->ymax = - y + s->height;
528 529 530 531
    } else if (s->out_format == FMT_H261){
        // Search range of H261 is different from other codec standards
        c->xmin = (x > 15) ? - 15 : 0;
        c->ymin = (y > 15) ? - 15 : 0;
532
        c->xmax = (x < s->mb_width * 16 - 16) ? 15 : 0;
533
        c->ymax = (y < s->mb_height * 16 - 16) ? 15 : 0;
Fabrice Bellard's avatar
Fabrice Bellard committed
534
    } else {
535 536 537 538
        c->xmin = - x;
        c->ymin = - y;
        c->xmax = - x + s->mb_width *16 - 16;
        c->ymax = - y + s->mb_height*16 - 16;
Fabrice Bellard's avatar
Fabrice Bellard committed
539
    }
540 541 542 543 544 545
    if(range){
        c->xmin = FFMAX(c->xmin,-range);
        c->xmax = FFMIN(c->xmax, range);
        c->ymin = FFMAX(c->ymin,-range);
        c->ymax = FFMIN(c->ymax, range);
    }
546 547
}

548 549
static inline void init_mv4_ref(MotionEstContext *c){
    const int stride= c->stride;
550 551 552 553 554 555 556 557 558

    c->ref[1][0] = c->ref[0][0] + 8;
    c->ref[2][0] = c->ref[0][0] + 8*stride;
    c->ref[3][0] = c->ref[2][0] + 8;
    c->src[1][0] = c->src[0][0] + 8;
    c->src[2][0] = c->src[0][0] + 8*stride;
    c->src[3][0] = c->src[2][0] + 8;
}

559
static inline int h263_mv4_search(MpegEncContext *s, int mx, int my, int shift)
560
{
561
    MotionEstContext * const c= &s->me;
562 563
    const int size= 1;
    const int h=8;
564 565
    int block;
    int P[10][2];
566
    int dmin_sum=0, mx4_sum=0, my4_sum=0, i;
567
    int same=1;
568
    const int stride= c->stride;
569
    uint8_t *mv_penalty= c->current_mv_penalty;
570
    int saftey_cliping= s->unrestricted_mv && (s->width&15) && (s->height&15);
571

572
    init_mv4_ref(c);
573

574 575 576 577 578
    for(block=0; block<4; block++){
        int mx4, my4;
        int pred_x4, pred_y4;
        int dmin4;
        static const int off[4]= {2, 1, 1, -1};
579
        const int mot_stride = s->b8_stride;
580
        const int mot_xy = s->block_index[block];
581

582 583 584 585 586
        if(saftey_cliping){
            c->xmax = - 16*s->mb_x + s->width  - 8*(block &1);
            c->ymax = - 16*s->mb_y + s->height - 8*(block>>1);
        }

587 588
        P_LEFT[0] = s->current_picture.f.motion_val[0][mot_xy - 1][0];
        P_LEFT[1] = s->current_picture.f.motion_val[0][mot_xy - 1][1];
589

590
        if(P_LEFT[0]       > (c->xmax<<shift)) P_LEFT[0]       = (c->xmax<<shift);
591 592

        /* special case for first line */
593
        if (s->first_slice_line && block<2) {
594 595
            c->pred_x= pred_x4= P_LEFT[0];
            c->pred_y= pred_y4= P_LEFT[1];
596
        } else {
597 598 599 600
            P_TOP[0]      = s->current_picture.f.motion_val[0][mot_xy - mot_stride             ][0];
            P_TOP[1]      = s->current_picture.f.motion_val[0][mot_xy - mot_stride             ][1];
            P_TOPRIGHT[0] = s->current_picture.f.motion_val[0][mot_xy - mot_stride + off[block]][0];
            P_TOPRIGHT[1] = s->current_picture.f.motion_val[0][mot_xy - mot_stride + off[block]][1];
601 602 603 604
            if(P_TOP[1]      > (c->ymax<<shift)) P_TOP[1]     = (c->ymax<<shift);
            if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
            if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift);
            if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
605

606 607 608
            P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
            P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);

609 610
            c->pred_x= pred_x4 = P_MEDIAN[0];
            c->pred_y= pred_y4 = P_MEDIAN[1];
611 612 613
        }
        P_MV1[0]= mx;
        P_MV1[1]= my;
614 615 616 617 618
        if(saftey_cliping)
            for(i=0; i<10; i++){
                if(P[i][0] > (c->xmax<<shift)) P[i][0]= (c->xmax<<shift);
                if(P[i][1] > (c->ymax<<shift)) P[i][1]= (c->ymax<<shift);
            }
619

620
        dmin4 = epzs_motion_search4(s, &mx4, &my4, P, block, block, s->p_mv_table, (1<<16)>>shift);
621

622
        dmin4= c->sub_motion_search(s, &mx4, &my4, dmin4, block, block, size, h);
623

624
        if(s->dsp.me_sub_cmp[0] != s->dsp.mb_cmp[0]){
625
            int dxy;
626
            const int offset= ((block&1) + (block>>1)*stride)*8;
627
            uint8_t *dest_y = c->scratchpad + offset;
628
            if(s->quarter_sample){
629
                uint8_t *ref= c->ref[block][0] + (mx4>>2) + (my4>>2)*stride;
630 631 632
                dxy = ((my4 & 3) << 2) | (mx4 & 3);

                if(s->no_rounding)
633
                    s->dsp.put_no_rnd_qpel_pixels_tab[1][dxy](dest_y   , ref    , stride);
634
                else
635
                    s->dsp.put_qpel_pixels_tab       [1][dxy](dest_y   , ref    , stride);
636
            }else{
637
                uint8_t *ref= c->ref[block][0] + (mx4>>1) + (my4>>1)*stride;
638 639 640
                dxy = ((my4 & 1) << 1) | (mx4 & 1);

                if(s->no_rounding)
641
                    s->dsp.put_no_rnd_pixels_tab[1][dxy](dest_y    , ref    , stride, h);
642
                else
643
                    s->dsp.put_pixels_tab       [1][dxy](dest_y    , ref    , stride, h);
644
            }
645
            dmin_sum+= (mv_penalty[mx4-pred_x4] + mv_penalty[my4-pred_y4])*c->mb_penalty_factor;
646 647 648 649 650 651 652 653 654 655
        }else
            dmin_sum+= dmin4;

        if(s->quarter_sample){
            mx4_sum+= mx4/2;
            my4_sum+= my4/2;
        }else{
            mx4_sum+= mx4;
            my4_sum+= my4;
        }
656

657 658
        s->current_picture.f.motion_val[0][s->block_index[block]][0] = mx4;
        s->current_picture.f.motion_val[0][s->block_index[block]][1] = my4;
659 660

        if(mx4 != mx || my4 != my) same=0;
661
    }
662

663 664
    if(same)
        return INT_MAX;
665

666
    if(s->dsp.me_sub_cmp[0] != s->dsp.mb_cmp[0]){
667
        dmin_sum += s->dsp.mb_cmp[0](s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*16*stride, c->scratchpad, stride, 16);
668
    }
669

670
    if(c->avctx->mb_cmp&FF_CMP_CHROMA){
671 672 673 674 675 676 677
        int dxy;
        int mx, my;
        int offset;

        mx= ff_h263_round_chroma(mx4_sum);
        my= ff_h263_round_chroma(my4_sum);
        dxy = ((my & 1) << 1) | (mx & 1);
678

679
        offset= (s->mb_x*8 + (mx>>1)) + (s->mb_y*8 + (my>>1))*s->uvlinesize;
680

681
        if(s->no_rounding){
682 683
            s->dsp.put_no_rnd_pixels_tab[1][dxy](c->scratchpad    , s->last_picture.f.data[1] + offset, s->uvlinesize, 8);
            s->dsp.put_no_rnd_pixels_tab[1][dxy](c->scratchpad + 8, s->last_picture.f.data[2] + offset, s->uvlinesize, 8);
684
        }else{
685 686
            s->dsp.put_pixels_tab       [1][dxy](c->scratchpad    , s->last_picture.f.data[1] + offset, s->uvlinesize, 8);
            s->dsp.put_pixels_tab       [1][dxy](c->scratchpad + 8, s->last_picture.f.data[2] + offset, s->uvlinesize, 8);
687 688
        }

689 690
        dmin_sum += s->dsp.mb_cmp[1](s, s->new_picture.f.data[1] + s->mb_x*8 + s->mb_y*8*s->uvlinesize, c->scratchpad  , s->uvlinesize, 8);
        dmin_sum += s->dsp.mb_cmp[1](s, s->new_picture.f.data[2] + s->mb_x*8 + s->mb_y*8*s->uvlinesize, c->scratchpad+8, s->uvlinesize, 8);
691
    }
692

693 694
    c->pred_x= mx;
    c->pred_y= my;
695

696
    switch(c->avctx->mb_cmp&0xFF){
697 698 699 700 701
    /*case FF_CMP_SSE:
        return dmin_sum+ 32*s->qscale*s->qscale;*/
    case FF_CMP_RD:
        return dmin_sum;
    default:
702
        return dmin_sum+ 11*c->mb_penalty_factor;
703
    }
704 705
}

706 707 708 709 710 711 712 713 714 715 716 717 718
static inline void init_interlaced_ref(MpegEncContext *s, int ref_index){
    MotionEstContext * const c= &s->me;

    c->ref[1+ref_index][0] = c->ref[0+ref_index][0] + s->linesize;
    c->src[1][0] = c->src[0][0] + s->linesize;
    if(c->flags & FLAG_CHROMA){
        c->ref[1+ref_index][1] = c->ref[0+ref_index][1] + s->uvlinesize;
        c->ref[1+ref_index][2] = c->ref[0+ref_index][2] + s->uvlinesize;
        c->src[1][1] = c->src[0][1] + s->uvlinesize;
        c->src[1][2] = c->src[0][2] + s->uvlinesize;
    }
}

719
static int interlaced_search(MpegEncContext *s, int ref_index,
720
                             int16_t (*mv_tables[2][2])[2], uint8_t *field_select_tables[2], int mx, int my, int user_field_select)
721
{
722
    MotionEstContext * const c= &s->me;
723 724 725 726
    const int size=0;
    const int h=8;
    int block;
    int P[10][2];
727
    uint8_t * const mv_penalty= c->current_mv_penalty;
728 729 730 731 732
    int same=1;
    const int stride= 2*s->linesize;
    int dmin_sum= 0;
    const int mot_stride= s->mb_stride;
    const int xy= s->mb_x + s->mb_y*mot_stride;
733

734 735 736 737
    c->ymin>>=1;
    c->ymax>>=1;
    c->stride<<=1;
    c->uvstride<<=1;
738
    init_interlaced_ref(s, ref_index);
739

740 741 742 743 744 745
    for(block=0; block<2; block++){
        int field_select;
        int best_dmin= INT_MAX;
        int best_field= -1;

        for(field_select=0; field_select<2; field_select++){
746
            int dmin, mx_i, my_i;
747
            int16_t (*mv_table)[2]= mv_tables[block][field_select];
748

749
            if(user_field_select){
750 751
                av_assert1(field_select==0 || field_select==1);
                av_assert1(field_select_tables[block][xy]==0 || field_select_tables[block][xy]==1);
752 753 754
                if(field_select_tables[block][xy] != field_select)
                    continue;
            }
755

756 757
            P_LEFT[0] = mv_table[xy - 1][0];
            P_LEFT[1] = mv_table[xy - 1][1];
758
            if(P_LEFT[0]       > (c->xmax<<1)) P_LEFT[0]       = (c->xmax<<1);
759

760 761
            c->pred_x= P_LEFT[0];
            c->pred_y= P_LEFT[1];
762

763
            if(!s->first_slice_line){
764 765 766 767
                P_TOP[0]      = mv_table[xy - mot_stride][0];
                P_TOP[1]      = mv_table[xy - mot_stride][1];
                P_TOPRIGHT[0] = mv_table[xy - mot_stride + 1][0];
                P_TOPRIGHT[1] = mv_table[xy - mot_stride + 1][1];
768 769 770 771
                if(P_TOP[1]      > (c->ymax<<1)) P_TOP[1]     = (c->ymax<<1);
                if(P_TOPRIGHT[0] < (c->xmin<<1)) P_TOPRIGHT[0]= (c->xmin<<1);
                if(P_TOPRIGHT[0] > (c->xmax<<1)) P_TOPRIGHT[0]= (c->xmax<<1);
                if(P_TOPRIGHT[1] > (c->ymax<<1)) P_TOPRIGHT[1]= (c->ymax<<1);
772

773 774 775 776 777
                P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
                P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
            }
            P_MV1[0]= mx; //FIXME not correct if block != field_select
            P_MV1[1]= my / 2;
778

779
            dmin = epzs_motion_search2(s, &mx_i, &my_i, P, block, field_select+ref_index, mv_table, (1<<16)>>1);
780

781
            dmin= c->sub_motion_search(s, &mx_i, &my_i, dmin, block, field_select+ref_index, size, h);
782

783 784
            mv_table[xy][0]= mx_i;
            mv_table[xy][1]= my_i;
785

786
            if(s->dsp.me_sub_cmp[0] != s->dsp.mb_cmp[0]){
787 788 789
                int dxy;

                //FIXME chroma ME
790
                uint8_t *ref= c->ref[field_select+ref_index][0] + (mx_i>>1) + (my_i>>1)*stride;
791 792 793
                dxy = ((my_i & 1) << 1) | (mx_i & 1);

                if(s->no_rounding){
794
                    s->dsp.put_no_rnd_pixels_tab[size][dxy](c->scratchpad, ref    , stride, h);
795
                }else{
796
                    s->dsp.put_pixels_tab       [size][dxy](c->scratchpad, ref    , stride, h);
797
                }
798
                dmin= s->dsp.mb_cmp[size](s, c->src[block][0], c->scratchpad, stride, h);
799
                dmin+= (mv_penalty[mx_i-c->pred_x] + mv_penalty[my_i-c->pred_y] + 1)*c->mb_penalty_factor;
800
            }else
801
                dmin+= c->mb_penalty_factor; //field_select bits
802

803
            dmin += field_select != block; //slightly prefer same field
804

805 806 807 808 809 810 811 812 813 814
            if(dmin < best_dmin){
                best_dmin= dmin;
                best_field= field_select;
            }
        }
        {
            int16_t (*mv_table)[2]= mv_tables[block][best_field];

            if(mv_table[xy][0] != mx) same=0; //FIXME check if these checks work and are any good at all
            if(mv_table[xy][1]&1) same=0;
815
            if(mv_table[xy][1]*2 != my) same=0;
816 817 818 819 820 821
            if(best_field != block) same=0;
        }

        field_select_tables[block][xy]= best_field;
        dmin_sum += best_dmin;
    }
822

823 824 825 826
    c->ymin<<=1;
    c->ymax<<=1;
    c->stride>>=1;
    c->uvstride>>=1;
827 828 829

    if(same)
        return INT_MAX;
830

831
    switch(c->avctx->mb_cmp&0xFF){
832 833 834 835 836
    /*case FF_CMP_SSE:
        return dmin_sum+ 32*s->qscale*s->qscale;*/
    case FF_CMP_RD:
        return dmin_sum;
    default:
837
        return dmin_sum+ 11*c->mb_penalty_factor;
838 839 840
    }
}

841 842 843
static void clip_input_mv(MpegEncContext * s, int16_t *mv, int interlaced){
    int ymax= s->me.ymax>>interlaced;
    int ymin= s->me.ymin>>interlaced;
844

845 846 847 848 849 850
    if(mv[0] < s->me.xmin) mv[0] = s->me.xmin;
    if(mv[0] > s->me.xmax) mv[0] = s->me.xmax;
    if(mv[1] <       ymin) mv[1] =       ymin;
    if(mv[1] >       ymax) mv[1] =       ymax;
}

851 852 853 854 855
static inline int check_input_motion(MpegEncContext * s, int mb_x, int mb_y, int p_type){
    MotionEstContext * const c= &s->me;
    Picture *p= s->current_picture_ptr;
    int mb_xy= mb_x + mb_y*s->mb_stride;
    int xy= 2*mb_x + 2*mb_y*s->b8_stride;
856
    int mb_type= s->current_picture.f.mb_type[mb_xy];
857 858 859
    int flags= c->flags;
    int shift= (flags&FLAG_QPEL) + 1;
    int mask= (1<<shift)-1;
860
    int x, y, i;
861 862 863
    int d=0;
    me_cmp_func cmpf= s->dsp.sse[0];
    me_cmp_func chroma_cmpf= s->dsp.sse[1];
864

865 866
    if(p_type && USES_LIST(mb_type, 1)){
        av_log(c->avctx, AV_LOG_ERROR, "backward motion vector in P frame\n");
867
        return INT_MAX/2;
868
    }
869
    av_assert0(IS_INTRA(mb_type) || USES_LIST(mb_type,0) || USES_LIST(mb_type,1));
870

871 872
    for(i=0; i<4; i++){
        int xy= s->block_index[i];
873 874
        clip_input_mv(s, p->f.motion_val[0][xy], !!IS_INTERLACED(mb_type));
        clip_input_mv(s, p->f.motion_val[1][xy], !!IS_INTERLACED(mb_type));
875 876
    }

877 878 879 880 881
    if(IS_INTERLACED(mb_type)){
        int xy2= xy  + s->b8_stride;
        s->mb_type[mb_xy]=CANDIDATE_MB_TYPE_INTRA;
        c->stride<<=1;
        c->uvstride<<=1;
882

883
        if(!(s->flags & CODEC_FLAG_INTERLACED_ME)){
884
            av_log(c->avctx, AV_LOG_ERROR, "Interlaced macroblock selected but interlaced motion estimation disabled\n");
885
            return INT_MAX/2;
886
        }
887

888
        if(USES_LIST(mb_type, 0)){
889 890
            int field_select0= p->f.ref_index[0][4*mb_xy  ];
            int field_select1= p->f.ref_index[0][4*mb_xy+2];
891 892
            av_assert0(field_select0==0 ||field_select0==1);
            av_assert0(field_select1==0 ||field_select1==1);
893 894
            init_interlaced_ref(s, 0);

895 896 897
            if(p_type){
                s->p_field_select_table[0][mb_xy]= field_select0;
                s->p_field_select_table[1][mb_xy]= field_select1;
898 899
                *(uint32_t*)s->p_field_mv_table[0][field_select0][mb_xy] = *(uint32_t*)p->f.motion_val[0][xy ];
                *(uint32_t*)s->p_field_mv_table[1][field_select1][mb_xy] = *(uint32_t*)p->f.motion_val[0][xy2];
900 901 902 903
                s->mb_type[mb_xy]=CANDIDATE_MB_TYPE_INTER_I;
            }else{
                s->b_field_select_table[0][0][mb_xy]= field_select0;
                s->b_field_select_table[0][1][mb_xy]= field_select1;
904 905
                *(uint32_t*)s->b_field_mv_table[0][0][field_select0][mb_xy] = *(uint32_t*)p->f.motion_val[0][xy ];
                *(uint32_t*)s->b_field_mv_table[0][1][field_select1][mb_xy] = *(uint32_t*)p->f.motion_val[0][xy2];
906 907 908
                s->mb_type[mb_xy]= CANDIDATE_MB_TYPE_FORWARD_I;
            }

909 910
            x = p->f.motion_val[0][xy ][0];
            y = p->f.motion_val[0][xy ][1];
911
            d = cmp(s, x>>shift, y>>shift, x&mask, y&mask, 0, 8, field_select0, 0, cmpf, chroma_cmpf, flags);
912 913
            x = p->f.motion_val[0][xy2][0];
            y = p->f.motion_val[0][xy2][1];
914 915 916
            d+= cmp(s, x>>shift, y>>shift, x&mask, y&mask, 0, 8, field_select1, 1, cmpf, chroma_cmpf, flags);
        }
        if(USES_LIST(mb_type, 1)){
917 918
            int field_select0 = p->f.ref_index[1][4 * mb_xy    ];
            int field_select1 = p->f.ref_index[1][4 * mb_xy + 2];
919 920
            av_assert0(field_select0==0 ||field_select0==1);
            av_assert0(field_select1==0 ||field_select1==1);
921 922
            init_interlaced_ref(s, 2);

923 924
            s->b_field_select_table[1][0][mb_xy]= field_select0;
            s->b_field_select_table[1][1][mb_xy]= field_select1;
925 926
            *(uint32_t*)s->b_field_mv_table[1][0][field_select0][mb_xy] = *(uint32_t*)p->f.motion_val[1][xy ];
            *(uint32_t*)s->b_field_mv_table[1][1][field_select1][mb_xy] = *(uint32_t*)p->f.motion_val[1][xy2];
927 928 929 930 931 932
            if(USES_LIST(mb_type, 0)){
                s->mb_type[mb_xy]= CANDIDATE_MB_TYPE_BIDIR_I;
            }else{
                s->mb_type[mb_xy]= CANDIDATE_MB_TYPE_BACKWARD_I;
            }

933 934
            x = p->f.motion_val[1][xy ][0];
            y = p->f.motion_val[1][xy ][1];
935
            d = cmp(s, x>>shift, y>>shift, x&mask, y&mask, 0, 8, field_select0+2, 0, cmpf, chroma_cmpf, flags);
936 937
            x = p->f.motion_val[1][xy2][0];
            y = p->f.motion_val[1][xy2][1];
938 939 940 941 942
            d+= cmp(s, x>>shift, y>>shift, x&mask, y&mask, 0, 8, field_select1+2, 1, cmpf, chroma_cmpf, flags);
            //FIXME bidir scores
        }
        c->stride>>=1;
        c->uvstride>>=1;
943
    }else if(IS_8X8(mb_type)){
944
        if(!(s->flags & CODEC_FLAG_4MV)){
945
            av_log(c->avctx, AV_LOG_ERROR, "4MV macroblock selected but 4MV encoding disabled\n");
946
            return INT_MAX/2;
947
        }
948 949
        cmpf= s->dsp.sse[1];
        chroma_cmpf= s->dsp.sse[1];
950
        init_mv4_ref(c);
951 952
        for(i=0; i<4; i++){
            xy= s->block_index[i];
953 954
            x= p->f.motion_val[0][xy][0];
            y= p->f.motion_val[0][xy][1];
955 956 957
            d+= cmp(s, x>>shift, y>>shift, x&mask, y&mask, 1, 8, i, i, cmpf, chroma_cmpf, flags);
        }
        s->mb_type[mb_xy]=CANDIDATE_MB_TYPE_INTER4V;
958 959 960
    }else{
        if(USES_LIST(mb_type, 0)){
            if(p_type){
961
                *(uint32_t*)s->p_mv_table[mb_xy] = *(uint32_t*)p->f.motion_val[0][xy];
962 963
                s->mb_type[mb_xy]=CANDIDATE_MB_TYPE_INTER;
            }else if(USES_LIST(mb_type, 1)){
964 965
                *(uint32_t*)s->b_bidir_forw_mv_table[mb_xy] = *(uint32_t*)p->f.motion_val[0][xy];
                *(uint32_t*)s->b_bidir_back_mv_table[mb_xy] = *(uint32_t*)p->f.motion_val[1][xy];
966 967
                s->mb_type[mb_xy]=CANDIDATE_MB_TYPE_BIDIR;
            }else{
968
                *(uint32_t*)s->b_forw_mv_table[mb_xy] = *(uint32_t*)p->f.motion_val[0][xy];
969 970
                s->mb_type[mb_xy]=CANDIDATE_MB_TYPE_FORWARD;
            }
971 972
            x = p->f.motion_val[0][xy][0];
            y = p->f.motion_val[0][xy][1];
973 974
            d = cmp(s, x>>shift, y>>shift, x&mask, y&mask, 0, 16, 0, 0, cmpf, chroma_cmpf, flags);
        }else if(USES_LIST(mb_type, 1)){
975
            *(uint32_t*)s->b_back_mv_table[mb_xy] = *(uint32_t*)p->f.motion_val[1][xy];
976
            s->mb_type[mb_xy]=CANDIDATE_MB_TYPE_BACKWARD;
977

978 979
            x = p->f.motion_val[1][xy][0];
            y = p->f.motion_val[1][xy][1];
980 981 982 983 984 985 986
            d = cmp(s, x>>shift, y>>shift, x&mask, y&mask, 0, 16, 2, 0, cmpf, chroma_cmpf, flags);
        }else
            s->mb_type[mb_xy]=CANDIDATE_MB_TYPE_INTRA;
    }
    return d;
}

987 988 989
void ff_estimate_p_frame_motion(MpegEncContext * s,
                                int mb_x, int mb_y)
{
990
    MotionEstContext * const c= &s->me;
991
    uint8_t *pix, *ppix;
992 993 994
    int sum, mx, my, dmin;
    int varc;            ///< the variance of the block (sum of squared (p[y][x]-average))
    int vard;            ///< sum of squared differences with the estimated motion vector
995
    int P[10][2];
996 997
    const int shift= 1+s->quarter_sample;
    int mb_type=0;
Michael Niedermayer's avatar
Michael Niedermayer committed
998
    Picture * const pic= &s->current_picture;
999

1000
    init_ref(c, s->new_picture.f.data, s->last_picture.f.data, NULL, 16*mb_x, 16*mb_y, 0);
1001

1002 1003 1004
    av_assert0(s->quarter_sample==0 || s->quarter_sample==1);
    av_assert0(s->linesize == c->stride);
    av_assert0(s->uvlinesize == c->uvstride);
Michael Niedermayer's avatar
Michael Niedermayer committed
1005

1006 1007 1008
    c->penalty_factor    = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
    c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
    c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
1009
    c->current_mv_penalty= c->mv_penalty[s->f_code] + MAX_MV;
1010

1011
    get_limits(s, 16*mb_x, 16*mb_y);
1012
    c->skip=0;
1013

1014 1015 1016
    /* intra / predictive decision */
    pix = c->src[0][0];
    sum = s->dsp.pix_sum(pix, s->linesize);
1017
    varc = s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)sum*sum)>>8) + 500;
1018 1019

    pic->mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
1020 1021
    pic->mb_var [s->mb_stride * mb_y + mb_x] = (varc+128)>>8;
    c->mb_var_sum_temp += (varc+128)>>8;
1022

1023
    if(c->avctx->me_threshold){
1024
        vard= check_input_motion(s, mb_x, mb_y, 1);
1025

1026
        if((vard+128)>>8 < c->avctx->me_threshold){
1027 1028
            int p_score= FFMIN(vard, varc-500+(s->lambda2>>FF_LAMBDA_SHIFT)*100);
            int i_score= varc-500+(s->lambda2>>FF_LAMBDA_SHIFT)*20;
1029 1030
            pic->mc_mb_var[s->mb_stride * mb_y + mb_x] = (vard+128)>>8;
            c->mc_mb_var_sum_temp += (vard+128)>>8;
1031
            c->scene_change_score+= ff_sqrt(p_score) - ff_sqrt(i_score);
1032 1033
            return;
        }
1034
        if((vard+128)>>8 < c->avctx->mb_threshold)
1035
            mb_type= s->mb_type[mb_x + mb_y*s->mb_stride];
1036 1037
    }

1038
    switch(s->me_method) {
Fabrice Bellard's avatar
Fabrice Bellard committed
1039 1040
    case ME_ZERO:
    default:
1041 1042
        mx   = 0;
        my   = 0;
Fabrice Bellard's avatar
Fabrice Bellard committed
1043 1044
        dmin = 0;
        break;
1045
    case ME_X1:
1046
    case ME_EPZS:
1047
       {
1048
            const int mot_stride = s->b8_stride;
1049
            const int mot_xy = s->block_index[0];
1050

1051 1052
            P_LEFT[0] = s->current_picture.f.motion_val[0][mot_xy - 1][0];
            P_LEFT[1] = s->current_picture.f.motion_val[0][mot_xy - 1][1];
1053

1054
            if(P_LEFT[0]       > (c->xmax<<shift)) P_LEFT[0]       = (c->xmax<<shift);
1055

1056
            if(!s->first_slice_line) {
1057 1058 1059 1060
                P_TOP[0]      = s->current_picture.f.motion_val[0][mot_xy - mot_stride    ][0];
                P_TOP[1]      = s->current_picture.f.motion_val[0][mot_xy - mot_stride    ][1];
                P_TOPRIGHT[0] = s->current_picture.f.motion_val[0][mot_xy - mot_stride + 2][0];
                P_TOPRIGHT[1] = s->current_picture.f.motion_val[0][mot_xy - mot_stride + 2][1];
1061 1062 1063
                if(P_TOP[1]      > (c->ymax<<shift)) P_TOP[1]     = (c->ymax<<shift);
                if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
                if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
1064

1065 1066 1067 1068
                P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
                P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);

                if(s->out_format == FMT_H263){
1069 1070
                    c->pred_x = P_MEDIAN[0];
                    c->pred_y = P_MEDIAN[1];
1071
                }else { /* mpeg1 at least */
1072 1073
                    c->pred_x= P_LEFT[0];
                    c->pred_y= P_LEFT[1];
1074
                }
1075
            }else{
1076 1077
                c->pred_x= P_LEFT[0];
                c->pred_y= P_LEFT[1];
1078
            }
1079

1080
        }
1081
        dmin = ff_epzs_motion_search(s, &mx, &my, P, 0, 0, s->p_mv_table, (1<<16)>>shift, 0, 16);
1082

1083
        break;
Fabrice Bellard's avatar
Fabrice Bellard committed
1084 1085
    }

1086
    /* At this point (mx,my) are full-pell and the relative displacement */
1087
    ppix = c->ref[0][0] + (my * s->linesize) + mx;
1088

1089
    vard = s->dsp.sse[0](NULL, pix, ppix, s->linesize, 16);
1090

1091
    pic->mc_mb_var[s->mb_stride * mb_y + mb_x] = (vard+128)>>8;
1092
//    pic->mb_cmp_score[s->mb_stride * mb_y + mb_x] = dmin;
1093
    c->mc_mb_var_sum_temp += (vard+128)>>8;
1094

1095
    if(mb_type){
1096 1097 1098
        int p_score= FFMIN(vard, varc-500+(s->lambda2>>FF_LAMBDA_SHIFT)*100);
        int i_score= varc-500+(s->lambda2>>FF_LAMBDA_SHIFT)*20;
        c->scene_change_score+= ff_sqrt(p_score) - ff_sqrt(i_score);
1099 1100

        if(mb_type == CANDIDATE_MB_TYPE_INTER){
1101
            c->sub_motion_search(s, &mx, &my, dmin, 0, 0, 0, 16);
1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114
            set_p_mv_tables(s, mx, my, 1);
        }else{
            mx <<=shift;
            my <<=shift;
        }
        if(mb_type == CANDIDATE_MB_TYPE_INTER4V){
            h263_mv4_search(s, mx, my, shift);

            set_p_mv_tables(s, mx, my, 0);
        }
        if(mb_type == CANDIDATE_MB_TYPE_INTER_I){
            interlaced_search(s, 0, s->p_field_mv_table, s->p_field_select_table, mx, my, 1);
        }
1115
    }else if(c->avctx->mb_decision > FF_MB_DECISION_SIMPLE){
1116 1117 1118
        int p_score= FFMIN(vard, varc-500+(s->lambda2>>FF_LAMBDA_SHIFT)*100);
        int i_score= varc-500+(s->lambda2>>FF_LAMBDA_SHIFT)*20;
        c->scene_change_score+= ff_sqrt(p_score) - ff_sqrt(i_score);
1119

1120
        if (vard*2 + 200*256 > varc)
1121
            mb_type|= CANDIDATE_MB_TYPE_INTRA;
1122 1123
        if (varc*2 + 200*256 > vard || s->qscale > 24){
//        if (varc*2 + 200*256 + 50*(s->lambda2>>FF_LAMBDA_SHIFT) > vard){
1124
            mb_type|= CANDIDATE_MB_TYPE_INTER;
1125
            c->sub_motion_search(s, &mx, &my, dmin, 0, 0, 0, 16);
1126 1127
            if(s->flags&CODEC_FLAG_MV0)
                if(mx || my)
1128
                    mb_type |= CANDIDATE_MB_TYPE_SKIPPED; //FIXME check difference
1129
        }else{
Michael Niedermayer's avatar
Michael Niedermayer committed
1130 1131
            mx <<=shift;
            my <<=shift;
Fabrice Bellard's avatar
Fabrice Bellard committed
1132
        }
1133
        if((s->flags&CODEC_FLAG_4MV)
1134
           && !c->skip && varc>50<<8 && vard>10<<8){
1135 1136
            if(h263_mv4_search(s, mx, my, shift) < INT_MAX)
                mb_type|=CANDIDATE_MB_TYPE_INTER4V;
1137 1138 1139 1140

            set_p_mv_tables(s, mx, my, 0);
        }else
            set_p_mv_tables(s, mx, my, 1);
1141
        if((s->flags&CODEC_FLAG_INTERLACED_ME)
1142
           && !c->skip){ //FIXME varc/d checks
1143
            if(interlaced_search(s, 0, s->p_field_mv_table, s->p_field_select_table, mx, my, 0) < INT_MAX)
1144 1145
                mb_type |= CANDIDATE_MB_TYPE_INTER_I;
        }
1146
    }else{
1147
        int intra_score, i;
1148
        mb_type= CANDIDATE_MB_TYPE_INTER;
1149

1150
        dmin= c->sub_motion_search(s, &mx, &my, dmin, 0, 0, 0, 16);
1151
        if(c->avctx->me_sub_cmp != c->avctx->mb_cmp && !c->skip)
1152
            dmin= get_mb_score(s, mx, my, 0, 0, 0, 16, 1);
1153 1154

        if((s->flags&CODEC_FLAG_4MV)
1155
           && !c->skip && varc>50<<8 && vard>10<<8){
1156
            int dmin4= h263_mv4_search(s, mx, my, shift);
1157
            if(dmin4 < dmin){
1158
                mb_type= CANDIDATE_MB_TYPE_INTER4V;
1159
                dmin=dmin4;
1160
            }
1161
        }
1162
        if((s->flags&CODEC_FLAG_INTERLACED_ME)
1163
           && !c->skip){ //FIXME varc/d checks
1164
            int dmin_i= interlaced_search(s, 0, s->p_field_mv_table, s->p_field_select_table, mx, my, 0);
1165 1166 1167 1168 1169
            if(dmin_i < dmin){
                mb_type = CANDIDATE_MB_TYPE_INTER_I;
                dmin= dmin_i;
            }
        }
1170 1171

//        pic->mb_cmp_score[s->mb_stride * mb_y + mb_x] = dmin;
1172
        set_p_mv_tables(s, mx, my, mb_type!=CANDIDATE_MB_TYPE_INTER4V);
1173 1174

        /* get intra luma score */
1175
        if((c->avctx->mb_cmp&0xFF)==FF_CMP_SSE){
1176
            intra_score= varc - 500;
1177
        }else{
1178
            unsigned mean = (sum+128)>>8;
1179
            mean*= 0x01010101;
1180

1181
            for(i=0; i<16; i++){
1182 1183 1184 1185
                *(uint32_t*)(&c->scratchpad[i*s->linesize+ 0]) = mean;
                *(uint32_t*)(&c->scratchpad[i*s->linesize+ 4]) = mean;
                *(uint32_t*)(&c->scratchpad[i*s->linesize+ 8]) = mean;
                *(uint32_t*)(&c->scratchpad[i*s->linesize+12]) = mean;
1186 1187
            }

1188
            intra_score= s->dsp.mb_cmp[0](s, c->scratchpad, pix, s->linesize, 16);
1189
        }
1190
        intra_score += c->mb_penalty_factor*16;
1191

1192
        if(intra_score < dmin){
1193
            mb_type= CANDIDATE_MB_TYPE_INTRA;
1194
            s->current_picture.f.mb_type[mb_y*s->mb_stride + mb_x] = CANDIDATE_MB_TYPE_INTRA; //FIXME cleanup
1195
        }else
1196
            s->current_picture.f.mb_type[mb_y*s->mb_stride + mb_x] = 0;
1197

1198 1199 1200 1201
        {
            int p_score= FFMIN(vard, varc-500+(s->lambda2>>FF_LAMBDA_SHIFT)*100);
            int i_score= varc-500+(s->lambda2>>FF_LAMBDA_SHIFT)*20;
            c->scene_change_score+= ff_sqrt(p_score) - ff_sqrt(i_score);
1202
        }
Fabrice Bellard's avatar
Fabrice Bellard committed
1203
    }
1204

1205
    s->mb_type[mb_y*s->mb_stride + mb_x]= mb_type;
1206 1207
}

1208 1209 1210
int ff_pre_estimate_p_frame_motion(MpegEncContext * s,
                                    int mb_x, int mb_y)
{
1211
    MotionEstContext * const c= &s->me;
1212
    int mx, my, dmin;
1213 1214
    int P[10][2];
    const int shift= 1+s->quarter_sample;
1215
    const int xy= mb_x + mb_y*s->mb_stride;
1216
    init_ref(c, s->new_picture.f.data, s->last_picture.f.data, NULL, 16*mb_x, 16*mb_y, 0);
1217

1218
    av_assert0(s->quarter_sample==0 || s->quarter_sample==1);
1219

1220
    c->pre_penalty_factor    = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_pre_cmp);
1221
    c->current_mv_penalty= c->mv_penalty[s->f_code] + MAX_MV;
1222

1223
    get_limits(s, 16*mb_x, 16*mb_y);
1224
    c->skip=0;
1225 1226 1227 1228

    P_LEFT[0]       = s->p_mv_table[xy + 1][0];
    P_LEFT[1]       = s->p_mv_table[xy + 1][1];

1229
    if(P_LEFT[0]       < (c->xmin<<shift)) P_LEFT[0]       = (c->xmin<<shift);
1230 1231

    /* special case for first line */
1232
    if (s->first_slice_line) {
1233 1234
        c->pred_x= P_LEFT[0];
        c->pred_y= P_LEFT[1];
1235
        P_TOP[0]= P_TOPRIGHT[0]= P_MEDIAN[0]=
1236
        P_TOP[1]= P_TOPRIGHT[1]= P_MEDIAN[1]= 0; //FIXME
1237
    } else {
1238 1239 1240 1241
        P_TOP[0]      = s->p_mv_table[xy + s->mb_stride    ][0];
        P_TOP[1]      = s->p_mv_table[xy + s->mb_stride    ][1];
        P_TOPRIGHT[0] = s->p_mv_table[xy + s->mb_stride - 1][0];
        P_TOPRIGHT[1] = s->p_mv_table[xy + s->mb_stride - 1][1];
1242 1243 1244
        if(P_TOP[1]      < (c->ymin<<shift)) P_TOP[1]     = (c->ymin<<shift);
        if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift);
        if(P_TOPRIGHT[1] < (c->ymin<<shift)) P_TOPRIGHT[1]= (c->ymin<<shift);
1245

1246 1247 1248
        P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
        P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);

1249 1250
        c->pred_x = P_MEDIAN[0];
        c->pred_y = P_MEDIAN[1];
1251
    }
1252

1253
    dmin = ff_epzs_motion_search(s, &mx, &my, P, 0, 0, s->p_mv_table, (1<<16)>>shift, 0, 16);
1254

1255 1256
    s->p_mv_table[xy][0] = mx<<shift;
    s->p_mv_table[xy][1] = my<<shift;
1257

1258 1259 1260
    return dmin;
}

1261
static int ff_estimate_motion_b(MpegEncContext * s,
1262
                       int mb_x, int mb_y, int16_t (*mv_table)[2], int ref_index, int f_code)
1263
{
1264
    MotionEstContext * const c= &s->me;
1265
    int mx, my, dmin;
1266
    int P[10][2];
1267
    const int shift= 1+s->quarter_sample;
1268 1269
    const int mot_stride = s->mb_stride;
    const int mot_xy = mb_y*mot_stride + mb_x;
1270
    uint8_t * const mv_penalty= c->mv_penalty[f_code] + MAX_MV;
1271
    int mv_scale;
1272

1273 1274 1275
    c->penalty_factor    = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
    c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
    c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
1276
    c->current_mv_penalty= mv_penalty;
Michael Niedermayer's avatar
Michael Niedermayer committed
1277

1278
    get_limits(s, 16*mb_x, 16*mb_y);
1279 1280 1281 1282

    switch(s->me_method) {
    case ME_ZERO:
    default:
1283 1284
        mx   = 0;
        my   = 0;
1285 1286 1287 1288
        dmin = 0;
        break;
    case ME_X1:
    case ME_EPZS:
1289 1290
        P_LEFT[0] = mv_table[mot_xy - 1][0];
        P_LEFT[1] = mv_table[mot_xy - 1][1];
1291

1292
        if (P_LEFT[0] > (c->xmax << shift)) P_LEFT[0] = (c->xmax << shift);
1293

1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305
        /* special case for first line */
        if (!s->first_slice_line) {
            P_TOP[0]      = mv_table[mot_xy - mot_stride    ][0];
            P_TOP[1]      = mv_table[mot_xy - mot_stride    ][1];
            P_TOPRIGHT[0] = mv_table[mot_xy - mot_stride + 1][0];
            P_TOPRIGHT[1] = mv_table[mot_xy - mot_stride + 1][1];
            if (P_TOP[1] > (c->ymax << shift)) P_TOP[1] = (c->ymax << shift);
            if (P_TOPRIGHT[0] < (c->xmin << shift)) P_TOPRIGHT[0] = (c->xmin << shift);
            if (P_TOPRIGHT[1] > (c->ymax << shift)) P_TOPRIGHT[1] = (c->ymax << shift);

            P_MEDIAN[0] = mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
            P_MEDIAN[1] = mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
1306
        }
1307 1308
        c->pred_x = P_LEFT[0];
        c->pred_y = P_LEFT[1];
1309

1310 1311 1312 1313 1314
        if(mv_table == s->b_forw_mv_table){
            mv_scale= (s->pb_time<<16) / (s->pp_time<<shift);
        }else{
            mv_scale= ((s->pb_time - s->pp_time)<<16) / (s->pp_time<<shift);
        }
1315

1316
        dmin = ff_epzs_motion_search(s, &mx, &my, P, 0, ref_index, s->p_mv_table, mv_scale, 0, 16);
1317

1318 1319
        break;
    }
1320

1321
    dmin= c->sub_motion_search(s, &mx, &my, dmin, 0, ref_index, 0, 16);
1322

1323
    if(c->avctx->me_sub_cmp != c->avctx->mb_cmp && !c->skip)
1324
        dmin= get_mb_score(s, mx, my, 0, ref_index, 0, 16, 1);
1325

1326 1327 1328
//    s->mb_type[mb_y*s->mb_width + mb_x]= mb_type;
    mv_table[mot_xy][0]= mx;
    mv_table[mot_xy][1]= my;
Michael Niedermayer's avatar
Michael Niedermayer committed
1329

1330
    return dmin;
Fabrice Bellard's avatar
Fabrice Bellard committed
1331 1332
}

1333
static inline int check_bidir_mv(MpegEncContext * s,
1334 1335 1336
                   int motion_fx, int motion_fy,
                   int motion_bx, int motion_by,
                   int pred_fx, int pred_fy,
1337 1338
                   int pred_bx, int pred_by,
                   int size, int h)
Fabrice Bellard's avatar
Fabrice Bellard committed
1339
{
1340
    //FIXME optimize?
Michael Niedermayer's avatar
Michael Niedermayer committed
1341
    //FIXME better f_code prediction (max mv & distance)
1342
    //FIXME pointers
1343
    MotionEstContext * const c= &s->me;
Michael Niedermayer's avatar
Michael Niedermayer committed
1344 1345
    uint8_t * const mv_penalty_f= c->mv_penalty[s->f_code] + MAX_MV; // f_code of the prev frame
    uint8_t * const mv_penalty_b= c->mv_penalty[s->b_code] + MAX_MV; // f_code of the prev frame
1346 1347
    int stride= c->stride;
    uint8_t *dest_y = c->scratchpad;
1348 1349 1350 1351
    uint8_t *ptr;
    int dxy;
    int src_x, src_y;
    int fbmin;
1352 1353 1354
    uint8_t **src_data= c->src[0];
    uint8_t **ref_data= c->ref[0];
    uint8_t **ref2_data= c->ref[2];
1355

Michael Niedermayer's avatar
Michael Niedermayer committed
1356 1357
    if(s->quarter_sample){
        dxy = ((motion_fy & 3) << 2) | (motion_fx & 3);
1358 1359
        src_x = motion_fx >> 2;
        src_y = motion_fy >> 2;
Michael Niedermayer's avatar
Michael Niedermayer committed
1360

1361 1362
        ptr = ref_data[0] + (src_y * stride) + src_x;
        s->dsp.put_qpel_pixels_tab[0][dxy](dest_y    , ptr    , stride);
Michael Niedermayer's avatar
Michael Niedermayer committed
1363 1364

        dxy = ((motion_by & 3) << 2) | (motion_bx & 3);
1365 1366
        src_x = motion_bx >> 2;
        src_y = motion_by >> 2;
1367

1368
        ptr = ref2_data[0] + (src_y * stride) + src_x;
1369
        s->dsp.avg_qpel_pixels_tab[size][dxy](dest_y    , ptr    , stride);
Michael Niedermayer's avatar
Michael Niedermayer committed
1370 1371
    }else{
        dxy = ((motion_fy & 1) << 1) | (motion_fx & 1);
1372 1373
        src_x = motion_fx >> 1;
        src_y = motion_fy >> 1;
Michael Niedermayer's avatar
Michael Niedermayer committed
1374

1375 1376
        ptr = ref_data[0] + (src_y * stride) + src_x;
        s->dsp.put_pixels_tab[size][dxy](dest_y    , ptr    , stride, h);
Michael Niedermayer's avatar
Michael Niedermayer committed
1377 1378

        dxy = ((motion_by & 1) << 1) | (motion_bx & 1);
1379 1380
        src_x = motion_bx >> 1;
        src_y = motion_by >> 1;
1381

1382
        ptr = ref2_data[0] + (src_y * stride) + src_x;
1383
        s->dsp.avg_pixels_tab[size][dxy](dest_y    , ptr    , stride, h);
Michael Niedermayer's avatar
Michael Niedermayer committed
1384 1385
    }

Michael Niedermayer's avatar
Michael Niedermayer committed
1386 1387
    fbmin = (mv_penalty_f[motion_fx-pred_fx] + mv_penalty_f[motion_fy-pred_fy])*c->mb_penalty_factor
           +(mv_penalty_b[motion_bx-pred_bx] + mv_penalty_b[motion_by-pred_by])*c->mb_penalty_factor
1388
           + s->dsp.mb_cmp[size](s, src_data[0], dest_y, stride, h); //FIXME new_pic
1389

1390
    if(c->avctx->mb_cmp&FF_CMP_CHROMA){
1391 1392
    }
    //FIXME CHROMA !!!
1393

1394 1395
    return fbmin;
}
1396

1397
/* refine the bidir vectors in hq mode and return the score in both lq & hq mode*/
1398
static inline int bidir_refine(MpegEncContext * s, int mb_x, int mb_y)
1399
{
1400
    MotionEstContext * const c= &s->me;
1401 1402
    const int mot_stride = s->mb_stride;
    const int xy = mb_y *mot_stride + mb_x;
1403 1404 1405 1406 1407 1408 1409 1410 1411
    int fbmin;
    int pred_fx= s->b_bidir_forw_mv_table[xy-1][0];
    int pred_fy= s->b_bidir_forw_mv_table[xy-1][1];
    int pred_bx= s->b_bidir_back_mv_table[xy-1][0];
    int pred_by= s->b_bidir_back_mv_table[xy-1][1];
    int motion_fx= s->b_bidir_forw_mv_table[xy][0]= s->b_forw_mv_table[xy][0];
    int motion_fy= s->b_bidir_forw_mv_table[xy][1]= s->b_forw_mv_table[xy][1];
    int motion_bx= s->b_bidir_back_mv_table[xy][0]= s->b_back_mv_table[xy][0];
    int motion_by= s->b_bidir_back_mv_table[xy][1]= s->b_back_mv_table[xy][1];
1412 1413 1414 1415 1416 1417 1418
    const int flags= c->sub_flags;
    const int qpel= flags&FLAG_QPEL;
    const int shift= 1+qpel;
    const int xmin= c->xmin<<shift;
    const int ymin= c->ymin<<shift;
    const int xmax= c->xmax<<shift;
    const int ymax= c->ymax<<shift;
1419
#define HASH(fx,fy,bx,by) ((fx)+17*(fy)+63*(bx)+117*(by))
1420
#define HASH8(fx,fy,bx,by) ((uint8_t)HASH(fx,fy,bx,by))
1421
    int hashidx= HASH(motion_fx,motion_fy, motion_bx, motion_by);
1422
    uint8_t map[256] = { 0 };
1423

1424
    map[hashidx&255] = 1;
1425

1426
    fbmin= check_bidir_mv(s, motion_fx, motion_fy,
1427 1428
                          motion_bx, motion_by,
                          pred_fx, pred_fy,
1429 1430
                          pred_bx, pred_by,
                          0, 16);
1431

1432
    if(s->avctx->bidir_refine){
1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453
        int end;
        static const uint8_t limittab[5]={0,8,32,64,80};
        const int limit= limittab[s->avctx->bidir_refine];
        static const int8_t vect[][4]={
{ 0, 0, 0, 1}, { 0, 0, 0,-1}, { 0, 0, 1, 0}, { 0, 0,-1, 0}, { 0, 1, 0, 0}, { 0,-1, 0, 0}, { 1, 0, 0, 0}, {-1, 0, 0, 0},

{ 0, 0, 1, 1}, { 0, 0,-1,-1}, { 0, 1, 1, 0}, { 0,-1,-1, 0}, { 1, 1, 0, 0}, {-1,-1, 0, 0}, { 1, 0, 0, 1}, {-1, 0, 0,-1},
{ 0, 1, 0, 1}, { 0,-1, 0,-1}, { 1, 0, 1, 0}, {-1, 0,-1, 0},
{ 0, 0,-1, 1}, { 0, 0, 1,-1}, { 0,-1, 1, 0}, { 0, 1,-1, 0}, {-1, 1, 0, 0}, { 1,-1, 0, 0}, { 1, 0, 0,-1}, {-1, 0, 0, 1},
{ 0,-1, 0, 1}, { 0, 1, 0,-1}, {-1, 0, 1, 0}, { 1, 0,-1, 0},

{ 0, 1, 1, 1}, { 0,-1,-1,-1}, { 1, 1, 1, 0}, {-1,-1,-1, 0}, { 1, 1, 0, 1}, {-1,-1, 0,-1}, { 1, 0, 1, 1}, {-1, 0,-1,-1},
{ 0,-1, 1, 1}, { 0, 1,-1,-1}, {-1, 1, 1, 0}, { 1,-1,-1, 0}, { 1, 1, 0,-1}, {-1,-1, 0, 1}, { 1, 0,-1, 1}, {-1, 0, 1,-1},
{ 0, 1,-1, 1}, { 0,-1, 1,-1}, { 1,-1, 1, 0}, {-1, 1,-1, 0}, {-1, 1, 0, 1}, { 1,-1, 0,-1}, { 1, 0, 1,-1}, {-1, 0,-1, 1},
{ 0, 1, 1,-1}, { 0,-1,-1, 1}, { 1, 1,-1, 0}, {-1,-1, 1, 0}, { 1,-1, 0, 1}, {-1, 1, 0,-1}, {-1, 0, 1, 1}, { 1, 0,-1,-1},

{ 1, 1, 1, 1}, {-1,-1,-1,-1},
{ 1, 1, 1,-1}, {-1,-1,-1, 1}, { 1, 1,-1, 1}, {-1,-1, 1,-1}, { 1,-1, 1, 1}, {-1, 1,-1,-1}, {-1, 1, 1, 1}, { 1,-1,-1,-1},
{ 1, 1,-1,-1}, {-1,-1, 1, 1}, { 1,-1,-1, 1}, {-1, 1, 1,-1}, { 1,-1, 1,-1}, {-1, 1,-1, 1},
        };
        static const uint8_t hash[]={
1454
HASH8( 0, 0, 0, 1), HASH8( 0, 0, 0,-1), HASH8( 0, 0, 1, 0), HASH8( 0, 0,-1, 0), HASH8( 0, 1, 0, 0), HASH8( 0,-1, 0, 0), HASH8( 1, 0, 0, 0), HASH8(-1, 0, 0, 0),
1455

1456 1457 1458 1459
HASH8( 0, 0, 1, 1), HASH8( 0, 0,-1,-1), HASH8( 0, 1, 1, 0), HASH8( 0,-1,-1, 0), HASH8( 1, 1, 0, 0), HASH8(-1,-1, 0, 0), HASH8( 1, 0, 0, 1), HASH8(-1, 0, 0,-1),
HASH8( 0, 1, 0, 1), HASH8( 0,-1, 0,-1), HASH8( 1, 0, 1, 0), HASH8(-1, 0,-1, 0),
HASH8( 0, 0,-1, 1), HASH8( 0, 0, 1,-1), HASH8( 0,-1, 1, 0), HASH8( 0, 1,-1, 0), HASH8(-1, 1, 0, 0), HASH8( 1,-1, 0, 0), HASH8( 1, 0, 0,-1), HASH8(-1, 0, 0, 1),
HASH8( 0,-1, 0, 1), HASH8( 0, 1, 0,-1), HASH8(-1, 0, 1, 0), HASH8( 1, 0,-1, 0),
1460

1461 1462 1463 1464
HASH8( 0, 1, 1, 1), HASH8( 0,-1,-1,-1), HASH8( 1, 1, 1, 0), HASH8(-1,-1,-1, 0), HASH8( 1, 1, 0, 1), HASH8(-1,-1, 0,-1), HASH8( 1, 0, 1, 1), HASH8(-1, 0,-1,-1),
HASH8( 0,-1, 1, 1), HASH8( 0, 1,-1,-1), HASH8(-1, 1, 1, 0), HASH8( 1,-1,-1, 0), HASH8( 1, 1, 0,-1), HASH8(-1,-1, 0, 1), HASH8( 1, 0,-1, 1), HASH8(-1, 0, 1,-1),
HASH8( 0, 1,-1, 1), HASH8( 0,-1, 1,-1), HASH8( 1,-1, 1, 0), HASH8(-1, 1,-1, 0), HASH8(-1, 1, 0, 1), HASH8( 1,-1, 0,-1), HASH8( 1, 0, 1,-1), HASH8(-1, 0,-1, 1),
HASH8( 0, 1, 1,-1), HASH8( 0,-1,-1, 1), HASH8( 1, 1,-1, 0), HASH8(-1,-1, 1, 0), HASH8( 1,-1, 0, 1), HASH8(-1, 1, 0,-1), HASH8(-1, 0, 1, 1), HASH8( 1, 0,-1,-1),
1465

1466 1467 1468
HASH8( 1, 1, 1, 1), HASH8(-1,-1,-1,-1),
HASH8( 1, 1, 1,-1), HASH8(-1,-1,-1, 1), HASH8( 1, 1,-1, 1), HASH8(-1,-1, 1,-1), HASH8( 1,-1, 1, 1), HASH8(-1, 1,-1,-1), HASH8(-1, 1, 1, 1), HASH8( 1,-1,-1,-1),
HASH8( 1, 1,-1,-1), HASH8(-1,-1, 1, 1), HASH8( 1,-1,-1, 1), HASH8(-1, 1, 1,-1), HASH8( 1,-1, 1,-1), HASH8(-1, 1,-1, 1),
1469 1470
};

1471
#define CHECK_BIDIR(fx,fy,bx,by)\
1472
    if( !map[(hashidx+HASH(fx,fy,bx,by))&255]\
1473
       &&(fx<=0 || motion_fx+fx<=xmax) && (fy<=0 || motion_fy+fy<=ymax) && (bx<=0 || motion_bx+bx<=xmax) && (by<=0 || motion_by+by<=ymax)\
Michael Niedermayer's avatar
Michael Niedermayer committed
1474
       &&(fx>=0 || motion_fx+fx>=xmin) && (fy>=0 || motion_fy+fy>=ymin) && (bx>=0 || motion_bx+bx>=xmin) && (by>=0 || motion_by+by>=ymin)){\
1475
        int score;\
1476
        map[(hashidx+HASH(fx,fy,bx,by))&255] = 1;\
Michael Niedermayer's avatar
Michael Niedermayer committed
1477 1478
        score= check_bidir_mv(s, motion_fx+fx, motion_fy+fy, motion_bx+bx, motion_by+by, pred_fx, pred_fy, pred_bx, pred_by, 0, 16);\
        if(score < fbmin){\
1479
            hashidx += HASH(fx,fy,bx,by);\
Michael Niedermayer's avatar
Michael Niedermayer committed
1480 1481 1482 1483 1484 1485 1486
            fbmin= score;\
            motion_fx+=fx;\
            motion_fy+=fy;\
            motion_bx+=bx;\
            motion_by+=by;\
            end=0;\
        }\
1487 1488 1489
    }
#define CHECK_BIDIR2(a,b,c,d)\
CHECK_BIDIR(a,b,c,d)\
1490
CHECK_BIDIR(-(a),-(b),-(c),-(d))
1491 1492

        do{
1493 1494
            int i;
            int borderdist=0;
1495 1496
            end=1;

1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530
            CHECK_BIDIR2(0,0,0,1)
            CHECK_BIDIR2(0,0,1,0)
            CHECK_BIDIR2(0,1,0,0)
            CHECK_BIDIR2(1,0,0,0)

            for(i=8; i<limit; i++){
                int fx= motion_fx+vect[i][0];
                int fy= motion_fy+vect[i][1];
                int bx= motion_bx+vect[i][2];
                int by= motion_by+vect[i][3];
                if(borderdist<=0){
                    int a= (xmax - FFMAX(fx,bx))|(FFMIN(fx,bx) - xmin);
                    int b= (ymax - FFMAX(fy,by))|(FFMIN(fy,by) - ymin);
                    if((a|b) < 0)
                        map[(hashidx+hash[i])&255] = 1;
                }
                if(!map[(hashidx+hash[i])&255]){
                    int score;
                    map[(hashidx+hash[i])&255] = 1;
                    score= check_bidir_mv(s, fx, fy, bx, by, pred_fx, pred_fy, pred_bx, pred_by, 0, 16);
                    if(score < fbmin){
                        hashidx += hash[i];
                        fbmin= score;
                        motion_fx=fx;
                        motion_fy=fy;
                        motion_bx=bx;
                        motion_by=by;
                        end=0;
                        borderdist--;
                        if(borderdist<=0){
                            int a= FFMIN(xmax - FFMAX(fx,bx), FFMIN(fx,bx) - xmin);
                            int b= FFMIN(ymax - FFMAX(fy,by), FFMIN(fy,by) - ymin);
                            borderdist= FFMIN(a,b);
                        }
1531 1532 1533 1534 1535 1536
                    }
                }
            }
        }while(!end);
    }

1537 1538 1539 1540 1541
    s->b_bidir_forw_mv_table[xy][0]= motion_fx;
    s->b_bidir_forw_mv_table[xy][1]= motion_fy;
    s->b_bidir_back_mv_table[xy][0]= motion_bx;
    s->b_bidir_back_mv_table[xy][1]= motion_by;

1542
    return fbmin;
1543 1544
}

1545
static inline int direct_search(MpegEncContext * s, int mb_x, int mb_y)
1546
{
1547
    MotionEstContext * const c= &s->me;
1548
    int P[10][2];
1549 1550
    const int mot_stride = s->mb_stride;
    const int mot_xy = mb_y*mot_stride + mb_x;
Michael Niedermayer's avatar
Michael Niedermayer committed
1551 1552
    const int shift= 1+s->quarter_sample;
    int dmin, i;
1553
    const int time_pp= s->pp_time;
1554
    const int time_pb= s->pb_time;
Michael Niedermayer's avatar
Michael Niedermayer committed
1555
    int mx, my, xmin, xmax, ymin, ymax;
1556
    int16_t (*mv_table)[2]= s->b_direct_mv_table;
1557

1558
    c->current_mv_penalty= c->mv_penalty[1] + MAX_MV;
Michael Niedermayer's avatar
Michael Niedermayer committed
1559 1560 1561
    ymin= xmin=(-32)>>shift;
    ymax= xmax=   31>>shift;

1562
    if (IS_8X8(s->next_picture.f.mb_type[mot_xy])) {
Michael Niedermayer's avatar
Michael Niedermayer committed
1563 1564 1565
        s->mv_type= MV_TYPE_8X8;
    }else{
        s->mv_type= MV_TYPE_16X16;
1566
    }
Michael Niedermayer's avatar
Michael Niedermayer committed
1567 1568 1569 1570

    for(i=0; i<4; i++){
        int index= s->block_index[i];
        int min, max;
1571

1572 1573
        c->co_located_mv[i][0] = s->next_picture.f.motion_val[0][index][0];
        c->co_located_mv[i][1] = s->next_picture.f.motion_val[0][index][1];
1574 1575 1576 1577 1578 1579 1580
        c->direct_basis_mv[i][0]= c->co_located_mv[i][0]*time_pb/time_pp + ((i& 1)<<(shift+3));
        c->direct_basis_mv[i][1]= c->co_located_mv[i][1]*time_pb/time_pp + ((i>>1)<<(shift+3));
//        c->direct_basis_mv[1][i][0]= c->co_located_mv[i][0]*(time_pb - time_pp)/time_pp + ((i &1)<<(shift+3);
//        c->direct_basis_mv[1][i][1]= c->co_located_mv[i][1]*(time_pb - time_pp)/time_pp + ((i>>1)<<(shift+3);

        max= FFMAX(c->direct_basis_mv[i][0], c->direct_basis_mv[i][0] - c->co_located_mv[i][0])>>shift;
        min= FFMIN(c->direct_basis_mv[i][0], c->direct_basis_mv[i][0] - c->co_located_mv[i][0])>>shift;
1581 1582
        max+= 16*mb_x + 1; // +-1 is for the simpler rounding
        min+= 16*mb_x - 1;
1583 1584
        xmax= FFMIN(xmax, s->width - max);
        xmin= FFMAX(xmin, - 16     - min);
Michael Niedermayer's avatar
Michael Niedermayer committed
1585

1586 1587
        max= FFMAX(c->direct_basis_mv[i][1], c->direct_basis_mv[i][1] - c->co_located_mv[i][1])>>shift;
        min= FFMIN(c->direct_basis_mv[i][1], c->direct_basis_mv[i][1] - c->co_located_mv[i][1])>>shift;
1588 1589
        max+= 16*mb_y + 1; // +-1 is for the simpler rounding
        min+= 16*mb_y - 1;
1590 1591
        ymax= FFMIN(ymax, s->height - max);
        ymin= FFMAX(ymin, - 16      - min);
1592

Michael Niedermayer's avatar
Michael Niedermayer committed
1593
        if(s->mv_type == MV_TYPE_16X16) break;
1594
    }
1595

Michael Niedermayer's avatar
Michael Niedermayer committed
1596
    assert(xmax <= 15 && ymax <= 15 && xmin >= -16 && ymin >= -16);
1597

Michael Niedermayer's avatar
Michael Niedermayer committed
1598 1599 1600 1601 1602 1603
    if(xmax < 0 || xmin >0 || ymax < 0 || ymin > 0){
        s->b_direct_mv_table[mot_xy][0]= 0;
        s->b_direct_mv_table[mot_xy][1]= 0;

        return 256*256*256*64;
    }
1604

1605 1606 1607 1608 1609 1610 1611 1612
    c->xmin= xmin;
    c->ymin= ymin;
    c->xmax= xmax;
    c->ymax= ymax;
    c->flags     |= FLAG_DIRECT;
    c->sub_flags |= FLAG_DIRECT;
    c->pred_x=0;
    c->pred_y=0;
Michael Niedermayer's avatar
Michael Niedermayer committed
1613

1614 1615
    P_LEFT[0]        = av_clip(mv_table[mot_xy - 1][0], xmin<<shift, xmax<<shift);
    P_LEFT[1]        = av_clip(mv_table[mot_xy - 1][1], ymin<<shift, ymax<<shift);
Michael Niedermayer's avatar
Michael Niedermayer committed
1616 1617

    /* special case for first line */
Diego Biurrun's avatar
Diego Biurrun committed
1618
    if (!s->first_slice_line) { //FIXME maybe allow this over thread boundary as it is clipped
1619 1620 1621 1622
        P_TOP[0]      = av_clip(mv_table[mot_xy - mot_stride             ][0], xmin<<shift, xmax<<shift);
        P_TOP[1]      = av_clip(mv_table[mot_xy - mot_stride             ][1], ymin<<shift, ymax<<shift);
        P_TOPRIGHT[0] = av_clip(mv_table[mot_xy - mot_stride + 1         ][0], xmin<<shift, xmax<<shift);
        P_TOPRIGHT[1] = av_clip(mv_table[mot_xy - mot_stride + 1         ][1], ymin<<shift, ymax<<shift);
1623

Michael Niedermayer's avatar
Michael Niedermayer committed
1624 1625 1626
        P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
        P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
    }
1627

1628
    dmin = ff_epzs_motion_search(s, &mx, &my, P, 0, 0, mv_table, 1<<(16-shift), 0, 16);
1629
    if(c->sub_flags&FLAG_QPEL)
1630 1631 1632
        dmin = qpel_motion_search(s, &mx, &my, dmin, 0, 0, 0, 16);
    else
        dmin = hpel_motion_search(s, &mx, &my, dmin, 0, 0, 0, 16);
1633

1634
    if(c->avctx->me_sub_cmp != c->avctx->mb_cmp && !c->skip)
1635
        dmin= get_mb_score(s, mx, my, 0, 0, 0, 16, 1);
1636

1637
    get_limits(s, 16*mb_x, 16*mb_y); //restore c->?min/max, maybe not needed
1638

Michael Niedermayer's avatar
Michael Niedermayer committed
1639 1640
    mv_table[mot_xy][0]= mx;
    mv_table[mot_xy][1]= my;
1641 1642
    c->flags     &= ~FLAG_DIRECT;
    c->sub_flags &= ~FLAG_DIRECT;
1643

1644
    return dmin;
1645 1646 1647 1648 1649
}

void ff_estimate_b_frame_motion(MpegEncContext * s,
                             int mb_x, int mb_y)
{
1650 1651
    MotionEstContext * const c= &s->me;
    const int penalty_factor= c->mb_penalty_factor;
1652
    int fmin, bmin, dmin, fbmin, bimin, fimin;
1653
    int type=0;
1654
    const int xy = mb_y*s->mb_stride + mb_x;
1655 1656
    init_ref(c, s->new_picture.f.data, s->last_picture.f.data,
             s->next_picture.f.data, 16 * mb_x, 16 * mb_y, 2);
1657

1658
    get_limits(s, 16*mb_x, 16*mb_y);
1659

1660
    c->skip=0;
1661

1662
    if (s->codec_id == AV_CODEC_ID_MPEG4 && s->next_picture.f.mbskip_table[xy]) {
1663 1664 1665 1666 1667 1668 1669 1670 1671 1672
        int score= direct_search(s, mb_x, mb_y); //FIXME just check 0,0

        score= ((unsigned)(score*score + 128*256))>>16;
        c->mc_mb_var_sum_temp += score;
        s->current_picture.mc_mb_var[mb_y*s->mb_stride + mb_x] = score; //FIXME use SSE
        s->mb_type[mb_y*s->mb_stride + mb_x]= CANDIDATE_MB_TYPE_DIRECT0;

        return;
    }

1673
    if(c->avctx->me_threshold){
1674
        int vard= check_input_motion(s, mb_x, mb_y, 0);
1675

1676
        if((vard+128)>>8 < c->avctx->me_threshold){
1677 1678
//            pix = c->src[0][0];
//            sum = s->dsp.pix_sum(pix, s->linesize);
1679
//            varc = s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)(sum*sum))>>8) + 500;
1680

1681 1682
//            pic->mb_var   [s->mb_stride * mb_y + mb_x] = (varc+128)>>8;
             s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] = (vard+128)>>8;
1683
/*            pic->mb_mean  [s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
1684 1685 1686 1687
            c->mb_var_sum_temp    += (varc+128)>>8;*/
            c->mc_mb_var_sum_temp += (vard+128)>>8;
/*            if (vard <= 64<<8 || vard < varc) {
                c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
1688
            }else{
1689
                c->scene_change_score+= s->qscale * s->avctx->scenechange_factor;
1690 1691 1692
            }*/
            return;
        }
1693
        if((vard+128)>>8 < c->avctx->mb_threshold){
1694 1695 1696 1697 1698
            type= s->mb_type[mb_y*s->mb_stride + mb_x];
            if(type == CANDIDATE_MB_TYPE_DIRECT){
                direct_search(s, mb_x, mb_y);
            }
            if(type == CANDIDATE_MB_TYPE_FORWARD || type == CANDIDATE_MB_TYPE_BIDIR){
1699
                c->skip=0;
1700 1701 1702
                ff_estimate_motion_b(s, mb_x, mb_y, s->b_forw_mv_table, 0, s->f_code);
            }
            if(type == CANDIDATE_MB_TYPE_BACKWARD || type == CANDIDATE_MB_TYPE_BIDIR){
1703
                c->skip=0;
1704 1705 1706
                ff_estimate_motion_b(s, mb_x, mb_y, s->b_back_mv_table, 2, s->b_code);
            }
            if(type == CANDIDATE_MB_TYPE_FORWARD_I || type == CANDIDATE_MB_TYPE_BIDIR_I){
1707 1708
                c->skip=0;
                c->current_mv_penalty= c->mv_penalty[s->f_code] + MAX_MV;
1709 1710 1711 1712 1713
                interlaced_search(s, 0,
                                        s->b_field_mv_table[0], s->b_field_select_table[0],
                                        s->b_forw_mv_table[xy][0], s->b_forw_mv_table[xy][1], 1);
            }
            if(type == CANDIDATE_MB_TYPE_BACKWARD_I || type == CANDIDATE_MB_TYPE_BIDIR_I){
1714 1715
                c->skip=0;
                c->current_mv_penalty= c->mv_penalty[s->b_code] + MAX_MV;
1716 1717 1718 1719 1720 1721
                interlaced_search(s, 2,
                                        s->b_field_mv_table[1], s->b_field_select_table[1],
                                        s->b_back_mv_table[xy][0], s->b_back_mv_table[xy][1], 1);
            }
            return;
        }
1722 1723
    }

1724
    if (s->codec_id == AV_CODEC_ID_MPEG4)
1725
        dmin= direct_search(s, mb_x, mb_y);
1726 1727
    else
        dmin= INT_MAX;
1728
//FIXME penalty stuff for non mpeg4
1729
    c->skip=0;
1730
    fmin= ff_estimate_motion_b(s, mb_x, mb_y, s->b_forw_mv_table, 0, s->f_code) + 3*penalty_factor;
1731

1732
    c->skip=0;
1733
    bmin= ff_estimate_motion_b(s, mb_x, mb_y, s->b_back_mv_table, 2, s->b_code) + 2*penalty_factor;
1734
    av_dlog(s, " %d %d ", s->b_forw_mv_table[xy][0], s->b_forw_mv_table[xy][1]);
1735

1736
    c->skip=0;
1737
    fbmin= bidir_refine(s, mb_x, mb_y) + penalty_factor;
1738
    av_dlog(s, "%d %d %d %d\n", dmin, fmin, bmin, fbmin);
1739

1740 1741
    if(s->flags & CODEC_FLAG_INTERLACED_ME){
//FIXME mb type penalty
1742 1743
        c->skip=0;
        c->current_mv_penalty= c->mv_penalty[s->f_code] + MAX_MV;
1744 1745
        fimin= interlaced_search(s, 0,
                                 s->b_field_mv_table[0], s->b_field_select_table[0],
1746
                                 s->b_forw_mv_table[xy][0], s->b_forw_mv_table[xy][1], 0);
1747
        c->current_mv_penalty= c->mv_penalty[s->b_code] + MAX_MV;
1748 1749
        bimin= interlaced_search(s, 2,
                                 s->b_field_mv_table[1], s->b_field_select_table[1],
1750
                                 s->b_back_mv_table[xy][0], s->b_back_mv_table[xy][1], 0);
1751 1752 1753
    }else
        fimin= bimin= INT_MAX;

1754
    {
1755
        int score= fmin;
1756
        type = CANDIDATE_MB_TYPE_FORWARD;
1757

1758
        if (dmin <= score){
1759
            score = dmin;
1760
            type = CANDIDATE_MB_TYPE_DIRECT;
1761 1762 1763
        }
        if(bmin<score){
            score=bmin;
1764
            type= CANDIDATE_MB_TYPE_BACKWARD;
1765 1766 1767
        }
        if(fbmin<score){
            score=fbmin;
1768 1769 1770 1771 1772 1773 1774 1775 1776
            type= CANDIDATE_MB_TYPE_BIDIR;
        }
        if(fimin<score){
            score=fimin;
            type= CANDIDATE_MB_TYPE_FORWARD_I;
        }
        if(bimin<score){
            score=bimin;
            type= CANDIDATE_MB_TYPE_BACKWARD_I;
1777
        }
1778

1779
        score= ((unsigned)(score*score + 128*256))>>16;
1780
        c->mc_mb_var_sum_temp += score;
1781
        s->current_picture.mc_mb_var[mb_y*s->mb_stride + mb_x] = score; //FIXME use SSE
1782
    }
1783

1784
    if(c->avctx->mb_decision > FF_MB_DECISION_SIMPLE){
1785 1786 1787 1788 1789 1790 1791 1792 1793
        type= CANDIDATE_MB_TYPE_FORWARD | CANDIDATE_MB_TYPE_BACKWARD | CANDIDATE_MB_TYPE_BIDIR | CANDIDATE_MB_TYPE_DIRECT;
        if(fimin < INT_MAX)
            type |= CANDIDATE_MB_TYPE_FORWARD_I;
        if(bimin < INT_MAX)
            type |= CANDIDATE_MB_TYPE_BACKWARD_I;
        if(fimin < INT_MAX && bimin < INT_MAX){
            type |= CANDIDATE_MB_TYPE_BIDIR_I;
        }
         //FIXME something smarter
Diego Biurrun's avatar
Diego Biurrun committed
1794
        if(dmin>256*256*16) type&= ~CANDIDATE_MB_TYPE_DIRECT; //do not try direct mode if it is invalid for this MB
1795
        if(s->codec_id == AV_CODEC_ID_MPEG4 && type&CANDIDATE_MB_TYPE_DIRECT && s->flags&CODEC_FLAG_MV0 && *(uint32_t*)s->b_direct_mv_table[xy])
1796
            type |= CANDIDATE_MB_TYPE_DIRECT0;
1797 1798
    }

1799
    s->mb_type[mb_y*s->mb_stride + mb_x]= type;
1800 1801 1802 1803 1804 1805
}

/* find best f_code for ME which do unlimited searches */
int ff_get_best_fcode(MpegEncContext * s, int16_t (*mv_table)[2], int type)
{
    if(s->me_method>=ME_EPZS){
1806
        int score[8];
1807
        int i, y, range= s->avctx->me_range ? s->avctx->me_range : (INT_MAX/2);
1808
        uint8_t * fcode_tab= s->fcode_tab;
1809 1810
        int best_fcode=-1;
        int best_score=-10000000;
1811

1812
        if(s->msmpeg4_version)
1813
            range= FFMIN(range, 16);
1814
        else if(s->codec_id == AV_CODEC_ID_MPEG2VIDEO && s->avctx->strict_std_compliance >= FF_COMPLIANCE_NORMAL)
1815 1816
            range= FFMIN(range, 256);

Michael Niedermayer's avatar
Michael Niedermayer committed
1817
        for(i=0; i<8; i++) score[i]= s->mb_num*(8-i);
1818 1819 1820

        for(y=0; y<s->mb_height; y++){
            int x;
1821
            int xy= y*s->mb_stride;
1822
            for(x=0; x<s->mb_width; x++){
Michael Niedermayer's avatar
Michael Niedermayer committed
1823
                if(s->mb_type[xy] & type){
1824 1825 1826 1827
                    int mx= mv_table[xy][0];
                    int my= mv_table[xy][1];
                    int fcode= FFMAX(fcode_tab[mx + MAX_MV],
                                     fcode_tab[my + MAX_MV]);
1828
                    int j;
1829 1830

                        if(mx >= range || mx < -range ||
1831 1832
                           my >= range || my < -range)
                            continue;
1833

1834
                    for(j=0; j<fcode && j<8; j++){
1835
                        if(s->pict_type==AV_PICTURE_TYPE_B || s->current_picture.mc_mb_var[xy] < s->current_picture.mb_var[xy])
1836 1837
                            score[j]-= 170;
                    }
1838 1839 1840 1841
                }
                xy++;
            }
        }
1842

1843 1844 1845 1846 1847
        for(i=1; i<8; i++){
            if(score[i] > best_score){
                best_score= score[i];
                best_fcode= i;
            }
1848
        }
1849 1850

        return best_fcode;
1851 1852
    }else{
        return 1;
Fabrice Bellard's avatar
Fabrice Bellard committed
1853 1854 1855
    }
}

1856 1857
void ff_fix_long_p_mvs(MpegEncContext * s)
{
1858
    MotionEstContext * const c= &s->me;
1859
    const int f_code= s->f_code;
1860
    int y, range;
1861
    av_assert0(s->pict_type==AV_PICTURE_TYPE_P);
1862

1863 1864
    range = (((s->out_format == FMT_MPEG1 || s->msmpeg4_version) ? 8 : 16) << f_code);

1865 1866
    av_assert0(range <= 16 || !s->msmpeg4_version);
    av_assert0(range <=256 || !(s->codec_id == AV_CODEC_ID_MPEG2VIDEO && s->avctx->strict_std_compliance >= FF_COMPLIANCE_NORMAL));
1867

1868
    if(c->avctx->me_range && range > c->avctx->me_range) range= c->avctx->me_range;
1869

1870
    if(s->flags&CODEC_FLAG_4MV){
1871
        const int wrap= s->b8_stride;
1872 1873 1874

        /* clip / convert to intra 8x8 type MVs */
        for(y=0; y<s->mb_height; y++){
1875
            int xy= y*2*wrap;
1876
            int i= y*s->mb_stride;
1877 1878 1879
            int x;

            for(x=0; x<s->mb_width; x++){
1880
                if(s->mb_type[i]&CANDIDATE_MB_TYPE_INTER4V){
1881 1882 1883
                    int block;
                    for(block=0; block<4; block++){
                        int off= (block& 1) + (block>>1)*wrap;
1884 1885
                        int mx = s->current_picture.f.motion_val[0][ xy + off ][0];
                        int my = s->current_picture.f.motion_val[0][ xy + off ][1];
1886

1887 1888
                        if(   mx >=range || mx <-range
                           || my >=range || my <-range){
1889 1890
                            s->mb_type[i] &= ~CANDIDATE_MB_TYPE_INTER4V;
                            s->mb_type[i] |= CANDIDATE_MB_TYPE_INTRA;
1891
                            s->current_picture.f.mb_type[i] = CANDIDATE_MB_TYPE_INTRA;
1892 1893 1894
                        }
                    }
                }
Michael Niedermayer's avatar
Michael Niedermayer committed
1895 1896
                xy+=2;
                i++;
1897 1898 1899 1900 1901
            }
        }
    }
}

1902 1903 1904 1905
/**
 *
 * @param truncate 1 for truncation, 0 for using intra
 */
1906
void ff_fix_long_mvs(MpegEncContext * s, uint8_t *field_select_table, int field_select,
1907
                     int16_t (*mv_table)[2], int f_code, int type, int truncate)
1908
{
1909
    MotionEstContext * const c= &s->me;
1910
    int y, h_range, v_range;
1911

1912
    // RAL: 8 in MPEG-1, 16 in MPEG-4
1913
    int range = (((s->out_format == FMT_MPEG1 || s->msmpeg4_version) ? 8 : 16) << f_code);
1914

1915
    if(c->avctx->me_range && range > c->avctx->me_range) range= c->avctx->me_range;
1916

1917 1918 1919
    h_range= range;
    v_range= field_select_table ? range>>1 : range;

1920 1921 1922
    /* clip / convert to intra 16x16 type MVs */
    for(y=0; y<s->mb_height; y++){
        int x;
1923
        int xy= y*s->mb_stride;
1924
        for(x=0; x<s->mb_width; x++){
1925
            if (s->mb_type[xy] & type){    // RAL: "type" test added...
1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940
                if(field_select_table==NULL || field_select_table[xy] == field_select){
                    if(   mv_table[xy][0] >=h_range || mv_table[xy][0] <-h_range
                       || mv_table[xy][1] >=v_range || mv_table[xy][1] <-v_range){

                        if(truncate){
                            if     (mv_table[xy][0] > h_range-1) mv_table[xy][0]=  h_range-1;
                            else if(mv_table[xy][0] < -h_range ) mv_table[xy][0]= -h_range;
                            if     (mv_table[xy][1] > v_range-1) mv_table[xy][1]=  v_range-1;
                            else if(mv_table[xy][1] < -v_range ) mv_table[xy][1]= -v_range;
                        }else{
                            s->mb_type[xy] &= ~type;
                            s->mb_type[xy] |= CANDIDATE_MB_TYPE_INTRA;
                            mv_table[xy][0]=
                            mv_table[xy][1]= 0;
                        }
1941
                    }
1942
                }
1943 1944 1945 1946 1947
            }
            xy++;
        }
    }
}