motion_est.c 67.8 KB
Newer Older
Fabrice Bellard's avatar
Fabrice Bellard committed
1
/*
2
 * Motion estimation
3
 * Copyright (c) 2000,2001 Fabrice Bellard
4
 * Copyright (c) 2002-2004 Michael Niedermayer
5
 *
6
 * new motion estimation (X1/EPZS) by Michael Niedermayer <michaelni@gmx.at>
Fabrice Bellard's avatar
Fabrice Bellard committed
7
 *
8 9 10
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
11 12
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
13
 * version 2.1 of the License, or (at your option) any later version.
Fabrice Bellard's avatar
Fabrice Bellard committed
14
 *
15
 * FFmpeg is distributed in the hope that it will be useful,
Fabrice Bellard's avatar
Fabrice Bellard committed
16
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 18
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
Fabrice Bellard's avatar
Fabrice Bellard committed
19
 *
20
 * You should have received a copy of the GNU Lesser General Public
21
 * License along with FFmpeg; if not, write to the Free Software
22
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Fabrice Bellard's avatar
Fabrice Bellard committed
23
 */
24

Michael Niedermayer's avatar
Michael Niedermayer committed
25
/**
26
 * @file
Michael Niedermayer's avatar
Michael Niedermayer committed
27 28
 * Motion estimation.
 */
29

Fabrice Bellard's avatar
Fabrice Bellard committed
30 31
#include <stdlib.h>
#include <stdio.h>
32
#include <limits.h>
33

Fabrice Bellard's avatar
Fabrice Bellard committed
34
#include "avcodec.h"
35
#include "internal.h"
36
#include "mathops.h"
37
#include "motion_est.h"
38
#include "mpegutils.h"
Fabrice Bellard's avatar
Fabrice Bellard committed
39 40
#include "mpegvideo.h"

41 42 43 44 45 46
#define P_LEFT P[1]
#define P_TOP P[2]
#define P_TOPRIGHT P[3]
#define P_MEDIAN P[4]
#define P_MV1 P[9]

47 48 49
#define ME_MAP_SHIFT 3
#define ME_MAP_MV_BITS 11

50
static int sad_hpel_motion_search(MpegEncContext * s,
51
                                  int *mx_ptr, int *my_ptr, int dmin,
52 53
                                  int src_index, int ref_index,
                                  int size, int h);
Michael Niedermayer's avatar
Michael Niedermayer committed
54

55
static inline unsigned update_map_generation(MotionEstContext *c)
Michael Niedermayer's avatar
Michael Niedermayer committed
56
{
57 58 59 60
    c->map_generation+= 1<<(ME_MAP_MV_BITS*2);
    if(c->map_generation==0){
        c->map_generation= 1<<(ME_MAP_MV_BITS*2);
        memset(c->map, 0, sizeof(uint32_t)*ME_MAP_SIZE);
Michael Niedermayer's avatar
Michael Niedermayer committed
61
    }
62
    return c->map_generation;
Michael Niedermayer's avatar
Michael Niedermayer committed
63 64
}

65 66 67 68 69 70
/* shape adaptive search stuff */
typedef struct Minima{
    int height;
    int x, y;
    int checked;
}Minima;
Michael Niedermayer's avatar
Michael Niedermayer committed
71

72
static int minima_cmp(const void *a, const void *b){
73 74
    const Minima *da = (const Minima *) a;
    const Minima *db = (const Minima *) b;
75

76 77
    return da->height - db->height;
}
Michael Niedermayer's avatar
Michael Niedermayer committed
78

79 80 81
#define FLAG_QPEL   1 //must be 1
#define FLAG_CHROMA 2
#define FLAG_DIRECT 4
Michael Niedermayer's avatar
Michael Niedermayer committed
82

83
static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){
84 85 86 87 88 89 90 91 92 93 94 95 96 97 98
    const int offset[3]= {
          y*c->  stride + x,
        ((y*c->uvstride + x)>>1),
        ((y*c->uvstride + x)>>1),
    };
    int i;
    for(i=0; i<3; i++){
        c->src[0][i]= src [i] + offset[i];
        c->ref[0][i]= ref [i] + offset[i];
    }
    if(ref_index){
        for(i=0; i<3; i++){
            c->ref[ref_index][i]= ref2[i] + offset[i];
        }
    }
Michael Niedermayer's avatar
Michael Niedermayer committed
99 100
}

101
static int get_flags(MotionEstContext *c, int direct, int chroma){
102
    return   ((c->avctx->flags&AV_CODEC_FLAG_QPEL) ? FLAG_QPEL : 0)
103
           + (direct ? FLAG_DIRECT : 0)
104
           + (chroma ? FLAG_CHROMA : 0);
Michael Niedermayer's avatar
Michael Niedermayer committed
105 106
}

107
static av_always_inline int cmp_direct_inline(MpegEncContext *s, const int x, const int y, const int subx, const int suby,
108
                      const int size, const int h, int ref_index, int src_index,
109
                      me_cmp_func cmp_func, me_cmp_func chroma_cmp_func, int qpel){
110 111 112 113 114 115 116 117
    MotionEstContext * const c= &s->me;
    const int stride= c->stride;
    const int hx= subx + (x<<(1+qpel));
    const int hy= suby + (y<<(1+qpel));
    uint8_t * const * const ref= c->ref[ref_index];
    uint8_t * const * const src= c->src[src_index];
    int d;
    //FIXME check chroma 4mv, (no crashes ...)
118
        av_assert2(x >= c->xmin && hx <= c->xmax<<(qpel+1) && y >= c->ymin && hy <= c->ymax<<(qpel+1));
119 120 121 122 123 124 125 126 127 128 129 130 131
        if(x >= c->xmin && hx <= c->xmax<<(qpel+1) && y >= c->ymin && hy <= c->ymax<<(qpel+1)){
            const int time_pp= s->pp_time;
            const int time_pb= s->pb_time;
            const int mask= 2*qpel+1;
            if(s->mv_type==MV_TYPE_8X8){
                int i;
                for(i=0; i<4; i++){
                    int fx = c->direct_basis_mv[i][0] + hx;
                    int fy = c->direct_basis_mv[i][1] + hy;
                    int bx = hx ? fx - c->co_located_mv[i][0] : c->co_located_mv[i][0]*(time_pb - time_pp)/time_pp + ((i &1)<<(qpel+4));
                    int by = hy ? fy - c->co_located_mv[i][1] : c->co_located_mv[i][1]*(time_pb - time_pp)/time_pp + ((i>>1)<<(qpel+4));
                    int fxy= (fx&mask) + ((fy&mask)<<(qpel+1));
                    int bxy= (bx&mask) + ((by&mask)<<(qpel+1));
132

133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148
                    uint8_t *dst= c->temp + 8*(i&1) + 8*stride*(i>>1);
                    if(qpel){
                        c->qpel_put[1][fxy](dst, ref[0] + (fx>>2) + (fy>>2)*stride, stride);
                        c->qpel_avg[1][bxy](dst, ref[8] + (bx>>2) + (by>>2)*stride, stride);
                    }else{
                        c->hpel_put[1][fxy](dst, ref[0] + (fx>>1) + (fy>>1)*stride, stride, 8);
                        c->hpel_avg[1][bxy](dst, ref[8] + (bx>>1) + (by>>1)*stride, stride, 8);
                    }
                }
            }else{
                int fx = c->direct_basis_mv[0][0] + hx;
                int fy = c->direct_basis_mv[0][1] + hy;
                int bx = hx ? fx - c->co_located_mv[0][0] : (c->co_located_mv[0][0]*(time_pb - time_pp)/time_pp);
                int by = hy ? fy - c->co_located_mv[0][1] : (c->co_located_mv[0][1]*(time_pb - time_pp)/time_pp);
                int fxy= (fx&mask) + ((fy&mask)<<(qpel+1));
                int bxy= (bx&mask) + ((by&mask)<<(qpel+1));
149

150 151 152 153 154 155 156 157 158
                if(qpel){
                    c->qpel_put[1][fxy](c->temp               , ref[0] + (fx>>2) + (fy>>2)*stride               , stride);
                    c->qpel_put[1][fxy](c->temp + 8           , ref[0] + (fx>>2) + (fy>>2)*stride + 8           , stride);
                    c->qpel_put[1][fxy](c->temp     + 8*stride, ref[0] + (fx>>2) + (fy>>2)*stride     + 8*stride, stride);
                    c->qpel_put[1][fxy](c->temp + 8 + 8*stride, ref[0] + (fx>>2) + (fy>>2)*stride + 8 + 8*stride, stride);
                    c->qpel_avg[1][bxy](c->temp               , ref[8] + (bx>>2) + (by>>2)*stride               , stride);
                    c->qpel_avg[1][bxy](c->temp + 8           , ref[8] + (bx>>2) + (by>>2)*stride + 8           , stride);
                    c->qpel_avg[1][bxy](c->temp     + 8*stride, ref[8] + (bx>>2) + (by>>2)*stride     + 8*stride, stride);
                    c->qpel_avg[1][bxy](c->temp + 8 + 8*stride, ref[8] + (bx>>2) + (by>>2)*stride + 8 + 8*stride, stride);
159
                }else{
160 161 162 163 164 165 166 167
                    av_assert2((fx>>1) + 16*s->mb_x >= -16);
                    av_assert2((fy>>1) + 16*s->mb_y >= -16);
                    av_assert2((fx>>1) + 16*s->mb_x <= s->width);
                    av_assert2((fy>>1) + 16*s->mb_y <= s->height);
                    av_assert2((bx>>1) + 16*s->mb_x >= -16);
                    av_assert2((by>>1) + 16*s->mb_y >= -16);
                    av_assert2((bx>>1) + 16*s->mb_x <= s->width);
                    av_assert2((by>>1) + 16*s->mb_y <= s->height);
168 169 170 171 172 173 174 175

                    c->hpel_put[0][fxy](c->temp, ref[0] + (fx>>1) + (fy>>1)*stride, stride, 16);
                    c->hpel_avg[0][bxy](c->temp, ref[8] + (bx>>1) + (by>>1)*stride, stride, 16);
                }
            }
            d = cmp_func(s, c->temp, src[0], stride, 16);
        }else
            d= 256*256*256*32;
176 177 178 179 180 181 182 183 184 185
    return d;
}

static av_always_inline int cmp_inline(MpegEncContext *s, const int x, const int y, const int subx, const int suby,
                      const int size, const int h, int ref_index, int src_index,
                      me_cmp_func cmp_func, me_cmp_func chroma_cmp_func, int qpel, int chroma){
    MotionEstContext * const c= &s->me;
    const int stride= c->stride;
    const int uvstride= c->uvstride;
    const int dxy= subx + (suby<<(1+qpel)); //FIXME log2_subpel?
186 187
    const int hx= subx + x*(1<<(1+qpel));
    const int hy= suby + y*(1<<(1+qpel));
188 189 190 191
    uint8_t * const * const ref= c->ref[ref_index];
    uint8_t * const * const src= c->src[src_index];
    int d;
    //FIXME check chroma 4mv, (no crashes ...)
192
        int uvdxy;              /* no, it might not be used uninitialized */
193 194
        if(dxy){
            if(qpel){
195 196 197 198 199 200 201
                if (h << size == 16) {
                    c->qpel_put[size][dxy](c->temp, ref[0] + x + y*stride, stride); //FIXME prototype (add h)
                } else if (size == 0 && h == 8) {
                    c->qpel_put[1][dxy](c->temp    , ref[0] + x + y*stride    , stride);
                    c->qpel_put[1][dxy](c->temp + 8, ref[0] + x + y*stride + 8, stride);
                } else
                    av_assert2(0);
202 203 204 205 206 207 208 209 210 211 212 213 214
                if(chroma){
                    int cx= hx/2;
                    int cy= hy/2;
                    cx= (cx>>1)|(cx&1);
                    cy= (cy>>1)|(cy&1);
                    uvdxy= (cx&1) + 2*(cy&1);
                    //FIXME x/y wrong, but mpeg4 qpel is sick anyway, we should drop as much of it as possible in favor for h264
                }
            }else{
                c->hpel_put[size][dxy](c->temp, ref[0] + x + y*stride, stride, h);
                if(chroma)
                    uvdxy= dxy | (x&1) | (2*(y&1));
            }
215
            d = cmp_func(s, c->temp, src[0], stride, h);
216
        }else{
217
            d = cmp_func(s, src[0], ref[0] + x + y*stride, stride, h);
218 219 220 221 222 223 224
            if(chroma)
                uvdxy= (x&1) + 2*(y&1);
        }
        if(chroma){
            uint8_t * const uvtemp= c->temp + 16*stride;
            c->hpel_put[size+1][uvdxy](uvtemp  , ref[1] + (x>>1) + (y>>1)*uvstride, uvstride, h>>1);
            c->hpel_put[size+1][uvdxy](uvtemp+8, ref[2] + (x>>1) + (y>>1)*uvstride, uvstride, h>>1);
225 226
            d += chroma_cmp_func(s, uvtemp  , src[1], uvstride, h>>1);
            d += chroma_cmp_func(s, uvtemp+8, src[2], uvstride, h>>1);
227
        }
228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243
    return d;
}

static int cmp_simple(MpegEncContext *s, const int x, const int y,
                      int ref_index, int src_index,
                      me_cmp_func cmp_func, me_cmp_func chroma_cmp_func){
    return cmp_inline(s,x,y,0,0,0,16,ref_index,src_index, cmp_func, chroma_cmp_func, 0, 0);
}

static int cmp_fpel_internal(MpegEncContext *s, const int x, const int y,
                      const int size, const int h, int ref_index, int src_index,
                      me_cmp_func cmp_func, me_cmp_func chroma_cmp_func, const int flags){
    if(flags&FLAG_DIRECT){
        return cmp_direct_inline(s,x,y,0,0,size,h,ref_index,src_index, cmp_func, chroma_cmp_func, flags&FLAG_QPEL);
    }else{
        return cmp_inline(s,x,y,0,0,size,h,ref_index,src_index, cmp_func, chroma_cmp_func, 0, flags&FLAG_CHROMA);
244
    }
245 246 247 248 249 250 251 252 253
}

static int cmp_internal(MpegEncContext *s, const int x, const int y, const int subx, const int suby,
                      const int size, const int h, int ref_index, int src_index,
                      me_cmp_func cmp_func, me_cmp_func chroma_cmp_func, const int flags){
    if(flags&FLAG_DIRECT){
        return cmp_direct_inline(s,x,y,subx,suby,size,h,ref_index,src_index, cmp_func, chroma_cmp_func, flags&FLAG_QPEL);
    }else{
        return cmp_inline(s,x,y,subx,suby,size,h,ref_index,src_index, cmp_func, chroma_cmp_func, flags&FLAG_QPEL, flags&FLAG_CHROMA);
254
    }
255
}
256

257
/** @brief compares a block (either a full macroblock or a partition thereof)
258 259
    against a proposed motion-compensated prediction of that block
 */
260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292
static av_always_inline int cmp(MpegEncContext *s, const int x, const int y, const int subx, const int suby,
                      const int size, const int h, int ref_index, int src_index,
                      me_cmp_func cmp_func, me_cmp_func chroma_cmp_func, const int flags){
    if(av_builtin_constant_p(flags) && av_builtin_constant_p(h) && av_builtin_constant_p(size)
       && av_builtin_constant_p(subx) && av_builtin_constant_p(suby)
       && flags==0 && h==16 && size==0 && subx==0 && suby==0){
        return cmp_simple(s,x,y,ref_index,src_index, cmp_func, chroma_cmp_func);
    }else if(av_builtin_constant_p(subx) && av_builtin_constant_p(suby)
       && subx==0 && suby==0){
        return cmp_fpel_internal(s,x,y,size,h,ref_index,src_index, cmp_func, chroma_cmp_func,flags);
    }else{
        return cmp_internal(s,x,y,subx,suby,size,h,ref_index,src_index, cmp_func, chroma_cmp_func, flags);
    }
}

static int cmp_hpel(MpegEncContext *s, const int x, const int y, const int subx, const int suby,
                      const int size, const int h, int ref_index, int src_index,
                      me_cmp_func cmp_func, me_cmp_func chroma_cmp_func, const int flags){
    if(flags&FLAG_DIRECT){
        return cmp_direct_inline(s,x,y,subx,suby,size,h,ref_index,src_index, cmp_func, chroma_cmp_func, 0);
    }else{
        return cmp_inline(s,x,y,subx,suby,size,h,ref_index,src_index, cmp_func, chroma_cmp_func, 0, flags&FLAG_CHROMA);
    }
}

static int cmp_qpel(MpegEncContext *s, const int x, const int y, const int subx, const int suby,
                      const int size, const int h, int ref_index, int src_index,
                      me_cmp_func cmp_func, me_cmp_func chroma_cmp_func, const int flags){
    if(flags&FLAG_DIRECT){
        return cmp_direct_inline(s,x,y,subx,suby,size,h,ref_index,src_index, cmp_func, chroma_cmp_func, 1);
    }else{
        return cmp_inline(s,x,y,subx,suby,size,h,ref_index,src_index, cmp_func, chroma_cmp_func, 1, flags&FLAG_CHROMA);
    }
293
}
Michael Niedermayer's avatar
Michael Niedermayer committed
294 295 296

#include "motion_est_template.c"

297
static int zero_cmp(MpegEncContext *s, uint8_t *a, uint8_t *b,
298
                    ptrdiff_t stride, int h)
299
{
Michael Niedermayer's avatar
Michael Niedermayer committed
300 301 302
    return 0;
}

303
static void zero_hpel(uint8_t *a, const uint8_t *b, ptrdiff_t stride, int h){
Michael Niedermayer's avatar
Michael Niedermayer committed
304 305
}

306
int ff_init_me(MpegEncContext *s){
307
    MotionEstContext * const c= &s->me;
308 309
    int cache_size= FFMIN(ME_MAP_SIZE>>ME_MAP_SHIFT, 1<<ME_MAP_SHIFT);
    int dia_size= FFMAX(FFABS(s->avctx->dia_size)&255, FFABS(s->avctx->pre_dia_size)&255);
310

311
    if(FFMIN(s->avctx->dia_size, s->avctx->pre_dia_size) < -FFMIN(ME_MAP_SIZE, MAX_SAB_SIZE)){
312 313 314
        av_log(s->avctx, AV_LOG_ERROR, "ME_MAP size is too small for SAB diamond\n");
        return -1;
    }
315 316

#if FF_API_MOTION_EST
317
    //special case of snow is needed because snow uses its own iterative ME code
318 319 320 321 322 323 324 325
FF_DISABLE_DEPRECATION_WARNINGS
    if (s->motion_est == FF_ME_EPZS) {
        if (s->me_method == ME_ZERO)
            s->motion_est = FF_ME_ZERO;
        else if (s->me_method == ME_EPZS)
            s->motion_est = FF_ME_EPZS;
        else if (s->me_method == ME_X1)
            s->motion_est = FF_ME_XONE;
326
        else if (s->avctx->codec_id != AV_CODEC_ID_SNOW) {
327 328 329 330 331
            av_log(s->avctx, AV_LOG_ERROR,
                   "me_method is only allowed to be set to zero and epzs; "
                   "for hex,umh,full and others see dia_size\n");
            return -1;
        }
332
    }
333 334
FF_ENABLE_DEPRECATION_WARNINGS
#endif
335

Michael Niedermayer's avatar
Michael Niedermayer committed
336
    c->avctx= s->avctx;
337

338 339 340
    if(s->codec_id == AV_CODEC_ID_H261)
        c->avctx->me_sub_cmp = c->avctx->me_cmp;

341 342 343
    if(cache_size < 2*dia_size && !c->stride){
        av_log(s->avctx, AV_LOG_INFO, "ME_MAP size may be a little small for the selected diamond size\n");
    }
344

345 346 347 348
    ff_set_cmp(&s->mecc, s->mecc.me_pre_cmp, c->avctx->me_pre_cmp);
    ff_set_cmp(&s->mecc, s->mecc.me_cmp,     c->avctx->me_cmp);
    ff_set_cmp(&s->mecc, s->mecc.me_sub_cmp, c->avctx->me_sub_cmp);
    ff_set_cmp(&s->mecc, s->mecc.mb_cmp,     c->avctx->mb_cmp);
349

350 351 352
    c->flags    = get_flags(c, 0, c->avctx->me_cmp    &FF_CMP_CHROMA);
    c->sub_flags= get_flags(c, 0, c->avctx->me_sub_cmp&FF_CMP_CHROMA);
    c->mb_flags = get_flags(c, 0, c->avctx->mb_cmp    &FF_CMP_CHROMA);
Fabrice Bellard's avatar
Fabrice Bellard committed
353

354
/*FIXME s->no_rounding b_type*/
355
    if (s->avctx->flags & AV_CODEC_FLAG_QPEL) {
356
        c->sub_motion_search= qpel_motion_search;
357 358 359 360 361
        c->qpel_avg = s->qdsp.avg_qpel_pixels_tab;
        if (s->no_rounding)
            c->qpel_put = s->qdsp.put_no_rnd_qpel_pixels_tab;
        else
            c->qpel_put = s->qdsp.put_qpel_pixels_tab;
Michael Niedermayer's avatar
Michael Niedermayer committed
362
    }else{
363
        if(c->avctx->me_sub_cmp&FF_CMP_CHROMA)
364
            c->sub_motion_search= hpel_motion_search;
365 366
        else if(   c->avctx->me_sub_cmp == FF_CMP_SAD
                && c->avctx->    me_cmp == FF_CMP_SAD
367
                && c->avctx->    mb_cmp == FF_CMP_SAD)
368
            c->sub_motion_search= sad_hpel_motion_search; // 2050 vs. 2450 cycles
Michael Niedermayer's avatar
Michael Niedermayer committed
369
        else
370
            c->sub_motion_search= hpel_motion_search;
Michael Niedermayer's avatar
Michael Niedermayer committed
371
    }
372 373 374 375 376
    c->hpel_avg = s->hdsp.avg_pixels_tab;
    if (s->no_rounding)
        c->hpel_put = s->hdsp.put_no_rnd_pixels_tab;
    else
        c->hpel_put = s->hdsp.put_pixels_tab;
Michael Niedermayer's avatar
Michael Niedermayer committed
377

378
    if(s->linesize){
379
        c->stride  = s->linesize;
380
        c->uvstride= s->uvlinesize;
381
    }else{
382 383
        c->stride  = 16*s->mb_width + 32;
        c->uvstride=  8*s->mb_width + 16;
384
    }
385

Diego Biurrun's avatar
Diego Biurrun committed
386 387 388
    /* 8x8 fullpel search would need a 4x4 chroma compare, which we do
     * not have yet, and even if we had, the motion estimation code
     * does not expect it. */
389 390 391 392 393
    if (s->codec_id != AV_CODEC_ID_SNOW) {
        if ((c->avctx->me_cmp & FF_CMP_CHROMA) /* && !s->mecc.me_cmp[2] */)
            s->mecc.me_cmp[2] = zero_cmp;
        if ((c->avctx->me_sub_cmp & FF_CMP_CHROMA) && !s->mecc.me_sub_cmp[2])
            s->mecc.me_sub_cmp[2] = zero_cmp;
394 395
        c->hpel_put[2][0]= c->hpel_put[2][1]=
        c->hpel_put[2][2]= c->hpel_put[2][3]= zero_hpel;
Michael Niedermayer's avatar
Michael Niedermayer committed
396 397
    }

398
    if(s->codec_id == AV_CODEC_ID_H261){
399 400 401
        c->sub_motion_search= no_sub_motion_search;
    }

402
    return 0;
Michael Niedermayer's avatar
Michael Niedermayer committed
403
}
404

Michael Niedermayer's avatar
Michael Niedermayer committed
405
#define CHECK_SAD_HALF_MV(suffix, x, y) \
406
{\
407
    d  = s->mecc.pix_abs[size][(x ? 1 : 0) + (y ? 2 : 0)](NULL, pix, ptr + ((x) >> 1), stride, h); \
Michael Niedermayer's avatar
Michael Niedermayer committed
408
    d += (mv_penalty[pen_x + x] + mv_penalty[pen_y + y])*penalty_factor;\
409 410
    COPY3_IF_LT(dminh, d, dx, x, dy, y)\
}
411

412
static int sad_hpel_motion_search(MpegEncContext * s,
413
                                  int *mx_ptr, int *my_ptr, int dmin,
414 415
                                  int src_index, int ref_index,
                                  int size, int h)
Fabrice Bellard's avatar
Fabrice Bellard committed
416
{
417 418
    MotionEstContext * const c= &s->me;
    const int penalty_factor= c->sub_penalty_factor;
419
    int mx, my, dminh;
420
    uint8_t *pix, *ptr;
421
    int stride= c->stride;
422
    LOAD_COMMON
423

424
    av_assert2(c->sub_flags == 0);
Fabrice Bellard's avatar
Fabrice Bellard committed
425

426
    if(c->skip){
427 428 429 430
        *mx_ptr = 0;
        *my_ptr = 0;
        return dmin;
    }
431

432
    pix = c->src[src_index][0];
433

434 435
    mx = *mx_ptr;
    my = *my_ptr;
436
    ptr = c->ref[ref_index][0] + (my * stride) + mx;
437

438 439
    dminh = dmin;

440
    if (mx > xmin && mx < xmax &&
441
        my > ymin && my < ymax) {
442
        int dx=0, dy=0;
443
        int d, pen_x, pen_y;
444
        const int index= my*(1<<ME_MAP_SHIFT) + mx;
445 446 447 448
        const int t= score_map[(index-(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)];
        const int l= score_map[(index- 1               )&(ME_MAP_SIZE-1)];
        const int r= score_map[(index+ 1               )&(ME_MAP_SIZE-1)];
        const int b= score_map[(index+(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)];
449 450
        mx += mx;
        my += my;
451

452

453 454 455
        pen_x= pred_x + mx;
        pen_y= pred_y + my;

456
        ptr-= stride;
457
        if(t<=b){
Michael Niedermayer's avatar
Michael Niedermayer committed
458
            CHECK_SAD_HALF_MV(y2 , 0, -1)
459
            if(l<=r){
Michael Niedermayer's avatar
Michael Niedermayer committed
460
                CHECK_SAD_HALF_MV(xy2, -1, -1)
461
                if(t+r<=b+l){
Michael Niedermayer's avatar
Michael Niedermayer committed
462
                    CHECK_SAD_HALF_MV(xy2, +1, -1)
463
                    ptr+= stride;
464
                }else{
465
                    ptr+= stride;
Michael Niedermayer's avatar
Michael Niedermayer committed
466
                    CHECK_SAD_HALF_MV(xy2, -1, +1)
467
                }
Michael Niedermayer's avatar
Michael Niedermayer committed
468
                CHECK_SAD_HALF_MV(x2 , -1,  0)
469
            }else{
Michael Niedermayer's avatar
Michael Niedermayer committed
470
                CHECK_SAD_HALF_MV(xy2, +1, -1)
471
                if(t+l<=b+r){
Michael Niedermayer's avatar
Michael Niedermayer committed
472
                    CHECK_SAD_HALF_MV(xy2, -1, -1)
473
                    ptr+= stride;
474
                }else{
475
                    ptr+= stride;
Michael Niedermayer's avatar
Michael Niedermayer committed
476
                    CHECK_SAD_HALF_MV(xy2, +1, +1)
477
                }
Michael Niedermayer's avatar
Michael Niedermayer committed
478
                CHECK_SAD_HALF_MV(x2 , +1,  0)
479 480 481 482
            }
        }else{
            if(l<=r){
                if(t+l<=b+r){
Michael Niedermayer's avatar
Michael Niedermayer committed
483
                    CHECK_SAD_HALF_MV(xy2, -1, -1)
484
                    ptr+= stride;
485
                }else{
486
                    ptr+= stride;
Michael Niedermayer's avatar
Michael Niedermayer committed
487
                    CHECK_SAD_HALF_MV(xy2, +1, +1)
488
                }
Michael Niedermayer's avatar
Michael Niedermayer committed
489 490
                CHECK_SAD_HALF_MV(x2 , -1,  0)
                CHECK_SAD_HALF_MV(xy2, -1, +1)
491 492
            }else{
                if(t+r<=b+l){
Michael Niedermayer's avatar
Michael Niedermayer committed
493
                    CHECK_SAD_HALF_MV(xy2, +1, -1)
494
                    ptr+= stride;
495
                }else{
496
                    ptr+= stride;
Michael Niedermayer's avatar
Michael Niedermayer committed
497
                    CHECK_SAD_HALF_MV(xy2, -1, +1)
498
                }
Michael Niedermayer's avatar
Michael Niedermayer committed
499 500
                CHECK_SAD_HALF_MV(x2 , +1,  0)
                CHECK_SAD_HALF_MV(xy2, +1, +1)
501
            }
Michael Niedermayer's avatar
Michael Niedermayer committed
502
            CHECK_SAD_HALF_MV(y2 ,  0, +1)
503 504 505
        }
        mx+=dx;
        my+=dy;
506 507

    }else{
508 509
        mx += mx;
        my += my;
510 511 512 513
    }

    *mx_ptr = mx;
    *my_ptr = my;
514
    return dminh;
515 516
}

517
static inline void set_p_mv_tables(MpegEncContext * s, int mx, int my, int mv4)
518
{
519
    const int xy= s->mb_x + s->mb_y*s->mb_stride;
520

521 522
    s->p_mv_table[xy][0] = mx;
    s->p_mv_table[xy][1] = my;
523

Diego Biurrun's avatar
Diego Biurrun committed
524
    /* has already been set to the 4 MV if 4MV is done */
525
    if(mv4){
526 527
        int mot_xy= s->block_index[0];

528 529 530 531
        s->current_picture.motion_val[0][mot_xy    ][0] = mx;
        s->current_picture.motion_val[0][mot_xy    ][1] = my;
        s->current_picture.motion_val[0][mot_xy + 1][0] = mx;
        s->current_picture.motion_val[0][mot_xy + 1][1] = my;
532

533
        mot_xy += s->b8_stride;
534 535 536 537
        s->current_picture.motion_val[0][mot_xy    ][0] = mx;
        s->current_picture.motion_val[0][mot_xy    ][1] = my;
        s->current_picture.motion_val[0][mot_xy + 1][0] = mx;
        s->current_picture.motion_val[0][mot_xy + 1][1] = my;
538 539 540
    }
}

541 542 543
/**
 * get fullpel ME search limits.
 */
544
static inline void get_limits(MpegEncContext *s, int x, int y)
Fabrice Bellard's avatar
Fabrice Bellard committed
545
{
546
    MotionEstContext * const c= &s->me;
547
    int range= c->avctx->me_range >> (1 + !!(c->flags&FLAG_QPEL));
548
    int max_range = MAX_MV >> (1 + !!(c->flags&FLAG_QPEL));
549
/*
550
    if(c->avctx->me_range) c->range= c->avctx->me_range >> 1;
551
    else                   c->range= 16;
552
*/
Fabrice Bellard's avatar
Fabrice Bellard committed
553
    if (s->unrestricted_mv) {
554 555
        c->xmin = - x - 16;
        c->ymin = - y - 16;
556 557
        c->xmax = - x + s->width;
        c->ymax = - y + s->height;
558 559 560 561
    } else if (s->out_format == FMT_H261){
        // Search range of H261 is different from other codec standards
        c->xmin = (x > 15) ? - 15 : 0;
        c->ymin = (y > 15) ? - 15 : 0;
562
        c->xmax = (x < s->mb_width * 16 - 16) ? 15 : 0;
563
        c->ymax = (y < s->mb_height * 16 - 16) ? 15 : 0;
Fabrice Bellard's avatar
Fabrice Bellard committed
564
    } else {
565 566 567 568
        c->xmin = - x;
        c->ymin = - y;
        c->xmax = - x + s->mb_width *16 - 16;
        c->ymax = - y + s->mb_height*16 - 16;
Fabrice Bellard's avatar
Fabrice Bellard committed
569
    }
570 571
    if(!range || range > max_range)
        range = max_range;
572 573 574 575 576 577
    if(range){
        c->xmin = FFMAX(c->xmin,-range);
        c->xmax = FFMIN(c->xmax, range);
        c->ymin = FFMAX(c->ymin,-range);
        c->ymax = FFMIN(c->ymax, range);
    }
578 579
}

580 581
static inline void init_mv4_ref(MotionEstContext *c){
    const int stride= c->stride;
582 583 584 585 586 587 588 589 590

    c->ref[1][0] = c->ref[0][0] + 8;
    c->ref[2][0] = c->ref[0][0] + 8*stride;
    c->ref[3][0] = c->ref[2][0] + 8;
    c->src[1][0] = c->src[0][0] + 8;
    c->src[2][0] = c->src[0][0] + 8*stride;
    c->src[3][0] = c->src[2][0] + 8;
}

591
static inline int h263_mv4_search(MpegEncContext *s, int mx, int my, int shift)
592
{
593
    MotionEstContext * const c= &s->me;
594 595
    const int size= 1;
    const int h=8;
596 597
    int block;
    int P[10][2];
598
    int dmin_sum=0, mx4_sum=0, my4_sum=0, i;
599
    int same=1;
600
    const int stride= c->stride;
601
    uint8_t *mv_penalty= c->current_mv_penalty;
602
    int safety_clipping= s->unrestricted_mv && (s->width&15) && (s->height&15);
603

604
    init_mv4_ref(c);
605

606 607 608 609 610
    for(block=0; block<4; block++){
        int mx4, my4;
        int pred_x4, pred_y4;
        int dmin4;
        static const int off[4]= {2, 1, 1, -1};
611
        const int mot_stride = s->b8_stride;
612
        const int mot_xy = s->block_index[block];
613

614
        if(safety_clipping){
615 616 617 618
            c->xmax = - 16*s->mb_x + s->width  - 8*(block &1);
            c->ymax = - 16*s->mb_y + s->height - 8*(block>>1);
        }

619 620
        P_LEFT[0] = s->current_picture.motion_val[0][mot_xy - 1][0];
        P_LEFT[1] = s->current_picture.motion_val[0][mot_xy - 1][1];
621

622
        if(P_LEFT[0]       > (c->xmax<<shift)) P_LEFT[0]       = (c->xmax<<shift);
623 624

        /* special case for first line */
625
        if (s->first_slice_line && block<2) {
626 627
            c->pred_x= pred_x4= P_LEFT[0];
            c->pred_y= pred_y4= P_LEFT[1];
628
        } else {
629 630 631 632
            P_TOP[0]      = s->current_picture.motion_val[0][mot_xy - mot_stride             ][0];
            P_TOP[1]      = s->current_picture.motion_val[0][mot_xy - mot_stride             ][1];
            P_TOPRIGHT[0] = s->current_picture.motion_val[0][mot_xy - mot_stride + off[block]][0];
            P_TOPRIGHT[1] = s->current_picture.motion_val[0][mot_xy - mot_stride + off[block]][1];
633 634 635 636
            if(P_TOP[1]      > (c->ymax<<shift)) P_TOP[1]     = (c->ymax<<shift);
            if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
            if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift);
            if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
637

638 639 640
            P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
            P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);

641 642
            c->pred_x= pred_x4 = P_MEDIAN[0];
            c->pred_y= pred_y4 = P_MEDIAN[1];
643 644 645
        }
        P_MV1[0]= mx;
        P_MV1[1]= my;
646
        if(safety_clipping)
647 648 649 650 651
            for(i=1; i<10; i++){
                if (s->first_slice_line && block<2 && i>1 && i<9)
                    continue;
                if (i>4 && i<9)
                    continue;
652 653 654
                if(P[i][0] > (c->xmax<<shift)) P[i][0]= (c->xmax<<shift);
                if(P[i][1] > (c->ymax<<shift)) P[i][1]= (c->ymax<<shift);
            }
655

656
        dmin4 = epzs_motion_search4(s, &mx4, &my4, P, block, block, s->p_mv_table, (1<<16)>>shift);
657

658
        dmin4= c->sub_motion_search(s, &mx4, &my4, dmin4, block, block, size, h);
659

660
        if (s->mecc.me_sub_cmp[0] != s->mecc.mb_cmp[0]) {
661
            int dxy;
662
            const int offset= ((block&1) + (block>>1)*stride)*8;
663
            uint8_t *dest_y = c->scratchpad + offset;
664
            if(s->quarter_sample){
665
                uint8_t *ref= c->ref[block][0] + (mx4>>2) + (my4>>2)*stride;
666 667 668
                dxy = ((my4 & 3) << 2) | (mx4 & 3);

                if(s->no_rounding)
669
                    s->qdsp.put_no_rnd_qpel_pixels_tab[1][dxy](dest_y, ref, stride);
670
                else
671
                    s->qdsp.put_qpel_pixels_tab[1][dxy](dest_y, ref, stride);
672
            }else{
673
                uint8_t *ref= c->ref[block][0] + (mx4>>1) + (my4>>1)*stride;
674 675 676
                dxy = ((my4 & 1) << 1) | (mx4 & 1);

                if(s->no_rounding)
677
                    s->hdsp.put_no_rnd_pixels_tab[1][dxy](dest_y    , ref    , stride, h);
678
                else
679
                    s->hdsp.put_pixels_tab       [1][dxy](dest_y    , ref    , stride, h);
680
            }
681
            dmin_sum+= (mv_penalty[mx4-pred_x4] + mv_penalty[my4-pred_y4])*c->mb_penalty_factor;
682 683 684 685 686 687 688 689 690 691
        }else
            dmin_sum+= dmin4;

        if(s->quarter_sample){
            mx4_sum+= mx4/2;
            my4_sum+= my4/2;
        }else{
            mx4_sum+= mx4;
            my4_sum+= my4;
        }
692

693 694
        s->current_picture.motion_val[0][s->block_index[block]][0] = mx4;
        s->current_picture.motion_val[0][s->block_index[block]][1] = my4;
695 696

        if(mx4 != mx || my4 != my) same=0;
697
    }
698

699 700
    if(same)
        return INT_MAX;
701

702 703 704 705 706
    if (s->mecc.me_sub_cmp[0] != s->mecc.mb_cmp[0]) {
        dmin_sum += s->mecc.mb_cmp[0](s,
                                      s->new_picture.f->data[0] +
                                      s->mb_x * 16 + s->mb_y * 16 * stride,
                                      c->scratchpad, stride, 16);
707
    }
708

709
    if(c->avctx->mb_cmp&FF_CMP_CHROMA){
710 711 712 713 714 715 716
        int dxy;
        int mx, my;
        int offset;

        mx= ff_h263_round_chroma(mx4_sum);
        my= ff_h263_round_chroma(my4_sum);
        dxy = ((my & 1) << 1) | (mx & 1);
717

718
        offset= (s->mb_x*8 + (mx>>1)) + (s->mb_y*8 + (my>>1))*s->uvlinesize;
719

720
        if(s->no_rounding){
721 722
            s->hdsp.put_no_rnd_pixels_tab[1][dxy](c->scratchpad    , s->last_picture.f->data[1] + offset, s->uvlinesize, 8);
            s->hdsp.put_no_rnd_pixels_tab[1][dxy](c->scratchpad + 8, s->last_picture.f->data[2] + offset, s->uvlinesize, 8);
723
        }else{
724 725
            s->hdsp.put_pixels_tab       [1][dxy](c->scratchpad    , s->last_picture.f->data[1] + offset, s->uvlinesize, 8);
            s->hdsp.put_pixels_tab       [1][dxy](c->scratchpad + 8, s->last_picture.f->data[2] + offset, s->uvlinesize, 8);
726 727
        }

728 729
        dmin_sum += s->mecc.mb_cmp[1](s, s->new_picture.f->data[1] + s->mb_x * 8 + s->mb_y * 8 * s->uvlinesize, c->scratchpad,     s->uvlinesize, 8);
        dmin_sum += s->mecc.mb_cmp[1](s, s->new_picture.f->data[2] + s->mb_x * 8 + s->mb_y * 8 * s->uvlinesize, c->scratchpad + 8, s->uvlinesize, 8);
730
    }
731

732 733
    c->pred_x= mx;
    c->pred_y= my;
734

735
    switch(c->avctx->mb_cmp&0xFF){
736 737 738 739 740
    /*case FF_CMP_SSE:
        return dmin_sum+ 32*s->qscale*s->qscale;*/
    case FF_CMP_RD:
        return dmin_sum;
    default:
741
        return dmin_sum+ 11*c->mb_penalty_factor;
742
    }
743 744
}

745 746 747 748 749 750 751 752 753 754 755 756 757
static inline void init_interlaced_ref(MpegEncContext *s, int ref_index){
    MotionEstContext * const c= &s->me;

    c->ref[1+ref_index][0] = c->ref[0+ref_index][0] + s->linesize;
    c->src[1][0] = c->src[0][0] + s->linesize;
    if(c->flags & FLAG_CHROMA){
        c->ref[1+ref_index][1] = c->ref[0+ref_index][1] + s->uvlinesize;
        c->ref[1+ref_index][2] = c->ref[0+ref_index][2] + s->uvlinesize;
        c->src[1][1] = c->src[0][1] + s->uvlinesize;
        c->src[1][2] = c->src[0][2] + s->uvlinesize;
    }
}

758
static int interlaced_search(MpegEncContext *s, int ref_index,
759
                             int16_t (*mv_tables[2][2])[2], uint8_t *field_select_tables[2], int mx, int my, int user_field_select)
760
{
761
    MotionEstContext * const c= &s->me;
762 763 764 765
    const int size=0;
    const int h=8;
    int block;
    int P[10][2];
766
    uint8_t * const mv_penalty= c->current_mv_penalty;
767 768 769 770 771
    int same=1;
    const int stride= 2*s->linesize;
    int dmin_sum= 0;
    const int mot_stride= s->mb_stride;
    const int xy= s->mb_x + s->mb_y*mot_stride;
772

773 774 775 776
    c->ymin>>=1;
    c->ymax>>=1;
    c->stride<<=1;
    c->uvstride<<=1;
777
    init_interlaced_ref(s, ref_index);
778

779 780 781 782 783 784
    for(block=0; block<2; block++){
        int field_select;
        int best_dmin= INT_MAX;
        int best_field= -1;

        for(field_select=0; field_select<2; field_select++){
785
            int dmin, mx_i, my_i;
786
            int16_t (*mv_table)[2]= mv_tables[block][field_select];
787

788
            if(user_field_select){
789 790
                av_assert1(field_select==0 || field_select==1);
                av_assert1(field_select_tables[block][xy]==0 || field_select_tables[block][xy]==1);
791 792 793
                if(field_select_tables[block][xy] != field_select)
                    continue;
            }
794

795 796
            P_LEFT[0] = mv_table[xy - 1][0];
            P_LEFT[1] = mv_table[xy - 1][1];
797
            if(P_LEFT[0]       > (c->xmax<<1)) P_LEFT[0]       = (c->xmax<<1);
798

799 800
            c->pred_x= P_LEFT[0];
            c->pred_y= P_LEFT[1];
801

802
            if(!s->first_slice_line){
803 804 805 806
                P_TOP[0]      = mv_table[xy - mot_stride][0];
                P_TOP[1]      = mv_table[xy - mot_stride][1];
                P_TOPRIGHT[0] = mv_table[xy - mot_stride + 1][0];
                P_TOPRIGHT[1] = mv_table[xy - mot_stride + 1][1];
807 808 809 810
                if(P_TOP[1]      > (c->ymax<<1)) P_TOP[1]     = (c->ymax<<1);
                if(P_TOPRIGHT[0] < (c->xmin<<1)) P_TOPRIGHT[0]= (c->xmin<<1);
                if(P_TOPRIGHT[0] > (c->xmax<<1)) P_TOPRIGHT[0]= (c->xmax<<1);
                if(P_TOPRIGHT[1] > (c->ymax<<1)) P_TOPRIGHT[1]= (c->ymax<<1);
811

812 813 814 815 816
                P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
                P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
            }
            P_MV1[0]= mx; //FIXME not correct if block != field_select
            P_MV1[1]= my / 2;
817

818
            dmin = epzs_motion_search2(s, &mx_i, &my_i, P, block, field_select+ref_index, mv_table, (1<<16)>>1);
819

820
            dmin= c->sub_motion_search(s, &mx_i, &my_i, dmin, block, field_select+ref_index, size, h);
821

822 823
            mv_table[xy][0]= mx_i;
            mv_table[xy][1]= my_i;
824

825
            if (s->mecc.me_sub_cmp[0] != s->mecc.mb_cmp[0]) {
826 827 828
                int dxy;

                //FIXME chroma ME
829
                uint8_t *ref= c->ref[field_select+ref_index][0] + (mx_i>>1) + (my_i>>1)*stride;
830 831 832
                dxy = ((my_i & 1) << 1) | (mx_i & 1);

                if(s->no_rounding){
833
                    s->hdsp.put_no_rnd_pixels_tab[size][dxy](c->scratchpad, ref    , stride, h);
834
                }else{
835
                    s->hdsp.put_pixels_tab       [size][dxy](c->scratchpad, ref    , stride, h);
836
                }
837
                dmin = s->mecc.mb_cmp[size](s, c->src[block][0], c->scratchpad, stride, h);
838
                dmin+= (mv_penalty[mx_i-c->pred_x] + mv_penalty[my_i-c->pred_y] + 1)*c->mb_penalty_factor;
839
            }else
840
                dmin+= c->mb_penalty_factor; //field_select bits
841

842
            dmin += field_select != block; //slightly prefer same field
843

844 845 846 847 848 849 850 851 852 853
            if(dmin < best_dmin){
                best_dmin= dmin;
                best_field= field_select;
            }
        }
        {
            int16_t (*mv_table)[2]= mv_tables[block][best_field];

            if(mv_table[xy][0] != mx) same=0; //FIXME check if these checks work and are any good at all
            if(mv_table[xy][1]&1) same=0;
854
            if(mv_table[xy][1]*2 != my) same=0;
855 856 857 858 859 860
            if(best_field != block) same=0;
        }

        field_select_tables[block][xy]= best_field;
        dmin_sum += best_dmin;
    }
861

862 863 864 865
    c->ymin<<=1;
    c->ymax<<=1;
    c->stride>>=1;
    c->uvstride>>=1;
866 867 868

    if(same)
        return INT_MAX;
869

870
    switch(c->avctx->mb_cmp&0xFF){
871 872 873 874 875
    /*case FF_CMP_SSE:
        return dmin_sum+ 32*s->qscale*s->qscale;*/
    case FF_CMP_RD:
        return dmin_sum;
    default:
876
        return dmin_sum+ 11*c->mb_penalty_factor;
877 878 879
    }
}

880 881 882 883 884 885 886
static inline int get_penalty_factor(int lambda, int lambda2, int type){
    switch(type&0xFF){
    default:
    case FF_CMP_SAD:
        return lambda>>FF_LAMBDA_SHIFT;
    case FF_CMP_DCT:
        return (3*lambda)>>(FF_LAMBDA_SHIFT+1);
887 888 889 890
    case FF_CMP_W53:
        return (4*lambda)>>(FF_LAMBDA_SHIFT);
    case FF_CMP_W97:
        return (2*lambda)>>(FF_LAMBDA_SHIFT);
891 892 893 894 895 896 897 898 899 900 901 902 903
    case FF_CMP_SATD:
    case FF_CMP_DCT264:
        return (2*lambda)>>FF_LAMBDA_SHIFT;
    case FF_CMP_RD:
    case FF_CMP_PSNR:
    case FF_CMP_SSE:
    case FF_CMP_NSSE:
        return lambda2>>FF_LAMBDA_SHIFT;
    case FF_CMP_BIT:
        return 1;
    }
}

904 905 906
void ff_estimate_p_frame_motion(MpegEncContext * s,
                                int mb_x, int mb_y)
{
907
    MotionEstContext * const c= &s->me;
908
    uint8_t *pix, *ppix;
909
    int sum, mx = 0, my = 0, dmin = 0;
910 911
    int varc;            ///< the variance of the block (sum of squared (p[y][x]-average))
    int vard;            ///< sum of squared differences with the estimated motion vector
912
    int P[10][2];
913 914
    const int shift= 1+s->quarter_sample;
    int mb_type=0;
Michael Niedermayer's avatar
Michael Niedermayer committed
915
    Picture * const pic= &s->current_picture;
916

917
    init_ref(c, s->new_picture.f->data, s->last_picture.f->data, NULL, 16*mb_x, 16*mb_y, 0);
918

919 920 921
    av_assert0(s->quarter_sample==0 || s->quarter_sample==1);
    av_assert0(s->linesize == c->stride);
    av_assert0(s->uvlinesize == c->uvstride);
Michael Niedermayer's avatar
Michael Niedermayer committed
922

923 924 925
    c->penalty_factor    = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
    c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
    c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
926
    c->current_mv_penalty= c->mv_penalty[s->f_code] + MAX_DMV;
927

928
    get_limits(s, 16*mb_x, 16*mb_y);
929
    c->skip=0;
930

931 932
    /* intra / predictive decision */
    pix = c->src[0][0];
933 934 935
    sum  = s->mpvencdsp.pix_sum(pix, s->linesize);
    varc = s->mpvencdsp.pix_norm1(pix, s->linesize) -
           (((unsigned) sum * sum) >> 8) + 500;
936 937

    pic->mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
938 939
    pic->mb_var [s->mb_stride * mb_y + mb_x] = (varc+128)>>8;
    c->mb_var_sum_temp += (varc+128)>>8;
940

941 942 943
    if (s->motion_est != FF_ME_ZERO) {
        const int mot_stride = s->b8_stride;
        const int mot_xy = s->block_index[0];
944

945 946
        P_LEFT[0] = s->current_picture.motion_val[0][mot_xy - 1][0];
        P_LEFT[1] = s->current_picture.motion_val[0][mot_xy - 1][1];
947

948 949
        if (P_LEFT[0] > (c->xmax << shift))
            P_LEFT[0] =  c->xmax << shift;
950

951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975
        if (!s->first_slice_line) {
            P_TOP[0]      = s->current_picture.motion_val[0][mot_xy - mot_stride    ][0];
            P_TOP[1]      = s->current_picture.motion_val[0][mot_xy - mot_stride    ][1];
            P_TOPRIGHT[0] = s->current_picture.motion_val[0][mot_xy - mot_stride + 2][0];
            P_TOPRIGHT[1] = s->current_picture.motion_val[0][mot_xy - mot_stride + 2][1];
            if (P_TOP[1] > (c->ymax << shift))
                P_TOP[1] =  c->ymax << shift;
            if (P_TOPRIGHT[0] < (c->xmin << shift))
                P_TOPRIGHT[0] =  c->xmin << shift;
            if (P_TOPRIGHT[1] > (c->ymax << shift))
                P_TOPRIGHT[1] =  c->ymax << shift;

            P_MEDIAN[0] = mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
            P_MEDIAN[1] = mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);

            if (s->out_format == FMT_H263) {
                c->pred_x = P_MEDIAN[0];
                c->pred_y = P_MEDIAN[1];
            } else { /* mpeg1 at least */
                c->pred_x = P_LEFT[0];
                c->pred_y = P_LEFT[1];
            }
        } else {
            c->pred_x = P_LEFT[0];
            c->pred_y = P_LEFT[1];
976
        }
977
        dmin = ff_epzs_motion_search(s, &mx, &my, P, 0, 0, s->p_mv_table, (1<<16)>>shift, 0, 16);
Fabrice Bellard's avatar
Fabrice Bellard committed
978 979
    }

980
    /* At this point (mx,my) are full-pell and the relative displacement */
981
    ppix = c->ref[0][0] + (my * s->linesize) + mx;
982

983
    vard = s->mecc.sse[0](NULL, pix, ppix, s->linesize, 16);
984

985 986
    pic->mc_mb_var[s->mb_stride * mb_y + mb_x] = (vard+128)>>8;
    c->mc_mb_var_sum_temp += (vard+128)>>8;
987

988
    if (c->avctx->mb_decision > FF_MB_DECISION_SIMPLE) {
989 990 991
        int p_score= FFMIN(vard, varc-500+(s->lambda2>>FF_LAMBDA_SHIFT)*100);
        int i_score= varc-500+(s->lambda2>>FF_LAMBDA_SHIFT)*20;
        c->scene_change_score+= ff_sqrt(p_score) - ff_sqrt(i_score);
992

993
        if (vard*2 + 200*256 > varc)
994
            mb_type|= CANDIDATE_MB_TYPE_INTRA;
995 996
        if (varc*2 + 200*256 > vard || s->qscale > 24){
//        if (varc*2 + 200*256 + 50*(s->lambda2>>FF_LAMBDA_SHIFT) > vard){
997
            mb_type|= CANDIDATE_MB_TYPE_INTER;
998
            c->sub_motion_search(s, &mx, &my, dmin, 0, 0, 0, 16);
999
            if (s->mpv_flags & FF_MPV_FLAG_MV0)
1000
                if(mx || my)
1001
                    mb_type |= CANDIDATE_MB_TYPE_SKIPPED; //FIXME check difference
1002
        }else{
Michael Niedermayer's avatar
Michael Niedermayer committed
1003 1004
            mx <<=shift;
            my <<=shift;
Fabrice Bellard's avatar
Fabrice Bellard committed
1005
        }
1006
        if ((s->avctx->flags & AV_CODEC_FLAG_4MV)
1007
           && !c->skip && varc>50<<8 && vard>10<<8){
1008 1009
            if(h263_mv4_search(s, mx, my, shift) < INT_MAX)
                mb_type|=CANDIDATE_MB_TYPE_INTER4V;
1010 1011 1012 1013

            set_p_mv_tables(s, mx, my, 0);
        }else
            set_p_mv_tables(s, mx, my, 1);
1014
        if ((s->avctx->flags & AV_CODEC_FLAG_INTERLACED_ME)
1015
           && !c->skip){ //FIXME varc/d checks
1016
            if(interlaced_search(s, 0, s->p_field_mv_table, s->p_field_select_table, mx, my, 0) < INT_MAX)
1017 1018
                mb_type |= CANDIDATE_MB_TYPE_INTER_I;
        }
1019
    }else{
1020
        int intra_score, i;
1021
        mb_type= CANDIDATE_MB_TYPE_INTER;
1022

1023
        dmin= c->sub_motion_search(s, &mx, &my, dmin, 0, 0, 0, 16);
1024
        if(c->avctx->me_sub_cmp != c->avctx->mb_cmp && !c->skip)
1025
            dmin= get_mb_score(s, mx, my, 0, 0, 0, 16, 1);
1026

1027
        if ((s->avctx->flags & AV_CODEC_FLAG_4MV)
1028
           && !c->skip && varc>50<<8 && vard>10<<8){
1029
            int dmin4= h263_mv4_search(s, mx, my, shift);
1030
            if(dmin4 < dmin){
1031
                mb_type= CANDIDATE_MB_TYPE_INTER4V;
1032
                dmin=dmin4;
1033
            }
1034
        }
1035
        if ((s->avctx->flags & AV_CODEC_FLAG_INTERLACED_ME)
1036
           && !c->skip){ //FIXME varc/d checks
1037
            int dmin_i= interlaced_search(s, 0, s->p_field_mv_table, s->p_field_select_table, mx, my, 0);
1038 1039 1040 1041 1042
            if(dmin_i < dmin){
                mb_type = CANDIDATE_MB_TYPE_INTER_I;
                dmin= dmin_i;
            }
        }
1043

1044
        set_p_mv_tables(s, mx, my, mb_type!=CANDIDATE_MB_TYPE_INTER4V);
1045 1046

        /* get intra luma score */
1047
        if((c->avctx->mb_cmp&0xFF)==FF_CMP_SSE){
1048
            intra_score= varc - 500;
1049
        }else{
1050
            unsigned mean = (sum+128)>>8;
1051
            mean*= 0x01010101;
1052

1053
            for(i=0; i<16; i++){
1054 1055 1056 1057
                *(uint32_t*)(&c->scratchpad[i*s->linesize+ 0]) = mean;
                *(uint32_t*)(&c->scratchpad[i*s->linesize+ 4]) = mean;
                *(uint32_t*)(&c->scratchpad[i*s->linesize+ 8]) = mean;
                *(uint32_t*)(&c->scratchpad[i*s->linesize+12]) = mean;
1058 1059
            }

1060
            intra_score= s->mecc.mb_cmp[0](s, c->scratchpad, pix, s->linesize, 16);
1061
        }
1062
        intra_score += c->mb_penalty_factor*16;
1063

1064
        if(intra_score < dmin){
1065
            mb_type= CANDIDATE_MB_TYPE_INTRA;
1066
            s->current_picture.mb_type[mb_y*s->mb_stride + mb_x] = CANDIDATE_MB_TYPE_INTRA; //FIXME cleanup
1067
        }else
1068
            s->current_picture.mb_type[mb_y*s->mb_stride + mb_x] = 0;
1069

1070 1071 1072 1073
        {
            int p_score= FFMIN(vard, varc-500+(s->lambda2>>FF_LAMBDA_SHIFT)*100);
            int i_score= varc-500+(s->lambda2>>FF_LAMBDA_SHIFT)*20;
            c->scene_change_score+= ff_sqrt(p_score) - ff_sqrt(i_score);
1074
        }
Fabrice Bellard's avatar
Fabrice Bellard committed
1075
    }
1076

1077
    s->mb_type[mb_y*s->mb_stride + mb_x]= mb_type;
1078 1079
}

1080 1081 1082
int ff_pre_estimate_p_frame_motion(MpegEncContext * s,
                                    int mb_x, int mb_y)
{
1083
    MotionEstContext * const c= &s->me;
1084
    int mx, my, dmin;
1085 1086
    int P[10][2];
    const int shift= 1+s->quarter_sample;
1087
    const int xy= mb_x + mb_y*s->mb_stride;
1088
    init_ref(c, s->new_picture.f->data, s->last_picture.f->data, NULL, 16*mb_x, 16*mb_y, 0);
1089

1090
    av_assert0(s->quarter_sample==0 || s->quarter_sample==1);
1091

1092
    c->pre_penalty_factor    = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_pre_cmp);
1093
    c->current_mv_penalty= c->mv_penalty[s->f_code] + MAX_DMV;
1094

1095
    get_limits(s, 16*mb_x, 16*mb_y);
1096
    c->skip=0;
1097 1098 1099 1100

    P_LEFT[0]       = s->p_mv_table[xy + 1][0];
    P_LEFT[1]       = s->p_mv_table[xy + 1][1];

1101
    if(P_LEFT[0]       < (c->xmin<<shift)) P_LEFT[0]       = (c->xmin<<shift);
1102 1103

    /* special case for first line */
1104
    if (s->first_slice_line) {
1105 1106
        c->pred_x= P_LEFT[0];
        c->pred_y= P_LEFT[1];
1107
        P_TOP[0]= P_TOPRIGHT[0]= P_MEDIAN[0]=
1108
        P_TOP[1]= P_TOPRIGHT[1]= P_MEDIAN[1]= 0; //FIXME
1109
    } else {
1110 1111 1112 1113
        P_TOP[0]      = s->p_mv_table[xy + s->mb_stride    ][0];
        P_TOP[1]      = s->p_mv_table[xy + s->mb_stride    ][1];
        P_TOPRIGHT[0] = s->p_mv_table[xy + s->mb_stride - 1][0];
        P_TOPRIGHT[1] = s->p_mv_table[xy + s->mb_stride - 1][1];
1114 1115 1116
        if(P_TOP[1]      < (c->ymin<<shift)) P_TOP[1]     = (c->ymin<<shift);
        if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift);
        if(P_TOPRIGHT[1] < (c->ymin<<shift)) P_TOPRIGHT[1]= (c->ymin<<shift);
1117

1118 1119 1120
        P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
        P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);

1121 1122
        c->pred_x = P_MEDIAN[0];
        c->pred_y = P_MEDIAN[1];
1123
    }
1124

1125
    dmin = ff_epzs_motion_search(s, &mx, &my, P, 0, 0, s->p_mv_table, (1<<16)>>shift, 0, 16);
1126

1127 1128
    s->p_mv_table[xy][0] = mx<<shift;
    s->p_mv_table[xy][1] = my<<shift;
1129

1130 1131 1132
    return dmin;
}

1133 1134
static int estimate_motion_b(MpegEncContext *s, int mb_x, int mb_y,
                             int16_t (*mv_table)[2], int ref_index, int f_code)
1135
{
1136
    MotionEstContext * const c= &s->me;
1137
    int mx = 0, my = 0, dmin = 0;
1138
    int P[10][2];
1139
    const int shift= 1+s->quarter_sample;
1140 1141
    const int mot_stride = s->mb_stride;
    const int mot_xy = mb_y*mot_stride + mb_x;
1142
    uint8_t * const mv_penalty= c->mv_penalty[f_code] + MAX_DMV;
1143
    int mv_scale;
1144

1145 1146 1147
    c->penalty_factor    = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
    c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
    c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
1148
    c->current_mv_penalty= mv_penalty;
Michael Niedermayer's avatar
Michael Niedermayer committed
1149

1150
    get_limits(s, 16*mb_x, 16*mb_y);
1151

1152
    if (s->motion_est != FF_ME_ZERO) {
1153 1154
        P_LEFT[0] = mv_table[mot_xy - 1][0];
        P_LEFT[1] = mv_table[mot_xy - 1][1];
1155

1156
        if (P_LEFT[0] > (c->xmax << shift)) P_LEFT[0] = (c->xmax << shift);
1157

1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169
        /* special case for first line */
        if (!s->first_slice_line) {
            P_TOP[0]      = mv_table[mot_xy - mot_stride    ][0];
            P_TOP[1]      = mv_table[mot_xy - mot_stride    ][1];
            P_TOPRIGHT[0] = mv_table[mot_xy - mot_stride + 1][0];
            P_TOPRIGHT[1] = mv_table[mot_xy - mot_stride + 1][1];
            if (P_TOP[1] > (c->ymax << shift)) P_TOP[1] = (c->ymax << shift);
            if (P_TOPRIGHT[0] < (c->xmin << shift)) P_TOPRIGHT[0] = (c->xmin << shift);
            if (P_TOPRIGHT[1] > (c->ymax << shift)) P_TOPRIGHT[1] = (c->ymax << shift);

            P_MEDIAN[0] = mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
            P_MEDIAN[1] = mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
1170
        }
1171 1172
        c->pred_x = P_LEFT[0];
        c->pred_y = P_LEFT[1];
1173

1174 1175 1176 1177 1178
        if(mv_table == s->b_forw_mv_table){
            mv_scale= (s->pb_time<<16) / (s->pp_time<<shift);
        }else{
            mv_scale= ((s->pb_time - s->pp_time)<<16) / (s->pp_time<<shift);
        }
1179

1180
        dmin = ff_epzs_motion_search(s, &mx, &my, P, 0, ref_index, s->p_mv_table, mv_scale, 0, 16);
1181
    }
1182

1183
    dmin= c->sub_motion_search(s, &mx, &my, dmin, 0, ref_index, 0, 16);
1184

1185
    if(c->avctx->me_sub_cmp != c->avctx->mb_cmp && !c->skip)
1186
        dmin= get_mb_score(s, mx, my, 0, ref_index, 0, 16, 1);
1187

1188 1189 1190
//    s->mb_type[mb_y*s->mb_width + mb_x]= mb_type;
    mv_table[mot_xy][0]= mx;
    mv_table[mot_xy][1]= my;
Michael Niedermayer's avatar
Michael Niedermayer committed
1191

1192
    return dmin;
Fabrice Bellard's avatar
Fabrice Bellard committed
1193 1194
}

1195
static inline int check_bidir_mv(MpegEncContext * s,
1196 1197 1198
                   int motion_fx, int motion_fy,
                   int motion_bx, int motion_by,
                   int pred_fx, int pred_fy,
1199 1200
                   int pred_bx, int pred_by,
                   int size, int h)
Fabrice Bellard's avatar
Fabrice Bellard committed
1201
{
1202
    //FIXME optimize?
Michael Niedermayer's avatar
Michael Niedermayer committed
1203
    //FIXME better f_code prediction (max mv & distance)
1204
    //FIXME pointers
1205
    MotionEstContext * const c= &s->me;
1206 1207
    uint8_t * const mv_penalty_f= c->mv_penalty[s->f_code] + MAX_DMV; // f_code of the prev frame
    uint8_t * const mv_penalty_b= c->mv_penalty[s->b_code] + MAX_DMV; // f_code of the prev frame
1208 1209
    int stride= c->stride;
    uint8_t *dest_y = c->scratchpad;
1210 1211 1212 1213
    uint8_t *ptr;
    int dxy;
    int src_x, src_y;
    int fbmin;
1214 1215 1216
    uint8_t **src_data= c->src[0];
    uint8_t **ref_data= c->ref[0];
    uint8_t **ref2_data= c->ref[2];
1217

Michael Niedermayer's avatar
Michael Niedermayer committed
1218 1219
    if(s->quarter_sample){
        dxy = ((motion_fy & 3) << 2) | (motion_fx & 3);
1220 1221
        src_x = motion_fx >> 2;
        src_y = motion_fy >> 2;
Michael Niedermayer's avatar
Michael Niedermayer committed
1222

1223
        ptr = ref_data[0] + (src_y * stride) + src_x;
1224
        s->qdsp.put_qpel_pixels_tab[0][dxy](dest_y, ptr, stride);
Michael Niedermayer's avatar
Michael Niedermayer committed
1225 1226

        dxy = ((motion_by & 3) << 2) | (motion_bx & 3);
1227 1228
        src_x = motion_bx >> 2;
        src_y = motion_by >> 2;
1229

1230
        ptr = ref2_data[0] + (src_y * stride) + src_x;
1231
        s->qdsp.avg_qpel_pixels_tab[size][dxy](dest_y, ptr, stride);
Michael Niedermayer's avatar
Michael Niedermayer committed
1232 1233
    }else{
        dxy = ((motion_fy & 1) << 1) | (motion_fx & 1);
1234 1235
        src_x = motion_fx >> 1;
        src_y = motion_fy >> 1;
Michael Niedermayer's avatar
Michael Niedermayer committed
1236

1237
        ptr = ref_data[0] + (src_y * stride) + src_x;
1238
        s->hdsp.put_pixels_tab[size][dxy](dest_y    , ptr    , stride, h);
Michael Niedermayer's avatar
Michael Niedermayer committed
1239 1240

        dxy = ((motion_by & 1) << 1) | (motion_bx & 1);
1241 1242
        src_x = motion_bx >> 1;
        src_y = motion_by >> 1;
1243

1244
        ptr = ref2_data[0] + (src_y * stride) + src_x;
1245
        s->hdsp.avg_pixels_tab[size][dxy](dest_y    , ptr    , stride, h);
Michael Niedermayer's avatar
Michael Niedermayer committed
1246 1247
    }

Michael Niedermayer's avatar
Michael Niedermayer committed
1248 1249
    fbmin = (mv_penalty_f[motion_fx-pred_fx] + mv_penalty_f[motion_fy-pred_fy])*c->mb_penalty_factor
           +(mv_penalty_b[motion_bx-pred_bx] + mv_penalty_b[motion_by-pred_by])*c->mb_penalty_factor
1250
           + s->mecc.mb_cmp[size](s, src_data[0], dest_y, stride, h); // FIXME new_pic
1251

1252
    if(c->avctx->mb_cmp&FF_CMP_CHROMA){
1253 1254
    }
    //FIXME CHROMA !!!
1255

1256 1257
    return fbmin;
}
1258

1259
/* refine the bidir vectors in hq mode and return the score in both lq & hq mode*/
1260
static inline int bidir_refine(MpegEncContext * s, int mb_x, int mb_y)
1261
{
1262
    MotionEstContext * const c= &s->me;
1263 1264
    const int mot_stride = s->mb_stride;
    const int xy = mb_y *mot_stride + mb_x;
1265 1266 1267 1268 1269 1270 1271 1272 1273
    int fbmin;
    int pred_fx= s->b_bidir_forw_mv_table[xy-1][0];
    int pred_fy= s->b_bidir_forw_mv_table[xy-1][1];
    int pred_bx= s->b_bidir_back_mv_table[xy-1][0];
    int pred_by= s->b_bidir_back_mv_table[xy-1][1];
    int motion_fx= s->b_bidir_forw_mv_table[xy][0]= s->b_forw_mv_table[xy][0];
    int motion_fy= s->b_bidir_forw_mv_table[xy][1]= s->b_forw_mv_table[xy][1];
    int motion_bx= s->b_bidir_back_mv_table[xy][0]= s->b_back_mv_table[xy][0];
    int motion_by= s->b_bidir_back_mv_table[xy][1]= s->b_back_mv_table[xy][1];
1274 1275 1276 1277 1278 1279 1280
    const int flags= c->sub_flags;
    const int qpel= flags&FLAG_QPEL;
    const int shift= 1+qpel;
    const int xmin= c->xmin<<shift;
    const int ymin= c->ymin<<shift;
    const int xmax= c->xmax<<shift;
    const int ymax= c->ymax<<shift;
1281
#define HASH(fx,fy,bx,by) ((fx)+17*(fy)+63*(bx)+117*(by))
1282
#define HASH8(fx,fy,bx,by) ((uint8_t)HASH(fx,fy,bx,by))
1283
    int hashidx= HASH(motion_fx,motion_fy, motion_bx, motion_by);
1284
    uint8_t map[256] = { 0 };
1285

1286
    map[hashidx&255] = 1;
1287

1288
    fbmin= check_bidir_mv(s, motion_fx, motion_fy,
1289 1290
                          motion_bx, motion_by,
                          pred_fx, pred_fy,
1291 1292
                          pred_bx, pred_by,
                          0, 16);
1293

1294
    if(s->avctx->bidir_refine){
1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315
        int end;
        static const uint8_t limittab[5]={0,8,32,64,80};
        const int limit= limittab[s->avctx->bidir_refine];
        static const int8_t vect[][4]={
{ 0, 0, 0, 1}, { 0, 0, 0,-1}, { 0, 0, 1, 0}, { 0, 0,-1, 0}, { 0, 1, 0, 0}, { 0,-1, 0, 0}, { 1, 0, 0, 0}, {-1, 0, 0, 0},

{ 0, 0, 1, 1}, { 0, 0,-1,-1}, { 0, 1, 1, 0}, { 0,-1,-1, 0}, { 1, 1, 0, 0}, {-1,-1, 0, 0}, { 1, 0, 0, 1}, {-1, 0, 0,-1},
{ 0, 1, 0, 1}, { 0,-1, 0,-1}, { 1, 0, 1, 0}, {-1, 0,-1, 0},
{ 0, 0,-1, 1}, { 0, 0, 1,-1}, { 0,-1, 1, 0}, { 0, 1,-1, 0}, {-1, 1, 0, 0}, { 1,-1, 0, 0}, { 1, 0, 0,-1}, {-1, 0, 0, 1},
{ 0,-1, 0, 1}, { 0, 1, 0,-1}, {-1, 0, 1, 0}, { 1, 0,-1, 0},

{ 0, 1, 1, 1}, { 0,-1,-1,-1}, { 1, 1, 1, 0}, {-1,-1,-1, 0}, { 1, 1, 0, 1}, {-1,-1, 0,-1}, { 1, 0, 1, 1}, {-1, 0,-1,-1},
{ 0,-1, 1, 1}, { 0, 1,-1,-1}, {-1, 1, 1, 0}, { 1,-1,-1, 0}, { 1, 1, 0,-1}, {-1,-1, 0, 1}, { 1, 0,-1, 1}, {-1, 0, 1,-1},
{ 0, 1,-1, 1}, { 0,-1, 1,-1}, { 1,-1, 1, 0}, {-1, 1,-1, 0}, {-1, 1, 0, 1}, { 1,-1, 0,-1}, { 1, 0, 1,-1}, {-1, 0,-1, 1},
{ 0, 1, 1,-1}, { 0,-1,-1, 1}, { 1, 1,-1, 0}, {-1,-1, 1, 0}, { 1,-1, 0, 1}, {-1, 1, 0,-1}, {-1, 0, 1, 1}, { 1, 0,-1,-1},

{ 1, 1, 1, 1}, {-1,-1,-1,-1},
{ 1, 1, 1,-1}, {-1,-1,-1, 1}, { 1, 1,-1, 1}, {-1,-1, 1,-1}, { 1,-1, 1, 1}, {-1, 1,-1,-1}, {-1, 1, 1, 1}, { 1,-1,-1,-1},
{ 1, 1,-1,-1}, {-1,-1, 1, 1}, { 1,-1,-1, 1}, {-1, 1, 1,-1}, { 1,-1, 1,-1}, {-1, 1,-1, 1},
        };
        static const uint8_t hash[]={
1316
HASH8( 0, 0, 0, 1), HASH8( 0, 0, 0,-1), HASH8( 0, 0, 1, 0), HASH8( 0, 0,-1, 0), HASH8( 0, 1, 0, 0), HASH8( 0,-1, 0, 0), HASH8( 1, 0, 0, 0), HASH8(-1, 0, 0, 0),
1317

1318 1319 1320 1321
HASH8( 0, 0, 1, 1), HASH8( 0, 0,-1,-1), HASH8( 0, 1, 1, 0), HASH8( 0,-1,-1, 0), HASH8( 1, 1, 0, 0), HASH8(-1,-1, 0, 0), HASH8( 1, 0, 0, 1), HASH8(-1, 0, 0,-1),
HASH8( 0, 1, 0, 1), HASH8( 0,-1, 0,-1), HASH8( 1, 0, 1, 0), HASH8(-1, 0,-1, 0),
HASH8( 0, 0,-1, 1), HASH8( 0, 0, 1,-1), HASH8( 0,-1, 1, 0), HASH8( 0, 1,-1, 0), HASH8(-1, 1, 0, 0), HASH8( 1,-1, 0, 0), HASH8( 1, 0, 0,-1), HASH8(-1, 0, 0, 1),
HASH8( 0,-1, 0, 1), HASH8( 0, 1, 0,-1), HASH8(-1, 0, 1, 0), HASH8( 1, 0,-1, 0),
1322

1323 1324 1325 1326
HASH8( 0, 1, 1, 1), HASH8( 0,-1,-1,-1), HASH8( 1, 1, 1, 0), HASH8(-1,-1,-1, 0), HASH8( 1, 1, 0, 1), HASH8(-1,-1, 0,-1), HASH8( 1, 0, 1, 1), HASH8(-1, 0,-1,-1),
HASH8( 0,-1, 1, 1), HASH8( 0, 1,-1,-1), HASH8(-1, 1, 1, 0), HASH8( 1,-1,-1, 0), HASH8( 1, 1, 0,-1), HASH8(-1,-1, 0, 1), HASH8( 1, 0,-1, 1), HASH8(-1, 0, 1,-1),
HASH8( 0, 1,-1, 1), HASH8( 0,-1, 1,-1), HASH8( 1,-1, 1, 0), HASH8(-1, 1,-1, 0), HASH8(-1, 1, 0, 1), HASH8( 1,-1, 0,-1), HASH8( 1, 0, 1,-1), HASH8(-1, 0,-1, 1),
HASH8( 0, 1, 1,-1), HASH8( 0,-1,-1, 1), HASH8( 1, 1,-1, 0), HASH8(-1,-1, 1, 0), HASH8( 1,-1, 0, 1), HASH8(-1, 1, 0,-1), HASH8(-1, 0, 1, 1), HASH8( 1, 0,-1,-1),
1327

1328 1329 1330
HASH8( 1, 1, 1, 1), HASH8(-1,-1,-1,-1),
HASH8( 1, 1, 1,-1), HASH8(-1,-1,-1, 1), HASH8( 1, 1,-1, 1), HASH8(-1,-1, 1,-1), HASH8( 1,-1, 1, 1), HASH8(-1, 1,-1,-1), HASH8(-1, 1, 1, 1), HASH8( 1,-1,-1,-1),
HASH8( 1, 1,-1,-1), HASH8(-1,-1, 1, 1), HASH8( 1,-1,-1, 1), HASH8(-1, 1, 1,-1), HASH8( 1,-1, 1,-1), HASH8(-1, 1,-1, 1),
1331 1332
};

1333
#define CHECK_BIDIR(fx,fy,bx,by)\
1334
    if( !map[(hashidx+HASH(fx,fy,bx,by))&255]\
1335
       &&(fx<=0 || motion_fx+fx<=xmax) && (fy<=0 || motion_fy+fy<=ymax) && (bx<=0 || motion_bx+bx<=xmax) && (by<=0 || motion_by+by<=ymax)\
Michael Niedermayer's avatar
Michael Niedermayer committed
1336
       &&(fx>=0 || motion_fx+fx>=xmin) && (fy>=0 || motion_fy+fy>=ymin) && (bx>=0 || motion_bx+bx>=xmin) && (by>=0 || motion_by+by>=ymin)){\
1337
        int score;\
1338
        map[(hashidx+HASH(fx,fy,bx,by))&255] = 1;\
Michael Niedermayer's avatar
Michael Niedermayer committed
1339 1340
        score= check_bidir_mv(s, motion_fx+fx, motion_fy+fy, motion_bx+bx, motion_by+by, pred_fx, pred_fy, pred_bx, pred_by, 0, 16);\
        if(score < fbmin){\
1341
            hashidx += HASH(fx,fy,bx,by);\
Michael Niedermayer's avatar
Michael Niedermayer committed
1342 1343 1344 1345 1346 1347 1348
            fbmin= score;\
            motion_fx+=fx;\
            motion_fy+=fy;\
            motion_bx+=bx;\
            motion_by+=by;\
            end=0;\
        }\
1349 1350 1351
    }
#define CHECK_BIDIR2(a,b,c,d)\
CHECK_BIDIR(a,b,c,d)\
1352
CHECK_BIDIR(-(a),-(b),-(c),-(d))
1353 1354

        do{
1355 1356
            int i;
            int borderdist=0;
1357 1358
            end=1;

1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392
            CHECK_BIDIR2(0,0,0,1)
            CHECK_BIDIR2(0,0,1,0)
            CHECK_BIDIR2(0,1,0,0)
            CHECK_BIDIR2(1,0,0,0)

            for(i=8; i<limit; i++){
                int fx= motion_fx+vect[i][0];
                int fy= motion_fy+vect[i][1];
                int bx= motion_bx+vect[i][2];
                int by= motion_by+vect[i][3];
                if(borderdist<=0){
                    int a= (xmax - FFMAX(fx,bx))|(FFMIN(fx,bx) - xmin);
                    int b= (ymax - FFMAX(fy,by))|(FFMIN(fy,by) - ymin);
                    if((a|b) < 0)
                        map[(hashidx+hash[i])&255] = 1;
                }
                if(!map[(hashidx+hash[i])&255]){
                    int score;
                    map[(hashidx+hash[i])&255] = 1;
                    score= check_bidir_mv(s, fx, fy, bx, by, pred_fx, pred_fy, pred_bx, pred_by, 0, 16);
                    if(score < fbmin){
                        hashidx += hash[i];
                        fbmin= score;
                        motion_fx=fx;
                        motion_fy=fy;
                        motion_bx=bx;
                        motion_by=by;
                        end=0;
                        borderdist--;
                        if(borderdist<=0){
                            int a= FFMIN(xmax - FFMAX(fx,bx), FFMIN(fx,bx) - xmin);
                            int b= FFMIN(ymax - FFMAX(fy,by), FFMIN(fy,by) - ymin);
                            borderdist= FFMIN(a,b);
                        }
1393 1394 1395 1396 1397 1398
                    }
                }
            }
        }while(!end);
    }

1399 1400 1401 1402 1403
    s->b_bidir_forw_mv_table[xy][0]= motion_fx;
    s->b_bidir_forw_mv_table[xy][1]= motion_fy;
    s->b_bidir_back_mv_table[xy][0]= motion_bx;
    s->b_bidir_back_mv_table[xy][1]= motion_by;

1404
    return fbmin;
1405 1406
}

1407
static inline int direct_search(MpegEncContext * s, int mb_x, int mb_y)
1408
{
1409
    MotionEstContext * const c= &s->me;
1410
    int P[10][2];
1411 1412
    const int mot_stride = s->mb_stride;
    const int mot_xy = mb_y*mot_stride + mb_x;
Michael Niedermayer's avatar
Michael Niedermayer committed
1413 1414
    const int shift= 1+s->quarter_sample;
    int dmin, i;
1415
    const int time_pp= s->pp_time;
1416
    const int time_pb= s->pb_time;
Michael Niedermayer's avatar
Michael Niedermayer committed
1417
    int mx, my, xmin, xmax, ymin, ymax;
1418
    int16_t (*mv_table)[2]= s->b_direct_mv_table;
1419

1420
    c->current_mv_penalty= c->mv_penalty[1] + MAX_DMV;
Michael Niedermayer's avatar
Michael Niedermayer committed
1421 1422 1423
    ymin= xmin=(-32)>>shift;
    ymax= xmax=   31>>shift;

1424
    if (IS_8X8(s->next_picture.mb_type[mot_xy])) {
Michael Niedermayer's avatar
Michael Niedermayer committed
1425 1426 1427
        s->mv_type= MV_TYPE_8X8;
    }else{
        s->mv_type= MV_TYPE_16X16;
1428
    }
Michael Niedermayer's avatar
Michael Niedermayer committed
1429 1430 1431 1432

    for(i=0; i<4; i++){
        int index= s->block_index[i];
        int min, max;
1433

1434 1435
        c->co_located_mv[i][0] = s->next_picture.motion_val[0][index][0];
        c->co_located_mv[i][1] = s->next_picture.motion_val[0][index][1];
1436 1437 1438 1439 1440 1441 1442
        c->direct_basis_mv[i][0]= c->co_located_mv[i][0]*time_pb/time_pp + ((i& 1)<<(shift+3));
        c->direct_basis_mv[i][1]= c->co_located_mv[i][1]*time_pb/time_pp + ((i>>1)<<(shift+3));
//        c->direct_basis_mv[1][i][0]= c->co_located_mv[i][0]*(time_pb - time_pp)/time_pp + ((i &1)<<(shift+3);
//        c->direct_basis_mv[1][i][1]= c->co_located_mv[i][1]*(time_pb - time_pp)/time_pp + ((i>>1)<<(shift+3);

        max= FFMAX(c->direct_basis_mv[i][0], c->direct_basis_mv[i][0] - c->co_located_mv[i][0])>>shift;
        min= FFMIN(c->direct_basis_mv[i][0], c->direct_basis_mv[i][0] - c->co_located_mv[i][0])>>shift;
1443 1444
        max+= 16*mb_x + 1; // +-1 is for the simpler rounding
        min+= 16*mb_x - 1;
1445 1446
        xmax= FFMIN(xmax, s->width - max);
        xmin= FFMAX(xmin, - 16     - min);
Michael Niedermayer's avatar
Michael Niedermayer committed
1447

1448 1449
        max= FFMAX(c->direct_basis_mv[i][1], c->direct_basis_mv[i][1] - c->co_located_mv[i][1])>>shift;
        min= FFMIN(c->direct_basis_mv[i][1], c->direct_basis_mv[i][1] - c->co_located_mv[i][1])>>shift;
1450 1451
        max+= 16*mb_y + 1; // +-1 is for the simpler rounding
        min+= 16*mb_y - 1;
1452 1453
        ymax= FFMIN(ymax, s->height - max);
        ymin= FFMAX(ymin, - 16      - min);
1454

Michael Niedermayer's avatar
Michael Niedermayer committed
1455
        if(s->mv_type == MV_TYPE_16X16) break;
1456
    }
1457

1458
    av_assert2(xmax <= 15 && ymax <= 15 && xmin >= -16 && ymin >= -16);
1459

Michael Niedermayer's avatar
Michael Niedermayer committed
1460 1461 1462 1463 1464 1465
    if(xmax < 0 || xmin >0 || ymax < 0 || ymin > 0){
        s->b_direct_mv_table[mot_xy][0]= 0;
        s->b_direct_mv_table[mot_xy][1]= 0;

        return 256*256*256*64;
    }
1466

1467 1468 1469 1470 1471 1472 1473 1474
    c->xmin= xmin;
    c->ymin= ymin;
    c->xmax= xmax;
    c->ymax= ymax;
    c->flags     |= FLAG_DIRECT;
    c->sub_flags |= FLAG_DIRECT;
    c->pred_x=0;
    c->pred_y=0;
Michael Niedermayer's avatar
Michael Niedermayer committed
1475

1476 1477
    P_LEFT[0]        = av_clip(mv_table[mot_xy - 1][0], xmin<<shift, xmax<<shift);
    P_LEFT[1]        = av_clip(mv_table[mot_xy - 1][1], ymin<<shift, ymax<<shift);
Michael Niedermayer's avatar
Michael Niedermayer committed
1478 1479

    /* special case for first line */
Diego Biurrun's avatar
Diego Biurrun committed
1480
    if (!s->first_slice_line) { //FIXME maybe allow this over thread boundary as it is clipped
1481 1482 1483 1484
        P_TOP[0]      = av_clip(mv_table[mot_xy - mot_stride             ][0], xmin<<shift, xmax<<shift);
        P_TOP[1]      = av_clip(mv_table[mot_xy - mot_stride             ][1], ymin<<shift, ymax<<shift);
        P_TOPRIGHT[0] = av_clip(mv_table[mot_xy - mot_stride + 1         ][0], xmin<<shift, xmax<<shift);
        P_TOPRIGHT[1] = av_clip(mv_table[mot_xy - mot_stride + 1         ][1], ymin<<shift, ymax<<shift);
1485

Michael Niedermayer's avatar
Michael Niedermayer committed
1486 1487 1488
        P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
        P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
    }
1489

1490
    dmin = ff_epzs_motion_search(s, &mx, &my, P, 0, 0, mv_table, 1<<(16-shift), 0, 16);
1491
    if(c->sub_flags&FLAG_QPEL)
1492 1493 1494
        dmin = qpel_motion_search(s, &mx, &my, dmin, 0, 0, 0, 16);
    else
        dmin = hpel_motion_search(s, &mx, &my, dmin, 0, 0, 0, 16);
1495

1496
    if(c->avctx->me_sub_cmp != c->avctx->mb_cmp && !c->skip)
1497
        dmin= get_mb_score(s, mx, my, 0, 0, 0, 16, 1);
1498

1499
    get_limits(s, 16*mb_x, 16*mb_y); //restore c->?min/max, maybe not needed
1500

Michael Niedermayer's avatar
Michael Niedermayer committed
1501 1502
    mv_table[mot_xy][0]= mx;
    mv_table[mot_xy][1]= my;
1503 1504
    c->flags     &= ~FLAG_DIRECT;
    c->sub_flags &= ~FLAG_DIRECT;
1505

1506
    return dmin;
1507 1508 1509 1510 1511
}

void ff_estimate_b_frame_motion(MpegEncContext * s,
                             int mb_x, int mb_y)
{
1512 1513
    MotionEstContext * const c= &s->me;
    const int penalty_factor= c->mb_penalty_factor;
1514
    int fmin, bmin, dmin, fbmin, bimin, fimin;
1515
    int type=0;
1516
    const int xy = mb_y*s->mb_stride + mb_x;
1517 1518
    init_ref(c, s->new_picture.f->data, s->last_picture.f->data,
             s->next_picture.f->data, 16 * mb_x, 16 * mb_y, 2);
1519

1520
    get_limits(s, 16*mb_x, 16*mb_y);
1521

1522
    c->skip=0;
1523

1524
    if (s->codec_id == AV_CODEC_ID_MPEG4 && s->next_picture.mbskip_table[xy]) {
1525 1526 1527 1528 1529 1530 1531 1532 1533 1534
        int score= direct_search(s, mb_x, mb_y); //FIXME just check 0,0

        score= ((unsigned)(score*score + 128*256))>>16;
        c->mc_mb_var_sum_temp += score;
        s->current_picture.mc_mb_var[mb_y*s->mb_stride + mb_x] = score; //FIXME use SSE
        s->mb_type[mb_y*s->mb_stride + mb_x]= CANDIDATE_MB_TYPE_DIRECT0;

        return;
    }

1535
    if (s->codec_id == AV_CODEC_ID_MPEG4)
1536
        dmin= direct_search(s, mb_x, mb_y);
1537 1538
    else
        dmin= INT_MAX;
1539
//FIXME penalty stuff for non mpeg4
1540
    c->skip=0;
1541 1542
    fmin = estimate_motion_b(s, mb_x, mb_y, s->b_forw_mv_table, 0, s->f_code) +
           3 * penalty_factor;
1543

1544
    c->skip=0;
1545 1546
    bmin = estimate_motion_b(s, mb_x, mb_y, s->b_back_mv_table, 2, s->b_code) +
           2 * penalty_factor;
1547
    ff_dlog(s, " %d %d ", s->b_forw_mv_table[xy][0], s->b_forw_mv_table[xy][1]);
1548

1549
    c->skip=0;
1550
    fbmin= bidir_refine(s, mb_x, mb_y) + penalty_factor;
1551
    ff_dlog(s, "%d %d %d %d\n", dmin, fmin, bmin, fbmin);
1552

1553
    if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_ME) {
1554
//FIXME mb type penalty
1555
        c->skip=0;
1556
        c->current_mv_penalty= c->mv_penalty[s->f_code] + MAX_DMV;
1557 1558
        fimin= interlaced_search(s, 0,
                                 s->b_field_mv_table[0], s->b_field_select_table[0],
1559
                                 s->b_forw_mv_table[xy][0], s->b_forw_mv_table[xy][1], 0);
1560
        c->current_mv_penalty= c->mv_penalty[s->b_code] + MAX_DMV;
1561 1562
        bimin= interlaced_search(s, 2,
                                 s->b_field_mv_table[1], s->b_field_select_table[1],
1563
                                 s->b_back_mv_table[xy][0], s->b_back_mv_table[xy][1], 0);
1564 1565 1566
    }else
        fimin= bimin= INT_MAX;

1567
    {
1568
        int score= fmin;
1569
        type = CANDIDATE_MB_TYPE_FORWARD;
1570

1571
        if (dmin <= score){
1572
            score = dmin;
1573
            type = CANDIDATE_MB_TYPE_DIRECT;
1574 1575 1576
        }
        if(bmin<score){
            score=bmin;
1577
            type= CANDIDATE_MB_TYPE_BACKWARD;
1578 1579 1580
        }
        if(fbmin<score){
            score=fbmin;
1581 1582 1583 1584 1585 1586 1587 1588 1589
            type= CANDIDATE_MB_TYPE_BIDIR;
        }
        if(fimin<score){
            score=fimin;
            type= CANDIDATE_MB_TYPE_FORWARD_I;
        }
        if(bimin<score){
            score=bimin;
            type= CANDIDATE_MB_TYPE_BACKWARD_I;
1590
        }
1591

1592
        score= ((unsigned)(score*score + 128*256))>>16;
1593
        c->mc_mb_var_sum_temp += score;
1594
        s->current_picture.mc_mb_var[mb_y*s->mb_stride + mb_x] = score; //FIXME use SSE
1595
    }
1596

1597
    if(c->avctx->mb_decision > FF_MB_DECISION_SIMPLE){
1598 1599 1600 1601 1602 1603 1604 1605 1606
        type= CANDIDATE_MB_TYPE_FORWARD | CANDIDATE_MB_TYPE_BACKWARD | CANDIDATE_MB_TYPE_BIDIR | CANDIDATE_MB_TYPE_DIRECT;
        if(fimin < INT_MAX)
            type |= CANDIDATE_MB_TYPE_FORWARD_I;
        if(bimin < INT_MAX)
            type |= CANDIDATE_MB_TYPE_BACKWARD_I;
        if(fimin < INT_MAX && bimin < INT_MAX){
            type |= CANDIDATE_MB_TYPE_BIDIR_I;
        }
         //FIXME something smarter
Diego Biurrun's avatar
Diego Biurrun committed
1607
        if(dmin>256*256*16) type&= ~CANDIDATE_MB_TYPE_DIRECT; //do not try direct mode if it is invalid for this MB
1608 1609
        if (s->codec_id == AV_CODEC_ID_MPEG4 && type&CANDIDATE_MB_TYPE_DIRECT &&
            s->mpv_flags & FF_MPV_FLAG_MV0 && *(uint32_t*)s->b_direct_mv_table[xy])
1610
            type |= CANDIDATE_MB_TYPE_DIRECT0;
1611 1612
    }

1613
    s->mb_type[mb_y*s->mb_stride + mb_x]= type;
1614 1615 1616 1617 1618
}

/* find best f_code for ME which do unlimited searches */
int ff_get_best_fcode(MpegEncContext * s, int16_t (*mv_table)[2], int type)
{
1619
    if (s->motion_est != FF_ME_ZERO) {
1620
        int score[8];
1621
        int i, y, range= s->avctx->me_range ? s->avctx->me_range : (INT_MAX/2);
1622
        uint8_t * fcode_tab= s->fcode_tab;
1623 1624
        int best_fcode=-1;
        int best_score=-10000000;
1625

1626
        if(s->msmpeg4_version)
1627
            range= FFMIN(range, 16);
1628
        else if(s->codec_id == AV_CODEC_ID_MPEG2VIDEO && s->avctx->strict_std_compliance >= FF_COMPLIANCE_NORMAL)
1629 1630
            range= FFMIN(range, 256);

Michael Niedermayer's avatar
Michael Niedermayer committed
1631
        for(i=0; i<8; i++) score[i]= s->mb_num*(8-i);
1632 1633 1634

        for(y=0; y<s->mb_height; y++){
            int x;
1635
            int xy= y*s->mb_stride;
1636
            for(x=0; x<s->mb_width; x++){
Michael Niedermayer's avatar
Michael Niedermayer committed
1637
                if(s->mb_type[xy] & type){
1638 1639 1640 1641
                    int mx= mv_table[xy][0];
                    int my= mv_table[xy][1];
                    int fcode= FFMAX(fcode_tab[mx + MAX_MV],
                                     fcode_tab[my + MAX_MV]);
1642
                    int j;
1643 1644

                        if(mx >= range || mx < -range ||
1645 1646
                           my >= range || my < -range)
                            continue;
1647

1648
                    for(j=0; j<fcode && j<8; j++){
1649
                        if(s->pict_type==AV_PICTURE_TYPE_B || s->current_picture.mc_mb_var[xy] < s->current_picture.mb_var[xy])
1650 1651
                            score[j]-= 170;
                    }
1652 1653 1654 1655
                }
                xy++;
            }
        }
1656

1657 1658 1659 1660 1661
        for(i=1; i<8; i++){
            if(score[i] > best_score){
                best_score= score[i];
                best_fcode= i;
            }
1662
        }
1663 1664

        return best_fcode;
1665 1666
    }else{
        return 1;
Fabrice Bellard's avatar
Fabrice Bellard committed
1667 1668 1669
    }
}

1670 1671
void ff_fix_long_p_mvs(MpegEncContext * s)
{
1672
    MotionEstContext * const c= &s->me;
1673
    const int f_code= s->f_code;
1674
    int y, range;
1675
    av_assert0(s->pict_type==AV_PICTURE_TYPE_P);
1676

1677 1678
    range = (((s->out_format == FMT_MPEG1 || s->msmpeg4_version) ? 8 : 16) << f_code);

1679 1680
    av_assert0(range <= 16 || !s->msmpeg4_version);
    av_assert0(range <=256 || !(s->codec_id == AV_CODEC_ID_MPEG2VIDEO && s->avctx->strict_std_compliance >= FF_COMPLIANCE_NORMAL));
1681

1682
    if(c->avctx->me_range && range > c->avctx->me_range) range= c->avctx->me_range;
1683

1684
    if (s->avctx->flags & AV_CODEC_FLAG_4MV) {
1685
        const int wrap= s->b8_stride;
1686 1687 1688

        /* clip / convert to intra 8x8 type MVs */
        for(y=0; y<s->mb_height; y++){
1689
            int xy= y*2*wrap;
1690
            int i= y*s->mb_stride;
1691 1692 1693
            int x;

            for(x=0; x<s->mb_width; x++){
1694
                if(s->mb_type[i]&CANDIDATE_MB_TYPE_INTER4V){
1695 1696 1697
                    int block;
                    for(block=0; block<4; block++){
                        int off= (block& 1) + (block>>1)*wrap;
1698 1699
                        int mx = s->current_picture.motion_val[0][ xy + off ][0];
                        int my = s->current_picture.motion_val[0][ xy + off ][1];
1700

1701 1702
                        if(   mx >=range || mx <-range
                           || my >=range || my <-range){
1703 1704
                            s->mb_type[i] &= ~CANDIDATE_MB_TYPE_INTER4V;
                            s->mb_type[i] |= CANDIDATE_MB_TYPE_INTRA;
1705
                            s->current_picture.mb_type[i] = CANDIDATE_MB_TYPE_INTRA;
1706 1707 1708
                        }
                    }
                }
Michael Niedermayer's avatar
Michael Niedermayer committed
1709 1710
                xy+=2;
                i++;
1711 1712 1713 1714 1715
            }
        }
    }
}

1716 1717 1718 1719
/**
 *
 * @param truncate 1 for truncation, 0 for using intra
 */
1720
void ff_fix_long_mvs(MpegEncContext * s, uint8_t *field_select_table, int field_select,
1721
                     int16_t (*mv_table)[2], int f_code, int type, int truncate)
1722
{
1723
    MotionEstContext * const c= &s->me;
1724
    int y, h_range, v_range;
1725

1726
    // RAL: 8 in MPEG-1, 16 in MPEG-4
1727
    int range = (((s->out_format == FMT_MPEG1 || s->msmpeg4_version) ? 8 : 16) << f_code);
1728

1729
    if(c->avctx->me_range && range > c->avctx->me_range) range= c->avctx->me_range;
1730

1731 1732 1733
    h_range= range;
    v_range= field_select_table ? range>>1 : range;

1734 1735 1736
    /* clip / convert to intra 16x16 type MVs */
    for(y=0; y<s->mb_height; y++){
        int x;
1737
        int xy= y*s->mb_stride;
1738
        for(x=0; x<s->mb_width; x++){
1739
            if (s->mb_type[xy] & type){    // RAL: "type" test added...
1740
                if (!field_select_table || field_select_table[xy] == field_select) {
1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754
                    if(   mv_table[xy][0] >=h_range || mv_table[xy][0] <-h_range
                       || mv_table[xy][1] >=v_range || mv_table[xy][1] <-v_range){

                        if(truncate){
                            if     (mv_table[xy][0] > h_range-1) mv_table[xy][0]=  h_range-1;
                            else if(mv_table[xy][0] < -h_range ) mv_table[xy][0]= -h_range;
                            if     (mv_table[xy][1] > v_range-1) mv_table[xy][1]=  v_range-1;
                            else if(mv_table[xy][1] < -v_range ) mv_table[xy][1]= -v_range;
                        }else{
                            s->mb_type[xy] &= ~type;
                            s->mb_type[xy] |= CANDIDATE_MB_TYPE_INTRA;
                            mv_table[xy][0]=
                            mv_table[xy][1]= 0;
                        }
1755
                    }
1756
                }
1757 1758 1759 1760 1761
            }
            xy++;
        }
    }
}