motion_est_template.c 39.5 KB
Newer Older
Michael Niedermayer's avatar
Michael Niedermayer committed
1
/*
2
 * Motion estimation
3
 * Copyright (c) 2002-2004 Michael Niedermayer
Michael Niedermayer's avatar
Michael Niedermayer committed
4
 *
5 6 7
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
Michael Niedermayer's avatar
Michael Niedermayer committed
8 9
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
10
 * version 2.1 of the License, or (at your option) any later version.
Michael Niedermayer's avatar
Michael Niedermayer committed
11
 *
12
 * FFmpeg is distributed in the hope that it will be useful,
Michael Niedermayer's avatar
Michael Niedermayer committed
13 14 15 16 17
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
18
 * License along with FFmpeg; if not, write to the Free Software
19
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Michael Niedermayer's avatar
Michael Niedermayer committed
20
 */
21

Michael Niedermayer's avatar
Michael Niedermayer committed
22
/**
23
 * @file
Michael Niedermayer's avatar
Michael Niedermayer committed
24 25
 * Motion estimation template.
 */
26

27
//Let us hope gcc will remove the unused vars ...(gcc 3.2.2 seems to do it ...)
28
#define LOAD_COMMON\
29 30 31 32 33
    uint32_t av_unused * const score_map= c->score_map;\
    const int av_unused xmin= c->xmin;\
    const int av_unused ymin= c->ymin;\
    const int av_unused xmax= c->xmax;\
    const int av_unused ymax= c->ymax;\
34 35 36
    uint8_t *mv_penalty= c->current_mv_penalty;\
    const int pred_x= c->pred_x;\
    const int pred_y= c->pred_y;\
Michael Niedermayer's avatar
Michael Niedermayer committed
37 38 39 40 41

#define CHECK_HALF_MV(dx, dy, x, y)\
{\
    const int hx= 2*(x)+(dx);\
    const int hy= 2*(y)+(dy);\
42
    d= cmp_hpel(s, x, y, dx, dy, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags);\
Michael Niedermayer's avatar
Michael Niedermayer committed
43 44 45 46
    d += (mv_penalty[hx - pred_x] + mv_penalty[hy - pred_y])*penalty_factor;\
    COPY3_IF_LT(dmin, d, bx, hx, by, hy)\
}

47
static int hpel_motion_search(MpegEncContext * s,
48
                                  int *mx_ptr, int *my_ptr, int dmin,
49 50
                                  int src_index, int ref_index,
                                  int size, int h)
Michael Niedermayer's avatar
Michael Niedermayer committed
51
{
52
    MotionEstContext * const c= &s->me;
Michael Niedermayer's avatar
Michael Niedermayer committed
53
    const int mx = *mx_ptr;
54
    const int my = *my_ptr;
55
    const int penalty_factor= c->sub_penalty_factor;
Michael Niedermayer's avatar
Michael Niedermayer committed
56
    me_cmp_func cmp_sub, chroma_cmp_sub;
57
    int bx=2*mx, by=2*my;
Michael Niedermayer's avatar
Michael Niedermayer committed
58

59
    LOAD_COMMON
60
    int flags= c->sub_flags;
61

Michael Niedermayer's avatar
Michael Niedermayer committed
62 63 64 65 66
 //FIXME factorize

    cmp_sub= s->dsp.me_sub_cmp[size];
    chroma_cmp_sub= s->dsp.me_sub_cmp[size+1];

67
    if(c->skip){ //FIXME move out of hpel?
Michael Niedermayer's avatar
Michael Niedermayer committed
68 69 70 71
        *mx_ptr = 0;
        *my_ptr = 0;
        return dmin;
    }
72

73
    if(c->avctx->me_cmp != c->avctx->me_sub_cmp){
74
        dmin= cmp(s, mx, my, 0, 0, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags);
Michael Niedermayer's avatar
Michael Niedermayer committed
75
        if(mx || my || size>0)
Michael Niedermayer's avatar
Michael Niedermayer committed
76 77
            dmin += (mv_penalty[2*mx - pred_x] + mv_penalty[2*my - pred_y])*penalty_factor;
    }
78 79

    if (mx > xmin && mx < xmax &&
Michael Niedermayer's avatar
Michael Niedermayer committed
80 81 82
        my > ymin && my < ymax) {
        int d= dmin;
        const int index= (my<<ME_MAP_SHIFT) + mx;
83
        const int t= score_map[(index-(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)]
84
                     + (mv_penalty[bx   - pred_x] + mv_penalty[by-2 - pred_y])*c->penalty_factor;
Michael Niedermayer's avatar
Michael Niedermayer committed
85
        const int l= score_map[(index- 1               )&(ME_MAP_SIZE-1)]
86
                     + (mv_penalty[bx-2 - pred_x] + mv_penalty[by   - pred_y])*c->penalty_factor;
Michael Niedermayer's avatar
Michael Niedermayer committed
87
        const int r= score_map[(index+ 1               )&(ME_MAP_SIZE-1)]
88
                     + (mv_penalty[bx+2 - pred_x] + mv_penalty[by   - pred_y])*c->penalty_factor;
Michael Niedermayer's avatar
Michael Niedermayer committed
89
        const int b= score_map[(index+(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)]
90
                     + (mv_penalty[bx   - pred_x] + mv_penalty[by+2 - pred_y])*c->penalty_factor;
91

92
#if 1
93 94
        unsigned key;
        unsigned map_generation= c->map_generation;
95
#ifndef NDEBUG
96
        uint32_t *map= c->map;
97
#endif
98 99 100 101 102 103 104 105
        key= ((my-1)<<ME_MAP_MV_BITS) + (mx) + map_generation;
        assert(map[(index-(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)] == key);
        key= ((my+1)<<ME_MAP_MV_BITS) + (mx) + map_generation;
        assert(map[(index+(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)] == key);
        key= ((my)<<ME_MAP_MV_BITS) + (mx+1) + map_generation;
        assert(map[(index+1)&(ME_MAP_SIZE-1)] == key);
        key= ((my)<<ME_MAP_MV_BITS) + (mx-1) + map_generation;
        assert(map[(index-1)&(ME_MAP_SIZE-1)] == key);
106
#endif
Michael Niedermayer's avatar
Michael Niedermayer committed
107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148
        if(t<=b){
            CHECK_HALF_MV(0, 1, mx  ,my-1)
            if(l<=r){
                CHECK_HALF_MV(1, 1, mx-1, my-1)
                if(t+r<=b+l){
                    CHECK_HALF_MV(1, 1, mx  , my-1)
                }else{
                    CHECK_HALF_MV(1, 1, mx-1, my  )
                }
                CHECK_HALF_MV(1, 0, mx-1, my  )
            }else{
                CHECK_HALF_MV(1, 1, mx  , my-1)
                if(t+l<=b+r){
                    CHECK_HALF_MV(1, 1, mx-1, my-1)
                }else{
                    CHECK_HALF_MV(1, 1, mx  , my  )
                }
                CHECK_HALF_MV(1, 0, mx  , my  )
            }
        }else{
            if(l<=r){
                if(t+l<=b+r){
                    CHECK_HALF_MV(1, 1, mx-1, my-1)
                }else{
                    CHECK_HALF_MV(1, 1, mx  , my  )
                }
                CHECK_HALF_MV(1, 0, mx-1, my)
                CHECK_HALF_MV(1, 1, mx-1, my)
            }else{
                if(t+r<=b+l){
                    CHECK_HALF_MV(1, 1, mx  , my-1)
                }else{
                    CHECK_HALF_MV(1, 1, mx-1, my)
                }
                CHECK_HALF_MV(1, 0, mx  , my)
                CHECK_HALF_MV(1, 1, mx  , my)
            }
            CHECK_HALF_MV(0, 1, mx  , my)
        }
        assert(bx >= xmin*2 && bx <= xmax*2 && by >= ymin*2 && by <= ymax*2);
    }

149 150
    *mx_ptr = bx;
    *my_ptr = by;
151

Michael Niedermayer's avatar
Michael Niedermayer committed
152 153 154
    return dmin;
}

155 156 157 158 159 160 161 162 163 164
static int no_sub_motion_search(MpegEncContext * s,
          int *mx_ptr, int *my_ptr, int dmin,
                                  int src_index, int ref_index,
                                  int size, int h)
{
    (*mx_ptr)<<=1;
    (*my_ptr)<<=1;
    return dmin;
}

Dominik Mierzejewski's avatar
Dominik Mierzejewski committed
165
inline int ff_get_mb_score(MpegEncContext * s, int mx, int my, int src_index,
166
                               int ref_index, int size, int h, int add_rate)
167 168
{
//    const int check_luma= s->dsp.me_sub_cmp != s->dsp.mb_cmp;
169 170 171
    MotionEstContext * const c= &s->me;
    const int penalty_factor= c->mb_penalty_factor;
    const int flags= c->mb_flags;
172 173
    const int qpel= flags & FLAG_QPEL;
    const int mask= 1+2*qpel;
174 175 176
    me_cmp_func cmp_sub, chroma_cmp_sub;
    int d;

177
    LOAD_COMMON
178

179 180 181 182
 //FIXME factorize

    cmp_sub= s->dsp.mb_cmp[size];
    chroma_cmp_sub= s->dsp.mb_cmp[size+1];
183

184 185
//    assert(!c->skip);
//    assert(c->avctx->me_sub_cmp != c->avctx->mb_cmp);
186

187
    d= cmp(s, mx>>(qpel+1), my>>(qpel+1), mx&mask, my&mask, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags);
188
    //FIXME check cbp before adding penalty for (0,0) vector
189
    if(add_rate && (mx || my || size>0))
190
        d += (mv_penalty[mx - pred_x] + mv_penalty[my - pred_y])*penalty_factor;
191

192 193 194
    return d;
}

Michael Niedermayer's avatar
Michael Niedermayer committed
195 196 197 198
#define CHECK_QUARTER_MV(dx, dy, x, y)\
{\
    const int hx= 4*(x)+(dx);\
    const int hy= 4*(y)+(dy);\
199
    d= cmp_qpel(s, x, y, dx, dy, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
Michael Niedermayer's avatar
Michael Niedermayer committed
200 201 202 203
    d += (mv_penalty[hx - pred_x] + mv_penalty[hy - pred_y])*penalty_factor;\
    COPY3_IF_LT(dmin, d, bx, hx, by, hy)\
}

204
static int qpel_motion_search(MpegEncContext * s,
205
                                  int *mx_ptr, int *my_ptr, int dmin,
206
                                  int src_index, int ref_index,
207
                                  int size, int h)
Michael Niedermayer's avatar
Michael Niedermayer committed
208
{
209
    MotionEstContext * const c= &s->me;
Michael Niedermayer's avatar
Michael Niedermayer committed
210
    const int mx = *mx_ptr;
211
    const int my = *my_ptr;
212
    const int penalty_factor= c->sub_penalty_factor;
213
    const unsigned map_generation = c->map_generation;
214
    const int subpel_quality= c->avctx->me_subpel_quality;
215
    uint32_t *map= c->map;
216
    me_cmp_func cmpf, chroma_cmpf;
Michael Niedermayer's avatar
Michael Niedermayer committed
217 218
    me_cmp_func cmp_sub, chroma_cmp_sub;

219
    LOAD_COMMON
220
    int flags= c->sub_flags;
221

222 223
    cmpf= s->dsp.me_cmp[size];
    chroma_cmpf= s->dsp.me_cmp[size+1]; //factorize FIXME
Michael Niedermayer's avatar
Michael Niedermayer committed
224 225 226 227 228
 //FIXME factorize

    cmp_sub= s->dsp.me_sub_cmp[size];
    chroma_cmp_sub= s->dsp.me_sub_cmp[size+1];

229
    if(c->skip){ //FIXME somehow move up (benchmark)
Michael Niedermayer's avatar
Michael Niedermayer committed
230 231 232 233
        *mx_ptr = 0;
        *my_ptr = 0;
        return dmin;
    }
234

235
    if(c->avctx->me_cmp != c->avctx->me_sub_cmp){
236
        dmin= cmp(s, mx, my, 0, 0, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags);
Michael Niedermayer's avatar
Michael Niedermayer committed
237
        if(mx || my || size>0)
Michael Niedermayer's avatar
Michael Niedermayer committed
238 239
            dmin += (mv_penalty[4*mx - pred_x] + mv_penalty[4*my - pred_y])*penalty_factor;
    }
240 241

    if (mx > xmin && mx < xmax &&
Michael Niedermayer's avatar
Michael Niedermayer committed
242 243 244 245 246 247 248 249 250 251 252 253
        my > ymin && my < ymax) {
        int bx=4*mx, by=4*my;
        int d= dmin;
        int i, nx, ny;
        const int index= (my<<ME_MAP_SHIFT) + mx;
        const int t= score_map[(index-(1<<ME_MAP_SHIFT)  )&(ME_MAP_SIZE-1)];
        const int l= score_map[(index- 1                 )&(ME_MAP_SIZE-1)];
        const int r= score_map[(index+ 1                 )&(ME_MAP_SIZE-1)];
        const int b= score_map[(index+(1<<ME_MAP_SHIFT)  )&(ME_MAP_SIZE-1)];
        const int c= score_map[(index                    )&(ME_MAP_SIZE-1)];
        int best[8];
        int best_pos[8][2];
254

Michael Niedermayer's avatar
Michael Niedermayer committed
255
        memset(best, 64, sizeof(int)*8);
256
        if(s->me.dia_size>=2){
Michael Niedermayer's avatar
Michael Niedermayer committed
257 258 259 260 261 262 263
            const int tl= score_map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)];
            const int bl= score_map[(index+(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)];
            const int tr= score_map[(index-(1<<ME_MAP_SHIFT)+1)&(ME_MAP_SIZE-1)];
            const int br= score_map[(index+(1<<ME_MAP_SHIFT)+1)&(ME_MAP_SIZE-1)];

            for(ny= -3; ny <= 3; ny++){
                for(nx= -3; nx <= 3; nx++){
Michael Niedermayer's avatar
Michael Niedermayer committed
264 265 266 267 268
                    //FIXME this could overflow (unlikely though)
                    const int64_t t2= nx*nx*(tr + tl - 2*t) + 4*nx*(tr-tl) + 32*t;
                    const int64_t c2= nx*nx*( r +  l - 2*c) + 4*nx*( r- l) + 32*c;
                    const int64_t b2= nx*nx*(br + bl - 2*b) + 4*nx*(br-bl) + 32*b;
                    int score= (ny*ny*(b2 + t2 - 2*c2) + 4*ny*(b2 - t2) + 32*c2 + 512)>>10;
Michael Niedermayer's avatar
Michael Niedermayer committed
269
                    int i;
270

Michael Niedermayer's avatar
Michael Niedermayer committed
271
                    if((nx&3)==0 && (ny&3)==0) continue;
272

Michael Niedermayer's avatar
Michael Niedermayer committed
273
                    score += (mv_penalty[4*mx + nx - pred_x] + mv_penalty[4*my + ny - pred_y])*penalty_factor;
274

275 276
//                    if(nx&1) score-=1024*c->penalty_factor;
//                    if(ny&1) score-=1024*c->penalty_factor;
277

Michael Niedermayer's avatar
Michael Niedermayer committed
278 279 280 281 282 283 284 285 286 287 288 289 290 291
                    for(i=0; i<8; i++){
                        if(score < best[i]){
                            memmove(&best[i+1], &best[i], sizeof(int)*(7-i));
                            memmove(&best_pos[i+1][0], &best_pos[i][0], sizeof(int)*2*(7-i));
                            best[i]= score;
                            best_pos[i][0]= nx + 4*mx;
                            best_pos[i][1]= ny + 4*my;
                            break;
                        }
                    }
                }
            }
        }else{
            int tl;
Michael Niedermayer's avatar
Michael Niedermayer committed
292
            //FIXME this could overflow (unlikely though)
Michael Niedermayer's avatar
Michael Niedermayer committed
293
            const int cx = 4*(r - l);
294
            const int cx2= r + l - 2*c;
Michael Niedermayer's avatar
Michael Niedermayer committed
295 296 297
            const int cy = 4*(b - t);
            const int cy2= b + t - 2*c;
            int cxy;
298

Michael Niedermayer's avatar
Michael Niedermayer committed
299 300 301
            if(map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)] == (my<<ME_MAP_MV_BITS) + mx + map_generation && 0){ //FIXME
                tl= score_map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)];
            }else{
302
                tl= cmp(s, mx-1, my-1, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);//FIXME wrong if chroma me is different
Michael Niedermayer's avatar
Michael Niedermayer committed
303
            }
304 305 306

            cxy= 2*tl + (cx + cy)/4 - (cx2 + cy2) - 2*c;

Michael Niedermayer's avatar
Michael Niedermayer committed
307 308 309 310 311
            assert(16*cx2 + 4*cx + 32*c == 32*r);
            assert(16*cx2 - 4*cx + 32*c == 32*l);
            assert(16*cy2 + 4*cy + 32*c == 32*b);
            assert(16*cy2 - 4*cy + 32*c == 32*t);
            assert(16*cxy + 16*cy2 + 16*cx2 - 4*cy - 4*cx + 32*c == 32*tl);
312

Michael Niedermayer's avatar
Michael Niedermayer committed
313 314
            for(ny= -3; ny <= 3; ny++){
                for(nx= -3; nx <= 3; nx++){
Michael Niedermayer's avatar
Michael Niedermayer committed
315
                    //FIXME this could overflow (unlikely though)
Michael Niedermayer's avatar
Michael Niedermayer committed
316 317
                    int score= ny*nx*cxy + nx*nx*cx2 + ny*ny*cy2 + nx*cx + ny*cy + 32*c; //FIXME factor
                    int i;
318

Michael Niedermayer's avatar
Michael Niedermayer committed
319
                    if((nx&3)==0 && (ny&3)==0) continue;
320

Michael Niedermayer's avatar
Michael Niedermayer committed
321
                    score += 32*(mv_penalty[4*mx + nx - pred_x] + mv_penalty[4*my + ny - pred_y])*penalty_factor;
322 323
//                    if(nx&1) score-=32*c->penalty_factor;
  //                  if(ny&1) score-=32*c->penalty_factor;
324

Michael Niedermayer's avatar
Michael Niedermayer committed
325 326 327 328 329 330 331 332 333 334 335
                    for(i=0; i<8; i++){
                        if(score < best[i]){
                            memmove(&best[i+1], &best[i], sizeof(int)*(7-i));
                            memmove(&best_pos[i+1][0], &best_pos[i][0], sizeof(int)*2*(7-i));
                            best[i]= score;
                            best_pos[i][0]= nx + 4*mx;
                            best_pos[i][1]= ny + 4*my;
                            break;
                        }
                    }
                }
336
            }
Michael Niedermayer's avatar
Michael Niedermayer committed
337
        }
Michael Niedermayer's avatar
Michael Niedermayer committed
338
        for(i=0; i<subpel_quality; i++){
Michael Niedermayer's avatar
Michael Niedermayer committed
339 340 341 342
            nx= best_pos[i][0];
            ny= best_pos[i][1];
            CHECK_QUARTER_MV(nx&3, ny&3, nx>>2, ny>>2)
        }
Michael Niedermayer's avatar
Michael Niedermayer committed
343

Michael Niedermayer's avatar
Michael Niedermayer committed
344 345 346 347 348 349 350 351 352 353 354 355 356 357 358
        assert(bx >= xmin*4 && bx <= xmax*4 && by >= ymin*4 && by <= ymax*4);

        *mx_ptr = bx;
        *my_ptr = by;
    }else{
        *mx_ptr =4*mx;
        *my_ptr =4*my;
    }

    return dmin;
}


#define CHECK_MV(x,y)\
{\
359
    const unsigned key = ((y)<<ME_MAP_MV_BITS) + (x) + map_generation;\
Michael Niedermayer's avatar
Michael Niedermayer committed
360
    const int index= (((y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);\
361 362 363 364
    assert((x) >= xmin);\
    assert((x) <= xmax);\
    assert((y) >= ymin);\
    assert((y) <= ymax);\
365
/*printf("check_mv %d %d\n", x, y);*/\
Michael Niedermayer's avatar
Michael Niedermayer committed
366
    if(map[index]!=key){\
367
        d= cmp(s, x, y, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
Michael Niedermayer's avatar
Michael Niedermayer committed
368 369 370
        map[index]= key;\
        score_map[index]= d;\
        d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*penalty_factor;\
371
/*printf("score:%d\n", d);*/\
Michael Niedermayer's avatar
Michael Niedermayer committed
372 373 374 375
        COPY3_IF_LT(dmin, d, best[0], x, best[1], y)\
    }\
}

376
#define CHECK_CLIPPED_MV(ax,ay)\
377
{\
378 379 380 381 382
    const int Lx= ax;\
    const int Ly= ay;\
    const int Lx2= FFMAX(xmin, FFMIN(Lx, xmax));\
    const int Ly2= FFMAX(ymin, FFMIN(Ly, ymax));\
    CHECK_MV(Lx2, Ly2)\
383 384
}

Michael Niedermayer's avatar
Michael Niedermayer committed
385 386
#define CHECK_MV_DIR(x,y,new_dir)\
{\
387
    const unsigned key = ((y)<<ME_MAP_MV_BITS) + (x) + map_generation;\
Michael Niedermayer's avatar
Michael Niedermayer committed
388
    const int index= (((y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);\
389
/*printf("check_mv_dir %d %d %d\n", x, y, new_dir);*/\
Michael Niedermayer's avatar
Michael Niedermayer committed
390
    if(map[index]!=key){\
391
        d= cmp(s, x, y, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
Michael Niedermayer's avatar
Michael Niedermayer committed
392 393 394
        map[index]= key;\
        score_map[index]= d;\
        d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*penalty_factor;\
395
/*printf("score:%d\n", d);*/\
Michael Niedermayer's avatar
Michael Niedermayer committed
396 397 398 399 400 401 402 403 404 405 406 407 408 409 410
        if(d<dmin){\
            best[0]=x;\
            best[1]=y;\
            dmin=d;\
            next_dir= new_dir;\
        }\
    }\
}

#define check(x,y,S,v)\
if( (x)<(xmin<<(S)) ) printf("%d %d %d %d %d xmin" #v, xmin, (x), (y), s->mb_x, s->mb_y);\
if( (x)>(xmax<<(S)) ) printf("%d %d %d %d %d xmax" #v, xmax, (x), (y), s->mb_x, s->mb_y);\
if( (y)<(ymin<<(S)) ) printf("%d %d %d %d %d ymin" #v, ymin, (x), (y), s->mb_x, s->mb_y);\
if( (y)>(ymax<<(S)) ) printf("%d %d %d %d %d ymax" #v, ymax, (x), (y), s->mb_x, s->mb_y);\

411
#define LOAD_COMMON2\
412
    uint32_t *map= c->map;\
413 414
    const int qpel= flags&FLAG_QPEL;\
    const int shift= 1+qpel;\
Michael Niedermayer's avatar
Michael Niedermayer committed
415

416
static av_always_inline int small_diamond_search(MpegEncContext * s, int *best, int dmin,
417 418
                                       int src_index, int ref_index, int const penalty_factor,
                                       int size, int h, int flags)
Michael Niedermayer's avatar
Michael Niedermayer committed
419
{
420
    MotionEstContext * const c= &s->me;
421
    me_cmp_func cmpf, chroma_cmpf;
Michael Niedermayer's avatar
Michael Niedermayer committed
422
    int next_dir=-1;
423
    LOAD_COMMON
424
    LOAD_COMMON2
425
    unsigned map_generation = c->map_generation;
426

427 428
    cmpf= s->dsp.me_cmp[size];
    chroma_cmpf= s->dsp.me_cmp[size+1];
Michael Niedermayer's avatar
Michael Niedermayer committed
429

430
    { /* ensure that the best point is in the MAP as h/qpel refinement needs it */
431
        const unsigned key = (best[1]<<ME_MAP_MV_BITS) + best[0] + map_generation;
432 433
        const int index= ((best[1]<<ME_MAP_SHIFT) + best[0])&(ME_MAP_SIZE-1);
        if(map[index]!=key){ //this will be executed only very rarey
434
            score_map[index]= cmp(s, best[0], best[1], 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);
435 436 437 438
            map[index]= key;
        }
    }

Michael Niedermayer's avatar
Michael Niedermayer committed
439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457
    for(;;){
        int d;
        const int dir= next_dir;
        const int x= best[0];
        const int y= best[1];
        next_dir=-1;

//printf("%d", dir);
        if(dir!=2 && x>xmin) CHECK_MV_DIR(x-1, y  , 0)
        if(dir!=3 && y>ymin) CHECK_MV_DIR(x  , y-1, 1)
        if(dir!=0 && x<xmax) CHECK_MV_DIR(x+1, y  , 2)
        if(dir!=1 && y<ymax) CHECK_MV_DIR(x  , y+1, 3)

        if(next_dir==-1){
            return dmin;
        }
    }
}

458 459 460
static int funny_diamond_search(MpegEncContext * s, int *best, int dmin,
                                       int src_index, int ref_index, int const penalty_factor,
                                       int size, int h, int flags)
461
{
462
    MotionEstContext * const c= &s->me;
463
    me_cmp_func cmpf, chroma_cmpf;
464
    int dia_size;
465
    LOAD_COMMON
466
    LOAD_COMMON2
467
    unsigned map_generation = c->map_generation;
468

469 470
    cmpf= s->dsp.me_cmp[size];
    chroma_cmpf= s->dsp.me_cmp[size+1];
471 472 473 474 475

    for(dia_size=1; dia_size<=4; dia_size++){
        int dir;
        const int x= best[0];
        const int y= best[1];
476

477 478 479 480 481 482 483
        if(dia_size&(dia_size-1)) continue;

        if(   x + dia_size > xmax
           || x - dia_size < xmin
           || y + dia_size > ymax
           || y - dia_size < ymin)
           continue;
484

485 486 487 488 489 490 491 492 493 494 495 496
        for(dir= 0; dir<dia_size; dir+=2){
            int d;

            CHECK_MV(x + dir           , y + dia_size - dir);
            CHECK_MV(x + dia_size - dir, y - dir           );
            CHECK_MV(x - dir           , y - dia_size + dir);
            CHECK_MV(x - dia_size + dir, y + dir           );
        }

        if(x!=best[0] || y!=best[1])
            dia_size=0;
    }
497
    return dmin;
498 499
}

Michael Niedermayer's avatar
Michael Niedermayer committed
500 501 502 503 504 505 506 507
static int hex_search(MpegEncContext * s, int *best, int dmin,
                                       int src_index, int ref_index, int const penalty_factor,
                                       int size, int h, int flags, int dia_size)
{
    MotionEstContext * const c= &s->me;
    me_cmp_func cmpf, chroma_cmpf;
    LOAD_COMMON
    LOAD_COMMON2
508
    unsigned map_generation = c->map_generation;
509 510
    int x,y,d;
    const int dec= dia_size & (dia_size-1);
Michael Niedermayer's avatar
Michael Niedermayer committed
511 512 513 514

    cmpf= s->dsp.me_cmp[size];
    chroma_cmpf= s->dsp.me_cmp[size+1];

515
    for(;dia_size; dia_size= dec ? dia_size-1 : dia_size>>1){
Michael Niedermayer's avatar
Michael Niedermayer committed
516 517 518
        do{
            x= best[0];
            y= best[1];
519 520 521 522 523 524 525 526

            CHECK_CLIPPED_MV(x  -dia_size    , y);
            CHECK_CLIPPED_MV(x+  dia_size    , y);
            CHECK_CLIPPED_MV(x+( dia_size>>1), y+dia_size);
            CHECK_CLIPPED_MV(x+( dia_size>>1), y-dia_size);
            if(dia_size>1){
                CHECK_CLIPPED_MV(x+(-dia_size>>1), y+dia_size);
                CHECK_CLIPPED_MV(x+(-dia_size>>1), y-dia_size);
Michael Niedermayer's avatar
Michael Niedermayer committed
527 528 529 530 531 532 533 534 535 536 537 538 539 540 541
            }
        }while(best[0] != x || best[1] != y);
    }

    return dmin;
}

static int l2s_dia_search(MpegEncContext * s, int *best, int dmin,
                                       int src_index, int ref_index, int const penalty_factor,
                                       int size, int h, int flags)
{
    MotionEstContext * const c= &s->me;
    me_cmp_func cmpf, chroma_cmpf;
    LOAD_COMMON
    LOAD_COMMON2
542
    unsigned map_generation = c->map_generation;
543 544 545
    int x,y,i,d;
    int dia_size= c->dia_size&0xFF;
    const int dec= dia_size & (dia_size-1);
Michael Niedermayer's avatar
Michael Niedermayer committed
546 547 548 549 550 551
    static const int hex[8][2]={{-2, 0}, {-1,-1}, { 0,-2}, { 1,-1},
                                { 2, 0}, { 1, 1}, { 0, 2}, {-1, 1}};

    cmpf= s->dsp.me_cmp[size];
    chroma_cmpf= s->dsp.me_cmp[size+1];

552
    for(; dia_size; dia_size= dec ? dia_size-1 : dia_size>>1){
Michael Niedermayer's avatar
Michael Niedermayer committed
553 554 555 556
        do{
            x= best[0];
            y= best[1];
            for(i=0; i<8; i++){
557
                CHECK_CLIPPED_MV(x+hex[i][0]*dia_size, y+hex[i][1]*dia_size);
Michael Niedermayer's avatar
Michael Niedermayer committed
558 559 560 561 562 563
            }
        }while(best[0] != x || best[1] != y);
    }

    x= best[0];
    y= best[1];
564 565 566 567
    CHECK_CLIPPED_MV(x+1, y);
    CHECK_CLIPPED_MV(x, y+1);
    CHECK_CLIPPED_MV(x-1, y);
    CHECK_CLIPPED_MV(x, y-1);
Michael Niedermayer's avatar
Michael Niedermayer committed
568 569 570 571

    return dmin;
}

572 573 574 575 576 577 578 579
static int umh_search(MpegEncContext * s, int *best, int dmin,
                                       int src_index, int ref_index, int const penalty_factor,
                                       int size, int h, int flags)
{
    MotionEstContext * const c= &s->me;
    me_cmp_func cmpf, chroma_cmpf;
    LOAD_COMMON
    LOAD_COMMON2
580
    unsigned map_generation = c->map_generation;
581
    int x,y,x2,y2, i, j, d;
582
    const int dia_size= c->dia_size&0xFE;
583 584 585
    static const int hex[16][2]={{-4,-2}, {-4,-1}, {-4, 0}, {-4, 1}, {-4, 2},
                                 { 4,-2}, { 4,-1}, { 4, 0}, { 4, 1}, { 4, 2},
                                 {-2, 3}, { 0, 4}, { 2, 3},
Michael Niedermayer's avatar
Michael Niedermayer committed
586
                                 {-2,-3}, { 0,-4}, { 2,-3},};
587 588 589 590 591 592

    cmpf= s->dsp.me_cmp[size];
    chroma_cmpf= s->dsp.me_cmp[size+1];

    x= best[0];
    y= best[1];
593
    for(x2=FFMAX(x-dia_size+1, xmin); x2<=FFMIN(x+dia_size-1,xmax); x2+=2){
594 595
        CHECK_MV(x2, y);
    }
596
    for(y2=FFMAX(y-dia_size/2+1, ymin); y2<=FFMIN(y+dia_size/2-1,ymax); y2+=2){
597 598 599 600 601 602 603 604 605 606 607 608 609
        CHECK_MV(x, y2);
    }

    x= best[0];
    y= best[1];
    for(y2=FFMAX(y-2, ymin); y2<=FFMIN(y+2,ymax); y2++){
        for(x2=FFMAX(x-2, xmin); x2<=FFMIN(x+2,xmax); x2++){
            CHECK_MV(x2, y2);
        }
    }

//FIXME prevent the CLIP stuff

610
    for(j=1; j<=dia_size/4; j++){
611
        for(i=0; i<16; i++){
612
            CHECK_CLIPPED_MV(x+hex[i][0]*j, y+hex[i][1]*j);
613 614 615
        }
    }

616
    return hex_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags, 2);
617 618
}

Michael Niedermayer's avatar
Michael Niedermayer committed
619 620 621 622 623 624 625 626
static int full_search(MpegEncContext * s, int *best, int dmin,
                                       int src_index, int ref_index, int const penalty_factor,
                                       int size, int h, int flags)
{
    MotionEstContext * const c= &s->me;
    me_cmp_func cmpf, chroma_cmpf;
    LOAD_COMMON
    LOAD_COMMON2
627
    unsigned map_generation = c->map_generation;
Michael Niedermayer's avatar
Michael Niedermayer committed
628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653
    int x,y, d;
    const int dia_size= c->dia_size&0xFF;

    cmpf= s->dsp.me_cmp[size];
    chroma_cmpf= s->dsp.me_cmp[size+1];

    for(y=FFMAX(-dia_size, ymin); y<=FFMIN(dia_size,ymax); y++){
        for(x=FFMAX(-dia_size, xmin); x<=FFMIN(dia_size,xmax); x++){
            CHECK_MV(x, y);
        }
    }

    x= best[0];
    y= best[1];
    d= dmin;
    CHECK_CLIPPED_MV(x  , y);
    CHECK_CLIPPED_MV(x+1, y);
    CHECK_CLIPPED_MV(x, y+1);
    CHECK_CLIPPED_MV(x-1, y);
    CHECK_CLIPPED_MV(x, y-1);
    best[0]= x;
    best[1]= y;

    return d;
}

654 655
#define SAB_CHECK_MV(ax,ay)\
{\
656
    const unsigned key = ((ay)<<ME_MAP_MV_BITS) + (ax) + map_generation;\
657 658 659
    const int index= (((ay)<<ME_MAP_SHIFT) + (ax))&(ME_MAP_SIZE-1);\
/*printf("sab check %d %d\n", ax, ay);*/\
    if(map[index]!=key){\
660
        d= cmp(s, ax, ay, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682
        map[index]= key;\
        score_map[index]= d;\
        d += (mv_penalty[((ax)<<shift)-pred_x] + mv_penalty[((ay)<<shift)-pred_y])*penalty_factor;\
/*printf("score: %d\n", d);*/\
        if(d < minima[minima_count-1].height){\
            int j=0;\
            \
            while(d >= minima[j].height) j++;\
\
            memmove(&minima [j+1], &minima [j], (minima_count - j - 1)*sizeof(Minima));\
\
            minima[j].checked= 0;\
            minima[j].height= d;\
            minima[j].x= ax;\
            minima[j].y= ay;\
            \
            i=-1;\
            continue;\
        }\
    }\
}

683
#define MAX_SAB_SIZE ME_MAP_SIZE
684 685 686
static int sab_diamond_search(MpegEncContext * s, int *best, int dmin,
                                       int src_index, int ref_index, int const penalty_factor,
                                       int size, int h, int flags)
687
{
688
    MotionEstContext * const c= &s->me;
689
    me_cmp_func cmpf, chroma_cmpf;
690
    Minima minima[MAX_SAB_SIZE];
691
    const int minima_count= FFABS(c->dia_size);
692
    int i, j;
693
    LOAD_COMMON
694
    LOAD_COMMON2
695
    unsigned map_generation = c->map_generation;
696

697 698
    cmpf= s->dsp.me_cmp[size];
    chroma_cmpf= s->dsp.me_cmp[size+1];
699

700 701 702 703
    /*Note j<MAX_SAB_SIZE is needed if MAX_SAB_SIZE < ME_MAP_SIZE as j can
      become larger due to MVs overflowing their ME_MAP_MV_BITS bits space in map
     */
    for(j=i=0; i<ME_MAP_SIZE && j<MAX_SAB_SIZE; i++){
704 705 706
        uint32_t key= map[i];

        key += (1<<(ME_MAP_MV_BITS-1)) + (1<<(2*ME_MAP_MV_BITS-1));
707

708
        if((key&((-1)<<(2*ME_MAP_MV_BITS))) != map_generation) continue;
709

710 711 712 713 714
        minima[j].height= score_map[i];
        minima[j].x= key & ((1<<ME_MAP_MV_BITS)-1); key>>=ME_MAP_MV_BITS;
        minima[j].y= key & ((1<<ME_MAP_MV_BITS)-1);
        minima[j].x-= (1<<(ME_MAP_MV_BITS-1));
        minima[j].y-= (1<<(ME_MAP_MV_BITS-1));
715 716 717 718 719 720

        // all entries in map should be in range except if the mv overflows their ME_MAP_MV_BITS bits space
        if(   minima[j].x > xmax || minima[j].x < xmin
           || minima[j].y > ymax || minima[j].y < ymin)
            continue;

721 722 723
        minima[j].checked=0;
        if(minima[j].x || minima[j].y)
            minima[j].height+= (mv_penalty[((minima[j].x)<<shift)-pred_x] + mv_penalty[((minima[j].y)<<shift)-pred_y])*penalty_factor;
724

725 726
        j++;
    }
727

728
    qsort(minima, j, sizeof(Minima), minima_cmp);
729

730 731 732 733 734
    for(; j<minima_count; j++){
        minima[j].height=256*256*256*64;
        minima[j].checked=0;
        minima[j].x= minima[j].y=0;
    }
735

736 737 738 739
    for(i=0; i<minima_count; i++){
        const int x= minima[i].x;
        const int y= minima[i].y;
        int d;
740

741
        if(minima[i].checked) continue;
742

743 744 745 746 747 748 749 750
        if(   x >= xmax || x <= xmin
           || y >= ymax || y <= ymin)
           continue;

        SAB_CHECK_MV(x-1, y)
        SAB_CHECK_MV(x+1, y)
        SAB_CHECK_MV(x  , y-1)
        SAB_CHECK_MV(x  , y+1)
751

752 753
        minima[i].checked= 1;
    }
754

755 756 757
    best[0]= minima[0].x;
    best[1]= minima[0].y;
    dmin= minima[0].height;
758

759 760 761 762 763 764 765 766 767
    if(   best[0] < xmax && best[0] > xmin
       && best[1] < ymax && best[1] > ymin){
        int d;
        //ensure that the refernece samples for hpel refinement are in the map
        CHECK_MV(best[0]-1, best[1])
        CHECK_MV(best[0]+1, best[1])
        CHECK_MV(best[0], best[1]-1)
        CHECK_MV(best[0], best[1]+1)
    }
768
    return dmin;
769 770
}

771 772 773
static int var_diamond_search(MpegEncContext * s, int *best, int dmin,
                                       int src_index, int ref_index, int const penalty_factor,
                                       int size, int h, int flags)
Michael Niedermayer's avatar
Michael Niedermayer committed
774
{
775
    MotionEstContext * const c= &s->me;
776
    me_cmp_func cmpf, chroma_cmpf;
777
    int dia_size;
778
    LOAD_COMMON
779
    LOAD_COMMON2
780
    unsigned map_generation = c->map_generation;
781

782 783
    cmpf= s->dsp.me_cmp[size];
    chroma_cmpf= s->dsp.me_cmp[size+1];
Michael Niedermayer's avatar
Michael Niedermayer committed
784

785
    for(dia_size=1; dia_size<=c->dia_size; dia_size++){
Michael Niedermayer's avatar
Michael Niedermayer committed
786 787 788 789 790
        int dir, start, end;
        const int x= best[0];
        const int y= best[1];

        start= FFMAX(0, y + dia_size - ymax);
791
        end  = FFMIN(dia_size, xmax - x + 1);
Michael Niedermayer's avatar
Michael Niedermayer committed
792 793 794 795 796 797 798 799
        for(dir= start; dir<end; dir++){
            int d;

//check(x + dir,y + dia_size - dir,0, a0)
            CHECK_MV(x + dir           , y + dia_size - dir);
        }

        start= FFMAX(0, x + dia_size - xmax);
800
        end  = FFMIN(dia_size, y - ymin + 1);
Michael Niedermayer's avatar
Michael Niedermayer committed
801 802 803 804 805 806 807 808
        for(dir= start; dir<end; dir++){
            int d;

//check(x + dia_size - dir, y - dir,0, a1)
            CHECK_MV(x + dia_size - dir, y - dir           );
        }

        start= FFMAX(0, -y + dia_size + ymin );
809
        end  = FFMIN(dia_size, x - xmin + 1);
Michael Niedermayer's avatar
Michael Niedermayer committed
810 811 812 813 814 815 816 817
        for(dir= start; dir<end; dir++){
            int d;

//check(x - dir,y - dia_size + dir,0, a2)
            CHECK_MV(x - dir           , y - dia_size + dir);
        }

        start= FFMAX(0, -x + dia_size + xmin );
818
        end  = FFMIN(dia_size, ymax - y + 1);
Michael Niedermayer's avatar
Michael Niedermayer committed
819 820 821 822 823 824 825 826 827 828
        for(dir= start; dir<end; dir++){
            int d;

//check(x - dia_size + dir, y + dir,0, a3)
            CHECK_MV(x - dia_size + dir, y + dir           );
        }

        if(x!=best[0] || y!=best[1])
            dia_size=0;
    }
829
    return dmin;
Michael Niedermayer's avatar
Michael Niedermayer committed
830 831
}

832
static av_always_inline int diamond_search(MpegEncContext * s, int *best, int dmin,
833 834
                                       int src_index, int ref_index, int const penalty_factor,
                                       int size, int h, int flags){
835 836
    MotionEstContext * const c= &s->me;
    if(c->dia_size==-1)
837
        return funny_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
838
    else if(c->dia_size<-1)
839
        return   sab_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
840
    else if(c->dia_size<2)
841
        return small_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
Michael Niedermayer's avatar
Michael Niedermayer committed
842 843
    else if(c->dia_size>1024)
        return          full_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
844 845
    else if(c->dia_size>768)
        return           umh_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
Michael Niedermayer's avatar
Michael Niedermayer committed
846 847 848 849
    else if(c->dia_size>512)
        return           hex_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags, c->dia_size&0xFF);
    else if(c->dia_size>256)
        return       l2s_dia_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
850 851 852 853
    else
        return   var_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
}

854
/**
855
   @param P a list of candidate mvs to check before starting the
856 857 858 859
   iterative search. If one of the candidates is close to the optimal mv, then
   it takes fewer iterations. And it increases the chance that we find the
   optimal mv.
 */
860
static av_always_inline int epzs_motion_search_internal(MpegEncContext * s, int *mx_ptr, int *my_ptr,
861
                             int P[10][2], int src_index, int ref_index, int16_t (*last_mv)[2],
862
                             int ref_mv_scale, int flags, int size, int h)
Michael Niedermayer's avatar
Michael Niedermayer committed
863
{
864
    MotionEstContext * const c= &s->me;
865
    int best[2]={0, 0};      /**< x and y coordinates of the best motion vector.
866
                               i.e. the difference between the position of the
Loren Merritt's avatar
Loren Merritt committed
867
                               block currently being encoded and the position of
868 869
                               the block chosen to predict it from. */
    int d;                   ///< the score (cmp + penalty) of any given mv
870
    int dmin;                /**< the best value of d, i.e. the score
871
                               corresponding to the mv stored in best[]. */
872
    unsigned map_generation;
Michael Niedermayer's avatar
Michael Niedermayer committed
873
    int penalty_factor;
874 875
    const int ref_mv_stride= s->mb_stride; //pass as arg  FIXME
    const int ref_mv_xy= s->mb_x + s->mb_y*ref_mv_stride; //add to last_mv beforepassing FIXME
876
    me_cmp_func cmpf, chroma_cmpf;
877

878
    LOAD_COMMON
879
    LOAD_COMMON2
880

Michael Niedermayer's avatar
Michael Niedermayer committed
881 882 883 884 885 886 887 888 889
    if(c->pre_pass){
        penalty_factor= c->pre_penalty_factor;
        cmpf= s->dsp.me_pre_cmp[size];
        chroma_cmpf= s->dsp.me_pre_cmp[size+1];
    }else{
        penalty_factor= c->penalty_factor;
        cmpf= s->dsp.me_cmp[size];
        chroma_cmpf= s->dsp.me_cmp[size+1];
    }
890

891
    map_generation= update_map_generation(c);
Michael Niedermayer's avatar
Michael Niedermayer committed
892

893
    assert(cmpf);
894
    dmin= cmp(s, 0, 0, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);
Michael Niedermayer's avatar
Michael Niedermayer committed
895 896
    map[0]= map_generation;
    score_map[0]= dmin;
897 898

    //FIXME precalc first term below?
899
    if((s->pict_type == AV_PICTURE_TYPE_B && !(c->flags & FLAG_DIRECT)) || s->flags&CODEC_FLAG_MV0)
900
        dmin += (mv_penalty[pred_x] + mv_penalty[pred_y])*penalty_factor;
Michael Niedermayer's avatar
Michael Niedermayer committed
901 902

    /* first line */
903
    if (s->first_slice_line) {
Michael Niedermayer's avatar
Michael Niedermayer committed
904
        CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
905
        CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
906
                        (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
Michael Niedermayer's avatar
Michael Niedermayer committed
907
    }else{
908 909
        if(dmin<((h*h*s->avctx->mv0_threshold)>>8)
                    && ( P_LEFT[0]    |P_LEFT[1]
Michael Niedermayer's avatar
Michael Niedermayer committed
910
                        |P_TOP[0]     |P_TOP[1]
911
                        |P_TOPRIGHT[0]|P_TOPRIGHT[1])==0){
Michael Niedermayer's avatar
Michael Niedermayer committed
912 913
            *mx_ptr= 0;
            *my_ptr= 0;
914
            c->skip=1;
Michael Niedermayer's avatar
Michael Niedermayer committed
915 916
            return dmin;
        }
917
        CHECK_MV(    P_MEDIAN[0] >>shift ,    P_MEDIAN[1] >>shift)
918 919 920 921 922
        CHECK_CLIPPED_MV((P_MEDIAN[0]>>shift)  , (P_MEDIAN[1]>>shift)-1)
        CHECK_CLIPPED_MV((P_MEDIAN[0]>>shift)  , (P_MEDIAN[1]>>shift)+1)
        CHECK_CLIPPED_MV((P_MEDIAN[0]>>shift)-1, (P_MEDIAN[1]>>shift)  )
        CHECK_CLIPPED_MV((P_MEDIAN[0]>>shift)+1, (P_MEDIAN[1]>>shift)  )
        CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
Michael Niedermayer's avatar
Michael Niedermayer committed
923 924 925 926
                        (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
        CHECK_MV(P_LEFT[0]    >>shift, P_LEFT[1]    >>shift)
        CHECK_MV(P_TOP[0]     >>shift, P_TOP[1]     >>shift)
        CHECK_MV(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift)
Michael Niedermayer's avatar
Michael Niedermayer committed
927
    }
928
    if(dmin>h*h*4){
929
        if(c->pre_pass){
930
            CHECK_CLIPPED_MV((last_mv[ref_mv_xy-1][0]*ref_mv_scale + (1<<15))>>16,
931
                            (last_mv[ref_mv_xy-1][1]*ref_mv_scale + (1<<15))>>16)
932
            if(!s->first_slice_line)
933
                CHECK_CLIPPED_MV((last_mv[ref_mv_xy-ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
934
                                (last_mv[ref_mv_xy-ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
935
        }else{
936
            CHECK_CLIPPED_MV((last_mv[ref_mv_xy+1][0]*ref_mv_scale + (1<<15))>>16,
937
                            (last_mv[ref_mv_xy+1][1]*ref_mv_scale + (1<<15))>>16)
938
            if(s->mb_y+1<s->end_mb_y)  //FIXME replace at least with last_slice_line
939
                CHECK_CLIPPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
940
                                (last_mv[ref_mv_xy+ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
941
        }
Michael Niedermayer's avatar
Michael Niedermayer committed
942
    }
943

944 945
    if(c->avctx->last_predictor_count){
        const int count= c->avctx->last_predictor_count;
946 947 948 949 950 951 952 953 954 955 956 957 958 959 960
        const int xstart= FFMAX(0, s->mb_x - count);
        const int ystart= FFMAX(0, s->mb_y - count);
        const int xend= FFMIN(s->mb_width , s->mb_x + count + 1);
        const int yend= FFMIN(s->mb_height, s->mb_y + count + 1);
        int mb_y;

        for(mb_y=ystart; mb_y<yend; mb_y++){
            int mb_x;
            for(mb_x=xstart; mb_x<xend; mb_x++){
                const int xy= mb_x + 1 + (mb_y + 1)*ref_mv_stride;
                int mx= (last_mv[xy][0]*ref_mv_scale + (1<<15))>>16;
                int my= (last_mv[xy][1]*ref_mv_scale + (1<<15))>>16;

                if(mx>xmax || mx<xmin || my>ymax || my<ymin) continue;
                CHECK_MV(mx,my)
Michael Niedermayer's avatar
Michael Niedermayer committed
961 962 963
            }
        }
    }
964

Michael Niedermayer's avatar
Michael Niedermayer committed
965
//check(best[0],best[1],0, b0)
966
    dmin= diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
Michael Niedermayer's avatar
Michael Niedermayer committed
967 968 969

//check(best[0],best[1],0, b1)
    *mx_ptr= best[0];
970
    *my_ptr= best[1];
Michael Niedermayer's avatar
Michael Niedermayer committed
971 972 973 974 975

//    printf("%d %d %d \n", best[0], best[1], dmin);
    return dmin;
}

976
//this function is dedicated to the braindamaged gcc
977
inline int ff_epzs_motion_search(MpegEncContext * s, int *mx_ptr, int *my_ptr,
978
                             int P[10][2], int src_index, int ref_index, int16_t (*last_mv)[2],
979
                             int ref_mv_scale, int size, int h)
980
{
981
    MotionEstContext * const c= &s->me;
982
//FIXME convert other functions in the same way if faster
983 984
    if(c->flags==0 && h==16 && size==0){
        return epzs_motion_search_internal(s, mx_ptr, my_ptr, P, src_index, ref_index, last_mv, ref_mv_scale, 0, 0, 16);
985 986
//    case FLAG_QPEL:
//        return epzs_motion_search_internal(s, mx_ptr, my_ptr, P, src_index, ref_index, last_mv, ref_mv_scale, FLAG_QPEL);
987 988
    }else{
        return epzs_motion_search_internal(s, mx_ptr, my_ptr, P, src_index, ref_index, last_mv, ref_mv_scale, c->flags, size, h);
989 990 991 992 993
    }
}

static int epzs_motion_search4(MpegEncContext * s,
                             int *mx_ptr, int *my_ptr, int P[10][2],
994
                             int src_index, int ref_index, int16_t (*last_mv)[2],
995
                             int ref_mv_scale)
Michael Niedermayer's avatar
Michael Niedermayer committed
996
{
997
    MotionEstContext * const c= &s->me;
Michael Niedermayer's avatar
Michael Niedermayer committed
998
    int best[2]={0, 0};
999
    int d, dmin;
1000
    unsigned map_generation;
1001
    const int penalty_factor= c->penalty_factor;
Michael Niedermayer's avatar
Michael Niedermayer committed
1002
    const int size=1;
1003
    const int h=8;
1004 1005
    const int ref_mv_stride= s->mb_stride;
    const int ref_mv_xy= s->mb_x + s->mb_y *ref_mv_stride;
1006
    me_cmp_func cmpf, chroma_cmpf;
1007
    LOAD_COMMON
1008
    int flags= c->flags;
1009
    LOAD_COMMON2
1010

1011 1012
    cmpf= s->dsp.me_cmp[size];
    chroma_cmpf= s->dsp.me_cmp[size+1];
Michael Niedermayer's avatar
Michael Niedermayer committed
1013

1014
    map_generation= update_map_generation(c);
Michael Niedermayer's avatar
Michael Niedermayer committed
1015 1016

    dmin = 1000000;
1017
//printf("%d %d %d %d //",xmin, ymin, xmax, ymax);
Michael Niedermayer's avatar
Michael Niedermayer committed
1018
    /* first line */
1019
    if (s->first_slice_line) {
1020
        CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
1021
        CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
1022
                        (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
Michael Niedermayer's avatar
Michael Niedermayer committed
1023 1024 1025 1026
        CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
    }else{
        CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
        //FIXME try some early stop
Michael Niedermayer's avatar
Michael Niedermayer committed
1027 1028 1029 1030
        CHECK_MV(P_MEDIAN[0]>>shift, P_MEDIAN[1]>>shift)
        CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
        CHECK_MV(P_TOP[0]>>shift, P_TOP[1]>>shift)
        CHECK_MV(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift)
1031
        CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
Michael Niedermayer's avatar
Michael Niedermayer committed
1032
                        (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
Michael Niedermayer's avatar
Michael Niedermayer committed
1033 1034
    }
    if(dmin>64*4){
1035
        CHECK_CLIPPED_MV((last_mv[ref_mv_xy+1][0]*ref_mv_scale + (1<<15))>>16,
1036
                        (last_mv[ref_mv_xy+1][1]*ref_mv_scale + (1<<15))>>16)
1037
        if(s->mb_y+1<s->end_mb_y)  //FIXME replace at least with last_slice_line
1038
            CHECK_CLIPPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
1039
                            (last_mv[ref_mv_xy+ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
Michael Niedermayer's avatar
Michael Niedermayer committed
1040 1041
    }

1042
    dmin= diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
1043 1044

    *mx_ptr= best[0];
1045
    *my_ptr= best[1];
1046 1047 1048 1049 1050 1051

//    printf("%d %d %d \n", best[0], best[1], dmin);
    return dmin;
}

//try to merge with above FIXME (needs PSNR test)
1052 1053
static int epzs_motion_search2(MpegEncContext * s,
                             int *mx_ptr, int *my_ptr, int P[10][2],
1054
                             int src_index, int ref_index, int16_t (*last_mv)[2],
1055
                             int ref_mv_scale)
1056
{
1057
    MotionEstContext * const c= &s->me;
1058
    int best[2]={0, 0};
1059
    int d, dmin;
1060
    unsigned map_generation;
1061
    const int penalty_factor= c->penalty_factor;
1062 1063 1064 1065
    const int size=0; //FIXME pass as arg
    const int h=8;
    const int ref_mv_stride= s->mb_stride;
    const int ref_mv_xy= s->mb_x + s->mb_y *ref_mv_stride;
1066
    me_cmp_func cmpf, chroma_cmpf;
1067
    LOAD_COMMON
1068
    int flags= c->flags;
1069
    LOAD_COMMON2
1070

1071 1072
    cmpf= s->dsp.me_cmp[size];
    chroma_cmpf= s->dsp.me_cmp[size+1];
1073

1074
    map_generation= update_map_generation(c);
1075 1076

    dmin = 1000000;
1077
//printf("%d %d %d %d //",xmin, ymin, xmax, ymax);
1078
    /* first line */
1079
    if (s->first_slice_line) {
1080
        CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
1081
        CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
1082 1083 1084 1085 1086
                        (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
        CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
    }else{
        CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
        //FIXME try some early stop
Michael Niedermayer's avatar
Michael Niedermayer committed
1087 1088 1089 1090
        CHECK_MV(P_MEDIAN[0]>>shift, P_MEDIAN[1]>>shift)
        CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
        CHECK_MV(P_TOP[0]>>shift, P_TOP[1]>>shift)
        CHECK_MV(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift)
1091
        CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
Michael Niedermayer's avatar
Michael Niedermayer committed
1092
                        (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
1093 1094
    }
    if(dmin>64*4){
1095
        CHECK_CLIPPED_MV((last_mv[ref_mv_xy+1][0]*ref_mv_scale + (1<<15))>>16,
1096
                        (last_mv[ref_mv_xy+1][1]*ref_mv_scale + (1<<15))>>16)
1097
        if(s->mb_y+1<s->end_mb_y)  //FIXME replace at least with last_slice_line
1098
            CHECK_CLIPPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
1099
                            (last_mv[ref_mv_xy+ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
1100 1101
    }

1102
    dmin= diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
1103

Michael Niedermayer's avatar
Michael Niedermayer committed
1104
    *mx_ptr= best[0];
1105
    *my_ptr= best[1];
Michael Niedermayer's avatar
Michael Niedermayer committed
1106 1107 1108 1109

//    printf("%d %d %d \n", best[0], best[1], dmin);
    return dmin;
}