motion_est_template.c 44.5 KB
Newer Older
Michael Niedermayer's avatar
Michael Niedermayer committed
1
/*
2
 * Motion estimation
3
 * Copyright (c) 2002-2004 Michael Niedermayer
Michael Niedermayer's avatar
Michael Niedermayer committed
4
 *
5 6 7
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
Michael Niedermayer's avatar
Michael Niedermayer committed
8 9
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
10
 * version 2.1 of the License, or (at your option) any later version.
Michael Niedermayer's avatar
Michael Niedermayer committed
11
 *
12
 * FFmpeg is distributed in the hope that it will be useful,
Michael Niedermayer's avatar
Michael Niedermayer committed
13 14 15 16 17
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
18
 * License along with FFmpeg; if not, write to the Free Software
19
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Michael Niedermayer's avatar
Michael Niedermayer committed
20
 */
21

Michael Niedermayer's avatar
Michael Niedermayer committed
22
/**
23
 * @file
Michael Niedermayer's avatar
Michael Niedermayer committed
24 25
 * Motion estimation template.
 */
26

27
//Let us hope gcc will remove the unused vars ...(gcc 3.2.2 seems to do it ...)
28
#define LOAD_COMMON\
29 30 31 32 33
    uint32_t av_unused * const score_map= c->score_map;\
    const int av_unused xmin= c->xmin;\
    const int av_unused ymin= c->ymin;\
    const int av_unused xmax= c->xmax;\
    const int av_unused ymax= c->ymax;\
34 35 36
    uint8_t *mv_penalty= c->current_mv_penalty;\
    const int pred_x= c->pred_x;\
    const int pred_y= c->pred_y;\
Michael Niedermayer's avatar
Michael Niedermayer committed
37 38 39 40 41

#define CHECK_HALF_MV(dx, dy, x, y)\
{\
    const int hx= 2*(x)+(dx);\
    const int hy= 2*(y)+(dy);\
42
    d= cmp_hpel(s, x, y, dx, dy, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags);\
Michael Niedermayer's avatar
Michael Niedermayer committed
43 44 45 46 47
    d += (mv_penalty[hx - pred_x] + mv_penalty[hy - pred_y])*penalty_factor;\
    COPY3_IF_LT(dmin, d, bx, hx, by, hy)\
}

#if 0
48
static int hpel_motion_search)(MpegEncContext * s,
49
                                  int *mx_ptr, int *my_ptr, int dmin,
50
                                  uint8_t *ref_data[3],
51
                                  int size)
Michael Niedermayer's avatar
Michael Niedermayer committed
52 53 54 55 56
{
    const int xx = 16 * s->mb_x + 8*(n&1);
    const int yy = 16 * s->mb_y + 8*(n>>1);
    const int mx = *mx_ptr;
    const int my = *my_ptr;
57
    const int penalty_factor= c->sub_penalty_factor;
58

59
    LOAD_COMMON
60

Michael Niedermayer's avatar
Michael Niedermayer committed
61 62 63 64 65 66 67 68 69 70 71
 //   INIT;
 //FIXME factorize
    me_cmp_func cmp, chroma_cmp, cmp_sub, chroma_cmp_sub;

    if(s->no_rounding /*FIXME b_type*/){
        hpel_put= &s->dsp.put_no_rnd_pixels_tab[size];
        chroma_hpel_put= &s->dsp.put_no_rnd_pixels_tab[size+1];
    }else{
        hpel_put=& s->dsp.put_pixels_tab[size];
        chroma_hpel_put= &s->dsp.put_pixels_tab[size+1];
    }
72 73
    cmpf= s->dsp.me_cmp[size];
    chroma_cmpf= s->dsp.me_cmp[size+1];
Michael Niedermayer's avatar
Michael Niedermayer committed
74 75 76
    cmp_sub= s->dsp.me_sub_cmp[size];
    chroma_cmp_sub= s->dsp.me_sub_cmp[size+1];

77
    if(c->skip){ //FIXME somehow move up (benchmark)
Michael Niedermayer's avatar
Michael Niedermayer committed
78 79 80 81
        *mx_ptr = 0;
        *my_ptr = 0;
        return dmin;
    }
82

83
    if(c->avctx->me_cmp != c->avctx->me_sub_cmp){
Michael Niedermayer's avatar
Michael Niedermayer committed
84 85 86 87
        CMP_HPEL(dmin, 0, 0, mx, my, size);
        if(mx || my)
            dmin += (mv_penalty[2*mx - pred_x] + mv_penalty[2*my - pred_y])*penalty_factor;
    }
88 89

    if (mx > xmin && mx < xmax &&
Michael Niedermayer's avatar
Michael Niedermayer committed
90 91 92
        my > ymin && my < ymax) {
        int bx=2*mx, by=2*my;
        int d= dmin;
93

Michael Niedermayer's avatar
Michael Niedermayer committed
94
        CHECK_HALF_MV(1, 1, mx-1, my-1)
95
        CHECK_HALF_MV(0, 1, mx  , my-1)
Michael Niedermayer's avatar
Michael Niedermayer committed
96 97 98 99
        CHECK_HALF_MV(1, 1, mx  , my-1)
        CHECK_HALF_MV(1, 0, mx-1, my  )
        CHECK_HALF_MV(1, 0, mx  , my  )
        CHECK_HALF_MV(1, 1, mx-1, my  )
100
        CHECK_HALF_MV(0, 1, mx  , my  )
Michael Niedermayer's avatar
Michael Niedermayer committed
101 102
        CHECK_HALF_MV(1, 1, mx  , my  )

103
        assert(bx >= xmin*2 || bx <= xmax*2 || by >= ymin*2 || by <= ymax*2);
Michael Niedermayer's avatar
Michael Niedermayer committed
104 105 106 107 108 109 110 111 112 113 114 115

        *mx_ptr = bx;
        *my_ptr = by;
    }else{
        *mx_ptr =2*mx;
        *my_ptr =2*my;
    }

    return dmin;
}

#else
116
static int hpel_motion_search(MpegEncContext * s,
117
                                  int *mx_ptr, int *my_ptr, int dmin,
118 119
                                  int src_index, int ref_index,
                                  int size, int h)
Michael Niedermayer's avatar
Michael Niedermayer committed
120
{
121
    MotionEstContext * const c= &s->me;
Michael Niedermayer's avatar
Michael Niedermayer committed
122
    const int mx = *mx_ptr;
123
    const int my = *my_ptr;
124
    const int penalty_factor= c->sub_penalty_factor;
Michael Niedermayer's avatar
Michael Niedermayer committed
125
    me_cmp_func cmp_sub, chroma_cmp_sub;
126
    int bx=2*mx, by=2*my;
Michael Niedermayer's avatar
Michael Niedermayer committed
127

128
    LOAD_COMMON
129
    int flags= c->sub_flags;
130

Michael Niedermayer's avatar
Michael Niedermayer committed
131 132 133 134 135
 //FIXME factorize

    cmp_sub= s->dsp.me_sub_cmp[size];
    chroma_cmp_sub= s->dsp.me_sub_cmp[size+1];

136
    if(c->skip){ //FIXME move out of hpel?
Michael Niedermayer's avatar
Michael Niedermayer committed
137 138 139 140
        *mx_ptr = 0;
        *my_ptr = 0;
        return dmin;
    }
141

142
    if(c->avctx->me_cmp != c->avctx->me_sub_cmp){
143
        dmin= cmp(s, mx, my, 0, 0, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags);
Michael Niedermayer's avatar
Michael Niedermayer committed
144
        if(mx || my || size>0)
Michael Niedermayer's avatar
Michael Niedermayer committed
145 146
            dmin += (mv_penalty[2*mx - pred_x] + mv_penalty[2*my - pred_y])*penalty_factor;
    }
147 148

    if (mx > xmin && mx < xmax &&
Michael Niedermayer's avatar
Michael Niedermayer committed
149 150 151
        my > ymin && my < ymax) {
        int d= dmin;
        const int index= (my<<ME_MAP_SHIFT) + mx;
152
        const int t= score_map[(index-(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)]
153
                     + (mv_penalty[bx   - pred_x] + mv_penalty[by-2 - pred_y])*c->penalty_factor;
Michael Niedermayer's avatar
Michael Niedermayer committed
154
        const int l= score_map[(index- 1               )&(ME_MAP_SIZE-1)]
155
                     + (mv_penalty[bx-2 - pred_x] + mv_penalty[by   - pred_y])*c->penalty_factor;
Michael Niedermayer's avatar
Michael Niedermayer committed
156
        const int r= score_map[(index+ 1               )&(ME_MAP_SIZE-1)]
157
                     + (mv_penalty[bx+2 - pred_x] + mv_penalty[by   - pred_y])*c->penalty_factor;
Michael Niedermayer's avatar
Michael Niedermayer committed
158
        const int b= score_map[(index+(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)]
159
                     + (mv_penalty[bx   - pred_x] + mv_penalty[by+2 - pred_y])*c->penalty_factor;
160

161
#if 1
162
        int key;
163
        int map_generation= c->map_generation;
164
#ifndef NDEBUG
165
        uint32_t *map= c->map;
166
#endif
167 168 169 170 171 172 173 174
        key= ((my-1)<<ME_MAP_MV_BITS) + (mx) + map_generation;
        assert(map[(index-(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)] == key);
        key= ((my+1)<<ME_MAP_MV_BITS) + (mx) + map_generation;
        assert(map[(index+(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)] == key);
        key= ((my)<<ME_MAP_MV_BITS) + (mx+1) + map_generation;
        assert(map[(index+1)&(ME_MAP_SIZE-1)] == key);
        key= ((my)<<ME_MAP_MV_BITS) + (mx-1) + map_generation;
        assert(map[(index-1)&(ME_MAP_SIZE-1)] == key);
175
#endif
Michael Niedermayer's avatar
Michael Niedermayer committed
176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217
        if(t<=b){
            CHECK_HALF_MV(0, 1, mx  ,my-1)
            if(l<=r){
                CHECK_HALF_MV(1, 1, mx-1, my-1)
                if(t+r<=b+l){
                    CHECK_HALF_MV(1, 1, mx  , my-1)
                }else{
                    CHECK_HALF_MV(1, 1, mx-1, my  )
                }
                CHECK_HALF_MV(1, 0, mx-1, my  )
            }else{
                CHECK_HALF_MV(1, 1, mx  , my-1)
                if(t+l<=b+r){
                    CHECK_HALF_MV(1, 1, mx-1, my-1)
                }else{
                    CHECK_HALF_MV(1, 1, mx  , my  )
                }
                CHECK_HALF_MV(1, 0, mx  , my  )
            }
        }else{
            if(l<=r){
                if(t+l<=b+r){
                    CHECK_HALF_MV(1, 1, mx-1, my-1)
                }else{
                    CHECK_HALF_MV(1, 1, mx  , my  )
                }
                CHECK_HALF_MV(1, 0, mx-1, my)
                CHECK_HALF_MV(1, 1, mx-1, my)
            }else{
                if(t+r<=b+l){
                    CHECK_HALF_MV(1, 1, mx  , my-1)
                }else{
                    CHECK_HALF_MV(1, 1, mx-1, my)
                }
                CHECK_HALF_MV(1, 0, mx  , my)
                CHECK_HALF_MV(1, 1, mx  , my)
            }
            CHECK_HALF_MV(0, 1, mx  , my)
        }
        assert(bx >= xmin*2 && bx <= xmax*2 && by >= ymin*2 && by <= ymax*2);
    }

218 219
    *mx_ptr = bx;
    *my_ptr = by;
220

Michael Niedermayer's avatar
Michael Niedermayer committed
221 222 223 224
    return dmin;
}
#endif

225 226 227 228 229 230 231 232 233 234
static int no_sub_motion_search(MpegEncContext * s,
          int *mx_ptr, int *my_ptr, int dmin,
                                  int src_index, int ref_index,
                                  int size, int h)
{
    (*mx_ptr)<<=1;
    (*my_ptr)<<=1;
    return dmin;
}

Dominik Mierzejewski's avatar
Dominik Mierzejewski committed
235
inline int ff_get_mb_score(MpegEncContext * s, int mx, int my, int src_index,
236
                               int ref_index, int size, int h, int add_rate)
237 238
{
//    const int check_luma= s->dsp.me_sub_cmp != s->dsp.mb_cmp;
239 240 241
    MotionEstContext * const c= &s->me;
    const int penalty_factor= c->mb_penalty_factor;
    const int flags= c->mb_flags;
242 243
    const int qpel= flags & FLAG_QPEL;
    const int mask= 1+2*qpel;
244 245 246
    me_cmp_func cmp_sub, chroma_cmp_sub;
    int d;

247
    LOAD_COMMON
248

249 250 251 252
 //FIXME factorize

    cmp_sub= s->dsp.mb_cmp[size];
    chroma_cmp_sub= s->dsp.mb_cmp[size+1];
253

254 255
//    assert(!c->skip);
//    assert(c->avctx->me_sub_cmp != c->avctx->mb_cmp);
256

257
    d= cmp(s, mx>>(qpel+1), my>>(qpel+1), mx&mask, my&mask, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags);
258
    //FIXME check cbp before adding penalty for (0,0) vector
259
    if(add_rate && (mx || my || size>0))
260
        d += (mv_penalty[mx - pred_x] + mv_penalty[my - pred_y])*penalty_factor;
261

262 263 264
    return d;
}

Michael Niedermayer's avatar
Michael Niedermayer committed
265 266 267 268
#define CHECK_QUARTER_MV(dx, dy, x, y)\
{\
    const int hx= 4*(x)+(dx);\
    const int hy= 4*(y)+(dy);\
269
    d= cmp_qpel(s, x, y, dx, dy, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
Michael Niedermayer's avatar
Michael Niedermayer committed
270 271 272 273
    d += (mv_penalty[hx - pred_x] + mv_penalty[hy - pred_y])*penalty_factor;\
    COPY3_IF_LT(dmin, d, bx, hx, by, hy)\
}

274
static int qpel_motion_search(MpegEncContext * s,
275
                                  int *mx_ptr, int *my_ptr, int dmin,
276
                                  int src_index, int ref_index,
277
                                  int size, int h)
Michael Niedermayer's avatar
Michael Niedermayer committed
278
{
279
    MotionEstContext * const c= &s->me;
Michael Niedermayer's avatar
Michael Niedermayer committed
280
    const int mx = *mx_ptr;
281
    const int my = *my_ptr;
282 283
    const int penalty_factor= c->sub_penalty_factor;
    const int map_generation= c->map_generation;
284
    const int subpel_quality= c->avctx->me_subpel_quality;
285
    uint32_t *map= c->map;
286
    me_cmp_func cmpf, chroma_cmpf;
Michael Niedermayer's avatar
Michael Niedermayer committed
287 288
    me_cmp_func cmp_sub, chroma_cmp_sub;

289
    LOAD_COMMON
290
    int flags= c->sub_flags;
291

292 293
    cmpf= s->dsp.me_cmp[size];
    chroma_cmpf= s->dsp.me_cmp[size+1]; //factorize FIXME
Michael Niedermayer's avatar
Michael Niedermayer committed
294 295 296 297 298
 //FIXME factorize

    cmp_sub= s->dsp.me_sub_cmp[size];
    chroma_cmp_sub= s->dsp.me_sub_cmp[size+1];

299
    if(c->skip){ //FIXME somehow move up (benchmark)
Michael Niedermayer's avatar
Michael Niedermayer committed
300 301 302 303
        *mx_ptr = 0;
        *my_ptr = 0;
        return dmin;
    }
304

305
    if(c->avctx->me_cmp != c->avctx->me_sub_cmp){
306
        dmin= cmp(s, mx, my, 0, 0, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags);
Michael Niedermayer's avatar
Michael Niedermayer committed
307
        if(mx || my || size>0)
Michael Niedermayer's avatar
Michael Niedermayer committed
308 309
            dmin += (mv_penalty[4*mx - pred_x] + mv_penalty[4*my - pred_y])*penalty_factor;
    }
310 311

    if (mx > xmin && mx < xmax &&
Michael Niedermayer's avatar
Michael Niedermayer committed
312 313 314 315 316 317 318 319 320 321 322 323
        my > ymin && my < ymax) {
        int bx=4*mx, by=4*my;
        int d= dmin;
        int i, nx, ny;
        const int index= (my<<ME_MAP_SHIFT) + mx;
        const int t= score_map[(index-(1<<ME_MAP_SHIFT)  )&(ME_MAP_SIZE-1)];
        const int l= score_map[(index- 1                 )&(ME_MAP_SIZE-1)];
        const int r= score_map[(index+ 1                 )&(ME_MAP_SIZE-1)];
        const int b= score_map[(index+(1<<ME_MAP_SHIFT)  )&(ME_MAP_SIZE-1)];
        const int c= score_map[(index                    )&(ME_MAP_SIZE-1)];
        int best[8];
        int best_pos[8][2];
324

Michael Niedermayer's avatar
Michael Niedermayer committed
325 326
        memset(best, 64, sizeof(int)*8);
#if 1
327
        if(s->me.dia_size>=2){
Michael Niedermayer's avatar
Michael Niedermayer committed
328 329 330 331 332 333 334
            const int tl= score_map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)];
            const int bl= score_map[(index+(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)];
            const int tr= score_map[(index-(1<<ME_MAP_SHIFT)+1)&(ME_MAP_SIZE-1)];
            const int br= score_map[(index+(1<<ME_MAP_SHIFT)+1)&(ME_MAP_SIZE-1)];

            for(ny= -3; ny <= 3; ny++){
                for(nx= -3; nx <= 3; nx++){
Michael Niedermayer's avatar
Michael Niedermayer committed
335 336 337 338 339
                    //FIXME this could overflow (unlikely though)
                    const int64_t t2= nx*nx*(tr + tl - 2*t) + 4*nx*(tr-tl) + 32*t;
                    const int64_t c2= nx*nx*( r +  l - 2*c) + 4*nx*( r- l) + 32*c;
                    const int64_t b2= nx*nx*(br + bl - 2*b) + 4*nx*(br-bl) + 32*b;
                    int score= (ny*ny*(b2 + t2 - 2*c2) + 4*ny*(b2 - t2) + 32*c2 + 512)>>10;
Michael Niedermayer's avatar
Michael Niedermayer committed
340
                    int i;
341

Michael Niedermayer's avatar
Michael Niedermayer committed
342
                    if((nx&3)==0 && (ny&3)==0) continue;
343

Michael Niedermayer's avatar
Michael Niedermayer committed
344
                    score += (mv_penalty[4*mx + nx - pred_x] + mv_penalty[4*my + ny - pred_y])*penalty_factor;
345

346 347
//                    if(nx&1) score-=1024*c->penalty_factor;
//                    if(ny&1) score-=1024*c->penalty_factor;
348

Michael Niedermayer's avatar
Michael Niedermayer committed
349 350 351 352 353 354 355 356 357 358 359 360 361 362
                    for(i=0; i<8; i++){
                        if(score < best[i]){
                            memmove(&best[i+1], &best[i], sizeof(int)*(7-i));
                            memmove(&best_pos[i+1][0], &best_pos[i][0], sizeof(int)*2*(7-i));
                            best[i]= score;
                            best_pos[i][0]= nx + 4*mx;
                            best_pos[i][1]= ny + 4*my;
                            break;
                        }
                    }
                }
            }
        }else{
            int tl;
Michael Niedermayer's avatar
Michael Niedermayer committed
363
            //FIXME this could overflow (unlikely though)
Michael Niedermayer's avatar
Michael Niedermayer committed
364
            const int cx = 4*(r - l);
365
            const int cx2= r + l - 2*c;
Michael Niedermayer's avatar
Michael Niedermayer committed
366 367 368
            const int cy = 4*(b - t);
            const int cy2= b + t - 2*c;
            int cxy;
369

Michael Niedermayer's avatar
Michael Niedermayer committed
370 371 372
            if(map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)] == (my<<ME_MAP_MV_BITS) + mx + map_generation && 0){ //FIXME
                tl= score_map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)];
            }else{
373
                tl= cmp(s, mx-1, my-1, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);//FIXME wrong if chroma me is different
Michael Niedermayer's avatar
Michael Niedermayer committed
374
            }
375 376 377

            cxy= 2*tl + (cx + cy)/4 - (cx2 + cy2) - 2*c;

Michael Niedermayer's avatar
Michael Niedermayer committed
378 379 380 381 382
            assert(16*cx2 + 4*cx + 32*c == 32*r);
            assert(16*cx2 - 4*cx + 32*c == 32*l);
            assert(16*cy2 + 4*cy + 32*c == 32*b);
            assert(16*cy2 - 4*cy + 32*c == 32*t);
            assert(16*cxy + 16*cy2 + 16*cx2 - 4*cy - 4*cx + 32*c == 32*tl);
383

Michael Niedermayer's avatar
Michael Niedermayer committed
384 385
            for(ny= -3; ny <= 3; ny++){
                for(nx= -3; nx <= 3; nx++){
Michael Niedermayer's avatar
Michael Niedermayer committed
386
                    //FIXME this could overflow (unlikely though)
Michael Niedermayer's avatar
Michael Niedermayer committed
387 388
                    int score= ny*nx*cxy + nx*nx*cx2 + ny*ny*cy2 + nx*cx + ny*cy + 32*c; //FIXME factor
                    int i;
389

Michael Niedermayer's avatar
Michael Niedermayer committed
390
                    if((nx&3)==0 && (ny&3)==0) continue;
391

Michael Niedermayer's avatar
Michael Niedermayer committed
392
                    score += 32*(mv_penalty[4*mx + nx - pred_x] + mv_penalty[4*my + ny - pred_y])*penalty_factor;
393 394
//                    if(nx&1) score-=32*c->penalty_factor;
  //                  if(ny&1) score-=32*c->penalty_factor;
395

Michael Niedermayer's avatar
Michael Niedermayer committed
396 397 398 399 400 401 402 403 404 405 406
                    for(i=0; i<8; i++){
                        if(score < best[i]){
                            memmove(&best[i+1], &best[i], sizeof(int)*(7-i));
                            memmove(&best_pos[i+1][0], &best_pos[i][0], sizeof(int)*2*(7-i));
                            best[i]= score;
                            best_pos[i][0]= nx + 4*mx;
                            best_pos[i][1]= ny + 4*my;
                            break;
                        }
                    }
                }
407
            }
Michael Niedermayer's avatar
Michael Niedermayer committed
408
        }
Michael Niedermayer's avatar
Michael Niedermayer committed
409
        for(i=0; i<subpel_quality; i++){
Michael Niedermayer's avatar
Michael Niedermayer committed
410 411 412 413
            nx= best_pos[i][0];
            ny= best_pos[i][1];
            CHECK_QUARTER_MV(nx&3, ny&3, nx>>2, ny>>2)
        }
Michael Niedermayer's avatar
Michael Niedermayer committed
414

Michael Niedermayer's avatar
Michael Niedermayer committed
415
#if 0
Michael Niedermayer's avatar
Michael Niedermayer committed
416 417 418 419 420 421 422 423 424
            const int tl= score_map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)];
            const int bl= score_map[(index+(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)];
            const int tr= score_map[(index-(1<<ME_MAP_SHIFT)+1)&(ME_MAP_SIZE-1)];
            const int br= score_map[(index+(1<<ME_MAP_SHIFT)+1)&(ME_MAP_SIZE-1)];
//            if(l < r && l < t && l < b && l < tl && l < bl && l < tr && l < br && bl < tl){
            if(tl<br){

//            nx= FFMAX(4*mx - bx, bx - 4*mx);
//            ny= FFMAX(4*my - by, by - 4*my);
425

Michael Niedermayer's avatar
Michael Niedermayer committed
426 427 428 429 430 431
            static int stats[7][7], count;
            count++;
            stats[4*mx - bx + 3][4*my - by + 3]++;
            if(256*256*256*64 % count ==0){
                for(i=0; i<49; i++){
                    if((i%7)==0) printf("\n");
Michael Niedermayer's avatar
Michael Niedermayer committed
432 433 434 435
                    printf("%6d ", stats[0][i]);
                }
                printf("\n");
            }
Michael Niedermayer's avatar
Michael Niedermayer committed
436
            }
Michael Niedermayer's avatar
Michael Niedermayer committed
437 438 439 440
#endif
#else

        CHECK_QUARTER_MV(2, 2, mx-1, my-1)
441
        CHECK_QUARTER_MV(0, 2, mx  , my-1)
Michael Niedermayer's avatar
Michael Niedermayer committed
442 443 444 445 446 447
        CHECK_QUARTER_MV(2, 2, mx  , my-1)
        CHECK_QUARTER_MV(2, 0, mx  , my  )
        CHECK_QUARTER_MV(2, 2, mx  , my  )
        CHECK_QUARTER_MV(0, 2, mx  , my  )
        CHECK_QUARTER_MV(2, 2, mx-1, my  )
        CHECK_QUARTER_MV(2, 0, mx-1, my  )
448

Michael Niedermayer's avatar
Michael Niedermayer committed
449 450
        nx= bx;
        ny= by;
451

Michael Niedermayer's avatar
Michael Niedermayer committed
452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501
        for(i=0; i<8; i++){
            int ox[8]= {0, 1, 1, 1, 0,-1,-1,-1};
            int oy[8]= {1, 1, 0,-1,-1,-1, 0, 1};
            CHECK_QUARTER_MV((nx + ox[i])&3, (ny + oy[i])&3, (nx + ox[i])>>2, (ny + oy[i])>>2)
        }
#endif
#if 0
        //outer ring
        CHECK_QUARTER_MV(1, 3, mx-1, my-1)
        CHECK_QUARTER_MV(1, 2, mx-1, my-1)
        CHECK_QUARTER_MV(1, 1, mx-1, my-1)
        CHECK_QUARTER_MV(2, 1, mx-1, my-1)
        CHECK_QUARTER_MV(3, 1, mx-1, my-1)
        CHECK_QUARTER_MV(0, 1, mx  , my-1)
        CHECK_QUARTER_MV(1, 1, mx  , my-1)
        CHECK_QUARTER_MV(2, 1, mx  , my-1)
        CHECK_QUARTER_MV(3, 1, mx  , my-1)
        CHECK_QUARTER_MV(3, 2, mx  , my-1)
        CHECK_QUARTER_MV(3, 3, mx  , my-1)
        CHECK_QUARTER_MV(3, 0, mx  , my  )
        CHECK_QUARTER_MV(3, 1, mx  , my  )
        CHECK_QUARTER_MV(3, 2, mx  , my  )
        CHECK_QUARTER_MV(3, 3, mx  , my  )
        CHECK_QUARTER_MV(2, 3, mx  , my  )
        CHECK_QUARTER_MV(1, 3, mx  , my  )
        CHECK_QUARTER_MV(0, 3, mx  , my  )
        CHECK_QUARTER_MV(3, 3, mx-1, my  )
        CHECK_QUARTER_MV(2, 3, mx-1, my  )
        CHECK_QUARTER_MV(1, 3, mx-1, my  )
        CHECK_QUARTER_MV(1, 2, mx-1, my  )
        CHECK_QUARTER_MV(1, 1, mx-1, my  )
        CHECK_QUARTER_MV(1, 0, mx-1, my  )
#endif
        assert(bx >= xmin*4 && bx <= xmax*4 && by >= ymin*4 && by <= ymax*4);

        *mx_ptr = bx;
        *my_ptr = by;
    }else{
        *mx_ptr =4*mx;
        *my_ptr =4*my;
    }

    return dmin;
}


#define CHECK_MV(x,y)\
{\
    const int key= ((y)<<ME_MAP_MV_BITS) + (x) + map_generation;\
    const int index= (((y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);\
502 503 504 505
    assert((x) >= xmin);\
    assert((x) <= xmax);\
    assert((y) >= ymin);\
    assert((y) <= ymax);\
506
/*printf("check_mv %d %d\n", x, y);*/\
Michael Niedermayer's avatar
Michael Niedermayer committed
507
    if(map[index]!=key){\
508
        d= cmp(s, x, y, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
Michael Niedermayer's avatar
Michael Niedermayer committed
509 510 511
        map[index]= key;\
        score_map[index]= d;\
        d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*penalty_factor;\
512
/*printf("score:%d\n", d);*/\
Michael Niedermayer's avatar
Michael Niedermayer committed
513 514 515 516
        COPY3_IF_LT(dmin, d, best[0], x, best[1], y)\
    }\
}

517
#define CHECK_CLIPPED_MV(ax,ay)\
518
{\
519 520 521 522 523
    const int Lx= ax;\
    const int Ly= ay;\
    const int Lx2= FFMAX(xmin, FFMIN(Lx, xmax));\
    const int Ly2= FFMAX(ymin, FFMIN(Ly, ymax));\
    CHECK_MV(Lx2, Ly2)\
524 525
}

Michael Niedermayer's avatar
Michael Niedermayer committed
526 527 528 529
#define CHECK_MV_DIR(x,y,new_dir)\
{\
    const int key= ((y)<<ME_MAP_MV_BITS) + (x) + map_generation;\
    const int index= (((y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);\
530
/*printf("check_mv_dir %d %d %d\n", x, y, new_dir);*/\
Michael Niedermayer's avatar
Michael Niedermayer committed
531
    if(map[index]!=key){\
532
        d= cmp(s, x, y, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
Michael Niedermayer's avatar
Michael Niedermayer committed
533 534 535
        map[index]= key;\
        score_map[index]= d;\
        d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*penalty_factor;\
536
/*printf("score:%d\n", d);*/\
Michael Niedermayer's avatar
Michael Niedermayer committed
537 538 539 540 541 542 543 544 545 546 547 548 549 550 551
        if(d<dmin){\
            best[0]=x;\
            best[1]=y;\
            dmin=d;\
            next_dir= new_dir;\
        }\
    }\
}

#define check(x,y,S,v)\
if( (x)<(xmin<<(S)) ) printf("%d %d %d %d %d xmin" #v, xmin, (x), (y), s->mb_x, s->mb_y);\
if( (x)>(xmax<<(S)) ) printf("%d %d %d %d %d xmax" #v, xmax, (x), (y), s->mb_x, s->mb_y);\
if( (y)<(ymin<<(S)) ) printf("%d %d %d %d %d ymin" #v, ymin, (x), (y), s->mb_x, s->mb_y);\
if( (y)>(ymax<<(S)) ) printf("%d %d %d %d %d ymax" #v, ymax, (x), (y), s->mb_x, s->mb_y);\

552
#define LOAD_COMMON2\
553
    uint32_t *map= c->map;\
554 555
    const int qpel= flags&FLAG_QPEL;\
    const int shift= 1+qpel;\
Michael Niedermayer's avatar
Michael Niedermayer committed
556

557
static av_always_inline int small_diamond_search(MpegEncContext * s, int *best, int dmin,
558 559
                                       int src_index, int ref_index, int const penalty_factor,
                                       int size, int h, int flags)
Michael Niedermayer's avatar
Michael Niedermayer committed
560
{
561
    MotionEstContext * const c= &s->me;
562
    me_cmp_func cmpf, chroma_cmpf;
Michael Niedermayer's avatar
Michael Niedermayer committed
563
    int next_dir=-1;
564
    LOAD_COMMON
565
    LOAD_COMMON2
566
    int map_generation= c->map_generation;
567

568 569
    cmpf= s->dsp.me_cmp[size];
    chroma_cmpf= s->dsp.me_cmp[size+1];
Michael Niedermayer's avatar
Michael Niedermayer committed
570

571 572 573 574
    { /* ensure that the best point is in the MAP as h/qpel refinement needs it */
        const int key= (best[1]<<ME_MAP_MV_BITS) + best[0] + map_generation;
        const int index= ((best[1]<<ME_MAP_SHIFT) + best[0])&(ME_MAP_SIZE-1);
        if(map[index]!=key){ //this will be executed only very rarey
575
            score_map[index]= cmp(s, best[0], best[1], 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);
576 577 578 579
            map[index]= key;
        }
    }

Michael Niedermayer's avatar
Michael Niedermayer committed
580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598
    for(;;){
        int d;
        const int dir= next_dir;
        const int x= best[0];
        const int y= best[1];
        next_dir=-1;

//printf("%d", dir);
        if(dir!=2 && x>xmin) CHECK_MV_DIR(x-1, y  , 0)
        if(dir!=3 && y>ymin) CHECK_MV_DIR(x  , y-1, 1)
        if(dir!=0 && x<xmax) CHECK_MV_DIR(x+1, y  , 2)
        if(dir!=1 && y<ymax) CHECK_MV_DIR(x  , y+1, 3)

        if(next_dir==-1){
            return dmin;
        }
    }
}

599 600 601
static int funny_diamond_search(MpegEncContext * s, int *best, int dmin,
                                       int src_index, int ref_index, int const penalty_factor,
                                       int size, int h, int flags)
602
{
603
    MotionEstContext * const c= &s->me;
604
    me_cmp_func cmpf, chroma_cmpf;
605
    int dia_size;
606
    LOAD_COMMON
607
    LOAD_COMMON2
608
    int map_generation= c->map_generation;
609

610 611
    cmpf= s->dsp.me_cmp[size];
    chroma_cmpf= s->dsp.me_cmp[size+1];
612 613 614 615 616

    for(dia_size=1; dia_size<=4; dia_size++){
        int dir;
        const int x= best[0];
        const int y= best[1];
617

618 619 620 621 622 623 624
        if(dia_size&(dia_size-1)) continue;

        if(   x + dia_size > xmax
           || x - dia_size < xmin
           || y + dia_size > ymax
           || y - dia_size < ymin)
           continue;
625

626 627 628 629 630 631 632 633 634 635 636 637 638 639 640
        for(dir= 0; dir<dia_size; dir+=2){
            int d;

            CHECK_MV(x + dir           , y + dia_size - dir);
            CHECK_MV(x + dia_size - dir, y - dir           );
            CHECK_MV(x - dir           , y - dia_size + dir);
            CHECK_MV(x - dia_size + dir, y + dir           );
        }

        if(x!=best[0] || y!=best[1])
            dia_size=0;
#if 0
{
int dx, dy, i;
static int stats[8*8];
641 642
dx= FFABS(x-best[0]);
dy= FFABS(y-best[1]);
643 644 645 646 647 648 649 650 651 652 653 654 655 656
if(dy>dx){
    dx^=dy; dy^=dx; dx^=dy;
}
stats[dy*8 + dx] ++;
if(256*256*256*64 % (stats[0]+1)==0){
    for(i=0; i<64; i++){
        if((i&7)==0) printf("\n");
        printf("%8d ", stats[i]);
    }
    printf("\n");
}
}
#endif
    }
657
    return dmin;
658 659
}

Michael Niedermayer's avatar
Michael Niedermayer committed
660 661 662 663 664 665 666 667 668
static int hex_search(MpegEncContext * s, int *best, int dmin,
                                       int src_index, int ref_index, int const penalty_factor,
                                       int size, int h, int flags, int dia_size)
{
    MotionEstContext * const c= &s->me;
    me_cmp_func cmpf, chroma_cmpf;
    LOAD_COMMON
    LOAD_COMMON2
    int map_generation= c->map_generation;
669 670
    int x,y,d;
    const int dec= dia_size & (dia_size-1);
Michael Niedermayer's avatar
Michael Niedermayer committed
671 672 673 674

    cmpf= s->dsp.me_cmp[size];
    chroma_cmpf= s->dsp.me_cmp[size+1];

675
    for(;dia_size; dia_size= dec ? dia_size-1 : dia_size>>1){
Michael Niedermayer's avatar
Michael Niedermayer committed
676 677 678
        do{
            x= best[0];
            y= best[1];
679 680 681 682 683 684 685 686

            CHECK_CLIPPED_MV(x  -dia_size    , y);
            CHECK_CLIPPED_MV(x+  dia_size    , y);
            CHECK_CLIPPED_MV(x+( dia_size>>1), y+dia_size);
            CHECK_CLIPPED_MV(x+( dia_size>>1), y-dia_size);
            if(dia_size>1){
                CHECK_CLIPPED_MV(x+(-dia_size>>1), y+dia_size);
                CHECK_CLIPPED_MV(x+(-dia_size>>1), y-dia_size);
Michael Niedermayer's avatar
Michael Niedermayer committed
687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702
            }
        }while(best[0] != x || best[1] != y);
    }

    return dmin;
}

static int l2s_dia_search(MpegEncContext * s, int *best, int dmin,
                                       int src_index, int ref_index, int const penalty_factor,
                                       int size, int h, int flags)
{
    MotionEstContext * const c= &s->me;
    me_cmp_func cmpf, chroma_cmpf;
    LOAD_COMMON
    LOAD_COMMON2
    int map_generation= c->map_generation;
703 704 705
    int x,y,i,d;
    int dia_size= c->dia_size&0xFF;
    const int dec= dia_size & (dia_size-1);
Michael Niedermayer's avatar
Michael Niedermayer committed
706 707 708 709 710 711
    static const int hex[8][2]={{-2, 0}, {-1,-1}, { 0,-2}, { 1,-1},
                                { 2, 0}, { 1, 1}, { 0, 2}, {-1, 1}};

    cmpf= s->dsp.me_cmp[size];
    chroma_cmpf= s->dsp.me_cmp[size+1];

712
    for(; dia_size; dia_size= dec ? dia_size-1 : dia_size>>1){
Michael Niedermayer's avatar
Michael Niedermayer committed
713 714 715 716
        do{
            x= best[0];
            y= best[1];
            for(i=0; i<8; i++){
717
                CHECK_CLIPPED_MV(x+hex[i][0]*dia_size, y+hex[i][1]*dia_size);
Michael Niedermayer's avatar
Michael Niedermayer committed
718 719 720 721 722 723
            }
        }while(best[0] != x || best[1] != y);
    }

    x= best[0];
    y= best[1];
724 725 726 727
    CHECK_CLIPPED_MV(x+1, y);
    CHECK_CLIPPED_MV(x, y+1);
    CHECK_CLIPPED_MV(x-1, y);
    CHECK_CLIPPED_MV(x, y-1);
Michael Niedermayer's avatar
Michael Niedermayer committed
728 729 730 731

    return dmin;
}

732 733 734 735 736 737 738 739 740 741
static int umh_search(MpegEncContext * s, int *best, int dmin,
                                       int src_index, int ref_index, int const penalty_factor,
                                       int size, int h, int flags)
{
    MotionEstContext * const c= &s->me;
    me_cmp_func cmpf, chroma_cmpf;
    LOAD_COMMON
    LOAD_COMMON2
    int map_generation= c->map_generation;
    int x,y,x2,y2, i, j, d;
742
    const int dia_size= c->dia_size&0xFE;
743 744 745
    static const int hex[16][2]={{-4,-2}, {-4,-1}, {-4, 0}, {-4, 1}, {-4, 2},
                                 { 4,-2}, { 4,-1}, { 4, 0}, { 4, 1}, { 4, 2},
                                 {-2, 3}, { 0, 4}, { 2, 3},
Michael Niedermayer's avatar
Michael Niedermayer committed
746
                                 {-2,-3}, { 0,-4}, { 2,-3},};
747 748 749 750 751 752

    cmpf= s->dsp.me_cmp[size];
    chroma_cmpf= s->dsp.me_cmp[size+1];

    x= best[0];
    y= best[1];
753
    for(x2=FFMAX(x-dia_size+1, xmin); x2<=FFMIN(x+dia_size-1,xmax); x2+=2){
754 755
        CHECK_MV(x2, y);
    }
756
    for(y2=FFMAX(y-dia_size/2+1, ymin); y2<=FFMIN(y+dia_size/2-1,ymax); y2+=2){
757 758 759 760 761 762 763 764 765 766 767 768 769
        CHECK_MV(x, y2);
    }

    x= best[0];
    y= best[1];
    for(y2=FFMAX(y-2, ymin); y2<=FFMIN(y+2,ymax); y2++){
        for(x2=FFMAX(x-2, xmin); x2<=FFMIN(x+2,xmax); x2++){
            CHECK_MV(x2, y2);
        }
    }

//FIXME prevent the CLIP stuff

770
    for(j=1; j<=dia_size/4; j++){
771
        for(i=0; i<16; i++){
772
            CHECK_CLIPPED_MV(x+hex[i][0]*j, y+hex[i][1]*j);
773 774 775
        }
    }

776
    return hex_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags, 2);
777 778
}

Michael Niedermayer's avatar
Michael Niedermayer committed
779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813
static int full_search(MpegEncContext * s, int *best, int dmin,
                                       int src_index, int ref_index, int const penalty_factor,
                                       int size, int h, int flags)
{
    MotionEstContext * const c= &s->me;
    me_cmp_func cmpf, chroma_cmpf;
    LOAD_COMMON
    LOAD_COMMON2
    int map_generation= c->map_generation;
    int x,y, d;
    const int dia_size= c->dia_size&0xFF;

    cmpf= s->dsp.me_cmp[size];
    chroma_cmpf= s->dsp.me_cmp[size+1];

    for(y=FFMAX(-dia_size, ymin); y<=FFMIN(dia_size,ymax); y++){
        for(x=FFMAX(-dia_size, xmin); x<=FFMIN(dia_size,xmax); x++){
            CHECK_MV(x, y);
        }
    }

    x= best[0];
    y= best[1];
    d= dmin;
    CHECK_CLIPPED_MV(x  , y);
    CHECK_CLIPPED_MV(x+1, y);
    CHECK_CLIPPED_MV(x, y+1);
    CHECK_CLIPPED_MV(x-1, y);
    CHECK_CLIPPED_MV(x, y-1);
    best[0]= x;
    best[1]= y;

    return d;
}

814 815 816 817 818 819
#define SAB_CHECK_MV(ax,ay)\
{\
    const int key= ((ay)<<ME_MAP_MV_BITS) + (ax) + map_generation;\
    const int index= (((ay)<<ME_MAP_SHIFT) + (ax))&(ME_MAP_SIZE-1);\
/*printf("sab check %d %d\n", ax, ay);*/\
    if(map[index]!=key){\
820
        d= cmp(s, ax, ay, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842
        map[index]= key;\
        score_map[index]= d;\
        d += (mv_penalty[((ax)<<shift)-pred_x] + mv_penalty[((ay)<<shift)-pred_y])*penalty_factor;\
/*printf("score: %d\n", d);*/\
        if(d < minima[minima_count-1].height){\
            int j=0;\
            \
            while(d >= minima[j].height) j++;\
\
            memmove(&minima [j+1], &minima [j], (minima_count - j - 1)*sizeof(Minima));\
\
            minima[j].checked= 0;\
            minima[j].height= d;\
            minima[j].x= ax;\
            minima[j].y= ay;\
            \
            i=-1;\
            continue;\
        }\
    }\
}

843
#define MAX_SAB_SIZE ME_MAP_SIZE
844 845 846
static int sab_diamond_search(MpegEncContext * s, int *best, int dmin,
                                       int src_index, int ref_index, int const penalty_factor,
                                       int size, int h, int flags)
847
{
848
    MotionEstContext * const c= &s->me;
849
    me_cmp_func cmpf, chroma_cmpf;
850
    Minima minima[MAX_SAB_SIZE];
851
    const int minima_count= FFABS(c->dia_size);
852
    int i, j;
853
    LOAD_COMMON
854
    LOAD_COMMON2
855
    int map_generation= c->map_generation;
856

857 858
    cmpf= s->dsp.me_cmp[size];
    chroma_cmpf= s->dsp.me_cmp[size+1];
859

860 861 862 863
    /*Note j<MAX_SAB_SIZE is needed if MAX_SAB_SIZE < ME_MAP_SIZE as j can
      become larger due to MVs overflowing their ME_MAP_MV_BITS bits space in map
     */
    for(j=i=0; i<ME_MAP_SIZE && j<MAX_SAB_SIZE; i++){
864 865 866
        uint32_t key= map[i];

        key += (1<<(ME_MAP_MV_BITS-1)) + (1<<(2*ME_MAP_MV_BITS-1));
867

868
        if((key&((-1)<<(2*ME_MAP_MV_BITS))) != map_generation) continue;
869

870 871 872 873 874
        minima[j].height= score_map[i];
        minima[j].x= key & ((1<<ME_MAP_MV_BITS)-1); key>>=ME_MAP_MV_BITS;
        minima[j].y= key & ((1<<ME_MAP_MV_BITS)-1);
        minima[j].x-= (1<<(ME_MAP_MV_BITS-1));
        minima[j].y-= (1<<(ME_MAP_MV_BITS-1));
875 876 877 878 879 880

        // all entries in map should be in range except if the mv overflows their ME_MAP_MV_BITS bits space
        if(   minima[j].x > xmax || minima[j].x < xmin
           || minima[j].y > ymax || minima[j].y < ymin)
            continue;

881 882 883
        minima[j].checked=0;
        if(minima[j].x || minima[j].y)
            minima[j].height+= (mv_penalty[((minima[j].x)<<shift)-pred_x] + mv_penalty[((minima[j].y)<<shift)-pred_y])*penalty_factor;
884

885 886
        j++;
    }
887

888
    qsort(minima, j, sizeof(Minima), minima_cmp);
889

890 891 892 893 894
    for(; j<minima_count; j++){
        minima[j].height=256*256*256*64;
        minima[j].checked=0;
        minima[j].x= minima[j].y=0;
    }
895

896 897 898 899
    for(i=0; i<minima_count; i++){
        const int x= minima[i].x;
        const int y= minima[i].y;
        int d;
900

901
        if(minima[i].checked) continue;
902

903 904 905 906 907 908 909 910
        if(   x >= xmax || x <= xmin
           || y >= ymax || y <= ymin)
           continue;

        SAB_CHECK_MV(x-1, y)
        SAB_CHECK_MV(x+1, y)
        SAB_CHECK_MV(x  , y-1)
        SAB_CHECK_MV(x  , y+1)
911

912 913
        minima[i].checked= 1;
    }
914

915 916 917
    best[0]= minima[0].x;
    best[1]= minima[0].y;
    dmin= minima[0].height;
918

919 920 921 922 923 924 925 926 927
    if(   best[0] < xmax && best[0] > xmin
       && best[1] < ymax && best[1] > ymin){
        int d;
        //ensure that the refernece samples for hpel refinement are in the map
        CHECK_MV(best[0]-1, best[1])
        CHECK_MV(best[0]+1, best[1])
        CHECK_MV(best[0], best[1]-1)
        CHECK_MV(best[0], best[1]+1)
    }
928
    return dmin;
929 930
}

931 932 933
static int var_diamond_search(MpegEncContext * s, int *best, int dmin,
                                       int src_index, int ref_index, int const penalty_factor,
                                       int size, int h, int flags)
Michael Niedermayer's avatar
Michael Niedermayer committed
934
{
935
    MotionEstContext * const c= &s->me;
936
    me_cmp_func cmpf, chroma_cmpf;
937
    int dia_size;
938
    LOAD_COMMON
939
    LOAD_COMMON2
940
    int map_generation= c->map_generation;
941

942 943
    cmpf= s->dsp.me_cmp[size];
    chroma_cmpf= s->dsp.me_cmp[size+1];
Michael Niedermayer's avatar
Michael Niedermayer committed
944

945
    for(dia_size=1; dia_size<=c->dia_size; dia_size++){
Michael Niedermayer's avatar
Michael Niedermayer committed
946 947 948 949 950
        int dir, start, end;
        const int x= best[0];
        const int y= best[1];

        start= FFMAX(0, y + dia_size - ymax);
951
        end  = FFMIN(dia_size, xmax - x + 1);
Michael Niedermayer's avatar
Michael Niedermayer committed
952 953 954 955 956 957 958 959
        for(dir= start; dir<end; dir++){
            int d;

//check(x + dir,y + dia_size - dir,0, a0)
            CHECK_MV(x + dir           , y + dia_size - dir);
        }

        start= FFMAX(0, x + dia_size - xmax);
960
        end  = FFMIN(dia_size, y - ymin + 1);
Michael Niedermayer's avatar
Michael Niedermayer committed
961 962 963 964 965 966 967 968
        for(dir= start; dir<end; dir++){
            int d;

//check(x + dia_size - dir, y - dir,0, a1)
            CHECK_MV(x + dia_size - dir, y - dir           );
        }

        start= FFMAX(0, -y + dia_size + ymin );
969
        end  = FFMIN(dia_size, x - xmin + 1);
Michael Niedermayer's avatar
Michael Niedermayer committed
970 971 972 973 974 975 976 977
        for(dir= start; dir<end; dir++){
            int d;

//check(x - dir,y - dia_size + dir,0, a2)
            CHECK_MV(x - dir           , y - dia_size + dir);
        }

        start= FFMAX(0, -x + dia_size + xmin );
978
        end  = FFMIN(dia_size, ymax - y + 1);
Michael Niedermayer's avatar
Michael Niedermayer committed
979 980 981 982 983 984 985 986 987
        for(dir= start; dir<end; dir++){
            int d;

//check(x - dia_size + dir, y + dir,0, a3)
            CHECK_MV(x - dia_size + dir, y + dir           );
        }

        if(x!=best[0] || y!=best[1])
            dia_size=0;
988 989 990 991
#if 0
{
int dx, dy, i;
static int stats[8*8];
992 993
dx= FFABS(x-best[0]);
dy= FFABS(y-best[1]);
994 995 996 997 998 999 1000 1001 1002 1003
stats[dy*8 + dx] ++;
if(256*256*256*64 % (stats[0]+1)==0){
    for(i=0; i<64; i++){
        if((i&7)==0) printf("\n");
        printf("%6d ", stats[i]);
    }
    printf("\n");
}
}
#endif
Michael Niedermayer's avatar
Michael Niedermayer committed
1004
    }
1005
    return dmin;
Michael Niedermayer's avatar
Michael Niedermayer committed
1006 1007
}

1008
static av_always_inline int diamond_search(MpegEncContext * s, int *best, int dmin,
1009 1010
                                       int src_index, int ref_index, int const penalty_factor,
                                       int size, int h, int flags){
1011 1012
    MotionEstContext * const c= &s->me;
    if(c->dia_size==-1)
1013
        return funny_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
1014
    else if(c->dia_size<-1)
1015
        return   sab_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
1016
    else if(c->dia_size<2)
1017
        return small_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
Michael Niedermayer's avatar
Michael Niedermayer committed
1018 1019
    else if(c->dia_size>1024)
        return          full_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
1020 1021
    else if(c->dia_size>768)
        return           umh_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
Michael Niedermayer's avatar
Michael Niedermayer committed
1022 1023 1024 1025
    else if(c->dia_size>512)
        return           hex_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags, c->dia_size&0xFF);
    else if(c->dia_size>256)
        return       l2s_dia_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
1026 1027 1028 1029
    else
        return   var_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
}

1030
/*!
1031
   \param P[10][2] a list of candidate mvs to check before starting the
1032 1033 1034 1035
   iterative search. If one of the candidates is close to the optimal mv, then
   it takes fewer iterations. And it increases the chance that we find the
   optimal mv.
 */
1036
static av_always_inline int epzs_motion_search_internal(MpegEncContext * s, int *mx_ptr, int *my_ptr,
1037
                             int P[10][2], int src_index, int ref_index, int16_t (*last_mv)[2],
1038
                             int ref_mv_scale, int flags, int size, int h)
Michael Niedermayer's avatar
Michael Niedermayer committed
1039
{
1040
    MotionEstContext * const c= &s->me;
1041 1042
    int best[2]={0, 0};      /*!< x and y coordinates of the best motion vector.
                               i.e. the difference between the position of the
Loren Merritt's avatar
Loren Merritt committed
1043
                               block currently being encoded and the position of
1044 1045 1046 1047
                               the block chosen to predict it from. */
    int d;                   ///< the score (cmp + penalty) of any given mv
    int dmin;                /*!< the best value of d, i.e. the score
                               corresponding to the mv stored in best[]. */
Michael Niedermayer's avatar
Michael Niedermayer committed
1048
    int map_generation;
Michael Niedermayer's avatar
Michael Niedermayer committed
1049
    int penalty_factor;
1050 1051
    const int ref_mv_stride= s->mb_stride; //pass as arg  FIXME
    const int ref_mv_xy= s->mb_x + s->mb_y*ref_mv_stride; //add to last_mv beforepassing FIXME
1052
    me_cmp_func cmpf, chroma_cmpf;
1053

1054
    LOAD_COMMON
1055
    LOAD_COMMON2
1056

Michael Niedermayer's avatar
Michael Niedermayer committed
1057 1058 1059 1060 1061 1062 1063 1064 1065
    if(c->pre_pass){
        penalty_factor= c->pre_penalty_factor;
        cmpf= s->dsp.me_pre_cmp[size];
        chroma_cmpf= s->dsp.me_pre_cmp[size+1];
    }else{
        penalty_factor= c->penalty_factor;
        cmpf= s->dsp.me_cmp[size];
        chroma_cmpf= s->dsp.me_cmp[size+1];
    }
1066

1067
    map_generation= update_map_generation(c);
Michael Niedermayer's avatar
Michael Niedermayer committed
1068

1069
    assert(cmpf);
1070
    dmin= cmp(s, 0, 0, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);
Michael Niedermayer's avatar
Michael Niedermayer committed
1071 1072
    map[0]= map_generation;
    score_map[0]= dmin;
1073 1074

    //FIXME precalc first term below?
1075
    if((s->pict_type == FF_B_TYPE && !(c->flags & FLAG_DIRECT)) || s->flags&CODEC_FLAG_MV0)
1076
        dmin += (mv_penalty[pred_x] + mv_penalty[pred_y])*penalty_factor;
Michael Niedermayer's avatar
Michael Niedermayer committed
1077 1078

    /* first line */
1079
    if (s->first_slice_line) {
Michael Niedermayer's avatar
Michael Niedermayer committed
1080
        CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
1081
        CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
1082
                        (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
Michael Niedermayer's avatar
Michael Niedermayer committed
1083
    }else{
1084 1085
        if(dmin<((h*h*s->avctx->mv0_threshold)>>8)
                    && ( P_LEFT[0]    |P_LEFT[1]
Michael Niedermayer's avatar
Michael Niedermayer committed
1086
                        |P_TOP[0]     |P_TOP[1]
1087
                        |P_TOPRIGHT[0]|P_TOPRIGHT[1])==0){
Michael Niedermayer's avatar
Michael Niedermayer committed
1088 1089
            *mx_ptr= 0;
            *my_ptr= 0;
1090
            c->skip=1;
Michael Niedermayer's avatar
Michael Niedermayer committed
1091 1092
            return dmin;
        }
1093
        CHECK_MV(    P_MEDIAN[0] >>shift ,    P_MEDIAN[1] >>shift)
1094 1095 1096 1097 1098
        CHECK_CLIPPED_MV((P_MEDIAN[0]>>shift)  , (P_MEDIAN[1]>>shift)-1)
        CHECK_CLIPPED_MV((P_MEDIAN[0]>>shift)  , (P_MEDIAN[1]>>shift)+1)
        CHECK_CLIPPED_MV((P_MEDIAN[0]>>shift)-1, (P_MEDIAN[1]>>shift)  )
        CHECK_CLIPPED_MV((P_MEDIAN[0]>>shift)+1, (P_MEDIAN[1]>>shift)  )
        CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
Michael Niedermayer's avatar
Michael Niedermayer committed
1099 1100 1101 1102
                        (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
        CHECK_MV(P_LEFT[0]    >>shift, P_LEFT[1]    >>shift)
        CHECK_MV(P_TOP[0]     >>shift, P_TOP[1]     >>shift)
        CHECK_MV(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift)
Michael Niedermayer's avatar
Michael Niedermayer committed
1103
    }
1104
    if(dmin>h*h*4){
1105
        if(c->pre_pass){
1106
            CHECK_CLIPPED_MV((last_mv[ref_mv_xy-1][0]*ref_mv_scale + (1<<15))>>16,
1107
                            (last_mv[ref_mv_xy-1][1]*ref_mv_scale + (1<<15))>>16)
1108
            if(!s->first_slice_line)
1109
                CHECK_CLIPPED_MV((last_mv[ref_mv_xy-ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
1110
                                (last_mv[ref_mv_xy-ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
1111
        }else{
1112
            CHECK_CLIPPED_MV((last_mv[ref_mv_xy+1][0]*ref_mv_scale + (1<<15))>>16,
1113
                            (last_mv[ref_mv_xy+1][1]*ref_mv_scale + (1<<15))>>16)
1114
            if(s->mb_y+1<s->end_mb_y)  //FIXME replace at least with last_slice_line
1115
                CHECK_CLIPPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
1116
                                (last_mv[ref_mv_xy+ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
1117
        }
Michael Niedermayer's avatar
Michael Niedermayer committed
1118
    }
1119

1120 1121
    if(c->avctx->last_predictor_count){
        const int count= c->avctx->last_predictor_count;
1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136
        const int xstart= FFMAX(0, s->mb_x - count);
        const int ystart= FFMAX(0, s->mb_y - count);
        const int xend= FFMIN(s->mb_width , s->mb_x + count + 1);
        const int yend= FFMIN(s->mb_height, s->mb_y + count + 1);
        int mb_y;

        for(mb_y=ystart; mb_y<yend; mb_y++){
            int mb_x;
            for(mb_x=xstart; mb_x<xend; mb_x++){
                const int xy= mb_x + 1 + (mb_y + 1)*ref_mv_stride;
                int mx= (last_mv[xy][0]*ref_mv_scale + (1<<15))>>16;
                int my= (last_mv[xy][1]*ref_mv_scale + (1<<15))>>16;

                if(mx>xmax || mx<xmin || my>ymax || my<ymin) continue;
                CHECK_MV(mx,my)
Michael Niedermayer's avatar
Michael Niedermayer committed
1137 1138 1139
            }
        }
    }
1140

Michael Niedermayer's avatar
Michael Niedermayer committed
1141
//check(best[0],best[1],0, b0)
1142
    dmin= diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
Michael Niedermayer's avatar
Michael Niedermayer committed
1143 1144 1145

//check(best[0],best[1],0, b1)
    *mx_ptr= best[0];
1146
    *my_ptr= best[1];
Michael Niedermayer's avatar
Michael Niedermayer committed
1147 1148 1149 1150 1151

//    printf("%d %d %d \n", best[0], best[1], dmin);
    return dmin;
}

1152
//this function is dedicated to the braindamaged gcc
1153
inline int ff_epzs_motion_search(MpegEncContext * s, int *mx_ptr, int *my_ptr,
1154
                             int P[10][2], int src_index, int ref_index, int16_t (*last_mv)[2],
1155
                             int ref_mv_scale, int size, int h)
1156
{
1157
    MotionEstContext * const c= &s->me;
1158
//FIXME convert other functions in the same way if faster
1159 1160
    if(c->flags==0 && h==16 && size==0){
        return epzs_motion_search_internal(s, mx_ptr, my_ptr, P, src_index, ref_index, last_mv, ref_mv_scale, 0, 0, 16);
1161 1162
//    case FLAG_QPEL:
//        return epzs_motion_search_internal(s, mx_ptr, my_ptr, P, src_index, ref_index, last_mv, ref_mv_scale, FLAG_QPEL);
1163 1164
    }else{
        return epzs_motion_search_internal(s, mx_ptr, my_ptr, P, src_index, ref_index, last_mv, ref_mv_scale, c->flags, size, h);
1165 1166 1167 1168 1169
    }
}

static int epzs_motion_search4(MpegEncContext * s,
                             int *mx_ptr, int *my_ptr, int P[10][2],
1170
                             int src_index, int ref_index, int16_t (*last_mv)[2],
1171
                             int ref_mv_scale)
Michael Niedermayer's avatar
Michael Niedermayer committed
1172
{
1173
    MotionEstContext * const c= &s->me;
Michael Niedermayer's avatar
Michael Niedermayer committed
1174
    int best[2]={0, 0};
1175
    int d, dmin;
Michael Niedermayer's avatar
Michael Niedermayer committed
1176
    int map_generation;
1177
    const int penalty_factor= c->penalty_factor;
Michael Niedermayer's avatar
Michael Niedermayer committed
1178
    const int size=1;
1179
    const int h=8;
1180 1181
    const int ref_mv_stride= s->mb_stride;
    const int ref_mv_xy= s->mb_x + s->mb_y *ref_mv_stride;
1182
    me_cmp_func cmpf, chroma_cmpf;
1183
    LOAD_COMMON
1184
    int flags= c->flags;
1185
    LOAD_COMMON2
1186

1187 1188
    cmpf= s->dsp.me_cmp[size];
    chroma_cmpf= s->dsp.me_cmp[size+1];
Michael Niedermayer's avatar
Michael Niedermayer committed
1189

1190
    map_generation= update_map_generation(c);
Michael Niedermayer's avatar
Michael Niedermayer committed
1191 1192

    dmin = 1000000;
1193
//printf("%d %d %d %d //",xmin, ymin, xmax, ymax);
Michael Niedermayer's avatar
Michael Niedermayer committed
1194
    /* first line */
1195
    if (s->first_slice_line) {
1196
        CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
1197
        CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
1198
                        (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
Michael Niedermayer's avatar
Michael Niedermayer committed
1199 1200 1201 1202
        CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
    }else{
        CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
        //FIXME try some early stop
Michael Niedermayer's avatar
Michael Niedermayer committed
1203 1204 1205 1206
        CHECK_MV(P_MEDIAN[0]>>shift, P_MEDIAN[1]>>shift)
        CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
        CHECK_MV(P_TOP[0]>>shift, P_TOP[1]>>shift)
        CHECK_MV(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift)
1207
        CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
Michael Niedermayer's avatar
Michael Niedermayer committed
1208
                        (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
Michael Niedermayer's avatar
Michael Niedermayer committed
1209 1210
    }
    if(dmin>64*4){
1211
        CHECK_CLIPPED_MV((last_mv[ref_mv_xy+1][0]*ref_mv_scale + (1<<15))>>16,
1212
                        (last_mv[ref_mv_xy+1][1]*ref_mv_scale + (1<<15))>>16)
1213
        if(s->mb_y+1<s->end_mb_y)  //FIXME replace at least with last_slice_line
1214
            CHECK_CLIPPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
1215
                            (last_mv[ref_mv_xy+ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
Michael Niedermayer's avatar
Michael Niedermayer committed
1216 1217
    }

1218
    dmin= diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
1219 1220

    *mx_ptr= best[0];
1221
    *my_ptr= best[1];
1222 1223 1224 1225 1226 1227

//    printf("%d %d %d \n", best[0], best[1], dmin);
    return dmin;
}

//try to merge with above FIXME (needs PSNR test)
1228 1229
static int epzs_motion_search2(MpegEncContext * s,
                             int *mx_ptr, int *my_ptr, int P[10][2],
1230
                             int src_index, int ref_index, int16_t (*last_mv)[2],
1231
                             int ref_mv_scale)
1232
{
1233
    MotionEstContext * const c= &s->me;
1234
    int best[2]={0, 0};
1235
    int d, dmin;
1236
    int map_generation;
1237
    const int penalty_factor= c->penalty_factor;
1238 1239 1240 1241
    const int size=0; //FIXME pass as arg
    const int h=8;
    const int ref_mv_stride= s->mb_stride;
    const int ref_mv_xy= s->mb_x + s->mb_y *ref_mv_stride;
1242
    me_cmp_func cmpf, chroma_cmpf;
1243
    LOAD_COMMON
1244
    int flags= c->flags;
1245
    LOAD_COMMON2
1246

1247 1248
    cmpf= s->dsp.me_cmp[size];
    chroma_cmpf= s->dsp.me_cmp[size+1];
1249

1250
    map_generation= update_map_generation(c);
1251 1252

    dmin = 1000000;
1253
//printf("%d %d %d %d //",xmin, ymin, xmax, ymax);
1254
    /* first line */
1255
    if (s->first_slice_line) {
1256
        CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
1257
        CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
1258 1259 1260 1261 1262
                        (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
        CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
    }else{
        CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
        //FIXME try some early stop
Michael Niedermayer's avatar
Michael Niedermayer committed
1263 1264 1265 1266
        CHECK_MV(P_MEDIAN[0]>>shift, P_MEDIAN[1]>>shift)
        CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
        CHECK_MV(P_TOP[0]>>shift, P_TOP[1]>>shift)
        CHECK_MV(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift)
1267
        CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
Michael Niedermayer's avatar
Michael Niedermayer committed
1268
                        (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
1269 1270
    }
    if(dmin>64*4){
1271
        CHECK_CLIPPED_MV((last_mv[ref_mv_xy+1][0]*ref_mv_scale + (1<<15))>>16,
1272
                        (last_mv[ref_mv_xy+1][1]*ref_mv_scale + (1<<15))>>16)
1273
        if(s->mb_y+1<s->end_mb_y)  //FIXME replace at least with last_slice_line
1274
            CHECK_CLIPPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
1275
                            (last_mv[ref_mv_xy+ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
1276 1277
    }

1278
    dmin= diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
1279

Michael Niedermayer's avatar
Michael Niedermayer committed
1280
    *mx_ptr= best[0];
1281
    *my_ptr= best[1];
Michael Niedermayer's avatar
Michael Niedermayer committed
1282 1283 1284 1285

//    printf("%d %d %d \n", best[0], best[1], dmin);
    return dmin;
}