mpegvideo_motion.c 37.4 KB
Newer Older
1
/*
2
 * Copyright (c) 2000,2001 Fabrice Bellard
3 4
 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
 *
5 6
 * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
 *
7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * FFmpeg is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with FFmpeg; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

24
#include <string.h>
25

26
#include "libavutil/avassert.h"
27
#include "libavutil/internal.h"
28
#include "avcodec.h"
29
#include "h261.h"
30
#include "mpegutils.h"
31 32 33
#include "mpegvideo.h"
#include "mjpegenc.h"
#include "msmpeg4.h"
34
#include "qpeldsp.h"
35
#include "wmv2.h"
36 37
#include <limits.h>

38 39 40
static void gmc1_motion(MpegEncContext *s,
                        uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
                        uint8_t **ref_picture)
41 42
{
    uint8_t *ptr;
43
    int src_x, src_y, motion_x, motion_y;
44
    ptrdiff_t offset, linesize, uvlinesize;
45 46 47 48 49 50 51 52 53
    int emu = 0;

    motion_x   = s->sprite_offset[0][0];
    motion_y   = s->sprite_offset[0][1];
    src_x      = s->mb_x * 16 + (motion_x >> (s->sprite_warping_accuracy + 1));
    src_y      = s->mb_y * 16 + (motion_y >> (s->sprite_warping_accuracy + 1));
    motion_x <<= (3 - s->sprite_warping_accuracy);
    motion_y <<= (3 - s->sprite_warping_accuracy);
    src_x      = av_clip(src_x, -16, s->width);
54
    if (src_x == s->width)
55
        motion_x = 0;
56 57
    src_y = av_clip(src_y, -16, s->height);
    if (src_y == s->height)
58
        motion_y = 0;
59

60
    linesize   = s->linesize;
61 62
    uvlinesize = s->uvlinesize;

63
    ptr = ref_picture[0] + src_y * linesize + src_x;
64

65 66
    if ((unsigned)src_x >= FFMAX(s->h_edge_pos - 17, 0) ||
        (unsigned)src_y >= FFMAX(s->v_edge_pos - 17, 0)) {
67
        s->vdsp.emulated_edge_mc(s->sc.edge_emu_buffer, ptr,
68 69 70 71
                                 linesize, linesize,
                                 17, 17,
                                 src_x, src_y,
                                 s->h_edge_pos, s->v_edge_pos);
72
        ptr = s->sc.edge_emu_buffer;
73 74
    }

75
    if ((motion_x | motion_y) & 7) {
76 77 78 79
        s->mdsp.gmc1(dest_y, ptr, linesize, 16,
                     motion_x & 15, motion_y & 15, 128 - s->no_rounding);
        s->mdsp.gmc1(dest_y + 8, ptr + 8, linesize, 16,
                     motion_x & 15, motion_y & 15, 128 - s->no_rounding);
80
    } else {
81 82
        int dxy;

83 84
        dxy = ((motion_x >> 3) & 1) | ((motion_y >> 2) & 2);
        if (s->no_rounding) {
85
            s->hdsp.put_no_rnd_pixels_tab[0][dxy](dest_y, ptr, linesize, 16);
86 87
        } else {
            s->hdsp.put_pixels_tab[0][dxy](dest_y, ptr, linesize, 16);
88 89 90
        }
    }

91
    if (CONFIG_GRAY && s->avctx->flags & AV_CODEC_FLAG_GRAY)
92 93 94 95 96 97 98 99 100 101 102 103 104 105
        return;

    motion_x   = s->sprite_offset[1][0];
    motion_y   = s->sprite_offset[1][1];
    src_x      = s->mb_x * 8 + (motion_x >> (s->sprite_warping_accuracy + 1));
    src_y      = s->mb_y * 8 + (motion_y >> (s->sprite_warping_accuracy + 1));
    motion_x <<= (3 - s->sprite_warping_accuracy);
    motion_y <<= (3 - s->sprite_warping_accuracy);
    src_x      = av_clip(src_x, -8, s->width >> 1);
    if (src_x == s->width >> 1)
        motion_x = 0;
    src_y = av_clip(src_y, -8, s->height >> 1);
    if (src_y == s->height >> 1)
        motion_y = 0;
106 107

    offset = (src_y * uvlinesize) + src_x;
108
    ptr    = ref_picture[1] + offset;
109 110
    if ((unsigned)src_x >= FFMAX((s->h_edge_pos >> 1) - 9, 0) ||
        (unsigned)src_y >= FFMAX((s->v_edge_pos >> 1) - 9, 0)) {
111
        s->vdsp.emulated_edge_mc(s->sc.edge_emu_buffer, ptr,
112 113 114 115
                                 uvlinesize, uvlinesize,
                                 9, 9,
                                 src_x, src_y,
                                 s->h_edge_pos >> 1, s->v_edge_pos >> 1);
116
        ptr = s->sc.edge_emu_buffer;
117
        emu = 1;
118
    }
119 120
    s->mdsp.gmc1(dest_cb, ptr, uvlinesize, 8,
                 motion_x & 15, motion_y & 15, 128 - s->no_rounding);
121 122

    ptr = ref_picture[2] + offset;
123
    if (emu) {
124
        s->vdsp.emulated_edge_mc(s->sc.edge_emu_buffer, ptr,
125
                                 uvlinesize, uvlinesize,
126 127 128
                                 9, 9,
                                 src_x, src_y,
                                 s->h_edge_pos >> 1, s->v_edge_pos >> 1);
129
        ptr = s->sc.edge_emu_buffer;
130
    }
131 132
    s->mdsp.gmc1(dest_cr, ptr, uvlinesize, 8,
                 motion_x & 15, motion_y & 15, 128 - s->no_rounding);
133 134
}

135 136 137
static void gmc_motion(MpegEncContext *s,
                       uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
                       uint8_t **ref_picture)
138 139 140
{
    uint8_t *ptr;
    int linesize, uvlinesize;
141
    const int a = s->sprite_warping_accuracy;
142 143
    int ox, oy;

144
    linesize   = s->linesize;
145 146 147 148
    uvlinesize = s->uvlinesize;

    ptr = ref_picture[0];

149 150 151 152
    ox = s->sprite_offset[0][0] + s->sprite_delta[0][0] * s->mb_x * 16 +
         s->sprite_delta[0][1] * s->mb_y * 16;
    oy = s->sprite_offset[0][1] + s->sprite_delta[1][0] * s->mb_x * 16 +
         s->sprite_delta[1][1] * s->mb_y * 16;
153

154 155 156 157 158 159 160 161 162 163 164 165 166
    s->mdsp.gmc(dest_y, ptr, linesize, 16,
                ox, oy,
                s->sprite_delta[0][0], s->sprite_delta[0][1],
                s->sprite_delta[1][0], s->sprite_delta[1][1],
                a + 1, (1 << (2 * a + 1)) - s->no_rounding,
                s->h_edge_pos, s->v_edge_pos);
    s->mdsp.gmc(dest_y + 8, ptr, linesize, 16,
                ox + s->sprite_delta[0][0] * 8,
                oy + s->sprite_delta[1][0] * 8,
                s->sprite_delta[0][0], s->sprite_delta[0][1],
                s->sprite_delta[1][0], s->sprite_delta[1][1],
                a + 1, (1 << (2 * a + 1)) - s->no_rounding,
                s->h_edge_pos, s->v_edge_pos);
167

168
    if (CONFIG_GRAY && s->avctx->flags & AV_CODEC_FLAG_GRAY)
169 170 171 172 173 174
        return;

    ox = s->sprite_offset[1][0] + s->sprite_delta[0][0] * s->mb_x * 8 +
         s->sprite_delta[0][1] * s->mb_y * 8;
    oy = s->sprite_offset[1][1] + s->sprite_delta[1][0] * s->mb_x * 8 +
         s->sprite_delta[1][1] * s->mb_y * 8;
175 176

    ptr = ref_picture[1];
177 178 179 180 181
    s->mdsp.gmc(dest_cb, ptr, uvlinesize, 8,
                ox, oy,
                s->sprite_delta[0][0], s->sprite_delta[0][1],
                s->sprite_delta[1][0], s->sprite_delta[1][1],
                a + 1, (1 << (2 * a + 1)) - s->no_rounding,
182
                (s->h_edge_pos + 1) >> 1, (s->v_edge_pos + 1) >> 1);
183 184

    ptr = ref_picture[2];
185 186 187 188 189
    s->mdsp.gmc(dest_cr, ptr, uvlinesize, 8,
                ox, oy,
                s->sprite_delta[0][0], s->sprite_delta[0][1],
                s->sprite_delta[1][0], s->sprite_delta[1][1],
                a + 1, (1 << (2 * a + 1)) - s->no_rounding,
190
                (s->h_edge_pos + 1) >> 1, (s->v_edge_pos + 1) >> 1);
191 192 193
}

static inline int hpel_motion(MpegEncContext *s,
194 195 196 197
                              uint8_t *dest, uint8_t *src,
                              int src_x, int src_y,
                              op_pixels_func *pix_op,
                              int motion_x, int motion_y)
198
{
199
    int dxy = 0;
200
    int emu = 0;
201 202 203 204 205

    src_x += motion_x >> 1;
    src_y += motion_y >> 1;

    /* WARNING: do no forget half pels */
206
    src_x = av_clip(src_x, -16, s->width); // FIXME unneeded for emu?
207 208
    if (src_x != s->width)
        dxy |= motion_x & 1;
209
    src_y = av_clip(src_y, -16, s->height);
210 211
    if (src_y != s->height)
        dxy |= (motion_y & 1) << 1;
212
    src += src_y * s->linesize + src_x;
213

214 215
        if ((unsigned)src_x >= FFMAX(s->h_edge_pos - (motion_x & 1) - 7, 0) ||
            (unsigned)src_y >= FFMAX(s->v_edge_pos - (motion_y & 1) - 7, 0)) {
216
            s->vdsp.emulated_edge_mc(s->sc.edge_emu_buffer, src,
217
                                     s->linesize, s->linesize,
218
                                     9, 9,
219 220
                                     src_x, src_y,
                                     s->h_edge_pos, s->v_edge_pos);
221
            src = s->sc.edge_emu_buffer;
222
            emu = 1;
223
        }
224
    pix_op[dxy](dest, src, s->linesize, 8);
225 226 227
    return emu;
}

228
static av_always_inline
Keiji Costantini's avatar
Keiji Costantini committed
229
void mpeg_motion_internal(MpegEncContext *s,
230 231 232 233 234 235 236 237 238 239 240 241 242
                          uint8_t *dest_y,
                          uint8_t *dest_cb,
                          uint8_t *dest_cr,
                          int field_based,
                          int bottom_field,
                          int field_select,
                          uint8_t **ref_picture,
                          op_pixels_func (*pix_op)[4],
                          int motion_x,
                          int motion_y,
                          int h,
                          int is_mpeg12,
                          int mb_y)
243 244
{
    uint8_t *ptr_y, *ptr_cb, *ptr_cr;
245
    int dxy, uvdxy, mx, my, src_x, src_y,
246
        uvsrc_x, uvsrc_y, v_edge_pos;
247
    ptrdiff_t uvlinesize, linesize;
248 249

#if 0
250 251 252 253
    if (s->quarter_sample) {
        motion_x >>= 1;
        motion_y >>= 1;
    }
254 255 256
#endif

    v_edge_pos = s->v_edge_pos >> field_based;
257 258
    linesize   = s->current_picture.f->linesize[0] << field_based;
    uvlinesize = s->current_picture.f->linesize[1] << field_based;
259

260 261 262
    dxy   = ((motion_y & 1) << 1) | (motion_x & 1);
    src_x = s->mb_x * 16 + (motion_x >> 1);
    src_y = (mb_y << (4 - field_based)) + (motion_y >> 1);
263

Keiji Costantini's avatar
Keiji Costantini committed
264
    if (!is_mpeg12 && s->out_format == FMT_H263) {
265 266 267 268 269 270 271 272 273 274
        if ((s->workaround_bugs & FF_BUG_HPEL_CHROMA) && field_based) {
            mx      = (motion_x >> 1) | (motion_x & 1);
            my      = motion_y >> 1;
            uvdxy   = ((my & 1) << 1) | (mx & 1);
            uvsrc_x = s->mb_x * 8 + (mx >> 1);
            uvsrc_y = (mb_y << (3 - field_based)) + (my >> 1);
        } else {
            uvdxy   = dxy | (motion_y & 2) | ((motion_x & 2) >> 1);
            uvsrc_x = src_x >> 1;
            uvsrc_y = src_y >> 1;
275
        }
276 277 278 279 280 281 282
    // Even chroma mv's are full pel in H261
    } else if (!is_mpeg12 && s->out_format == FMT_H261) {
        mx      = motion_x / 4;
        my      = motion_y / 4;
        uvdxy   = 0;
        uvsrc_x = s->mb_x * 8 + mx;
        uvsrc_y = mb_y * 8 + my;
283
    } else {
284 285 286 287 288 289
        if (s->chroma_y_shift) {
            mx      = motion_x / 2;
            my      = motion_y / 2;
            uvdxy   = ((my & 1) << 1) | (mx & 1);
            uvsrc_x = s->mb_x * 8 + (mx >> 1);
            uvsrc_y = (mb_y << (3 - field_based)) + (my >> 1);
290
        } else {
291 292 293 294 295
            if (s->chroma_x_shift) {
                // Chroma422
                mx      = motion_x / 2;
                uvdxy   = ((motion_y & 1) << 1) | (mx & 1);
                uvsrc_x = s->mb_x * 8 + (mx >> 1);
296 297
                uvsrc_y = src_y;
            } else {
298 299
                // Chroma444
                uvdxy   = dxy;
300 301 302 303 304 305 306 307 308 309
                uvsrc_x = src_x;
                uvsrc_y = src_y;
            }
        }
    }

    ptr_y  = ref_picture[0] + src_y * linesize + src_x;
    ptr_cb = ref_picture[1] + uvsrc_y * uvlinesize + uvsrc_x;
    ptr_cr = ref_picture[2] + uvsrc_y * uvlinesize + uvsrc_x;

310 311
    if ((unsigned)src_x >= FFMAX(s->h_edge_pos - (motion_x & 1) - 15   , 0) ||
        (unsigned)src_y >= FFMAX(   v_edge_pos - (motion_y & 1) - h + 1, 0)) {
312 313 314 315 316 317 318 319
        if (is_mpeg12 ||
            s->codec_id == AV_CODEC_ID_MPEG2VIDEO ||
            s->codec_id == AV_CODEC_ID_MPEG1VIDEO) {
            av_log(s->avctx, AV_LOG_DEBUG,
                   "MPEG motion vector out of boundary (%d %d)\n", src_x,
                   src_y);
            return;
        }
320
        src_y = (unsigned)src_y << field_based;
321
        s->vdsp.emulated_edge_mc(s->sc.edge_emu_buffer, ptr_y,
322
                                 s->linesize, s->linesize,
323
                                 17, 17 + field_based,
324
                                 src_x, src_y,
325
                                 s->h_edge_pos, s->v_edge_pos);
326
        ptr_y = s->sc.edge_emu_buffer;
327
        if (!CONFIG_GRAY || !(s->avctx->flags & AV_CODEC_FLAG_GRAY)) {
328
            uint8_t *ubuf = s->sc.edge_emu_buffer + 18 * s->linesize;
329
            uint8_t *vbuf = ubuf + 9 * s->uvlinesize;
330
            uvsrc_y = (unsigned)uvsrc_y << field_based;
331
            s->vdsp.emulated_edge_mc(ubuf, ptr_cb,
332
                                     s->uvlinesize, s->uvlinesize,
333
                                     9, 9 + field_based,
334
                                     uvsrc_x, uvsrc_y,
335
                                     s->h_edge_pos >> 1, s->v_edge_pos >> 1);
336
            s->vdsp.emulated_edge_mc(vbuf, ptr_cr,
337
                                     s->uvlinesize, s->uvlinesize,
338
                                     9, 9 + field_based,
339
                                     uvsrc_x, uvsrc_y,
340
                                     s->h_edge_pos >> 1, s->v_edge_pos >> 1);
341 342
            ptr_cb = ubuf;
            ptr_cr = vbuf;
343
        }
344 345
    }

346 347 348 349 350 351
    /* FIXME use this for field pix too instead of the obnoxious hack which
     * changes picture.data */
    if (bottom_field) {
        dest_y  += s->linesize;
        dest_cb += s->uvlinesize;
        dest_cr += s->uvlinesize;
352 353
    }

354 355 356 357
    if (field_select) {
        ptr_y  += s->linesize;
        ptr_cb += s->uvlinesize;
        ptr_cr += s->uvlinesize;
358 359 360 361
    }

    pix_op[0][dxy](dest_y, ptr_y, linesize, h);

362
    if (!CONFIG_GRAY || !(s->avctx->flags & AV_CODEC_FLAG_GRAY)) {
363
        pix_op[s->chroma_x_shift][uvdxy]
364
            (dest_cb, ptr_cb, uvlinesize, h >> s->chroma_y_shift);
365
        pix_op[s->chroma_x_shift][uvdxy]
366
            (dest_cr, ptr_cr, uvlinesize, h >> s->chroma_y_shift);
367
    }
368 369
    if (!is_mpeg12 && (CONFIG_H261_ENCODER || CONFIG_H261_DECODER) &&
        s->out_format == FMT_H261) {
370 371 372
        ff_h261_loop_filter(s);
    }
}
Keiji Costantini's avatar
Keiji Costantini committed
373
/* apply one mpeg motion vector to the three components */
374 375 376 377 378
static void mpeg_motion(MpegEncContext *s,
                        uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
                        int field_select, uint8_t **ref_picture,
                        op_pixels_func (*pix_op)[4],
                        int motion_x, int motion_y, int h, int mb_y)
Keiji Costantini's avatar
Keiji Costantini committed
379
{
380
#if !CONFIG_SMALL
381
    if (s->out_format == FMT_MPEG1)
382
        mpeg_motion_internal(s, dest_y, dest_cb, dest_cr, 0, 0,
383 384
                             field_select, ref_picture, pix_op,
                             motion_x, motion_y, h, 1, mb_y);
385 386 387
    else
#endif
        mpeg_motion_internal(s, dest_y, dest_cb, dest_cr, 0, 0,
388 389
                             field_select, ref_picture, pix_op,
                             motion_x, motion_y, h, 0, mb_y);
390 391 392 393 394 395 396 397 398 399
}

static void mpeg_motion_field(MpegEncContext *s, uint8_t *dest_y,
                              uint8_t *dest_cb, uint8_t *dest_cr,
                              int bottom_field, int field_select,
                              uint8_t **ref_picture,
                              op_pixels_func (*pix_op)[4],
                              int motion_x, int motion_y, int h, int mb_y)
{
#if !CONFIG_SMALL
400
    if (s->out_format == FMT_MPEG1)
401
        mpeg_motion_internal(s, dest_y, dest_cb, dest_cr, 1,
402 403
                             bottom_field, field_select, ref_picture, pix_op,
                             motion_x, motion_y, h, 1, mb_y);
Keiji Costantini's avatar
Keiji Costantini committed
404 405
    else
#endif
406
        mpeg_motion_internal(s, dest_y, dest_cb, dest_cr, 1,
407 408
                             bottom_field, field_select, ref_picture, pix_op,
                             motion_x, motion_y, h, 0, mb_y);
Keiji Costantini's avatar
Keiji Costantini committed
409
}
410

411
// FIXME: SIMDify, avg variant, 16x16 version
412 413
static inline void put_obmc(uint8_t *dst, uint8_t *src[5], int stride)
{
414
    int x;
415 416 417 418 419
    uint8_t *const top    = src[1];
    uint8_t *const left   = src[2];
    uint8_t *const mid    = src[0];
    uint8_t *const right  = src[3];
    uint8_t *const bottom = src[4];
420 421 422 423 424 425 426 427
#define OBMC_FILTER(x, t, l, m, r, b)\
    dst[x]= (t*top[x] + l*left[x] + m*mid[x] + r*right[x] + b*bottom[x] + 4)>>3
#define OBMC_FILTER4(x, t, l, m, r, b)\
    OBMC_FILTER(x         , t, l, m, r, b);\
    OBMC_FILTER(x+1       , t, l, m, r, b);\
    OBMC_FILTER(x  +stride, t, l, m, r, b);\
    OBMC_FILTER(x+1+stride, t, l, m, r, b);

428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461
    x = 0;
    OBMC_FILTER (x    , 2, 2, 4, 0, 0);
    OBMC_FILTER (x + 1, 2, 1, 5, 0, 0);
    OBMC_FILTER4(x + 2, 2, 1, 5, 0, 0);
    OBMC_FILTER4(x + 4, 2, 0, 5, 1, 0);
    OBMC_FILTER (x + 6, 2, 0, 5, 1, 0);
    OBMC_FILTER (x + 7, 2, 0, 4, 2, 0);
    x += stride;
    OBMC_FILTER (x    , 1, 2, 5, 0, 0);
    OBMC_FILTER (x + 1, 1, 2, 5, 0, 0);
    OBMC_FILTER (x + 6, 1, 0, 5, 2, 0);
    OBMC_FILTER (x + 7, 1, 0, 5, 2, 0);
    x += stride;
    OBMC_FILTER4(x    , 1, 2, 5, 0, 0);
    OBMC_FILTER4(x + 2, 1, 1, 6, 0, 0);
    OBMC_FILTER4(x + 4, 1, 0, 6, 1, 0);
    OBMC_FILTER4(x + 6, 1, 0, 5, 2, 0);
    x += 2 * stride;
    OBMC_FILTER4(x    , 0, 2, 5, 0, 1);
    OBMC_FILTER4(x + 2, 0, 1, 6, 0, 1);
    OBMC_FILTER4(x + 4, 0, 0, 6, 1, 1);
    OBMC_FILTER4(x + 6, 0, 0, 5, 2, 1);
    x += 2*stride;
    OBMC_FILTER (x    , 0, 2, 5, 0, 1);
    OBMC_FILTER (x + 1, 0, 2, 5, 0, 1);
    OBMC_FILTER4(x + 2, 0, 1, 5, 0, 2);
    OBMC_FILTER4(x + 4, 0, 0, 5, 1, 2);
    OBMC_FILTER (x + 6, 0, 0, 5, 2, 1);
    OBMC_FILTER (x + 7, 0, 0, 5, 2, 1);
    x += stride;
    OBMC_FILTER (x    , 0, 2, 4, 0, 2);
    OBMC_FILTER (x + 1, 0, 1, 5, 0, 2);
    OBMC_FILTER (x + 6, 0, 0, 5, 1, 2);
    OBMC_FILTER (x + 7, 0, 0, 4, 2, 2);
462 463 464 465 466 467 468
}

/* obmc for 1 8x8 luma block */
static inline void obmc_motion(MpegEncContext *s,
                               uint8_t *dest, uint8_t *src,
                               int src_x, int src_y,
                               op_pixels_func *pix_op,
469
                               int16_t mv[5][2] /* mid top left right bottom */)
470 471 472 473 474
#define MID    0
{
    int i;
    uint8_t *ptr[5];

475
    av_assert2(s->quarter_sample == 0);
476

477 478 479 480
    for (i = 0; i < 5; i++) {
        if (i && mv[i][0] == mv[MID][0] && mv[i][1] == mv[MID][1]) {
            ptr[i] = ptr[MID];
        } else {
481
            ptr[i] = s->sc.obmc_scratchpad + 8 * (i & 1) +
482 483
                     s->linesize * 8 * (i >> 1);
            hpel_motion(s, ptr[i], src, src_x, src_y, pix_op,
484 485 486 487 488 489 490 491
                        mv[i][0], mv[i][1]);
        }
    }

    put_obmc(dest, ptr, s->linesize);
}

static inline void qpel_motion(MpegEncContext *s,
492 493 494 495 496 497
                               uint8_t *dest_y,
                               uint8_t *dest_cb,
                               uint8_t *dest_cr,
                               int field_based, int bottom_field,
                               int field_select, uint8_t **ref_picture,
                               op_pixels_func (*pix_op)[4],
498 499 500 501
                               qpel_mc_func (*qpix_op)[16],
                               int motion_x, int motion_y, int h)
{
    uint8_t *ptr_y, *ptr_cb, *ptr_cr;
502 503
    int dxy, uvdxy, mx, my, src_x, src_y, uvsrc_x, uvsrc_y, v_edge_pos;
    ptrdiff_t linesize, uvlinesize;
504

505 506
    dxy   = ((motion_y & 3) << 2) | (motion_x & 3);

507 508 509 510
    src_x = s->mb_x *  16                 + (motion_x >> 2);
    src_y = s->mb_y * (16 >> field_based) + (motion_y >> 2);

    v_edge_pos = s->v_edge_pos >> field_based;
511
    linesize   = s->linesize   << field_based;
512 513
    uvlinesize = s->uvlinesize << field_based;

514 515 516 517 518 519 520 521 522 523 524 525 526
    if (field_based) {
        mx = motion_x / 2;
        my = motion_y >> 1;
    } else if (s->workaround_bugs & FF_BUG_QPEL_CHROMA2) {
        static const int rtab[8] = { 0, 0, 1, 1, 0, 0, 0, 1 };
        mx = (motion_x >> 1) + rtab[motion_x & 7];
        my = (motion_y >> 1) + rtab[motion_y & 7];
    } else if (s->workaround_bugs & FF_BUG_QPEL_CHROMA) {
        mx = (motion_x >> 1) | (motion_x & 1);
        my = (motion_y >> 1) | (motion_y & 1);
    } else {
        mx = motion_x / 2;
        my = motion_y / 2;
527
    }
528 529
    mx = (mx >> 1) | (mx & 1);
    my = (my >> 1) | (my & 1);
530

531 532 533
    uvdxy = (mx & 1) | ((my & 1) << 1);
    mx  >>= 1;
    my  >>= 1;
534 535 536 537

    uvsrc_x = s->mb_x *  8                 + mx;
    uvsrc_y = s->mb_y * (8 >> field_based) + my;

538
    ptr_y  = ref_picture[0] + src_y   * linesize   + src_x;
539 540 541
    ptr_cb = ref_picture[1] + uvsrc_y * uvlinesize + uvsrc_x;
    ptr_cr = ref_picture[2] + uvsrc_y * uvlinesize + uvsrc_x;

542 543
    if ((unsigned)src_x >= FFMAX(s->h_edge_pos - (motion_x & 3) - 15   , 0) ||
        (unsigned)src_y >= FFMAX(   v_edge_pos - (motion_y & 3) - h + 1, 0)) {
544
        s->vdsp.emulated_edge_mc(s->sc.edge_emu_buffer, ptr_y,
545
                                 s->linesize, s->linesize,
546 547
                                 17, 17 + field_based,
                                 src_x, src_y << field_based,
548
                                 s->h_edge_pos, s->v_edge_pos);
549
        ptr_y = s->sc.edge_emu_buffer;
550
        if (!CONFIG_GRAY || !(s->avctx->flags & AV_CODEC_FLAG_GRAY)) {
551
            uint8_t *ubuf = s->sc.edge_emu_buffer + 18 * s->linesize;
552 553
            uint8_t *vbuf = ubuf + 9 * s->uvlinesize;
            s->vdsp.emulated_edge_mc(ubuf, ptr_cb,
554
                                     s->uvlinesize, s->uvlinesize,
555
                                     9, 9 + field_based,
556 557
                                     uvsrc_x, uvsrc_y << field_based,
                                     s->h_edge_pos >> 1, s->v_edge_pos >> 1);
558
            s->vdsp.emulated_edge_mc(vbuf, ptr_cr,
559
                                     s->uvlinesize, s->uvlinesize,
560
                                     9, 9 + field_based,
561 562
                                     uvsrc_x, uvsrc_y << field_based,
                                     s->h_edge_pos >> 1, s->v_edge_pos >> 1);
563 564
            ptr_cb = ubuf;
            ptr_cr = vbuf;
565 566 567
        }
    }

568
    if (!field_based)
569
        qpix_op[0][dxy](dest_y, ptr_y, linesize);
570 571 572 573 574
    else {
        if (bottom_field) {
            dest_y  += s->linesize;
            dest_cb += s->uvlinesize;
            dest_cr += s->uvlinesize;
575 576
        }

577
        if (field_select) {
578 579 580 581
            ptr_y  += s->linesize;
            ptr_cb += s->uvlinesize;
            ptr_cr += s->uvlinesize;
        }
582 583 584 585
        // damn interlaced mode
        // FIXME boundary mirroring is not exactly correct here
        qpix_op[1][dxy](dest_y, ptr_y, linesize);
        qpix_op[1][dxy](dest_y + 8, ptr_y + 8, linesize);
586
    }
587
    if (!CONFIG_GRAY || !(s->avctx->flags & AV_CODEC_FLAG_GRAY)) {
588 589 590 591 592 593
        pix_op[1][uvdxy](dest_cr, ptr_cr, uvlinesize, h >> 1);
        pix_op[1][uvdxy](dest_cb, ptr_cb, uvlinesize, h >> 1);
    }
}

/**
Panagiotis Issaris's avatar
Panagiotis Issaris committed
594
 * h263 chroma 4mv motion compensation.
595
 */
596 597 598 599 600 601
static void chroma_4mv_motion(MpegEncContext *s,
                              uint8_t *dest_cb, uint8_t *dest_cr,
                              uint8_t **ref_picture,
                              op_pixels_func *pix_op,
                              int mx, int my)
{
602
    uint8_t *ptr;
603 604
    int src_x, src_y, dxy, emu = 0;
    ptrdiff_t offset;
605 606

    /* In case of 8X8, we construct a single chroma motion vector
607 608 609
     * with a special rounding */
    mx = ff_h263_round_chroma(mx);
    my = ff_h263_round_chroma(my);
610

611
    dxy  = ((my & 1) << 1) | (mx & 1);
612 613 614 615 616
    mx >>= 1;
    my >>= 1;

    src_x = s->mb_x * 8 + mx;
    src_y = s->mb_y * 8 + my;
617 618
    src_x = av_clip(src_x, -8, (s->width >> 1));
    if (src_x == (s->width >> 1))
619
        dxy &= ~1;
620 621
    src_y = av_clip(src_y, -8, (s->height >> 1));
    if (src_y == (s->height >> 1))
622 623
        dxy &= ~2;

624
    offset = src_y * s->uvlinesize + src_x;
625
    ptr    = ref_picture[1] + offset;
626 627
    if ((unsigned)src_x >= FFMAX((s->h_edge_pos >> 1) - (dxy  & 1) - 7, 0) ||
        (unsigned)src_y >= FFMAX((s->v_edge_pos >> 1) - (dxy >> 1) - 7, 0)) {
628
        s->vdsp.emulated_edge_mc(s->sc.edge_emu_buffer, ptr,
629 630 631
                                 s->uvlinesize, s->uvlinesize,
                                 9, 9, src_x, src_y,
                                 s->h_edge_pos >> 1, s->v_edge_pos >> 1);
632
        ptr = s->sc.edge_emu_buffer;
633
        emu = 1;
634 635 636 637
    }
    pix_op[dxy](dest_cb, ptr, s->uvlinesize, 8);

    ptr = ref_picture[2] + offset;
638
    if (emu) {
639
        s->vdsp.emulated_edge_mc(s->sc.edge_emu_buffer, ptr,
640
                                 s->uvlinesize, s->uvlinesize,
641 642
                                 9, 9, src_x, src_y,
                                 s->h_edge_pos >> 1, s->v_edge_pos >> 1);
643
        ptr = s->sc.edge_emu_buffer;
644 645 646 647
    }
    pix_op[dxy](dest_cr, ptr, s->uvlinesize, 8);
}

648 649
static inline void prefetch_motion(MpegEncContext *s, uint8_t **pix, int dir)
{
650 651 652
    /* fetch pixels for estimated mv 4 macroblocks ahead
     * optimized for 64byte cache lines */
    const int shift = s->quarter_sample ? 2 : 1;
653 654 655 656 657 658 659
    const int mx    = (s->mv[dir][0][0] >> shift) + 16 * s->mb_x + 8;
    const int my    = (s->mv[dir][0][1] >> shift) + 16 * s->mb_y;
    int off         = mx + (my + (s->mb_x & 3) * 4) * s->linesize + 64;

    s->vdsp.prefetch(pix[0] + off, s->linesize, 4);
    off = (mx >> 1) + ((my >> 1) + (s->mb_x & 7)) * s->uvlinesize + 64;
    s->vdsp.prefetch(pix[1] + off, pix[2] - pix[1], 2);
660 661
}

662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677
static inline void apply_obmc(MpegEncContext *s,
                              uint8_t *dest_y,
                              uint8_t *dest_cb,
                              uint8_t *dest_cr,
                              uint8_t **ref_picture,
                              op_pixels_func (*pix_op)[4])
{
    LOCAL_ALIGNED_8(int16_t, mv_cache, [4], [4][2]);
    Picture *cur_frame   = &s->current_picture;
    int mb_x = s->mb_x;
    int mb_y = s->mb_y;
    const int xy         = mb_x + mb_y * s->mb_stride;
    const int mot_stride = s->b8_stride;
    const int mot_xy     = mb_x * 2 + mb_y * 2 * mot_stride;
    int mx, my, i;

678
    av_assert2(!s->mb_skipped);
679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742

    AV_COPY32(mv_cache[1][1], cur_frame->motion_val[0][mot_xy]);
    AV_COPY32(mv_cache[1][2], cur_frame->motion_val[0][mot_xy + 1]);

    AV_COPY32(mv_cache[2][1],
              cur_frame->motion_val[0][mot_xy + mot_stride]);
    AV_COPY32(mv_cache[2][2],
              cur_frame->motion_val[0][mot_xy + mot_stride + 1]);

    AV_COPY32(mv_cache[3][1],
              cur_frame->motion_val[0][mot_xy + mot_stride]);
    AV_COPY32(mv_cache[3][2],
              cur_frame->motion_val[0][mot_xy + mot_stride + 1]);

    if (mb_y == 0 || IS_INTRA(cur_frame->mb_type[xy - s->mb_stride])) {
        AV_COPY32(mv_cache[0][1], mv_cache[1][1]);
        AV_COPY32(mv_cache[0][2], mv_cache[1][2]);
    } else {
        AV_COPY32(mv_cache[0][1],
                  cur_frame->motion_val[0][mot_xy - mot_stride]);
        AV_COPY32(mv_cache[0][2],
                  cur_frame->motion_val[0][mot_xy - mot_stride + 1]);
    }

    if (mb_x == 0 || IS_INTRA(cur_frame->mb_type[xy - 1])) {
        AV_COPY32(mv_cache[1][0], mv_cache[1][1]);
        AV_COPY32(mv_cache[2][0], mv_cache[2][1]);
    } else {
        AV_COPY32(mv_cache[1][0], cur_frame->motion_val[0][mot_xy - 1]);
        AV_COPY32(mv_cache[2][0],
                  cur_frame->motion_val[0][mot_xy - 1 + mot_stride]);
    }

    if (mb_x + 1 >= s->mb_width || IS_INTRA(cur_frame->mb_type[xy + 1])) {
        AV_COPY32(mv_cache[1][3], mv_cache[1][2]);
        AV_COPY32(mv_cache[2][3], mv_cache[2][2]);
    } else {
        AV_COPY32(mv_cache[1][3], cur_frame->motion_val[0][mot_xy + 2]);
        AV_COPY32(mv_cache[2][3],
                  cur_frame->motion_val[0][mot_xy + 2 + mot_stride]);
    }

    mx = 0;
    my = 0;
    for (i = 0; i < 4; i++) {
        const int x      = (i & 1) + 1;
        const int y      = (i >> 1) + 1;
        int16_t mv[5][2] = {
            { mv_cache[y][x][0],     mv_cache[y][x][1]         },
            { mv_cache[y - 1][x][0], mv_cache[y - 1][x][1]     },
            { mv_cache[y][x - 1][0], mv_cache[y][x - 1][1]     },
            { mv_cache[y][x + 1][0], mv_cache[y][x + 1][1]     },
            { mv_cache[y + 1][x][0], mv_cache[y + 1][x][1]     }
        };
        // FIXME cleanup
        obmc_motion(s, dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize,
                    ref_picture[0],
                    mb_x * 16 + (i & 1) * 8, mb_y * 16 + (i >> 1) * 8,
                    pix_op[1],
                    mv);

        mx += mv[0][0];
        my += mv[0][1];
    }
743
    if (!CONFIG_GRAY || !(s->avctx->flags & AV_CODEC_FLAG_GRAY))
744 745 746 747 748
        chroma_4mv_motion(s, dest_cb, dest_cr,
                          ref_picture, pix_op[1],
                          mx, my);
}

749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783
static inline void apply_8x8(MpegEncContext *s,
                             uint8_t *dest_y,
                             uint8_t *dest_cb,
                             uint8_t *dest_cr,
                             int dir,
                             uint8_t **ref_picture,
                             qpel_mc_func (*qpix_op)[16],
                             op_pixels_func (*pix_op)[4])
{
    int dxy, mx, my, src_x, src_y;
    int i;
    int mb_x = s->mb_x;
    int mb_y = s->mb_y;
    uint8_t *ptr, *dest;

    mx = 0;
    my = 0;
    if (s->quarter_sample) {
        for (i = 0; i < 4; i++) {
            int motion_x = s->mv[dir][i][0];
            int motion_y = s->mv[dir][i][1];

            dxy   = ((motion_y & 3) << 2) | (motion_x & 3);
            src_x = mb_x * 16 + (motion_x >> 2) + (i & 1) * 8;
            src_y = mb_y * 16 + (motion_y >> 2) + (i >> 1) * 8;

            /* WARNING: do no forget half pels */
            src_x = av_clip(src_x, -16, s->width);
            if (src_x == s->width)
                dxy &= ~3;
            src_y = av_clip(src_y, -16, s->height);
            if (src_y == s->height)
                dxy &= ~12;

            ptr = ref_picture[0] + (src_y * s->linesize) + (src_x);
784 785
            if ((unsigned)src_x >= FFMAX(s->h_edge_pos - (motion_x & 3) - 7, 0) ||
                (unsigned)src_y >= FFMAX(s->v_edge_pos - (motion_y & 3) - 7, 0)) {
786
                s->vdsp.emulated_edge_mc(s->sc.edge_emu_buffer, ptr,
787 788 789 790 791
                                         s->linesize, s->linesize,
                                         9, 9,
                                         src_x, src_y,
                                         s->h_edge_pos,
                                         s->v_edge_pos);
792
                ptr = s->sc.edge_emu_buffer;
793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815
            }
            dest = dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize;
            qpix_op[1][dxy](dest, ptr, s->linesize);

            mx += s->mv[dir][i][0] / 2;
            my += s->mv[dir][i][1] / 2;
        }
    } else {
        for (i = 0; i < 4; i++) {
            hpel_motion(s,
                        dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize,
                        ref_picture[0],
                        mb_x * 16 + (i & 1) * 8,
                        mb_y * 16 + (i >> 1) * 8,
                        pix_op[1],
                        s->mv[dir][i][0],
                        s->mv[dir][i][1]);

            mx += s->mv[dir][i][0];
            my += s->mv[dir][i][1];
        }
    }

816
    if (!CONFIG_GRAY || !(s->avctx->flags & AV_CODEC_FLAG_GRAY))
817 818 819 820
        chroma_4mv_motion(s, dest_cb, dest_cr,
                          ref_picture, pix_op[1], mx, my);
}

821 822 823 824 825 826 827 828
/**
 * motion compensation of a single macroblock
 * @param s context
 * @param dest_y luma destination pointer
 * @param dest_cb chroma cb/u destination pointer
 * @param dest_cr chroma cr/v destination pointer
 * @param dir direction (0->forward, 1->backward)
 * @param ref_picture array[3] of pointers to the 3 planes of the reference picture
829 830
 * @param pix_op halfpel motion compensation function (average or put normally)
 * @param qpix_op qpel motion compensation function (average or put normally)
831 832
 * the motion vectors are taken from s->mv and the MV type from s->mv_type
 */
833
static av_always_inline void mpv_motion_internal(MpegEncContext *s,
834 835 836 837 838 839 840 841
                                                 uint8_t *dest_y,
                                                 uint8_t *dest_cb,
                                                 uint8_t *dest_cr,
                                                 int dir,
                                                 uint8_t **ref_picture,
                                                 op_pixels_func (*pix_op)[4],
                                                 qpel_mc_func (*qpix_op)[16],
                                                 int is_mpeg12)
842
{
843 844
    int i;
    int mb_y = s->mb_y;
845 846 847

    prefetch_motion(s, ref_picture, dir);

848
    if (!is_mpeg12 && s->obmc && s->pict_type != AV_PICTURE_TYPE_B) {
849
        apply_obmc(s, dest_y, dest_cb, dest_cr, ref_picture, pix_op);
850 851 852
        return;
    }

853
    switch (s->mv_type) {
854
    case MV_TYPE_16X16:
855 856
        if (s->mcsel) {
            if (s->real_sprite_warping_points == 1) {
857 858
                gmc1_motion(s, dest_y, dest_cb, dest_cr,
                            ref_picture);
859
            } else {
860
                gmc_motion(s, dest_y, dest_cb, dest_cr,
861
                           ref_picture);
862
            }
863
        } else if (!is_mpeg12 && s->quarter_sample) {
864 865 866 867
            qpel_motion(s, dest_y, dest_cb, dest_cr,
                        0, 0, 0,
                        ref_picture, pix_op, qpix_op,
                        s->mv[dir][0][0], s->mv[dir][0][1], 16);
868
        } else if (!is_mpeg12 && (CONFIG_WMV2_DECODER || CONFIG_WMV2_ENCODER) &&
869
                   s->mspel && s->codec_id == AV_CODEC_ID_WMV2) {
870
            ff_mspel_motion(s, dest_y, dest_cb, dest_cr,
871 872 873
                            ref_picture, pix_op,
                            s->mv[dir][0][0], s->mv[dir][0][1], 16);
        } else {
874
            mpeg_motion(s, dest_y, dest_cb, dest_cr, 0,
875
                        ref_picture, pix_op,
876
                        s->mv[dir][0][0], s->mv[dir][0][1], 16, mb_y);
877 878 879
        }
        break;
    case MV_TYPE_8X8:
880 881 882
        if (!is_mpeg12)
            apply_8x8(s, dest_y, dest_cb, dest_cr,
                      dir, ref_picture, qpix_op, pix_op);
883 884 885
        break;
    case MV_TYPE_FIELD:
        if (s->picture_structure == PICT_FRAME) {
886 887
            if (!is_mpeg12 && s->quarter_sample) {
                for (i = 0; i < 2; i++)
888 889 890 891
                    qpel_motion(s, dest_y, dest_cb, dest_cr,
                                1, i, s->field_select[dir][i],
                                ref_picture, pix_op, qpix_op,
                                s->mv[dir][i][0], s->mv[dir][i][1], 8);
892
            } else {
893
                /* top field */
894 895 896 897
                mpeg_motion_field(s, dest_y, dest_cb, dest_cr,
                                  0, s->field_select[dir][0],
                                  ref_picture, pix_op,
                                  s->mv[dir][0][0], s->mv[dir][0][1], 8, mb_y);
898
                /* bottom field */
899 900 901 902
                mpeg_motion_field(s, dest_y, dest_cb, dest_cr,
                                  1, s->field_select[dir][1],
                                  ref_picture, pix_op,
                                  s->mv[dir][1][0], s->mv[dir][1][1], 8, mb_y);
903 904
            }
        } else {
905 906
            if (   s->picture_structure != s->field_select[dir][0] + 1 && s->pict_type != AV_PICTURE_TYPE_B && !s->first_field
                || !ref_picture[0]) {
907
                ref_picture = s->current_picture_ptr->f->data;
908 909 910
            }

            mpeg_motion(s, dest_y, dest_cb, dest_cr,
911
                        s->field_select[dir][0],
912
                        ref_picture, pix_op,
913
                        s->mv[dir][0][0], s->mv[dir][0][1], 16, mb_y >> 1);
914 915 916
        }
        break;
    case MV_TYPE_16X8:
917 918
        for (i = 0; i < 2; i++) {
            uint8_t **ref2picture;
919

920 921
            if ((s->picture_structure == s->field_select[dir][i] + 1
                || s->pict_type == AV_PICTURE_TYPE_B || s->first_field) && ref_picture[0]) {
922 923
                ref2picture = ref_picture;
            } else {
924
                ref2picture = s->current_picture_ptr->f->data;
925 926 927
            }

            mpeg_motion(s, dest_y, dest_cb, dest_cr,
928
                        s->field_select[dir][i],
929
                        ref2picture, pix_op,
930 931
                        s->mv[dir][i][0], s->mv[dir][i][1] + 16 * i,
                        8, mb_y >> 1);
932

933 934 935
            dest_y  += 16 * s->linesize;
            dest_cb += (16 >> s->chroma_y_shift) * s->uvlinesize;
            dest_cr += (16 >> s->chroma_y_shift) * s->uvlinesize;
936 937 938
        }
        break;
    case MV_TYPE_DMV:
939 940
        if (s->picture_structure == PICT_FRAME) {
            for (i = 0; i < 2; i++) {
941
                int j;
942
                for (j = 0; j < 2; j++)
943
                    mpeg_motion_field(s, dest_y, dest_cb, dest_cr,
944 945 946
                                      j, j ^ i, ref_picture, pix_op,
                                      s->mv[dir][2 * i + j][0],
                                      s->mv[dir][2 * i + j][1], 8, mb_y);
947
                pix_op = s->hdsp.avg_pixels_tab;
948
            }
949
        } else {
950
            if (!ref_picture[0]) {
951
                ref_picture = s->current_picture_ptr->f->data;
952
            }
953
            for (i = 0; i < 2; i++) {
954
                mpeg_motion(s, dest_y, dest_cb, dest_cr,
955
                            s->picture_structure != i + 1,
956
                            ref_picture, pix_op,
957 958
                            s->mv[dir][2 * i][0], s->mv[dir][2 * i][1],
                            16, mb_y >> 1);
959 960

                // after put we make avg of the same block
961
                pix_op = s->hdsp.avg_pixels_tab;
962

963 964 965
                /* opposite parity is always in the same frame if this is
                 * second field */
                if (!s->first_field) {
966
                    ref_picture = s->current_picture_ptr->f->data;
967 968 969
                }
            }
        }
970
        break;
971
    default: av_assert2(0);
972 973 974
    }
}

975
void ff_mpv_motion(MpegEncContext *s,
976 977 978 979 980
                   uint8_t *dest_y, uint8_t *dest_cb,
                   uint8_t *dest_cr, int dir,
                   uint8_t **ref_picture,
                   op_pixels_func (*pix_op)[4],
                   qpel_mc_func (*qpix_op)[16])
Keiji Costantini's avatar
Keiji Costantini committed
981
{
982
#if !CONFIG_SMALL
983
    if (s->out_format == FMT_MPEG1)
984
        mpv_motion_internal(s, dest_y, dest_cb, dest_cr, dir,
Keiji Costantini's avatar
Keiji Costantini committed
985 986 987
                            ref_picture, pix_op, qpix_op, 1);
    else
#endif
988
        mpv_motion_internal(s, dest_y, dest_cb, dest_cr, dir,
Keiji Costantini's avatar
Keiji Costantini committed
989 990
                            ref_picture, pix_op, qpix_op, 0);
}