mpegvideo_motion.c 37.5 KB
Newer Older
1
/*
2
 * Copyright (c) 2000,2001 Fabrice Bellard
3 4
 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
 *
5 6
 * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
 *
7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * FFmpeg is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with FFmpeg; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

24
#include <string.h>
25

26
#include "libavutil/avassert.h"
27
#include "libavutil/internal.h"
28
#include "avcodec.h"
29
#include "h261.h"
30
#include "mpegutils.h"
31 32 33
#include "mpegvideo.h"
#include "mjpegenc.h"
#include "msmpeg4.h"
34
#include "qpeldsp.h"
35
#include "wmv2.h"
36 37
#include <limits.h>

38 39 40
static void gmc1_motion(MpegEncContext *s,
                        uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
                        uint8_t **ref_picture)
41 42
{
    uint8_t *ptr;
43
    int src_x, src_y, motion_x, motion_y;
44
    ptrdiff_t offset, linesize, uvlinesize;
45 46 47 48 49 50
    int emu = 0;

    motion_x   = s->sprite_offset[0][0];
    motion_y   = s->sprite_offset[0][1];
    src_x      = s->mb_x * 16 + (motion_x >> (s->sprite_warping_accuracy + 1));
    src_y      = s->mb_y * 16 + (motion_y >> (s->sprite_warping_accuracy + 1));
51 52
    motion_x *= 1 << (3 - s->sprite_warping_accuracy);
    motion_y *= 1 << (3 - s->sprite_warping_accuracy);
53
    src_x      = av_clip(src_x, -16, s->width);
54
    if (src_x == s->width)
55
        motion_x = 0;
56 57
    src_y = av_clip(src_y, -16, s->height);
    if (src_y == s->height)
58
        motion_y = 0;
59

60
    linesize   = s->linesize;
61 62
    uvlinesize = s->uvlinesize;

63
    ptr = ref_picture[0] + src_y * linesize + src_x;
64

65 66
    if ((unsigned)src_x >= FFMAX(s->h_edge_pos - 17, 0) ||
        (unsigned)src_y >= FFMAX(s->v_edge_pos - 17, 0)) {
67
        s->vdsp.emulated_edge_mc(s->sc.edge_emu_buffer, ptr,
68 69 70 71
                                 linesize, linesize,
                                 17, 17,
                                 src_x, src_y,
                                 s->h_edge_pos, s->v_edge_pos);
72
        ptr = s->sc.edge_emu_buffer;
73 74
    }

75
    if ((motion_x | motion_y) & 7) {
76 77 78 79
        s->mdsp.gmc1(dest_y, ptr, linesize, 16,
                     motion_x & 15, motion_y & 15, 128 - s->no_rounding);
        s->mdsp.gmc1(dest_y + 8, ptr + 8, linesize, 16,
                     motion_x & 15, motion_y & 15, 128 - s->no_rounding);
80
    } else {
81 82
        int dxy;

83 84
        dxy = ((motion_x >> 3) & 1) | ((motion_y >> 2) & 2);
        if (s->no_rounding) {
85
            s->hdsp.put_no_rnd_pixels_tab[0][dxy](dest_y, ptr, linesize, 16);
86 87
        } else {
            s->hdsp.put_pixels_tab[0][dxy](dest_y, ptr, linesize, 16);
88 89 90
        }
    }

91
    if (CONFIG_GRAY && s->avctx->flags & AV_CODEC_FLAG_GRAY)
92 93 94 95 96 97
        return;

    motion_x   = s->sprite_offset[1][0];
    motion_y   = s->sprite_offset[1][1];
    src_x      = s->mb_x * 8 + (motion_x >> (s->sprite_warping_accuracy + 1));
    src_y      = s->mb_y * 8 + (motion_y >> (s->sprite_warping_accuracy + 1));
98 99
    motion_x  *= 1 << (3 - s->sprite_warping_accuracy);
    motion_y  *= 1 << (3 - s->sprite_warping_accuracy);
100 101 102 103 104 105
    src_x      = av_clip(src_x, -8, s->width >> 1);
    if (src_x == s->width >> 1)
        motion_x = 0;
    src_y = av_clip(src_y, -8, s->height >> 1);
    if (src_y == s->height >> 1)
        motion_y = 0;
106 107

    offset = (src_y * uvlinesize) + src_x;
108
    ptr    = ref_picture[1] + offset;
109 110
    if ((unsigned)src_x >= FFMAX((s->h_edge_pos >> 1) - 9, 0) ||
        (unsigned)src_y >= FFMAX((s->v_edge_pos >> 1) - 9, 0)) {
111
        s->vdsp.emulated_edge_mc(s->sc.edge_emu_buffer, ptr,
112 113 114 115
                                 uvlinesize, uvlinesize,
                                 9, 9,
                                 src_x, src_y,
                                 s->h_edge_pos >> 1, s->v_edge_pos >> 1);
116
        ptr = s->sc.edge_emu_buffer;
117
        emu = 1;
118
    }
119 120
    s->mdsp.gmc1(dest_cb, ptr, uvlinesize, 8,
                 motion_x & 15, motion_y & 15, 128 - s->no_rounding);
121 122

    ptr = ref_picture[2] + offset;
123
    if (emu) {
124
        s->vdsp.emulated_edge_mc(s->sc.edge_emu_buffer, ptr,
125
                                 uvlinesize, uvlinesize,
126 127 128
                                 9, 9,
                                 src_x, src_y,
                                 s->h_edge_pos >> 1, s->v_edge_pos >> 1);
129
        ptr = s->sc.edge_emu_buffer;
130
    }
131 132
    s->mdsp.gmc1(dest_cr, ptr, uvlinesize, 8,
                 motion_x & 15, motion_y & 15, 128 - s->no_rounding);
133 134
}

135 136 137
static void gmc_motion(MpegEncContext *s,
                       uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
                       uint8_t **ref_picture)
138 139 140
{
    uint8_t *ptr;
    int linesize, uvlinesize;
141
    const int a = s->sprite_warping_accuracy;
142 143
    int ox, oy;

144
    linesize   = s->linesize;
145 146 147 148
    uvlinesize = s->uvlinesize;

    ptr = ref_picture[0];

149 150 151 152
    ox = s->sprite_offset[0][0] + s->sprite_delta[0][0] * s->mb_x * 16 +
         s->sprite_delta[0][1] * s->mb_y * 16;
    oy = s->sprite_offset[0][1] + s->sprite_delta[1][0] * s->mb_x * 16 +
         s->sprite_delta[1][1] * s->mb_y * 16;
153

154 155 156 157 158 159 160 161 162 163 164 165 166
    s->mdsp.gmc(dest_y, ptr, linesize, 16,
                ox, oy,
                s->sprite_delta[0][0], s->sprite_delta[0][1],
                s->sprite_delta[1][0], s->sprite_delta[1][1],
                a + 1, (1 << (2 * a + 1)) - s->no_rounding,
                s->h_edge_pos, s->v_edge_pos);
    s->mdsp.gmc(dest_y + 8, ptr, linesize, 16,
                ox + s->sprite_delta[0][0] * 8,
                oy + s->sprite_delta[1][0] * 8,
                s->sprite_delta[0][0], s->sprite_delta[0][1],
                s->sprite_delta[1][0], s->sprite_delta[1][1],
                a + 1, (1 << (2 * a + 1)) - s->no_rounding,
                s->h_edge_pos, s->v_edge_pos);
167

168
    if (CONFIG_GRAY && s->avctx->flags & AV_CODEC_FLAG_GRAY)
169 170 171 172 173 174
        return;

    ox = s->sprite_offset[1][0] + s->sprite_delta[0][0] * s->mb_x * 8 +
         s->sprite_delta[0][1] * s->mb_y * 8;
    oy = s->sprite_offset[1][1] + s->sprite_delta[1][0] * s->mb_x * 8 +
         s->sprite_delta[1][1] * s->mb_y * 8;
175 176

    ptr = ref_picture[1];
177 178 179 180 181
    s->mdsp.gmc(dest_cb, ptr, uvlinesize, 8,
                ox, oy,
                s->sprite_delta[0][0], s->sprite_delta[0][1],
                s->sprite_delta[1][0], s->sprite_delta[1][1],
                a + 1, (1 << (2 * a + 1)) - s->no_rounding,
182
                (s->h_edge_pos + 1) >> 1, (s->v_edge_pos + 1) >> 1);
183 184

    ptr = ref_picture[2];
185 186 187 188 189
    s->mdsp.gmc(dest_cr, ptr, uvlinesize, 8,
                ox, oy,
                s->sprite_delta[0][0], s->sprite_delta[0][1],
                s->sprite_delta[1][0], s->sprite_delta[1][1],
                a + 1, (1 << (2 * a + 1)) - s->no_rounding,
190
                (s->h_edge_pos + 1) >> 1, (s->v_edge_pos + 1) >> 1);
191 192 193
}

static inline int hpel_motion(MpegEncContext *s,
194 195 196 197
                              uint8_t *dest, uint8_t *src,
                              int src_x, int src_y,
                              op_pixels_func *pix_op,
                              int motion_x, int motion_y)
198
{
199
    int dxy = 0;
200
    int emu = 0;
201 202 203 204 205

    src_x += motion_x >> 1;
    src_y += motion_y >> 1;

    /* WARNING: do no forget half pels */
206
    src_x = av_clip(src_x, -16, s->width); // FIXME unneeded for emu?
207 208
    if (src_x != s->width)
        dxy |= motion_x & 1;
209
    src_y = av_clip(src_y, -16, s->height);
210 211
    if (src_y != s->height)
        dxy |= (motion_y & 1) << 1;
212
    src += src_y * s->linesize + src_x;
213

214 215
        if ((unsigned)src_x >= FFMAX(s->h_edge_pos - (motion_x & 1) - 7, 0) ||
            (unsigned)src_y >= FFMAX(s->v_edge_pos - (motion_y & 1) - 7, 0)) {
216
            s->vdsp.emulated_edge_mc(s->sc.edge_emu_buffer, src,
217
                                     s->linesize, s->linesize,
218
                                     9, 9,
219 220
                                     src_x, src_y,
                                     s->h_edge_pos, s->v_edge_pos);
221
            src = s->sc.edge_emu_buffer;
222
            emu = 1;
223
        }
224
    pix_op[dxy](dest, src, s->linesize, 8);
225 226 227
    return emu;
}

228
static av_always_inline
Keiji Costantini's avatar
Keiji Costantini committed
229
void mpeg_motion_internal(MpegEncContext *s,
230 231 232 233 234 235 236 237 238 239 240 241 242
                          uint8_t *dest_y,
                          uint8_t *dest_cb,
                          uint8_t *dest_cr,
                          int field_based,
                          int bottom_field,
                          int field_select,
                          uint8_t **ref_picture,
                          op_pixels_func (*pix_op)[4],
                          int motion_x,
                          int motion_y,
                          int h,
                          int is_mpeg12,
                          int mb_y)
243 244
{
    uint8_t *ptr_y, *ptr_cb, *ptr_cr;
245
    int dxy, uvdxy, mx, my, src_x, src_y,
246
        uvsrc_x, uvsrc_y, v_edge_pos;
247
    ptrdiff_t uvlinesize, linesize;
248 249

    v_edge_pos = s->v_edge_pos >> field_based;
250 251
    linesize   = s->current_picture.f->linesize[0] << field_based;
    uvlinesize = s->current_picture.f->linesize[1] << field_based;
252

253 254 255
    dxy   = ((motion_y & 1) << 1) | (motion_x & 1);
    src_x = s->mb_x * 16 + (motion_x >> 1);
    src_y = (mb_y << (4 - field_based)) + (motion_y >> 1);
256

Keiji Costantini's avatar
Keiji Costantini committed
257
    if (!is_mpeg12 && s->out_format == FMT_H263) {
258 259 260 261 262 263 264 265 266 267
        if ((s->workaround_bugs & FF_BUG_HPEL_CHROMA) && field_based) {
            mx      = (motion_x >> 1) | (motion_x & 1);
            my      = motion_y >> 1;
            uvdxy   = ((my & 1) << 1) | (mx & 1);
            uvsrc_x = s->mb_x * 8 + (mx >> 1);
            uvsrc_y = (mb_y << (3 - field_based)) + (my >> 1);
        } else {
            uvdxy   = dxy | (motion_y & 2) | ((motion_x & 2) >> 1);
            uvsrc_x = src_x >> 1;
            uvsrc_y = src_y >> 1;
268
        }
269 270 271 272 273 274 275
    // Even chroma mv's are full pel in H261
    } else if (!is_mpeg12 && s->out_format == FMT_H261) {
        mx      = motion_x / 4;
        my      = motion_y / 4;
        uvdxy   = 0;
        uvsrc_x = s->mb_x * 8 + mx;
        uvsrc_y = mb_y * 8 + my;
276
    } else {
277 278 279 280 281 282
        if (s->chroma_y_shift) {
            mx      = motion_x / 2;
            my      = motion_y / 2;
            uvdxy   = ((my & 1) << 1) | (mx & 1);
            uvsrc_x = s->mb_x * 8 + (mx >> 1);
            uvsrc_y = (mb_y << (3 - field_based)) + (my >> 1);
283
        } else {
284 285 286 287 288
            if (s->chroma_x_shift) {
                // Chroma422
                mx      = motion_x / 2;
                uvdxy   = ((motion_y & 1) << 1) | (mx & 1);
                uvsrc_x = s->mb_x * 8 + (mx >> 1);
289 290
                uvsrc_y = src_y;
            } else {
291 292
                // Chroma444
                uvdxy   = dxy;
293 294 295 296 297 298 299 300 301 302
                uvsrc_x = src_x;
                uvsrc_y = src_y;
            }
        }
    }

    ptr_y  = ref_picture[0] + src_y * linesize + src_x;
    ptr_cb = ref_picture[1] + uvsrc_y * uvlinesize + uvsrc_x;
    ptr_cr = ref_picture[2] + uvsrc_y * uvlinesize + uvsrc_x;

303 304
    if ((unsigned)src_x >= FFMAX(s->h_edge_pos - (motion_x & 1) - 15   , 0) ||
        (unsigned)src_y >= FFMAX(   v_edge_pos - (motion_y & 1) - h + 1, 0)) {
305 306 307 308 309 310 311 312
        if (is_mpeg12 ||
            s->codec_id == AV_CODEC_ID_MPEG2VIDEO ||
            s->codec_id == AV_CODEC_ID_MPEG1VIDEO) {
            av_log(s->avctx, AV_LOG_DEBUG,
                   "MPEG motion vector out of boundary (%d %d)\n", src_x,
                   src_y);
            return;
        }
313
        src_y = (unsigned)src_y << field_based;
314
        s->vdsp.emulated_edge_mc(s->sc.edge_emu_buffer, ptr_y,
315
                                 s->linesize, s->linesize,
316
                                 17, 17 + field_based,
317
                                 src_x, src_y,
318
                                 s->h_edge_pos, s->v_edge_pos);
319
        ptr_y = s->sc.edge_emu_buffer;
320
        if (!CONFIG_GRAY || !(s->avctx->flags & AV_CODEC_FLAG_GRAY)) {
321
            uint8_t *ubuf = s->sc.edge_emu_buffer + 18 * s->linesize;
322
            uint8_t *vbuf = ubuf + 10 * s->uvlinesize;
323 324
            if (s->workaround_bugs & FF_BUG_IEDGE)
                vbuf -= s->uvlinesize;
325
            uvsrc_y = (unsigned)uvsrc_y << field_based;
326
            s->vdsp.emulated_edge_mc(ubuf, ptr_cb,
327
                                     s->uvlinesize, s->uvlinesize,
328
                                     9, 9 + field_based,
329
                                     uvsrc_x, uvsrc_y,
330
                                     s->h_edge_pos >> 1, s->v_edge_pos >> 1);
331
            s->vdsp.emulated_edge_mc(vbuf, ptr_cr,
332
                                     s->uvlinesize, s->uvlinesize,
333
                                     9, 9 + field_based,
334
                                     uvsrc_x, uvsrc_y,
335
                                     s->h_edge_pos >> 1, s->v_edge_pos >> 1);
336 337
            ptr_cb = ubuf;
            ptr_cr = vbuf;
338
        }
339 340
    }

341 342 343 344 345 346
    /* FIXME use this for field pix too instead of the obnoxious hack which
     * changes picture.data */
    if (bottom_field) {
        dest_y  += s->linesize;
        dest_cb += s->uvlinesize;
        dest_cr += s->uvlinesize;
347 348
    }

349 350 351 352
    if (field_select) {
        ptr_y  += s->linesize;
        ptr_cb += s->uvlinesize;
        ptr_cr += s->uvlinesize;
353 354 355 356
    }

    pix_op[0][dxy](dest_y, ptr_y, linesize, h);

357
    if (!CONFIG_GRAY || !(s->avctx->flags & AV_CODEC_FLAG_GRAY)) {
358
        pix_op[s->chroma_x_shift][uvdxy]
359
            (dest_cb, ptr_cb, uvlinesize, h >> s->chroma_y_shift);
360
        pix_op[s->chroma_x_shift][uvdxy]
361
            (dest_cr, ptr_cr, uvlinesize, h >> s->chroma_y_shift);
362
    }
363 364
    if (!is_mpeg12 && (CONFIG_H261_ENCODER || CONFIG_H261_DECODER) &&
        s->out_format == FMT_H261) {
365 366 367
        ff_h261_loop_filter(s);
    }
}
Keiji Costantini's avatar
Keiji Costantini committed
368
/* apply one mpeg motion vector to the three components */
369 370 371 372 373
static void mpeg_motion(MpegEncContext *s,
                        uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
                        int field_select, uint8_t **ref_picture,
                        op_pixels_func (*pix_op)[4],
                        int motion_x, int motion_y, int h, int mb_y)
Keiji Costantini's avatar
Keiji Costantini committed
374
{
375
#if !CONFIG_SMALL
376
    if (s->out_format == FMT_MPEG1)
377
        mpeg_motion_internal(s, dest_y, dest_cb, dest_cr, 0, 0,
378 379
                             field_select, ref_picture, pix_op,
                             motion_x, motion_y, h, 1, mb_y);
380 381 382
    else
#endif
        mpeg_motion_internal(s, dest_y, dest_cb, dest_cr, 0, 0,
383 384
                             field_select, ref_picture, pix_op,
                             motion_x, motion_y, h, 0, mb_y);
385 386 387 388 389 390 391 392 393 394
}

static void mpeg_motion_field(MpegEncContext *s, uint8_t *dest_y,
                              uint8_t *dest_cb, uint8_t *dest_cr,
                              int bottom_field, int field_select,
                              uint8_t **ref_picture,
                              op_pixels_func (*pix_op)[4],
                              int motion_x, int motion_y, int h, int mb_y)
{
#if !CONFIG_SMALL
395
    if (s->out_format == FMT_MPEG1)
396
        mpeg_motion_internal(s, dest_y, dest_cb, dest_cr, 1,
397 398
                             bottom_field, field_select, ref_picture, pix_op,
                             motion_x, motion_y, h, 1, mb_y);
Keiji Costantini's avatar
Keiji Costantini committed
399 400
    else
#endif
401
        mpeg_motion_internal(s, dest_y, dest_cb, dest_cr, 1,
402 403
                             bottom_field, field_select, ref_picture, pix_op,
                             motion_x, motion_y, h, 0, mb_y);
Keiji Costantini's avatar
Keiji Costantini committed
404
}
405

406
// FIXME: SIMDify, avg variant, 16x16 version
407 408
static inline void put_obmc(uint8_t *dst, uint8_t *src[5], int stride)
{
409
    int x;
410 411 412 413 414
    uint8_t *const top    = src[1];
    uint8_t *const left   = src[2];
    uint8_t *const mid    = src[0];
    uint8_t *const right  = src[3];
    uint8_t *const bottom = src[4];
415 416 417 418 419 420 421 422
#define OBMC_FILTER(x, t, l, m, r, b)\
    dst[x]= (t*top[x] + l*left[x] + m*mid[x] + r*right[x] + b*bottom[x] + 4)>>3
#define OBMC_FILTER4(x, t, l, m, r, b)\
    OBMC_FILTER(x         , t, l, m, r, b);\
    OBMC_FILTER(x+1       , t, l, m, r, b);\
    OBMC_FILTER(x  +stride, t, l, m, r, b);\
    OBMC_FILTER(x+1+stride, t, l, m, r, b);

423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456
    x = 0;
    OBMC_FILTER (x    , 2, 2, 4, 0, 0);
    OBMC_FILTER (x + 1, 2, 1, 5, 0, 0);
    OBMC_FILTER4(x + 2, 2, 1, 5, 0, 0);
    OBMC_FILTER4(x + 4, 2, 0, 5, 1, 0);
    OBMC_FILTER (x + 6, 2, 0, 5, 1, 0);
    OBMC_FILTER (x + 7, 2, 0, 4, 2, 0);
    x += stride;
    OBMC_FILTER (x    , 1, 2, 5, 0, 0);
    OBMC_FILTER (x + 1, 1, 2, 5, 0, 0);
    OBMC_FILTER (x + 6, 1, 0, 5, 2, 0);
    OBMC_FILTER (x + 7, 1, 0, 5, 2, 0);
    x += stride;
    OBMC_FILTER4(x    , 1, 2, 5, 0, 0);
    OBMC_FILTER4(x + 2, 1, 1, 6, 0, 0);
    OBMC_FILTER4(x + 4, 1, 0, 6, 1, 0);
    OBMC_FILTER4(x + 6, 1, 0, 5, 2, 0);
    x += 2 * stride;
    OBMC_FILTER4(x    , 0, 2, 5, 0, 1);
    OBMC_FILTER4(x + 2, 0, 1, 6, 0, 1);
    OBMC_FILTER4(x + 4, 0, 0, 6, 1, 1);
    OBMC_FILTER4(x + 6, 0, 0, 5, 2, 1);
    x += 2*stride;
    OBMC_FILTER (x    , 0, 2, 5, 0, 1);
    OBMC_FILTER (x + 1, 0, 2, 5, 0, 1);
    OBMC_FILTER4(x + 2, 0, 1, 5, 0, 2);
    OBMC_FILTER4(x + 4, 0, 0, 5, 1, 2);
    OBMC_FILTER (x + 6, 0, 0, 5, 2, 1);
    OBMC_FILTER (x + 7, 0, 0, 5, 2, 1);
    x += stride;
    OBMC_FILTER (x    , 0, 2, 4, 0, 2);
    OBMC_FILTER (x + 1, 0, 1, 5, 0, 2);
    OBMC_FILTER (x + 6, 0, 0, 5, 1, 2);
    OBMC_FILTER (x + 7, 0, 0, 4, 2, 2);
457 458 459 460 461 462 463
}

/* obmc for 1 8x8 luma block */
static inline void obmc_motion(MpegEncContext *s,
                               uint8_t *dest, uint8_t *src,
                               int src_x, int src_y,
                               op_pixels_func *pix_op,
464
                               int16_t mv[5][2] /* mid top left right bottom */)
465 466 467 468 469
#define MID    0
{
    int i;
    uint8_t *ptr[5];

470
    av_assert2(s->quarter_sample == 0);
471

472 473 474 475
    for (i = 0; i < 5; i++) {
        if (i && mv[i][0] == mv[MID][0] && mv[i][1] == mv[MID][1]) {
            ptr[i] = ptr[MID];
        } else {
476
            ptr[i] = s->sc.obmc_scratchpad + 8 * (i & 1) +
477 478
                     s->linesize * 8 * (i >> 1);
            hpel_motion(s, ptr[i], src, src_x, src_y, pix_op,
479 480 481 482 483 484 485 486
                        mv[i][0], mv[i][1]);
        }
    }

    put_obmc(dest, ptr, s->linesize);
}

static inline void qpel_motion(MpegEncContext *s,
487 488 489 490 491 492
                               uint8_t *dest_y,
                               uint8_t *dest_cb,
                               uint8_t *dest_cr,
                               int field_based, int bottom_field,
                               int field_select, uint8_t **ref_picture,
                               op_pixels_func (*pix_op)[4],
493 494 495 496
                               qpel_mc_func (*qpix_op)[16],
                               int motion_x, int motion_y, int h)
{
    uint8_t *ptr_y, *ptr_cb, *ptr_cr;
497 498
    int dxy, uvdxy, mx, my, src_x, src_y, uvsrc_x, uvsrc_y, v_edge_pos;
    ptrdiff_t linesize, uvlinesize;
499

500 501
    dxy   = ((motion_y & 3) << 2) | (motion_x & 3);

502 503 504 505
    src_x = s->mb_x *  16                 + (motion_x >> 2);
    src_y = s->mb_y * (16 >> field_based) + (motion_y >> 2);

    v_edge_pos = s->v_edge_pos >> field_based;
506
    linesize   = s->linesize   << field_based;
507 508
    uvlinesize = s->uvlinesize << field_based;

509 510 511 512 513 514 515 516 517 518 519 520 521
    if (field_based) {
        mx = motion_x / 2;
        my = motion_y >> 1;
    } else if (s->workaround_bugs & FF_BUG_QPEL_CHROMA2) {
        static const int rtab[8] = { 0, 0, 1, 1, 0, 0, 0, 1 };
        mx = (motion_x >> 1) + rtab[motion_x & 7];
        my = (motion_y >> 1) + rtab[motion_y & 7];
    } else if (s->workaround_bugs & FF_BUG_QPEL_CHROMA) {
        mx = (motion_x >> 1) | (motion_x & 1);
        my = (motion_y >> 1) | (motion_y & 1);
    } else {
        mx = motion_x / 2;
        my = motion_y / 2;
522
    }
523 524
    mx = (mx >> 1) | (mx & 1);
    my = (my >> 1) | (my & 1);
525

526 527 528
    uvdxy = (mx & 1) | ((my & 1) << 1);
    mx  >>= 1;
    my  >>= 1;
529 530 531 532

    uvsrc_x = s->mb_x *  8                 + mx;
    uvsrc_y = s->mb_y * (8 >> field_based) + my;

533
    ptr_y  = ref_picture[0] + src_y   * linesize   + src_x;
534 535 536
    ptr_cb = ref_picture[1] + uvsrc_y * uvlinesize + uvsrc_x;
    ptr_cr = ref_picture[2] + uvsrc_y * uvlinesize + uvsrc_x;

537 538
    if ((unsigned)src_x >= FFMAX(s->h_edge_pos - (motion_x & 3) - 15   , 0) ||
        (unsigned)src_y >= FFMAX(   v_edge_pos - (motion_y & 3) - h + 1, 0)) {
539
        s->vdsp.emulated_edge_mc(s->sc.edge_emu_buffer, ptr_y,
540
                                 s->linesize, s->linesize,
541
                                 17, 17 + field_based,
542
                                 src_x, src_y * (1 << field_based),
543
                                 s->h_edge_pos, s->v_edge_pos);
544
        ptr_y = s->sc.edge_emu_buffer;
545
        if (!CONFIG_GRAY || !(s->avctx->flags & AV_CODEC_FLAG_GRAY)) {
546
            uint8_t *ubuf = s->sc.edge_emu_buffer + 18 * s->linesize;
547
            uint8_t *vbuf = ubuf + 10 * s->uvlinesize;
548 549
            if (s->workaround_bugs & FF_BUG_IEDGE)
                vbuf -= s->uvlinesize;
550
            s->vdsp.emulated_edge_mc(ubuf, ptr_cb,
551
                                     s->uvlinesize, s->uvlinesize,
552
                                     9, 9 + field_based,
553
                                     uvsrc_x, uvsrc_y * (1 << field_based),
554
                                     s->h_edge_pos >> 1, s->v_edge_pos >> 1);
555
            s->vdsp.emulated_edge_mc(vbuf, ptr_cr,
556
                                     s->uvlinesize, s->uvlinesize,
557
                                     9, 9 + field_based,
558
                                     uvsrc_x, uvsrc_y * (1 << field_based),
559
                                     s->h_edge_pos >> 1, s->v_edge_pos >> 1);
560 561
            ptr_cb = ubuf;
            ptr_cr = vbuf;
562 563 564
        }
    }

565
    if (!field_based)
566
        qpix_op[0][dxy](dest_y, ptr_y, linesize);
567 568 569 570 571
    else {
        if (bottom_field) {
            dest_y  += s->linesize;
            dest_cb += s->uvlinesize;
            dest_cr += s->uvlinesize;
572 573
        }

574
        if (field_select) {
575 576 577 578
            ptr_y  += s->linesize;
            ptr_cb += s->uvlinesize;
            ptr_cr += s->uvlinesize;
        }
579 580 581 582
        // damn interlaced mode
        // FIXME boundary mirroring is not exactly correct here
        qpix_op[1][dxy](dest_y, ptr_y, linesize);
        qpix_op[1][dxy](dest_y + 8, ptr_y + 8, linesize);
583
    }
584
    if (!CONFIG_GRAY || !(s->avctx->flags & AV_CODEC_FLAG_GRAY)) {
585 586 587 588 589 590
        pix_op[1][uvdxy](dest_cr, ptr_cr, uvlinesize, h >> 1);
        pix_op[1][uvdxy](dest_cb, ptr_cb, uvlinesize, h >> 1);
    }
}

/**
591
 * H.263 chroma 4mv motion compensation.
592
 */
593 594 595 596 597 598
static void chroma_4mv_motion(MpegEncContext *s,
                              uint8_t *dest_cb, uint8_t *dest_cr,
                              uint8_t **ref_picture,
                              op_pixels_func *pix_op,
                              int mx, int my)
{
599
    uint8_t *ptr;
600 601
    int src_x, src_y, dxy, emu = 0;
    ptrdiff_t offset;
602 603

    /* In case of 8X8, we construct a single chroma motion vector
604 605 606
     * with a special rounding */
    mx = ff_h263_round_chroma(mx);
    my = ff_h263_round_chroma(my);
607

608
    dxy  = ((my & 1) << 1) | (mx & 1);
609 610 611 612 613
    mx >>= 1;
    my >>= 1;

    src_x = s->mb_x * 8 + mx;
    src_y = s->mb_y * 8 + my;
614 615
    src_x = av_clip(src_x, -8, (s->width >> 1));
    if (src_x == (s->width >> 1))
616
        dxy &= ~1;
617 618
    src_y = av_clip(src_y, -8, (s->height >> 1));
    if (src_y == (s->height >> 1))
619 620
        dxy &= ~2;

621
    offset = src_y * s->uvlinesize + src_x;
622
    ptr    = ref_picture[1] + offset;
623 624
    if ((unsigned)src_x >= FFMAX((s->h_edge_pos >> 1) - (dxy  & 1) - 7, 0) ||
        (unsigned)src_y >= FFMAX((s->v_edge_pos >> 1) - (dxy >> 1) - 7, 0)) {
625
        s->vdsp.emulated_edge_mc(s->sc.edge_emu_buffer, ptr,
626 627 628
                                 s->uvlinesize, s->uvlinesize,
                                 9, 9, src_x, src_y,
                                 s->h_edge_pos >> 1, s->v_edge_pos >> 1);
629
        ptr = s->sc.edge_emu_buffer;
630
        emu = 1;
631 632 633 634
    }
    pix_op[dxy](dest_cb, ptr, s->uvlinesize, 8);

    ptr = ref_picture[2] + offset;
635
    if (emu) {
636
        s->vdsp.emulated_edge_mc(s->sc.edge_emu_buffer, ptr,
637
                                 s->uvlinesize, s->uvlinesize,
638 639
                                 9, 9, src_x, src_y,
                                 s->h_edge_pos >> 1, s->v_edge_pos >> 1);
640
        ptr = s->sc.edge_emu_buffer;
641 642 643 644
    }
    pix_op[dxy](dest_cr, ptr, s->uvlinesize, 8);
}

645 646
static inline void prefetch_motion(MpegEncContext *s, uint8_t **pix, int dir)
{
647 648 649
    /* fetch pixels for estimated mv 4 macroblocks ahead
     * optimized for 64byte cache lines */
    const int shift = s->quarter_sample ? 2 : 1;
650 651 652 653 654 655 656
    const int mx    = (s->mv[dir][0][0] >> shift) + 16 * s->mb_x + 8;
    const int my    = (s->mv[dir][0][1] >> shift) + 16 * s->mb_y;
    int off         = mx + (my + (s->mb_x & 3) * 4) * s->linesize + 64;

    s->vdsp.prefetch(pix[0] + off, s->linesize, 4);
    off = (mx >> 1) + ((my >> 1) + (s->mb_x & 7)) * s->uvlinesize + 64;
    s->vdsp.prefetch(pix[1] + off, pix[2] - pix[1], 2);
657 658
}

659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674
static inline void apply_obmc(MpegEncContext *s,
                              uint8_t *dest_y,
                              uint8_t *dest_cb,
                              uint8_t *dest_cr,
                              uint8_t **ref_picture,
                              op_pixels_func (*pix_op)[4])
{
    LOCAL_ALIGNED_8(int16_t, mv_cache, [4], [4][2]);
    Picture *cur_frame   = &s->current_picture;
    int mb_x = s->mb_x;
    int mb_y = s->mb_y;
    const int xy         = mb_x + mb_y * s->mb_stride;
    const int mot_stride = s->b8_stride;
    const int mot_xy     = mb_x * 2 + mb_y * 2 * mot_stride;
    int mx, my, i;

675
    av_assert2(!s->mb_skipped);
676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739

    AV_COPY32(mv_cache[1][1], cur_frame->motion_val[0][mot_xy]);
    AV_COPY32(mv_cache[1][2], cur_frame->motion_val[0][mot_xy + 1]);

    AV_COPY32(mv_cache[2][1],
              cur_frame->motion_val[0][mot_xy + mot_stride]);
    AV_COPY32(mv_cache[2][2],
              cur_frame->motion_val[0][mot_xy + mot_stride + 1]);

    AV_COPY32(mv_cache[3][1],
              cur_frame->motion_val[0][mot_xy + mot_stride]);
    AV_COPY32(mv_cache[3][2],
              cur_frame->motion_val[0][mot_xy + mot_stride + 1]);

    if (mb_y == 0 || IS_INTRA(cur_frame->mb_type[xy - s->mb_stride])) {
        AV_COPY32(mv_cache[0][1], mv_cache[1][1]);
        AV_COPY32(mv_cache[0][2], mv_cache[1][2]);
    } else {
        AV_COPY32(mv_cache[0][1],
                  cur_frame->motion_val[0][mot_xy - mot_stride]);
        AV_COPY32(mv_cache[0][2],
                  cur_frame->motion_val[0][mot_xy - mot_stride + 1]);
    }

    if (mb_x == 0 || IS_INTRA(cur_frame->mb_type[xy - 1])) {
        AV_COPY32(mv_cache[1][0], mv_cache[1][1]);
        AV_COPY32(mv_cache[2][0], mv_cache[2][1]);
    } else {
        AV_COPY32(mv_cache[1][0], cur_frame->motion_val[0][mot_xy - 1]);
        AV_COPY32(mv_cache[2][0],
                  cur_frame->motion_val[0][mot_xy - 1 + mot_stride]);
    }

    if (mb_x + 1 >= s->mb_width || IS_INTRA(cur_frame->mb_type[xy + 1])) {
        AV_COPY32(mv_cache[1][3], mv_cache[1][2]);
        AV_COPY32(mv_cache[2][3], mv_cache[2][2]);
    } else {
        AV_COPY32(mv_cache[1][3], cur_frame->motion_val[0][mot_xy + 2]);
        AV_COPY32(mv_cache[2][3],
                  cur_frame->motion_val[0][mot_xy + 2 + mot_stride]);
    }

    mx = 0;
    my = 0;
    for (i = 0; i < 4; i++) {
        const int x      = (i & 1) + 1;
        const int y      = (i >> 1) + 1;
        int16_t mv[5][2] = {
            { mv_cache[y][x][0],     mv_cache[y][x][1]         },
            { mv_cache[y - 1][x][0], mv_cache[y - 1][x][1]     },
            { mv_cache[y][x - 1][0], mv_cache[y][x - 1][1]     },
            { mv_cache[y][x + 1][0], mv_cache[y][x + 1][1]     },
            { mv_cache[y + 1][x][0], mv_cache[y + 1][x][1]     }
        };
        // FIXME cleanup
        obmc_motion(s, dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize,
                    ref_picture[0],
                    mb_x * 16 + (i & 1) * 8, mb_y * 16 + (i >> 1) * 8,
                    pix_op[1],
                    mv);

        mx += mv[0][0];
        my += mv[0][1];
    }
740
    if (!CONFIG_GRAY || !(s->avctx->flags & AV_CODEC_FLAG_GRAY))
741 742 743 744 745
        chroma_4mv_motion(s, dest_cb, dest_cr,
                          ref_picture, pix_op[1],
                          mx, my);
}

746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780
static inline void apply_8x8(MpegEncContext *s,
                             uint8_t *dest_y,
                             uint8_t *dest_cb,
                             uint8_t *dest_cr,
                             int dir,
                             uint8_t **ref_picture,
                             qpel_mc_func (*qpix_op)[16],
                             op_pixels_func (*pix_op)[4])
{
    int dxy, mx, my, src_x, src_y;
    int i;
    int mb_x = s->mb_x;
    int mb_y = s->mb_y;
    uint8_t *ptr, *dest;

    mx = 0;
    my = 0;
    if (s->quarter_sample) {
        for (i = 0; i < 4; i++) {
            int motion_x = s->mv[dir][i][0];
            int motion_y = s->mv[dir][i][1];

            dxy   = ((motion_y & 3) << 2) | (motion_x & 3);
            src_x = mb_x * 16 + (motion_x >> 2) + (i & 1) * 8;
            src_y = mb_y * 16 + (motion_y >> 2) + (i >> 1) * 8;

            /* WARNING: do no forget half pels */
            src_x = av_clip(src_x, -16, s->width);
            if (src_x == s->width)
                dxy &= ~3;
            src_y = av_clip(src_y, -16, s->height);
            if (src_y == s->height)
                dxy &= ~12;

            ptr = ref_picture[0] + (src_y * s->linesize) + (src_x);
781 782
            if ((unsigned)src_x >= FFMAX(s->h_edge_pos - (motion_x & 3) - 7, 0) ||
                (unsigned)src_y >= FFMAX(s->v_edge_pos - (motion_y & 3) - 7, 0)) {
783
                s->vdsp.emulated_edge_mc(s->sc.edge_emu_buffer, ptr,
784 785 786 787 788
                                         s->linesize, s->linesize,
                                         9, 9,
                                         src_x, src_y,
                                         s->h_edge_pos,
                                         s->v_edge_pos);
789
                ptr = s->sc.edge_emu_buffer;
790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812
            }
            dest = dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize;
            qpix_op[1][dxy](dest, ptr, s->linesize);

            mx += s->mv[dir][i][0] / 2;
            my += s->mv[dir][i][1] / 2;
        }
    } else {
        for (i = 0; i < 4; i++) {
            hpel_motion(s,
                        dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize,
                        ref_picture[0],
                        mb_x * 16 + (i & 1) * 8,
                        mb_y * 16 + (i >> 1) * 8,
                        pix_op[1],
                        s->mv[dir][i][0],
                        s->mv[dir][i][1]);

            mx += s->mv[dir][i][0];
            my += s->mv[dir][i][1];
        }
    }

813
    if (!CONFIG_GRAY || !(s->avctx->flags & AV_CODEC_FLAG_GRAY))
814 815 816 817
        chroma_4mv_motion(s, dest_cb, dest_cr,
                          ref_picture, pix_op[1], mx, my);
}

818 819 820 821 822 823 824 825
/**
 * motion compensation of a single macroblock
 * @param s context
 * @param dest_y luma destination pointer
 * @param dest_cb chroma cb/u destination pointer
 * @param dest_cr chroma cr/v destination pointer
 * @param dir direction (0->forward, 1->backward)
 * @param ref_picture array[3] of pointers to the 3 planes of the reference picture
826 827
 * @param pix_op halfpel motion compensation function (average or put normally)
 * @param qpix_op qpel motion compensation function (average or put normally)
828 829
 * the motion vectors are taken from s->mv and the MV type from s->mv_type
 */
830
static av_always_inline void mpv_motion_internal(MpegEncContext *s,
831 832 833 834 835 836 837 838
                                                 uint8_t *dest_y,
                                                 uint8_t *dest_cb,
                                                 uint8_t *dest_cr,
                                                 int dir,
                                                 uint8_t **ref_picture,
                                                 op_pixels_func (*pix_op)[4],
                                                 qpel_mc_func (*qpix_op)[16],
                                                 int is_mpeg12)
839
{
840 841
    int i;
    int mb_y = s->mb_y;
842 843 844

    prefetch_motion(s, ref_picture, dir);

845
    if (!is_mpeg12 && s->obmc && s->pict_type != AV_PICTURE_TYPE_B) {
846
        apply_obmc(s, dest_y, dest_cb, dest_cr, ref_picture, pix_op);
847 848 849
        return;
    }

850
    switch (s->mv_type) {
851
    case MV_TYPE_16X16:
852 853
        if (s->mcsel) {
            if (s->real_sprite_warping_points == 1) {
854 855
                gmc1_motion(s, dest_y, dest_cb, dest_cr,
                            ref_picture);
856
            } else {
857
                gmc_motion(s, dest_y, dest_cb, dest_cr,
858
                           ref_picture);
859
            }
860
        } else if (!is_mpeg12 && s->quarter_sample) {
861 862 863 864
            qpel_motion(s, dest_y, dest_cb, dest_cr,
                        0, 0, 0,
                        ref_picture, pix_op, qpix_op,
                        s->mv[dir][0][0], s->mv[dir][0][1], 16);
865
        } else if (!is_mpeg12 && (CONFIG_WMV2_DECODER || CONFIG_WMV2_ENCODER) &&
866
                   s->mspel && s->codec_id == AV_CODEC_ID_WMV2) {
867
            ff_mspel_motion(s, dest_y, dest_cb, dest_cr,
868 869 870
                            ref_picture, pix_op,
                            s->mv[dir][0][0], s->mv[dir][0][1], 16);
        } else {
871
            mpeg_motion(s, dest_y, dest_cb, dest_cr, 0,
872
                        ref_picture, pix_op,
873
                        s->mv[dir][0][0], s->mv[dir][0][1], 16, mb_y);
874 875 876
        }
        break;
    case MV_TYPE_8X8:
877 878 879
        if (!is_mpeg12)
            apply_8x8(s, dest_y, dest_cb, dest_cr,
                      dir, ref_picture, qpix_op, pix_op);
880 881 882
        break;
    case MV_TYPE_FIELD:
        if (s->picture_structure == PICT_FRAME) {
883 884
            if (!is_mpeg12 && s->quarter_sample) {
                for (i = 0; i < 2; i++)
885 886 887 888
                    qpel_motion(s, dest_y, dest_cb, dest_cr,
                                1, i, s->field_select[dir][i],
                                ref_picture, pix_op, qpix_op,
                                s->mv[dir][i][0], s->mv[dir][i][1], 8);
889
            } else {
890
                /* top field */
891 892 893 894
                mpeg_motion_field(s, dest_y, dest_cb, dest_cr,
                                  0, s->field_select[dir][0],
                                  ref_picture, pix_op,
                                  s->mv[dir][0][0], s->mv[dir][0][1], 8, mb_y);
895
                /* bottom field */
896 897 898 899
                mpeg_motion_field(s, dest_y, dest_cb, dest_cr,
                                  1, s->field_select[dir][1],
                                  ref_picture, pix_op,
                                  s->mv[dir][1][0], s->mv[dir][1][1], 8, mb_y);
900 901
            }
        } else {
902 903
            if (   s->picture_structure != s->field_select[dir][0] + 1 && s->pict_type != AV_PICTURE_TYPE_B && !s->first_field
                || !ref_picture[0]) {
904
                ref_picture = s->current_picture_ptr->f->data;
905 906 907
            }

            mpeg_motion(s, dest_y, dest_cb, dest_cr,
908
                        s->field_select[dir][0],
909
                        ref_picture, pix_op,
910
                        s->mv[dir][0][0], s->mv[dir][0][1], 16, mb_y >> 1);
911 912 913
        }
        break;
    case MV_TYPE_16X8:
914 915
        for (i = 0; i < 2; i++) {
            uint8_t **ref2picture;
916

917 918
            if ((s->picture_structure == s->field_select[dir][i] + 1
                || s->pict_type == AV_PICTURE_TYPE_B || s->first_field) && ref_picture[0]) {
919 920
                ref2picture = ref_picture;
            } else {
921
                ref2picture = s->current_picture_ptr->f->data;
922 923 924
            }

            mpeg_motion(s, dest_y, dest_cb, dest_cr,
925
                        s->field_select[dir][i],
926
                        ref2picture, pix_op,
927 928
                        s->mv[dir][i][0], s->mv[dir][i][1] + 16 * i,
                        8, mb_y >> 1);
929

930 931 932
            dest_y  += 16 * s->linesize;
            dest_cb += (16 >> s->chroma_y_shift) * s->uvlinesize;
            dest_cr += (16 >> s->chroma_y_shift) * s->uvlinesize;
933 934 935
        }
        break;
    case MV_TYPE_DMV:
936 937
        if (s->picture_structure == PICT_FRAME) {
            for (i = 0; i < 2; i++) {
938
                int j;
939
                for (j = 0; j < 2; j++)
940
                    mpeg_motion_field(s, dest_y, dest_cb, dest_cr,
941 942 943
                                      j, j ^ i, ref_picture, pix_op,
                                      s->mv[dir][2 * i + j][0],
                                      s->mv[dir][2 * i + j][1], 8, mb_y);
944
                pix_op = s->hdsp.avg_pixels_tab;
945
            }
946
        } else {
947
            if (!ref_picture[0]) {
948
                ref_picture = s->current_picture_ptr->f->data;
949
            }
950
            for (i = 0; i < 2; i++) {
951
                mpeg_motion(s, dest_y, dest_cb, dest_cr,
952
                            s->picture_structure != i + 1,
953
                            ref_picture, pix_op,
954 955
                            s->mv[dir][2 * i][0], s->mv[dir][2 * i][1],
                            16, mb_y >> 1);
956 957

                // after put we make avg of the same block
958
                pix_op = s->hdsp.avg_pixels_tab;
959

960 961 962
                /* opposite parity is always in the same frame if this is
                 * second field */
                if (!s->first_field) {
963
                    ref_picture = s->current_picture_ptr->f->data;
964 965 966
                }
            }
        }
967
        break;
968
    default: av_assert2(0);
969 970 971
    }
}

972
void ff_mpv_motion(MpegEncContext *s,
973 974 975 976 977
                   uint8_t *dest_y, uint8_t *dest_cb,
                   uint8_t *dest_cr, int dir,
                   uint8_t **ref_picture,
                   op_pixels_func (*pix_op)[4],
                   qpel_mc_func (*qpix_op)[16])
Keiji Costantini's avatar
Keiji Costantini committed
978
{
979
#if !CONFIG_SMALL
980
    if (s->out_format == FMT_MPEG1)
981
        mpv_motion_internal(s, dest_y, dest_cb, dest_cr, dir,
Keiji Costantini's avatar
Keiji Costantini committed
982 983 984
                            ref_picture, pix_op, qpix_op, 1);
    else
#endif
985
        mpv_motion_internal(s, dest_y, dest_cb, dest_cr, dir,
Keiji Costantini's avatar
Keiji Costantini committed
986 987
                            ref_picture, pix_op, qpix_op, 0);
}