dxva2_vc1.c 19.1 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
/*
 * DXVA2 WMV3/VC-1 HW acceleration.
 *
 * copyright (c) 2010 Laurent Aimar
 *
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * FFmpeg is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with FFmpeg; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

23
#include "dxva2_internal.h"
24
#include "mpegutils.h"
25 26 27
#include "vc1.h"
#include "vc1data.h"

28
#define MAX_SLICES 1024
James Almer's avatar
James Almer committed
29

30 31
struct dxva2_picture_context {
    DXVA_PictureParameters pp;
32 33
    unsigned               slice_count;
    DXVA_SliceInfo         slice[MAX_SLICES];
34 35 36 37 38 39

    const uint8_t          *bitstream;
    unsigned               bitstream_size;
};

static void fill_picture_parameters(AVCodecContext *avctx,
40
                                    AVDXVAContext *ctx, const VC1Context *v,
41 42 43 44
                                    DXVA_PictureParameters *pp)
{
    const MpegEncContext *s = &v->s;
    const Picture *current_picture = s->current_picture_ptr;
45 46 47 48 49
    int intcomp = 0;

    // determine if intensity compensation is needed
    if (s->pict_type == AV_PICTURE_TYPE_P) {
      if ((v->fcm == ILACE_FRAME && v->intcomp) || (v->fcm != ILACE_FRAME && v->mv_mode == MV_PMODE_INTENSITY_COMP)) {
50
        if (v->lumscale != 32 || v->lumshift != 0 || (s->picture_structure != PICT_FRAME && (v->lumscale2 != 32 || v->lumshift2 != 0)))
51 52 53
          intcomp = 1;
      }
    }
54 55 56

    memset(pp, 0, sizeof(*pp));
    pp->wDecodedPictureIndex    =
57
    pp->wDeblockedPictureIndex  = ff_dxva2_get_surface_index(avctx, ctx, current_picture->f);
58
    if (s->pict_type != AV_PICTURE_TYPE_I && !v->bi_type)
59
        pp->wForwardRefPictureIndex = ff_dxva2_get_surface_index(avctx, ctx, s->last_picture.f);
60 61
    else
        pp->wForwardRefPictureIndex = 0xffff;
62
    if (s->pict_type == AV_PICTURE_TYPE_B && !v->bi_type)
63
        pp->wBackwardRefPictureIndex = ff_dxva2_get_surface_index(avctx, ctx, s->next_picture.f);
64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83
    else
        pp->wBackwardRefPictureIndex = 0xffff;
    if (v->profile == PROFILE_ADVANCED) {
        /* It is the cropped width/height -1 of the frame */
        pp->wPicWidthInMBminus1 = avctx->width  - 1;
        pp->wPicHeightInMBminus1= avctx->height - 1;
    } else {
        /* It is the coded width/height in macroblock -1 of the frame */
        pp->wPicWidthInMBminus1 = s->mb_width  - 1;
        pp->wPicHeightInMBminus1= s->mb_height - 1;
    }
    pp->bMacroblockWidthMinus1  = 15;
    pp->bMacroblockHeightMinus1 = 15;
    pp->bBlockWidthMinus1       = 7;
    pp->bBlockHeightMinus1      = 7;
    pp->bBPPminus1              = 7;
    if (s->picture_structure & PICT_TOP_FIELD)
        pp->bPicStructure      |= 0x01;
    if (s->picture_structure & PICT_BOTTOM_FIELD)
        pp->bPicStructure      |= 0x02;
84
    pp->bSecondField            = v->interlace && v->fcm == ILACE_FIELD && v->second_field;
85 86
    pp->bPicIntra               = s->pict_type == AV_PICTURE_TYPE_I || v->bi_type;
    pp->bPicBackwardPrediction  = s->pict_type == AV_PICTURE_TYPE_B && !v->bi_type;
87
    pp->bBidirectionalAveragingMode = (1                                           << 7) |
88 89
                                      ((DXVA_CONTEXT_CFG_INTRARESID(avctx, ctx) != 0) << 6) |
                                      ((DXVA_CONTEXT_CFG_RESIDACCEL(avctx, ctx) != 0) << 5) |
90
                                      (intcomp                                     << 4) |
91 92 93 94 95 96
                                      ((v->profile == PROFILE_ADVANCED)            << 3);
    pp->bMVprecisionAndChromaRelation = ((v->mv_mode == MV_PMODE_1MV_HPEL_BILIN) << 3) |
                                        (1                                       << 2) |
                                        (0                                       << 1) |
                                        (!s->quarter_sample                          );
    pp->bChromaFormat           = v->chromaformat;
97 98 99 100 101
    DXVA_CONTEXT_REPORT_ID(avctx, ctx)++;
    if (DXVA_CONTEXT_REPORT_ID(avctx, ctx) >= (1 << 16))
        DXVA_CONTEXT_REPORT_ID(avctx, ctx) = 1;
    pp->bPicScanFixed           = DXVA_CONTEXT_REPORT_ID(avctx, ctx) >> 8;
    pp->bPicScanMethod          = DXVA_CONTEXT_REPORT_ID(avctx, ctx) & 0xff;
102 103 104 105 106 107 108 109 110 111 112
    pp->bPicReadbackRequests    = 0;
    pp->bRcontrol               = v->rnd;
    pp->bPicSpatialResid8       = (v->panscanflag  << 7) |
                                  (v->refdist_flag << 6) |
                                  (s->loop_filter  << 5) |
                                  (v->fastuvmc     << 4) |
                                  (v->extended_mv  << 3) |
                                  (v->dquant       << 1) |
                                  (v->vstransform      );
    pp->bPicOverflowBlocks      = (v->quantizer_mode << 6) |
                                  (v->multires       << 5) |
113
                                  (v->resync_marker  << 4) |
114 115
                                  (v->rangered       << 3) |
                                  (s->max_b_frames       );
116
    pp->bPicExtrapolation       = (!v->interlace || v->fcm == PROGRESSIVE) ? 1 : 2;
117 118
    pp->bPicDeblocked           = ((!pp->bPicBackwardPrediction && v->overlap)        << 6) |
                                  ((v->profile != PROFILE_ADVANCED && v->rangeredfrm) << 5) |
119 120 121 122 123 124
                                  (s->loop_filter                                     << 1);
    pp->bPicDeblockConfined     = (v->postprocflag             << 7) |
                                  (v->broadcast                << 6) |
                                  (v->interlace                << 5) |
                                  (v->tfcntrflag               << 4) |
                                  (v->finterpflag              << 3) |
125
                                  ((s->pict_type != AV_PICTURE_TYPE_B) << 2) |
126 127
                                  (v->psf                      << 1) |
                                  (v->extended_dmv                 );
128
    if (s->pict_type != AV_PICTURE_TYPE_I)
129 130 131 132 133 134 135 136 137
        pp->bPic4MVallowed      = v->mv_mode == MV_PMODE_MIXED_MV ||
                                  (v->mv_mode == MV_PMODE_INTENSITY_COMP &&
                                   v->mv_mode2 == MV_PMODE_MIXED_MV);
    if (v->profile == PROFILE_ADVANCED)
        pp->bPicOBMC            = (v->range_mapy_flag  << 7) |
                                  (v->range_mapy       << 4) |
                                  (v->range_mapuv_flag << 3) |
                                  (v->range_mapuv          );
    pp->bPicBinPB               = 0;
138
    pp->bMV_RPS                 = (v->fcm == ILACE_FIELD && pp->bPicBackwardPrediction) ? v->refdist + 9 : 0;
139
    pp->bReservedBits           = v->pq;
140
    if (s->picture_structure == PICT_FRAME) {
141 142 143 144 145 146 147
        if (intcomp) {
            pp->wBitstreamFcodes      = v->lumscale;
            pp->wBitstreamPCEelements = v->lumshift;
        } else {
            pp->wBitstreamFcodes      = 32;
            pp->wBitstreamPCEelements = 0;
        }
148 149
    } else {
        /* Syntax: (top_field_param << 8) | bottom_field_param */
150 151 152 153 154 155 156
        if (intcomp) {
            pp->wBitstreamFcodes      = (v->lumscale << 8) | v->lumscale2;
            pp->wBitstreamPCEelements = (v->lumshift << 8) | v->lumshift2;
        } else {
            pp->wBitstreamFcodes      = (32 << 8) | 32;
            pp->wBitstreamPCEelements = 0;
        }
157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173
    }
    pp->bBitstreamConcealmentNeed   = 0;
    pp->bBitstreamConcealmentMethod = 0;
}

static void fill_slice(AVCodecContext *avctx, DXVA_SliceInfo *slice,
                       unsigned position, unsigned size)
{
    const VC1Context *v = avctx->priv_data;
    const MpegEncContext *s = &v->s;

    memset(slice, 0, sizeof(*slice));
    slice->wHorizontalPosition = 0;
    slice->wVerticalPosition   = s->mb_y;
    slice->dwSliceBitsInBuffer = 8 * size;
    slice->dwSliceDataLocation = position;
    slice->bStartCodeBitOffset = 0;
174
    slice->bReservedBits       = (s->pict_type == AV_PICTURE_TYPE_B && !v->bi_type) ? v->bfraction_lut_index + 9 : 0;
175
    slice->wMBbitOffset        = v->p_frame_skipped ? 0xffff : get_bits_count(&s->gb) + (avctx->codec_id == AV_CODEC_ID_VC1 ? 32 : 0);
176 177
    /* XXX We store the index of the first MB and it will be fixed later */
    slice->wNumberMBsInSlice   = (s->mb_y >> v->field_mode) * s->mb_width + s->mb_x;
178 179 180 181 182
    slice->wQuantizerScaleCode = v->pq;
    slice->wBadSliceChopping   = 0;
}

static int commit_bitstream_and_slice_buffer(AVCodecContext *avctx,
183 184
                                             DECODER_BUFFER_DESC *bs,
                                             DECODER_BUFFER_DESC *sc)
185 186
{
    const VC1Context *v = avctx->priv_data;
187
    AVDXVAContext *ctx = DXVA_CONTEXT(avctx);
188
    const MpegEncContext *s = &v->s;
189
    struct dxva2_picture_context *ctx_pic = s->current_picture_ptr->hwaccel_picture_private;
190 191

    static const uint8_t start_code[] = { 0, 0, 1, 0x0d };
192
    const unsigned start_code_size = avctx->codec_id == AV_CODEC_ID_VC1 ? sizeof(start_code) : 0;
193 194
    const unsigned mb_count = s->mb_width * (s->mb_height >> v->field_mode);
    DXVA_SliceInfo *slice = NULL;
195
    void     *dxva_data_ptr;
196
    uint8_t  *dxva_data, *current, *end;
197
    unsigned dxva_size;
198 199
    unsigned padding;
    unsigned i;
200
    unsigned type;
201

202
#if CONFIG_D3D11VA
203
    if (ff_dxva2_is_d3d11(avctx)) {
204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220
        type = D3D11_VIDEO_DECODER_BUFFER_BITSTREAM;
        if (FAILED(ID3D11VideoContext_GetDecoderBuffer(D3D11VA_CONTEXT(ctx)->video_context,
                                                       D3D11VA_CONTEXT(ctx)->decoder,
                                                       type,
                                                       &dxva_size, &dxva_data_ptr)))
            return -1;
    }
#endif
#if CONFIG_DXVA2
    if (avctx->pix_fmt == AV_PIX_FMT_DXVA2_VLD) {
        type = DXVA2_BitStreamDateBufferType;
        if (FAILED(IDirectXVideoDecoder_GetBuffer(DXVA2_CONTEXT(ctx)->decoder,
                                                  type,
                                                  &dxva_data_ptr, &dxva_size)))
            return -1;
    }
#endif
221

222
    dxva_data = dxva_data_ptr;
223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253
    current = dxva_data;
    end = dxva_data + dxva_size;

    for (i = 0; i < ctx_pic->slice_count; i++) {
        unsigned position, size;
        slice    = &ctx_pic->slice[i];
        position = slice->dwSliceDataLocation;
        size     = slice->dwSliceBitsInBuffer / 8;
        if (start_code_size + size > end - current) {
            av_log(avctx, AV_LOG_ERROR, "Failed to build bitstream");
            break;
        }
        slice->dwSliceDataLocation = current - dxva_data;

        if (i < ctx_pic->slice_count - 1)
            slice->wNumberMBsInSlice =
                slice[1].wNumberMBsInSlice - slice[0].wNumberMBsInSlice;
        else
            slice->wNumberMBsInSlice =
                mb_count - slice[0].wNumberMBsInSlice;

        /* write the appropriate frame, field or slice start code */
        if (start_code_size) {
            memcpy(current, start_code, start_code_size);
            if (i == 0 && v->second_field)
                current[3] = 0x0c;
            else if (i > 0)
                current[3] = 0x0b;

            current += start_code_size;
            slice->dwSliceBitsInBuffer += start_code_size * 8;
254
        }
255 256 257 258 259 260 261 262 263

        memcpy(current, &ctx_pic->bitstream[position], size);
        current += size;
    }
    padding = FFMIN(128 - ((current - dxva_data) & 127), end - current);
    if (slice && padding > 0) {
        memset(current, 0, padding);
        current += padding;
        slice->dwSliceBitsInBuffer += padding * 8;
264
    }
265

266
#if CONFIG_D3D11VA
267
    if (ff_dxva2_is_d3d11(avctx))
268 269 270 271 272 273 274 275
        if (FAILED(ID3D11VideoContext_ReleaseDecoderBuffer(D3D11VA_CONTEXT(ctx)->video_context, D3D11VA_CONTEXT(ctx)->decoder, type)))
            return -1;
#endif
#if CONFIG_DXVA2
    if (avctx->pix_fmt == AV_PIX_FMT_DXVA2_VLD)
        if (FAILED(IDirectXVideoDecoder_ReleaseBuffer(DXVA2_CONTEXT(ctx)->decoder, type)))
            return -1;
#endif
276 277
    if (i < ctx_pic->slice_count)
        return -1;
278

279
#if CONFIG_D3D11VA
280
    if (ff_dxva2_is_d3d11(avctx)) {
281 282 283
        D3D11_VIDEO_DECODER_BUFFER_DESC *dsc11 = bs;
        memset(dsc11, 0, sizeof(*dsc11));
        dsc11->BufferType           = type;
284 285
        dsc11->DataSize             = current - dxva_data;
        dsc11->NumMBsInBuffer       = mb_count;
286 287 288 289 290 291 292 293 294

        type = D3D11_VIDEO_DECODER_BUFFER_SLICE_CONTROL;
    }
#endif
#if CONFIG_DXVA2
    if (avctx->pix_fmt == AV_PIX_FMT_DXVA2_VLD) {
        DXVA2_DecodeBufferDesc *dsc2 = bs;
        memset(dsc2, 0, sizeof(*dsc2));
        dsc2->CompressedBufferType = type;
295 296
        dsc2->DataSize             = current - dxva_data;
        dsc2->NumMBsInBuffer       = mb_count;
297 298 299 300

        type = DXVA2_SliceControlBufferType;
    }
#endif
301 302

    return ff_dxva2_commit_buffer(avctx, ctx, sc,
303
                                  type,
304 305 306
                                  ctx_pic->slice,
                                  ctx_pic->slice_count * sizeof(*ctx_pic->slice),
                                  mb_count);
307 308
}

309 310 311
static int dxva2_vc1_start_frame(AVCodecContext *avctx,
                                 av_unused const uint8_t *buffer,
                                 av_unused uint32_t size)
312 313
{
    const VC1Context *v = avctx->priv_data;
314
    AVDXVAContext *ctx = DXVA_CONTEXT(avctx);
315
    struct dxva2_picture_context *ctx_pic = v->s.current_picture_ptr->hwaccel_picture_private;
316

317
    if (!DXVA_CONTEXT_VALID(avctx, ctx))
318 319 320 321 322
        return -1;
    assert(ctx_pic);

    fill_picture_parameters(avctx, ctx, v, &ctx_pic->pp);

323
    ctx_pic->slice_count    = 0;
324 325 326 327 328
    ctx_pic->bitstream_size = 0;
    ctx_pic->bitstream      = NULL;
    return 0;
}

329 330 331
static int dxva2_vc1_decode_slice(AVCodecContext *avctx,
                                  const uint8_t *buffer,
                                  uint32_t size)
332 333 334
{
    const VC1Context *v = avctx->priv_data;
    const Picture *current_picture = v->s.current_picture_ptr;
335
    struct dxva2_picture_context *ctx_pic = current_picture->hwaccel_picture_private;
336
    unsigned position;
337

338 339 340
    if (ctx_pic->slice_count >= MAX_SLICES) {
        avpriv_request_sample(avctx, "%d slices in dxva2",
                              ctx_pic->slice_count);
341
        return -1;
342
    }
343

344
    if (avctx->codec_id == AV_CODEC_ID_VC1 &&
345 346 347 348 349
        size >= 4 && IS_MARKER(AV_RB32(buffer))) {
        buffer += 4;
        size   -= 4;
    }

350 351 352
    if (!ctx_pic->bitstream)
        ctx_pic->bitstream = buffer;
    ctx_pic->bitstream_size += size;
353

354 355
    position = buffer - ctx_pic->bitstream;
    fill_slice(avctx, &ctx_pic->slice[ctx_pic->slice_count++], position, size);
356 357 358
    return 0;
}

359
static int dxva2_vc1_end_frame(AVCodecContext *avctx)
360 361
{
    VC1Context *v = avctx->priv_data;
362
    struct dxva2_picture_context *ctx_pic = v->s.current_picture_ptr->hwaccel_picture_private;
363
    int ret;
364

365
    if (ctx_pic->slice_count <= 0 || ctx_pic->bitstream_size <= 0)
366 367
        return -1;

368
    ret = ff_dxva2_common_end_frame(avctx, v->s.current_picture_ptr->f,
369 370 371 372 373 374
                                    &ctx_pic->pp, sizeof(ctx_pic->pp),
                                    NULL, 0,
                                    commit_bitstream_and_slice_buffer);
    if (!ret)
        ff_mpeg_draw_horiz_band(&v->s, 0, avctx->height);
    return ret;
375 376 377
}

#if CONFIG_WMV3_DXVA2_HWACCEL
378
const AVHWAccel ff_wmv3_dxva2_hwaccel = {
379
    .name           = "wmv3_dxva2",
380
    .type           = AVMEDIA_TYPE_VIDEO,
381
    .id             = AV_CODEC_ID_WMV3,
382
    .pix_fmt        = AV_PIX_FMT_DXVA2_VLD,
383 384
    .init           = ff_dxva2_decode_init,
    .uninit         = ff_dxva2_decode_uninit,
385 386 387
    .start_frame    = dxva2_vc1_start_frame,
    .decode_slice   = dxva2_vc1_decode_slice,
    .end_frame      = dxva2_vc1_end_frame,
388
    .frame_params   = ff_dxva2_common_frame_params,
389
    .frame_priv_data_size = sizeof(struct dxva2_picture_context),
390
    .priv_data_size = sizeof(FFDXVASharedContext),
391 392 393
};
#endif

394
#if CONFIG_VC1_DXVA2_HWACCEL
395
const AVHWAccel ff_vc1_dxva2_hwaccel = {
396
    .name           = "vc1_dxva2",
397
    .type           = AVMEDIA_TYPE_VIDEO,
398
    .id             = AV_CODEC_ID_VC1,
399
    .pix_fmt        = AV_PIX_FMT_DXVA2_VLD,
400 401
    .init           = ff_dxva2_decode_init,
    .uninit         = ff_dxva2_decode_uninit,
402 403 404
    .start_frame    = dxva2_vc1_start_frame,
    .decode_slice   = dxva2_vc1_decode_slice,
    .end_frame      = dxva2_vc1_end_frame,
405
    .frame_params   = ff_dxva2_common_frame_params,
406
    .frame_priv_data_size = sizeof(struct dxva2_picture_context),
407
    .priv_data_size = sizeof(FFDXVASharedContext),
408
};
409 410 411
#endif

#if CONFIG_WMV3_D3D11VA_HWACCEL
412
const AVHWAccel ff_wmv3_d3d11va_hwaccel = {
413 414 415 416
    .name           = "wmv3_d3d11va",
    .type           = AVMEDIA_TYPE_VIDEO,
    .id             = AV_CODEC_ID_WMV3,
    .pix_fmt        = AV_PIX_FMT_D3D11VA_VLD,
417 418
    .init           = ff_dxva2_decode_init,
    .uninit         = ff_dxva2_decode_uninit,
419 420 421
    .start_frame    = dxva2_vc1_start_frame,
    .decode_slice   = dxva2_vc1_decode_slice,
    .end_frame      = dxva2_vc1_end_frame,
422
    .frame_params   = ff_dxva2_common_frame_params,
423
    .frame_priv_data_size = sizeof(struct dxva2_picture_context),
424 425 426 427 428
    .priv_data_size = sizeof(FFDXVASharedContext),
};
#endif

#if CONFIG_WMV3_D3D11VA2_HWACCEL
429
const AVHWAccel ff_wmv3_d3d11va2_hwaccel = {
430 431 432 433 434 435 436 437 438
    .name           = "wmv3_d3d11va2",
    .type           = AVMEDIA_TYPE_VIDEO,
    .id             = AV_CODEC_ID_WMV3,
    .pix_fmt        = AV_PIX_FMT_D3D11,
    .init           = ff_dxva2_decode_init,
    .uninit         = ff_dxva2_decode_uninit,
    .start_frame    = dxva2_vc1_start_frame,
    .decode_slice   = dxva2_vc1_decode_slice,
    .end_frame      = dxva2_vc1_end_frame,
439
    .frame_params   = ff_dxva2_common_frame_params,
440 441
    .frame_priv_data_size = sizeof(struct dxva2_picture_context),
    .priv_data_size = sizeof(FFDXVASharedContext),
442 443 444 445
};
#endif

#if CONFIG_VC1_D3D11VA_HWACCEL
446
const AVHWAccel ff_vc1_d3d11va_hwaccel = {
447 448 449 450
    .name           = "vc1_d3d11va",
    .type           = AVMEDIA_TYPE_VIDEO,
    .id             = AV_CODEC_ID_VC1,
    .pix_fmt        = AV_PIX_FMT_D3D11VA_VLD,
451 452 453 454 455
    .init           = ff_dxva2_decode_init,
    .uninit         = ff_dxva2_decode_uninit,
    .start_frame    = dxva2_vc1_start_frame,
    .decode_slice   = dxva2_vc1_decode_slice,
    .end_frame      = dxva2_vc1_end_frame,
456
    .frame_params   = ff_dxva2_common_frame_params,
457 458 459 460 461 462
    .frame_priv_data_size = sizeof(struct dxva2_picture_context),
    .priv_data_size = sizeof(FFDXVASharedContext),
};
#endif

#if CONFIG_VC1_D3D11VA2_HWACCEL
463
const AVHWAccel ff_vc1_d3d11va2_hwaccel = {
464 465 466 467 468 469
    .name           = "vc1_d3d11va2",
    .type           = AVMEDIA_TYPE_VIDEO,
    .id             = AV_CODEC_ID_VC1,
    .pix_fmt        = AV_PIX_FMT_D3D11,
    .init           = ff_dxva2_decode_init,
    .uninit         = ff_dxva2_decode_uninit,
470 471 472
    .start_frame    = dxva2_vc1_start_frame,
    .decode_slice   = dxva2_vc1_decode_slice,
    .end_frame      = dxva2_vc1_end_frame,
473
    .frame_params   = ff_dxva2_common_frame_params,
474
    .frame_priv_data_size = sizeof(struct dxva2_picture_context),
475
    .priv_data_size = sizeof(FFDXVASharedContext),
476 477
};
#endif