cuviddec.c 38.9 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21
/*
 * Nvidia CUVID decoder
 * Copyright (c) 2016 Timo Rothenpieler <timo@rothenpieler.org>
 *
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * FFmpeg is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with FFmpeg; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

22 23
#include "compat/cuda/dynlink_loader.h"

24 25 26
#include "libavutil/buffer.h"
#include "libavutil/mathematics.h"
#include "libavutil/hwcontext.h"
27
#include "libavutil/hwcontext_cuda_internal.h"
28 29
#include "libavutil/fifo.h"
#include "libavutil/log.h"
30
#include "libavutil/opt.h"
31
#include "libavutil/pixdesc.h"
32 33

#include "avcodec.h"
James Almer's avatar
James Almer committed
34
#include "decode.h"
35
#include "hwaccel.h"
36 37 38 39
#include "internal.h"

typedef struct CuvidContext
{
40 41
    AVClass *avclass;

42 43 44
    CUvideodecoder cudecoder;
    CUvideoparser cuparser;

45
    char *cu_gpu;
46
    int nb_surfaces;
47
    int drop_second_field;
48 49 50 51 52 53 54 55 56 57 58 59 60 61
    char *crop_expr;
    char *resize_expr;

    struct {
        int left;
        int top;
        int right;
        int bottom;
    } crop;

    struct {
        int width;
        int height;
    } resize;
62

63 64 65 66 67 68 69
    AVBufferRef *hwdevice;
    AVBufferRef *hwframe;

    AVBSFContext *bsf;

    AVFifoBuffer *frame_queue;

70
    int deint_mode;
71
    int deint_mode_current;
72 73
    int64_t prev_pts;

74
    int internal_error;
75
    int decoder_flushing;
76 77 78

    cudaVideoCodec codec_type;
    cudaVideoChromaFormat chroma_format;
79

80 81
    CUVIDDECODECAPS caps8, caps10, caps12;

82 83
    CUVIDPARSERPARAMS cuparseinfo;
    CUVIDEOFORMATEX cuparse_ext;
84 85 86

    CudaFunctions *cudl;
    CuvidFunctions *cvdl;
87 88
} CuvidContext;

89 90 91 92 93 94 95
typedef struct CuvidParsedFrame
{
    CUVIDPARSERDISPINFO dispinfo;
    int second_field;
    int is_deinterlacing;
} CuvidParsedFrame;

96 97
static int check_cu(AVCodecContext *avctx, CUresult err, const char *func)
{
98
    CuvidContext *ctx = avctx->priv_data;
99 100 101 102 103 104 105 106
    const char *err_name;
    const char *err_string;

    av_log(avctx, AV_LOG_TRACE, "Calling %s\n", func);

    if (err == CUDA_SUCCESS)
        return 0;

107 108
    ctx->cudl->cuGetErrorName(err, &err_name);
    ctx->cudl->cuGetErrorString(err, &err_string);
109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124

    av_log(avctx, AV_LOG_ERROR, "%s failed", func);
    if (err_name && err_string)
        av_log(avctx, AV_LOG_ERROR, " -> %s: %s", err_name, err_string);
    av_log(avctx, AV_LOG_ERROR, "\n");

    return AVERROR_EXTERNAL;
}

#define CHECK_CU(x) check_cu(avctx, (x), #x)

static int CUDAAPI cuvid_handle_video_sequence(void *opaque, CUVIDEOFORMAT* format)
{
    AVCodecContext *avctx = opaque;
    CuvidContext *ctx = avctx->priv_data;
    AVHWFramesContext *hwframe_ctx = (AVHWFramesContext*)ctx->hwframe->data;
125
    CUVIDDECODECAPS *caps = NULL;
126
    CUVIDDECODECREATEINFO cuinfo;
127 128
    int surface_fmt;

129 130 131
    int old_width = avctx->width;
    int old_height = avctx->height;

132 133 134
    enum AVPixelFormat pix_fmts[3] = { AV_PIX_FMT_CUDA,
                                       AV_PIX_FMT_NONE,  // Will be updated below
                                       AV_PIX_FMT_NONE };
135

136
    av_log(avctx, AV_LOG_TRACE, "pfnSequenceCallback, progressive_sequence=%d\n", format->progressive_sequence);
137

138 139
    memset(&cuinfo, 0, sizeof(cuinfo));

140 141
    ctx->internal_error = 0;

142 143 144 145 146 147 148 149 150
    avctx->coded_width = cuinfo.ulWidth = format->coded_width;
    avctx->coded_height = cuinfo.ulHeight = format->coded_height;

    // apply cropping
    cuinfo.display_area.left = format->display_area.left + ctx->crop.left;
    cuinfo.display_area.top = format->display_area.top + ctx->crop.top;
    cuinfo.display_area.right = format->display_area.right - ctx->crop.right;
    cuinfo.display_area.bottom = format->display_area.bottom - ctx->crop.bottom;

151
    // width and height need to be set before calling ff_get_format
152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168
    if (ctx->resize_expr) {
        avctx->width = ctx->resize.width;
        avctx->height = ctx->resize.height;
    } else {
        avctx->width = cuinfo.display_area.right - cuinfo.display_area.left;
        avctx->height = cuinfo.display_area.bottom - cuinfo.display_area.top;
    }

    // target width/height need to be multiples of two
    cuinfo.ulTargetWidth = avctx->width = (avctx->width + 1) & ~1;
    cuinfo.ulTargetHeight = avctx->height = (avctx->height + 1) & ~1;

    // aspect ratio conversion, 1:1, depends on scaled resolution
    cuinfo.target_rect.left = 0;
    cuinfo.target_rect.top = 0;
    cuinfo.target_rect.right = cuinfo.ulTargetWidth;
    cuinfo.target_rect.bottom = cuinfo.ulTargetHeight;
169

170 171 172
    switch (format->bit_depth_luma_minus8) {
    case 0: // 8-bit
        pix_fmts[1] = AV_PIX_FMT_NV12;
173
        caps = &ctx->caps8;
174 175 176
        break;
    case 2: // 10-bit
        pix_fmts[1] = AV_PIX_FMT_P010;
177
        caps = &ctx->caps10;
178 179 180
        break;
    case 4: // 12-bit
        pix_fmts[1] = AV_PIX_FMT_P016;
181
        caps = &ctx->caps12;
182 183
        break;
    default:
184 185 186 187
        break;
    }

    if (!caps || !caps->bIsSupported) {
188 189 190 191 192
        av_log(avctx, AV_LOG_ERROR, "unsupported bit depth: %d\n",
               format->bit_depth_luma_minus8 + 8);
        ctx->internal_error = AVERROR(EINVAL);
        return 0;
    }
193

194 195 196 197 198 199 200 201 202 203 204 205 206 207
    surface_fmt = ff_get_format(avctx, pix_fmts);
    if (surface_fmt < 0) {
        av_log(avctx, AV_LOG_ERROR, "ff_get_format failed: %d\n", surface_fmt);
        ctx->internal_error = AVERROR(EINVAL);
        return 0;
    }

    av_log(avctx, AV_LOG_VERBOSE, "Formats: Original: %s | HW: %s | SW: %s\n",
           av_get_pix_fmt_name(avctx->pix_fmt),
           av_get_pix_fmt_name(surface_fmt),
           av_get_pix_fmt_name(avctx->sw_pix_fmt));

    avctx->pix_fmt = surface_fmt;

208 209 210 211 212 213 214 215 216 217 218 219 220
    // Update our hwframe ctx, as the get_format callback might have refreshed it!
    if (avctx->hw_frames_ctx) {
        av_buffer_unref(&ctx->hwframe);

        ctx->hwframe = av_buffer_ref(avctx->hw_frames_ctx);
        if (!ctx->hwframe) {
            ctx->internal_error = AVERROR(ENOMEM);
            return 0;
        }

        hwframe_ctx = (AVHWFramesContext*)ctx->hwframe->data;
    }

221 222 223 224
    ff_set_sar(avctx, av_div_q(
        (AVRational){ format->display_aspect_ratio.x, format->display_aspect_ratio.y },
        (AVRational){ avctx->width, avctx->height }));

225 226 227 228 229
    ctx->deint_mode_current = format->progressive_sequence
                              ? cudaVideoDeinterlaceMode_Weave
                              : ctx->deint_mode;

    if (!format->progressive_sequence && ctx->deint_mode_current == cudaVideoDeinterlaceMode_Weave)
230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253
        avctx->flags |= AV_CODEC_FLAG_INTERLACED_DCT;
    else
        avctx->flags &= ~AV_CODEC_FLAG_INTERLACED_DCT;

    if (format->video_signal_description.video_full_range_flag)
        avctx->color_range = AVCOL_RANGE_JPEG;
    else
        avctx->color_range = AVCOL_RANGE_MPEG;

    avctx->color_primaries = format->video_signal_description.color_primaries;
    avctx->color_trc = format->video_signal_description.transfer_characteristics;
    avctx->colorspace = format->video_signal_description.matrix_coefficients;

    if (format->bitrate)
        avctx->bit_rate = format->bitrate;

    if (format->frame_rate.numerator && format->frame_rate.denominator) {
        avctx->framerate.num = format->frame_rate.numerator;
        avctx->framerate.den = format->frame_rate.denominator;
    }

    if (ctx->cudecoder
            && avctx->coded_width == format->coded_width
            && avctx->coded_height == format->coded_height
254 255
            && avctx->width == old_width
            && avctx->height == old_height
256 257 258 259 260
            && ctx->chroma_format == format->chroma_format
            && ctx->codec_type == format->codec)
        return 1;

    if (ctx->cudecoder) {
261
        av_log(avctx, AV_LOG_TRACE, "Re-initializing decoder\n");
262
        ctx->internal_error = CHECK_CU(ctx->cvdl->cuvidDestroyDecoder(ctx->cudecoder));
263 264 265
        if (ctx->internal_error < 0)
            return 0;
        ctx->cudecoder = NULL;
266 267
    }

268 269 270 271
    if (hwframe_ctx->pool && (
            hwframe_ctx->width < avctx->width ||
            hwframe_ctx->height < avctx->height ||
            hwframe_ctx->format != AV_PIX_FMT_CUDA ||
272
            hwframe_ctx->sw_format != avctx->sw_pix_fmt)) {
273
        av_log(avctx, AV_LOG_ERROR, "AVHWFramesContext is already initialized with incompatible parameters\n");
274 275 276 277 278
        av_log(avctx, AV_LOG_DEBUG, "width: %d <-> %d\n", hwframe_ctx->width, avctx->width);
        av_log(avctx, AV_LOG_DEBUG, "height: %d <-> %d\n", hwframe_ctx->height, avctx->height);
        av_log(avctx, AV_LOG_DEBUG, "format: %s <-> cuda\n", av_get_pix_fmt_name(hwframe_ctx->format));
        av_log(avctx, AV_LOG_DEBUG, "sw_format: %s <-> %s\n",
               av_get_pix_fmt_name(hwframe_ctx->sw_format), av_get_pix_fmt_name(avctx->sw_pix_fmt));
279 280 281 282
        ctx->internal_error = AVERROR(EINVAL);
        return 0;
    }

283 284 285 286 287 288
    if (format->chroma_format != cudaVideoChromaFormat_420) {
        av_log(avctx, AV_LOG_ERROR, "Chroma formats other than 420 are not supported\n");
        ctx->internal_error = AVERROR(EINVAL);
        return 0;
    }

289 290 291 292
    ctx->chroma_format = format->chroma_format;

    cuinfo.CodecType = ctx->codec_type = format->codec;
    cuinfo.ChromaFormat = format->chroma_format;
293 294 295 296 297 298 299 300 301 302 303 304 305 306

    switch (avctx->sw_pix_fmt) {
    case AV_PIX_FMT_NV12:
        cuinfo.OutputFormat = cudaVideoSurfaceFormat_NV12;
        break;
    case AV_PIX_FMT_P010:
    case AV_PIX_FMT_P016:
        cuinfo.OutputFormat = cudaVideoSurfaceFormat_P016;
        break;
    default:
        av_log(avctx, AV_LOG_ERROR, "Output formats other than NV12, P010 or P016 are not supported\n");
        ctx->internal_error = AVERROR(EINVAL);
        return 0;
    }
307

308
    cuinfo.ulNumDecodeSurfaces = ctx->nb_surfaces;
309 310
    cuinfo.ulNumOutputSurfaces = 1;
    cuinfo.ulCreationFlags = cudaVideoCreate_PreferCUVID;
311
    cuinfo.bitDepthMinus8 = format->bit_depth_luma_minus8;
312
    cuinfo.DeinterlaceMode = ctx->deint_mode_current;
313

314
    if (ctx->deint_mode_current != cudaVideoDeinterlaceMode_Weave && !ctx->drop_second_field)
315
        avctx->framerate = av_mul_q(avctx->framerate, (AVRational){2, 1});
316

317
    ctx->internal_error = CHECK_CU(ctx->cvdl->cuvidCreateDecoder(&ctx->cudecoder, &cuinfo));
318 319 320
    if (ctx->internal_error < 0)
        return 0;

321 322
    if (!hwframe_ctx->pool) {
        hwframe_ctx->format = AV_PIX_FMT_CUDA;
323
        hwframe_ctx->sw_format = avctx->sw_pix_fmt;
324 325
        hwframe_ctx->width = avctx->width;
        hwframe_ctx->height = avctx->height;
326

327 328 329 330
        if ((ctx->internal_error = av_hwframe_ctx_init(ctx->hwframe)) < 0) {
            av_log(avctx, AV_LOG_ERROR, "av_hwframe_ctx_init failed\n");
            return 0;
        }
331 332 333 334 335 336 337 338 339 340 341 342
    }

    return 1;
}

static int CUDAAPI cuvid_handle_picture_decode(void *opaque, CUVIDPICPARAMS* picparams)
{
    AVCodecContext *avctx = opaque;
    CuvidContext *ctx = avctx->priv_data;

    av_log(avctx, AV_LOG_TRACE, "pfnDecodePicture\n");

343
    ctx->internal_error = CHECK_CU(ctx->cvdl->cuvidDecodePicture(ctx->cudecoder, picparams));
344 345 346 347 348 349 350 351 352 353
    if (ctx->internal_error < 0)
        return 0;

    return 1;
}

static int CUDAAPI cuvid_handle_picture_display(void *opaque, CUVIDPARSERDISPINFO* dispinfo)
{
    AVCodecContext *avctx = opaque;
    CuvidContext *ctx = avctx->priv_data;
354
    CuvidParsedFrame parsed_frame = { { 0 } };
355

356
    parsed_frame.dispinfo = *dispinfo;
357 358
    ctx->internal_error = 0;

359
    if (ctx->deint_mode_current == cudaVideoDeinterlaceMode_Weave) {
360 361 362 363
        av_fifo_generic_write(ctx->frame_queue, &parsed_frame, sizeof(CuvidParsedFrame), NULL);
    } else {
        parsed_frame.is_deinterlacing = 1;
        av_fifo_generic_write(ctx->frame_queue, &parsed_frame, sizeof(CuvidParsedFrame), NULL);
364 365 366 367
        if (!ctx->drop_second_field) {
            parsed_frame.second_field = 1;
            av_fifo_generic_write(ctx->frame_queue, &parsed_frame, sizeof(CuvidParsedFrame), NULL);
        }
368
    }
369 370 371 372

    return 1;
}

James Almer's avatar
James Almer committed
373 374 375 376 377 378 379
static int cuvid_is_buffer_full(AVCodecContext *avctx)
{
    CuvidContext *ctx = avctx->priv_data;

    return (av_fifo_size(ctx->frame_queue) / sizeof(CuvidParsedFrame)) + 2 > ctx->nb_surfaces;
}

380
static int cuvid_decode_packet(AVCodecContext *avctx, const AVPacket *avpkt)
381 382 383 384 385 386 387 388
{
    CuvidContext *ctx = avctx->priv_data;
    AVHWDeviceContext *device_ctx = (AVHWDeviceContext*)ctx->hwdevice->data;
    AVCUDADeviceContext *device_hwctx = device_ctx->hwctx;
    CUcontext dummy, cuda_ctx = device_hwctx->cuda_ctx;
    CUVIDSOURCEDATAPACKET cupkt;
    AVPacket filter_packet = { 0 };
    AVPacket filtered_packet = { 0 };
389
    int ret = 0, eret = 0, is_flush = ctx->decoder_flushing;
390

391 392 393 394 395
    av_log(avctx, AV_LOG_TRACE, "cuvid_decode_packet\n");

    if (is_flush && avpkt && avpkt->size)
        return AVERROR_EOF;

James Almer's avatar
James Almer committed
396
    if (cuvid_is_buffer_full(avctx) && avpkt && avpkt->size)
397 398 399
        return AVERROR(EAGAIN);

    if (ctx->bsf && avpkt && avpkt->size) {
400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418
        if ((ret = av_packet_ref(&filter_packet, avpkt)) < 0) {
            av_log(avctx, AV_LOG_ERROR, "av_packet_ref failed\n");
            return ret;
        }

        if ((ret = av_bsf_send_packet(ctx->bsf, &filter_packet)) < 0) {
            av_log(avctx, AV_LOG_ERROR, "av_bsf_send_packet failed\n");
            av_packet_unref(&filter_packet);
            return ret;
        }

        if ((ret = av_bsf_receive_packet(ctx->bsf, &filtered_packet)) < 0) {
            av_log(avctx, AV_LOG_ERROR, "av_bsf_receive_packet failed\n");
            return ret;
        }

        avpkt = &filtered_packet;
    }

419
    ret = CHECK_CU(ctx->cudl->cuCtxPushCurrent(cuda_ctx));
420 421 422 423 424 425 426
    if (ret < 0) {
        av_packet_unref(&filtered_packet);
        return ret;
    }

    memset(&cupkt, 0, sizeof(cupkt));

427
    if (avpkt && avpkt->size) {
428 429 430 431 432
        cupkt.payload_size = avpkt->size;
        cupkt.payload = avpkt->data;

        if (avpkt->pts != AV_NOPTS_VALUE) {
            cupkt.flags = CUVID_PKT_TIMESTAMP;
433 434 435 436
            if (avctx->pkt_timebase.num && avctx->pkt_timebase.den)
                cupkt.timestamp = av_rescale_q(avpkt->pts, avctx->pkt_timebase, (AVRational){1, 10000000});
            else
                cupkt.timestamp = avpkt->pts;
437 438 439
        }
    } else {
        cupkt.flags = CUVID_PKT_ENDOFSTREAM;
440
        ctx->decoder_flushing = 1;
441 442
    }

443
    ret = CHECK_CU(ctx->cvdl->cuvidParseVideoData(ctx->cuparser, &cupkt));
444 445 446

    av_packet_unref(&filtered_packet);

447
    if (ret < 0)
448 449 450 451 452 453
        goto error;

    // cuvidParseVideoData doesn't return an error just because stuff failed...
    if (ctx->internal_error) {
        av_log(avctx, AV_LOG_ERROR, "cuvid decode callback error\n");
        ret = ctx->internal_error;
454 455 456
        goto error;
    }

457
error:
458
    eret = CHECK_CU(ctx->cudl->cuCtxPopCurrent(&dummy));
459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486

    if (eret < 0)
        return eret;
    else if (ret < 0)
        return ret;
    else if (is_flush)
        return AVERROR_EOF;
    else
        return 0;
}

static int cuvid_output_frame(AVCodecContext *avctx, AVFrame *frame)
{
    CuvidContext *ctx = avctx->priv_data;
    AVHWDeviceContext *device_ctx = (AVHWDeviceContext*)ctx->hwdevice->data;
    AVCUDADeviceContext *device_hwctx = device_ctx->hwctx;
    CUcontext dummy, cuda_ctx = device_hwctx->cuda_ctx;
    CUdeviceptr mapped_frame = 0;
    int ret = 0, eret = 0;

    av_log(avctx, AV_LOG_TRACE, "cuvid_output_frame\n");

    if (ctx->decoder_flushing) {
        ret = cuvid_decode_packet(avctx, NULL);
        if (ret < 0 && ret != AVERROR_EOF)
            return ret;
    }

James Almer's avatar
James Almer committed
487 488 489 490 491 492 493 494 495 496 497 498 499 500
    if (!cuvid_is_buffer_full(avctx)) {
        AVPacket pkt = {0};
        ret = ff_decode_get_packet(avctx, &pkt);
        if (ret < 0 && ret != AVERROR_EOF)
            return ret;
        ret = cuvid_decode_packet(avctx, &pkt);
        av_packet_unref(&pkt);
        // cuvid_is_buffer_full() should avoid this.
        if (ret == AVERROR(EAGAIN))
            ret = AVERROR_EXTERNAL;
        if (ret < 0 && ret != AVERROR_EOF)
            return ret;
    }

501
    ret = CHECK_CU(ctx->cudl->cuCtxPushCurrent(cuda_ctx));
502 503 504
    if (ret < 0)
        return ret;

505
    if (av_fifo_size(ctx->frame_queue)) {
506
        CuvidParsedFrame parsed_frame;
507 508 509 510 511
        CUVIDPROCPARAMS params;
        unsigned int pitch = 0;
        int offset = 0;
        int i;

512
        av_fifo_generic_read(ctx->frame_queue, &parsed_frame, sizeof(CuvidParsedFrame), NULL);
513 514

        memset(&params, 0, sizeof(params));
515 516 517
        params.progressive_frame = parsed_frame.dispinfo.progressive_frame;
        params.second_field = parsed_frame.second_field;
        params.top_field_first = parsed_frame.dispinfo.top_field_first;
518

519
        ret = CHECK_CU(ctx->cvdl->cuvidMapVideoFrame(ctx->cudecoder, parsed_frame.dispinfo.picture_index, &mapped_frame, &pitch, &params));
520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545
        if (ret < 0)
            goto error;

        if (avctx->pix_fmt == AV_PIX_FMT_CUDA) {
            ret = av_hwframe_get_buffer(ctx->hwframe, frame, 0);
            if (ret < 0) {
                av_log(avctx, AV_LOG_ERROR, "av_hwframe_get_buffer failed\n");
                goto error;
            }

            ret = ff_decode_frame_props(avctx, frame);
            if (ret < 0) {
                av_log(avctx, AV_LOG_ERROR, "ff_decode_frame_props failed\n");
                goto error;
            }

            for (i = 0; i < 2; i++) {
                CUDA_MEMCPY2D cpy = {
                    .srcMemoryType = CU_MEMORYTYPE_DEVICE,
                    .dstMemoryType = CU_MEMORYTYPE_DEVICE,
                    .srcDevice     = mapped_frame,
                    .dstDevice     = (CUdeviceptr)frame->data[i],
                    .srcPitch      = pitch,
                    .dstPitch      = frame->linesize[i],
                    .srcY          = offset,
                    .WidthInBytes  = FFMIN(pitch, frame->linesize[i]),
546
                    .Height        = avctx->height >> (i ? 1 : 0),
547 548
                };

549
                ret = CHECK_CU(ctx->cudl->cuMemcpy2D(&cpy));
550 551 552
                if (ret < 0)
                    goto error;

553
                offset += avctx->height;
554
            }
555 556 557
        } else if (avctx->pix_fmt == AV_PIX_FMT_NV12 ||
                   avctx->pix_fmt == AV_PIX_FMT_P010 ||
                   avctx->pix_fmt == AV_PIX_FMT_P016) {
558 559 560 561 562 563 564 565 566 567 568
            AVFrame *tmp_frame = av_frame_alloc();
            if (!tmp_frame) {
                av_log(avctx, AV_LOG_ERROR, "av_frame_alloc failed\n");
                ret = AVERROR(ENOMEM);
                goto error;
            }

            tmp_frame->format        = AV_PIX_FMT_CUDA;
            tmp_frame->hw_frames_ctx = av_buffer_ref(ctx->hwframe);
            tmp_frame->data[0]       = (uint8_t*)mapped_frame;
            tmp_frame->linesize[0]   = pitch;
569
            tmp_frame->data[1]       = (uint8_t*)(mapped_frame + avctx->height * pitch);
570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594
            tmp_frame->linesize[1]   = pitch;
            tmp_frame->width         = avctx->width;
            tmp_frame->height        = avctx->height;

            ret = ff_get_buffer(avctx, frame, 0);
            if (ret < 0) {
                av_log(avctx, AV_LOG_ERROR, "ff_get_buffer failed\n");
                av_frame_free(&tmp_frame);
                goto error;
            }

            ret = av_hwframe_transfer_data(frame, tmp_frame, 0);
            if (ret) {
                av_log(avctx, AV_LOG_ERROR, "av_hwframe_transfer_data failed\n");
                av_frame_free(&tmp_frame);
                goto error;
            }
            av_frame_free(&tmp_frame);
        } else {
            ret = AVERROR_BUG;
            goto error;
        }

        frame->width = avctx->width;
        frame->height = avctx->height;
595
        if (avctx->pkt_timebase.num && avctx->pkt_timebase.den)
596
            frame->pts = av_rescale_q(parsed_frame.dispinfo.timestamp, (AVRational){1, 10000000}, avctx->pkt_timebase);
597
        else
598 599 600 601 602 603 604 605 606 607 608 609
            frame->pts = parsed_frame.dispinfo.timestamp;

        if (parsed_frame.second_field) {
            if (ctx->prev_pts == INT64_MIN) {
                ctx->prev_pts = frame->pts;
                frame->pts += (avctx->pkt_timebase.den * avctx->framerate.den) / (avctx->pkt_timebase.num * avctx->framerate.num);
            } else {
                int pts_diff = (frame->pts - ctx->prev_pts) / 2;
                ctx->prev_pts = frame->pts;
                frame->pts += pts_diff;
            }
        }
610 611 612 613

        /* CUVIDs opaque reordering breaks the internal pkt logic.
         * So set pkt_pts and clear all the other pkt_ fields.
         */
614 615
#if FF_API_PKT_PTS
FF_DISABLE_DEPRECATION_WARNINGS
616
        frame->pkt_pts = frame->pts;
617 618
FF_ENABLE_DEPRECATION_WARNINGS
#endif
619 620 621
        frame->pkt_pos = -1;
        frame->pkt_duration = 0;
        frame->pkt_size = -1;
622

623
        frame->interlaced_frame = !parsed_frame.is_deinterlacing && !parsed_frame.dispinfo.progressive_frame;
624

625 626
        if (frame->interlaced_frame)
            frame->top_field_first = parsed_frame.dispinfo.top_field_first;
627 628
    } else if (ctx->decoder_flushing) {
        ret = AVERROR_EOF;
629
    } else {
630
        ret = AVERROR(EAGAIN);
631 632 633 634
    }

error:
    if (mapped_frame)
635
        eret = CHECK_CU(ctx->cvdl->cuvidUnmapVideoFrame(ctx->cudecoder, mapped_frame));
636

637
    eret = CHECK_CU(ctx->cudl->cuCtxPopCurrent(&dummy));
638 639 640 641 642 643 644

    if (eret < 0)
        return eret;
    else
        return ret;
}

645 646 647 648 649 650 651 652
static int cuvid_decode_frame(AVCodecContext *avctx, void *data, int *got_frame, AVPacket *avpkt)
{
    CuvidContext *ctx = avctx->priv_data;
    AVFrame *frame = data;
    int ret = 0;

    av_log(avctx, AV_LOG_TRACE, "cuvid_decode_frame\n");

653
    if (ctx->deint_mode_current != cudaVideoDeinterlaceMode_Weave) {
654 655 656 657
        av_log(avctx, AV_LOG_ERROR, "Deinterlacing is not supported via the old API\n");
        return AVERROR(EINVAL);
    }

658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675
    if (!ctx->decoder_flushing) {
        ret = cuvid_decode_packet(avctx, avpkt);
        if (ret < 0)
            return ret;
    }

    ret = cuvid_output_frame(avctx, frame);
    if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) {
        *got_frame = 0;
    } else if (ret < 0) {
        return ret;
    } else {
        *got_frame = 1;
    }

    return 0;
}

676 677 678 679 680 681 682 683 684 685
static av_cold int cuvid_decode_end(AVCodecContext *avctx)
{
    CuvidContext *ctx = avctx->priv_data;

    av_fifo_freep(&ctx->frame_queue);

    if (ctx->bsf)
        av_bsf_free(&ctx->bsf);

    if (ctx->cuparser)
686
        ctx->cvdl->cuvidDestroyVideoParser(ctx->cuparser);
687 688

    if (ctx->cudecoder)
689 690 691
        ctx->cvdl->cuvidDestroyDecoder(ctx->cudecoder);

    ctx->cudl = NULL;
692 693 694 695

    av_buffer_unref(&ctx->hwframe);
    av_buffer_unref(&ctx->hwdevice);

696 697
    cuvid_free_functions(&ctx->cvdl);

698 699 700
    return 0;
}

701 702 703 704 705
static int cuvid_test_capabilities(AVCodecContext *avctx,
                                   const CUVIDPARSERPARAMS *cuparseinfo,
                                   int probed_width,
                                   int probed_height,
                                   int bit_depth)
706
{
707
    CuvidContext *ctx = avctx->priv_data;
708 709 710
    CUVIDDECODECAPS *caps;
    int res8 = 0, res10 = 0, res12 = 0;

711 712 713 714 715 716 717 718 719 720 721 722 723 724 725
    if (!ctx->cvdl->cuvidGetDecoderCaps) {
        av_log(avctx, AV_LOG_WARNING, "Used Nvidia driver is too old to perform a capability check.\n");
        av_log(avctx, AV_LOG_WARNING, "The minimum required version is "
#if defined(_WIN32) || defined(__CYGWIN__)
            "378.66"
#else
            "378.13"
#endif
            ". Continuing blind.\n");
        ctx->caps8.bIsSupported = ctx->caps10.bIsSupported = 1;
        // 12 bit was not supported before the capability check was introduced, so disable it.
        ctx->caps12.bIsSupported = 0;
        return 0;
    }

726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762
    ctx->caps8.eCodecType = ctx->caps10.eCodecType = ctx->caps12.eCodecType
        = cuparseinfo->CodecType;
    ctx->caps8.eChromaFormat = ctx->caps10.eChromaFormat = ctx->caps12.eChromaFormat
        = cudaVideoChromaFormat_420;

    ctx->caps8.nBitDepthMinus8 = 0;
    ctx->caps10.nBitDepthMinus8 = 2;
    ctx->caps12.nBitDepthMinus8 = 4;

    res8 = CHECK_CU(ctx->cvdl->cuvidGetDecoderCaps(&ctx->caps8));
    res10 = CHECK_CU(ctx->cvdl->cuvidGetDecoderCaps(&ctx->caps10));
    res12 = CHECK_CU(ctx->cvdl->cuvidGetDecoderCaps(&ctx->caps12));

    av_log(avctx, AV_LOG_VERBOSE, "CUVID capabilities for %s:\n", avctx->codec->name);
    av_log(avctx, AV_LOG_VERBOSE, "8 bit: supported: %d, min_width: %d, max_width: %d, min_height: %d, max_height: %d\n",
           ctx->caps8.bIsSupported, ctx->caps8.nMinWidth, ctx->caps8.nMaxWidth, ctx->caps8.nMinHeight, ctx->caps8.nMaxHeight);
    av_log(avctx, AV_LOG_VERBOSE, "10 bit: supported: %d, min_width: %d, max_width: %d, min_height: %d, max_height: %d\n",
           ctx->caps10.bIsSupported, ctx->caps10.nMinWidth, ctx->caps10.nMaxWidth, ctx->caps10.nMinHeight, ctx->caps10.nMaxHeight);
    av_log(avctx, AV_LOG_VERBOSE, "12 bit: supported: %d, min_width: %d, max_width: %d, min_height: %d, max_height: %d\n",
           ctx->caps12.bIsSupported, ctx->caps12.nMinWidth, ctx->caps12.nMaxWidth, ctx->caps12.nMinHeight, ctx->caps12.nMaxHeight);

    switch (bit_depth) {
    case 10:
        caps = &ctx->caps10;
        if (res10 < 0)
            return res10;
        break;
    case 12:
        caps = &ctx->caps12;
        if (res12 < 0)
            return res12;
        break;
    default:
        caps = &ctx->caps8;
        if (res8 < 0)
            return res8;
    }
763

764 765 766 767
    if (!ctx->caps8.bIsSupported) {
        av_log(avctx, AV_LOG_ERROR, "Codec %s is not supported.\n", avctx->codec->name);
        return AVERROR(EINVAL);
    }
768

769 770 771 772
    if (!caps->bIsSupported) {
        av_log(avctx, AV_LOG_ERROR, "Bit depth %d is not supported.\n", bit_depth);
        return AVERROR(EINVAL);
    }
773

774 775 776 777 778
    if (probed_width > caps->nMaxWidth || probed_width < caps->nMinWidth) {
        av_log(avctx, AV_LOG_ERROR, "Video width %d not within range from %d to %d\n",
               probed_width, caps->nMinWidth, caps->nMaxWidth);
        return AVERROR(EINVAL);
    }
779

780 781 782 783 784
    if (probed_height > caps->nMaxHeight || probed_height < caps->nMinHeight) {
        av_log(avctx, AV_LOG_ERROR, "Video height %d not within range from %d to %d\n",
               probed_height, caps->nMinHeight, caps->nMaxHeight);
        return AVERROR(EINVAL);
    }
785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800

    return 0;
}

static av_cold int cuvid_decode_init(AVCodecContext *avctx)
{
    CuvidContext *ctx = avctx->priv_data;
    AVCUDADeviceContext *device_hwctx;
    AVHWDeviceContext *device_ctx;
    AVHWFramesContext *hwframe_ctx;
    CUVIDSOURCEDATAPACKET seq_pkt;
    CUcontext cuda_ctx = NULL;
    CUcontext dummy;
    const AVBitStreamFilter *bsf;
    int ret = 0;

801 802 803 804
    enum AVPixelFormat pix_fmts[3] = { AV_PIX_FMT_CUDA,
                                       AV_PIX_FMT_NV12,
                                       AV_PIX_FMT_NONE };

805 806
    int probed_width = avctx->coded_width ? avctx->coded_width : 1280;
    int probed_height = avctx->coded_height ? avctx->coded_height : 720;
807 808 809 810 811
    int probed_bit_depth = 8;

    const AVPixFmtDescriptor *probe_desc = av_pix_fmt_desc_get(avctx->pix_fmt);
    if (probe_desc && probe_desc->nb_components)
        probed_bit_depth = probe_desc->comp[0].depth;
812

813 814 815 816 817 818 819 820 821 822 823
    // Accelerated transcoding scenarios with 'ffmpeg' require that the
    // pix_fmt be set to AV_PIX_FMT_CUDA early. The sw_pix_fmt, and the
    // pix_fmt for non-accelerated transcoding, do not need to be correct
    // but need to be set to something. We arbitrarily pick NV12.
    ret = ff_get_format(avctx, pix_fmts);
    if (ret < 0) {
        av_log(avctx, AV_LOG_ERROR, "ff_get_format failed: %d\n", ret);
        return ret;
    }
    avctx->pix_fmt = ret;

824 825 826 827 828 829 830 831 832 833 834 835 836 837 838
    if (ctx->resize_expr && sscanf(ctx->resize_expr, "%dx%d",
                                   &ctx->resize.width, &ctx->resize.height) != 2) {
        av_log(avctx, AV_LOG_ERROR, "Invalid resize expressions\n");
        ret = AVERROR(EINVAL);
        goto error;
    }

    if (ctx->crop_expr && sscanf(ctx->crop_expr, "%dx%dx%dx%d",
                                 &ctx->crop.top, &ctx->crop.bottom,
                                 &ctx->crop.left, &ctx->crop.right) != 4) {
        av_log(avctx, AV_LOG_ERROR, "Invalid cropping expressions\n");
        ret = AVERROR(EINVAL);
        goto error;
    }

839
    ret = cuvid_load_functions(&ctx->cvdl, avctx);
840 841 842 843
    if (ret < 0) {
        av_log(avctx, AV_LOG_ERROR, "Failed loading nvcuvid.\n");
        goto error;
    }
844

845
    ctx->frame_queue = av_fifo_alloc(ctx->nb_surfaces * sizeof(CuvidParsedFrame));
846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865
    if (!ctx->frame_queue) {
        ret = AVERROR(ENOMEM);
        goto error;
    }

    if (avctx->hw_frames_ctx) {
        ctx->hwframe = av_buffer_ref(avctx->hw_frames_ctx);
        if (!ctx->hwframe) {
            ret = AVERROR(ENOMEM);
            goto error;
        }

        hwframe_ctx = (AVHWFramesContext*)ctx->hwframe->data;

        ctx->hwdevice = av_buffer_ref(hwframe_ctx->device_ref);
        if (!ctx->hwdevice) {
            ret = AVERROR(ENOMEM);
            goto error;
        }
    } else {
866 867 868 869 870 871 872 873 874 875 876
        if (avctx->hw_device_ctx) {
            ctx->hwdevice = av_buffer_ref(avctx->hw_device_ctx);
            if (!ctx->hwdevice) {
                ret = AVERROR(ENOMEM);
                goto error;
            }
        } else {
            ret = av_hwdevice_ctx_create(&ctx->hwdevice, AV_HWDEVICE_TYPE_CUDA, ctx->cu_gpu, NULL, 0);
            if (ret < 0)
                goto error;
        }
877 878 879 880 881 882 883

        ctx->hwframe = av_hwframe_ctx_alloc(ctx->hwdevice);
        if (!ctx->hwframe) {
            av_log(avctx, AV_LOG_ERROR, "av_hwframe_ctx_alloc failed\n");
            ret = AVERROR(ENOMEM);
            goto error;
        }
884 885

        hwframe_ctx = (AVHWFramesContext*)ctx->hwframe->data;
886 887
    }

888 889
    device_ctx = hwframe_ctx->device_ctx;
    device_hwctx = device_ctx->hwctx;
890

891
    cuda_ctx = device_hwctx->cuda_ctx;
892
    ctx->cudl = device_hwctx->internal->cuda_dl;
893

894 895
    memset(&ctx->cuparseinfo, 0, sizeof(ctx->cuparseinfo));
    memset(&ctx->cuparse_ext, 0, sizeof(ctx->cuparse_ext));
896 897
    memset(&seq_pkt, 0, sizeof(seq_pkt));

898
    ctx->cuparseinfo.pExtVideoInfo = &ctx->cuparse_ext;
899 900 901 902

    switch (avctx->codec->id) {
#if CONFIG_H264_CUVID_DECODER
    case AV_CODEC_ID_H264:
903
        ctx->cuparseinfo.CodecType = cudaVideoCodec_H264;
904 905 906 907
        break;
#endif
#if CONFIG_HEVC_CUVID_DECODER
    case AV_CODEC_ID_HEVC:
908
        ctx->cuparseinfo.CodecType = cudaVideoCodec_HEVC;
909 910
        break;
#endif
911 912
#if CONFIG_MJPEG_CUVID_DECODER
    case AV_CODEC_ID_MJPEG:
913
        ctx->cuparseinfo.CodecType = cudaVideoCodec_JPEG;
914 915 916 917
        break;
#endif
#if CONFIG_MPEG1_CUVID_DECODER
    case AV_CODEC_ID_MPEG1VIDEO:
918
        ctx->cuparseinfo.CodecType = cudaVideoCodec_MPEG1;
919 920 921 922
        break;
#endif
#if CONFIG_MPEG2_CUVID_DECODER
    case AV_CODEC_ID_MPEG2VIDEO:
923
        ctx->cuparseinfo.CodecType = cudaVideoCodec_MPEG2;
924 925 926 927
        break;
#endif
#if CONFIG_MPEG4_CUVID_DECODER
    case AV_CODEC_ID_MPEG4:
928
        ctx->cuparseinfo.CodecType = cudaVideoCodec_MPEG4;
929 930
        break;
#endif
931 932
#if CONFIG_VP8_CUVID_DECODER
    case AV_CODEC_ID_VP8:
933
        ctx->cuparseinfo.CodecType = cudaVideoCodec_VP8;
934 935 936 937
        break;
#endif
#if CONFIG_VP9_CUVID_DECODER
    case AV_CODEC_ID_VP9:
938
        ctx->cuparseinfo.CodecType = cudaVideoCodec_VP9;
939 940 941 942
        break;
#endif
#if CONFIG_VC1_CUVID_DECODER
    case AV_CODEC_ID_VC1:
943
        ctx->cuparseinfo.CodecType = cudaVideoCodec_VC1;
944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968
        break;
#endif
    default:
        av_log(avctx, AV_LOG_ERROR, "Invalid CUVID codec!\n");
        return AVERROR_BUG;
    }

    if (avctx->codec->id == AV_CODEC_ID_H264 || avctx->codec->id == AV_CODEC_ID_HEVC) {
        if (avctx->codec->id == AV_CODEC_ID_H264)
            bsf = av_bsf_get_by_name("h264_mp4toannexb");
        else
            bsf = av_bsf_get_by_name("hevc_mp4toannexb");

        if (!bsf) {
            ret = AVERROR_BSF_NOT_FOUND;
            goto error;
        }
        if (ret = av_bsf_alloc(bsf, &ctx->bsf)) {
            goto error;
        }
        if (((ret = avcodec_parameters_from_context(ctx->bsf->par_in, avctx)) < 0) || ((ret = av_bsf_init(ctx->bsf)) < 0)) {
            av_bsf_free(&ctx->bsf);
            goto error;
        }

969 970
        ctx->cuparse_ext.format.seqhdr_data_length = ctx->bsf->par_out->extradata_size;
        memcpy(ctx->cuparse_ext.raw_seqhdr_data,
971
               ctx->bsf->par_out->extradata,
972
               FFMIN(sizeof(ctx->cuparse_ext.raw_seqhdr_data), ctx->bsf->par_out->extradata_size));
973
    } else if (avctx->extradata_size > 0) {
974 975
        ctx->cuparse_ext.format.seqhdr_data_length = avctx->extradata_size;
        memcpy(ctx->cuparse_ext.raw_seqhdr_data,
976
               avctx->extradata,
977
               FFMIN(sizeof(ctx->cuparse_ext.raw_seqhdr_data), avctx->extradata_size));
978 979
    }

980
    ctx->cuparseinfo.ulMaxNumDecodeSurfaces = ctx->nb_surfaces;
981 982 983 984 985
    ctx->cuparseinfo.ulMaxDisplayDelay = 4;
    ctx->cuparseinfo.pUserData = avctx;
    ctx->cuparseinfo.pfnSequenceCallback = cuvid_handle_video_sequence;
    ctx->cuparseinfo.pfnDecodePicture = cuvid_handle_picture_decode;
    ctx->cuparseinfo.pfnDisplayPicture = cuvid_handle_picture_display;
986

987
    ret = CHECK_CU(ctx->cudl->cuCtxPushCurrent(cuda_ctx));
988 989 990
    if (ret < 0)
        goto error;

991 992 993 994
    ret = cuvid_test_capabilities(avctx, &ctx->cuparseinfo,
                                  probed_width,
                                  probed_height,
                                  probed_bit_depth);
995 996 997
    if (ret < 0)
        goto error;

998
    ret = CHECK_CU(ctx->cvdl->cuvidCreateVideoParser(&ctx->cuparser, &ctx->cuparseinfo));
999 1000 1001
    if (ret < 0)
        goto error;

1002 1003
    seq_pkt.payload = ctx->cuparse_ext.raw_seqhdr_data;
    seq_pkt.payload_size = ctx->cuparse_ext.format.seqhdr_data_length;
1004 1005

    if (seq_pkt.payload && seq_pkt.payload_size) {
1006
        ret = CHECK_CU(ctx->cvdl->cuvidParseVideoData(ctx->cuparser, &seq_pkt));
1007 1008 1009 1010
        if (ret < 0)
            goto error;
    }

1011
    ret = CHECK_CU(ctx->cudl->cuCtxPopCurrent(&dummy));
1012 1013 1014
    if (ret < 0)
        goto error;

1015 1016
    ctx->prev_pts = INT64_MIN;

1017 1018 1019
    if (!avctx->pkt_timebase.num || !avctx->pkt_timebase.den)
        av_log(avctx, AV_LOG_WARNING, "Invalid pkt_timebase, passing timestamps as-is.\n");

1020 1021 1022 1023 1024 1025 1026
    return 0;

error:
    cuvid_decode_end(avctx);
    return ret;
}

1027 1028 1029 1030 1031 1032
static void cuvid_flush(AVCodecContext *avctx)
{
    CuvidContext *ctx = avctx->priv_data;
    AVHWDeviceContext *device_ctx = (AVHWDeviceContext*)ctx->hwdevice->data;
    AVCUDADeviceContext *device_hwctx = device_ctx->hwctx;
    CUcontext dummy, cuda_ctx = device_hwctx->cuda_ctx;
1033
    CUVIDSOURCEDATAPACKET seq_pkt = { 0 };
1034 1035
    int ret;

1036
    ret = CHECK_CU(ctx->cudl->cuCtxPushCurrent(cuda_ctx));
1037 1038 1039 1040 1041
    if (ret < 0)
        goto error;

    av_fifo_freep(&ctx->frame_queue);

1042
    ctx->frame_queue = av_fifo_alloc(ctx->nb_surfaces * sizeof(CuvidParsedFrame));
1043 1044 1045 1046 1047 1048
    if (!ctx->frame_queue) {
        av_log(avctx, AV_LOG_ERROR, "Failed to recreate frame queue on flush\n");
        return;
    }

    if (ctx->cudecoder) {
1049
        ctx->cvdl->cuvidDestroyDecoder(ctx->cudecoder);
1050 1051 1052 1053
        ctx->cudecoder = NULL;
    }

    if (ctx->cuparser) {
1054
        ctx->cvdl->cuvidDestroyVideoParser(ctx->cuparser);
1055 1056 1057
        ctx->cuparser = NULL;
    }

1058
    ret = CHECK_CU(ctx->cvdl->cuvidCreateVideoParser(&ctx->cuparser, &ctx->cuparseinfo));
1059 1060 1061
    if (ret < 0)
        goto error;

1062 1063 1064 1065
    seq_pkt.payload = ctx->cuparse_ext.raw_seqhdr_data;
    seq_pkt.payload_size = ctx->cuparse_ext.format.seqhdr_data_length;

    if (seq_pkt.payload && seq_pkt.payload_size) {
1066
        ret = CHECK_CU(ctx->cvdl->cuvidParseVideoData(ctx->cuparser, &seq_pkt));
1067 1068 1069 1070
        if (ret < 0)
            goto error;
    }

1071
    ret = CHECK_CU(ctx->cudl->cuCtxPopCurrent(&dummy));
1072 1073 1074
    if (ret < 0)
        goto error;

1075 1076 1077
    ctx->prev_pts = INT64_MIN;
    ctx->decoder_flushing = 0;

1078 1079 1080 1081 1082
    return;
 error:
    av_log(avctx, AV_LOG_ERROR, "CUDA reinit on flush failed\n");
}

1083 1084 1085 1086 1087 1088 1089
#define OFFSET(x) offsetof(CuvidContext, x)
#define VD AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_DECODING_PARAM
static const AVOption options[] = {
    { "deint",    "Set deinterlacing mode", OFFSET(deint_mode), AV_OPT_TYPE_INT,   { .i64 = cudaVideoDeinterlaceMode_Weave    }, cudaVideoDeinterlaceMode_Weave, cudaVideoDeinterlaceMode_Adaptive, VD, "deint" },
    { "weave",    "Weave deinterlacing (do nothing)",        0, AV_OPT_TYPE_CONST, { .i64 = cudaVideoDeinterlaceMode_Weave    }, 0, 0, VD, "deint" },
    { "bob",      "Bob deinterlacing",                       0, AV_OPT_TYPE_CONST, { .i64 = cudaVideoDeinterlaceMode_Bob      }, 0, 0, VD, "deint" },
    { "adaptive", "Adaptive deinterlacing",                  0, AV_OPT_TYPE_CONST, { .i64 = cudaVideoDeinterlaceMode_Adaptive }, 0, 0, VD, "deint" },
1090
    { "gpu",      "GPU to be used for decoding", OFFSET(cu_gpu), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, VD },
1091
    { "surfaces", "Maximum surfaces to be used for decoding", OFFSET(nb_surfaces), AV_OPT_TYPE_INT, { .i64 = 25 }, 0, INT_MAX, VD },
1092
    { "drop_second_field", "Drop second field when deinterlacing", OFFSET(drop_second_field), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VD },
1093 1094
    { "crop",     "Crop (top)x(bottom)x(left)x(right)", OFFSET(crop_expr), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, VD },
    { "resize",   "Resize (width)x(height)", OFFSET(resize_expr), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, VD },
1095 1096 1097
    { NULL }
};

1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110
static const AVCodecHWConfigInternal *cuvid_hw_configs[] = {
    &(const AVCodecHWConfigInternal) {
        .public = {
            .pix_fmt     = AV_PIX_FMT_CUDA,
            .methods     = AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX |
                           AV_CODEC_HW_CONFIG_METHOD_INTERNAL,
            .device_type = AV_HWDEVICE_TYPE_CUDA
        },
        .hwaccel = NULL,
    },
    NULL
};

1111
#define DEFINE_CUVID_CODEC(x, X) \
1112 1113 1114 1115 1116 1117
    static const AVClass x##_cuvid_class = { \
        .class_name = #x "_cuvid", \
        .item_name = av_default_item_name, \
        .option = options, \
        .version = LIBAVUTIL_VERSION_INT, \
    }; \
1118 1119 1120 1121 1122 1123
    AVCodec ff_##x##_cuvid_decoder = { \
        .name           = #x "_cuvid", \
        .long_name      = NULL_IF_CONFIG_SMALL("Nvidia CUVID " #X " decoder"), \
        .type           = AVMEDIA_TYPE_VIDEO, \
        .id             = AV_CODEC_ID_##X, \
        .priv_data_size = sizeof(CuvidContext), \
1124
        .priv_class     = &x##_cuvid_class, \
1125 1126 1127
        .init           = cuvid_decode_init, \
        .close          = cuvid_decode_end, \
        .decode         = cuvid_decode_frame, \
1128
        .receive_frame  = cuvid_output_frame, \
1129
        .flush          = cuvid_flush, \
1130
        .capabilities   = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_AVOID_PROBING | AV_CODEC_CAP_HARDWARE, \
1131 1132
        .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_CUDA, \
                                                        AV_PIX_FMT_NV12, \
1133 1134
                                                        AV_PIX_FMT_P010, \
                                                        AV_PIX_FMT_P016, \
1135
                                                        AV_PIX_FMT_NONE }, \
1136
        .hw_configs     = cuvid_hw_configs, \
1137
        .wrapper_name   = "cuvid", \
1138 1139 1140 1141 1142 1143 1144 1145 1146 1147
    };

#if CONFIG_HEVC_CUVID_DECODER
DEFINE_CUVID_CODEC(hevc, HEVC)
#endif

#if CONFIG_H264_CUVID_DECODER
DEFINE_CUVID_CODEC(h264, H264)
#endif

1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163
#if CONFIG_MJPEG_CUVID_DECODER
DEFINE_CUVID_CODEC(mjpeg, MJPEG)
#endif

#if CONFIG_MPEG1_CUVID_DECODER
DEFINE_CUVID_CODEC(mpeg1, MPEG1VIDEO)
#endif

#if CONFIG_MPEG2_CUVID_DECODER
DEFINE_CUVID_CODEC(mpeg2, MPEG2VIDEO)
#endif

#if CONFIG_MPEG4_CUVID_DECODER
DEFINE_CUVID_CODEC(mpeg4, MPEG4)
#endif

1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174
#if CONFIG_VP8_CUVID_DECODER
DEFINE_CUVID_CODEC(vp8, VP8)
#endif

#if CONFIG_VP9_CUVID_DECODER
DEFINE_CUVID_CODEC(vp9, VP9)
#endif

#if CONFIG_VC1_CUVID_DECODER
DEFINE_CUVID_CODEC(vc1, VC1)
#endif