nvenc.c 55.6 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21
/*
 * H.264 hardware encoding using nvidia nvenc
 * Copyright (c) 2014 Timo Rothenpieler <timo@rothenpieler.org>
 *
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * FFmpeg is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with FFmpeg; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

22 23
#include "config.h"

24
#if defined(_WIN32)
25
#include <windows.h>
26 27 28 29 30 31 32 33 34 35 36

#define CUDA_LIBNAME TEXT("nvcuda.dll")
#if ARCH_X86_64
#define NVENC_LIBNAME TEXT("nvEncodeAPI64.dll")
#else
#define NVENC_LIBNAME TEXT("nvEncodeAPI.dll")
#endif

#define dlopen(filename, flags) LoadLibrary((filename))
#define dlsym(handle, symbol)   GetProcAddress(handle, symbol)
#define dlclose(handle)         FreeLibrary(handle)
37 38
#else
#include <dlfcn.h>
39 40 41

#define CUDA_LIBNAME "libcuda.so"
#define NVENC_LIBNAME "libnvidia-encode.so"
42 43
#endif

44
#include "libavutil/hwcontext.h"
45 46 47 48
#include "libavutil/imgutils.h"
#include "libavutil/avassert.h"
#include "libavutil/mem.h"
#include "internal.h"
49
#include "nvenc.h"
50

51
#define NVENC_CAP 0x30
52 53 54 55
#define IS_CBR(rc) (rc == NV_ENC_PARAMS_RC_CBR ||               \
                    rc == NV_ENC_PARAMS_RC_2_PASS_QUALITY ||    \
                    rc == NV_ENC_PARAMS_RC_2_PASS_FRAMESIZE_CAP)

56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74
#define LOAD_LIBRARY(l, path)                   \
    do {                                        \
        if (!((l) = dlopen(path, RTLD_LAZY))) { \
            av_log(avctx, AV_LOG_ERROR,         \
                   "Cannot load %s\n",          \
                   path);                       \
            return AVERROR_UNKNOWN;             \
        }                                       \
    } while (0)

#define LOAD_SYMBOL(fun, lib, symbol)        \
    do {                                     \
        if (!((fun) = dlsym(lib, symbol))) { \
            av_log(avctx, AV_LOG_ERROR,      \
                   "Cannot load %s\n",       \
                   symbol);                  \
            return AVERROR_UNKNOWN;          \
        }                                    \
    } while (0)
75

76 77 78 79 80 81 82 83 84
const enum AVPixelFormat ff_nvenc_pix_fmts[] = {
    AV_PIX_FMT_YUV420P,
    AV_PIX_FMT_NV12,
    AV_PIX_FMT_YUV444P,
#if CONFIG_CUDA
    AV_PIX_FMT_CUDA,
#endif
    AV_PIX_FMT_NONE
};
85

86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143
static const struct {
    NVENCSTATUS nverr;
    int         averr;
    const char *desc;
} nvenc_errors[] = {
    { NV_ENC_SUCCESS,                      0,                "success"                  },
    { NV_ENC_ERR_NO_ENCODE_DEVICE,         AVERROR(ENOENT),  "no encode device"         },
    { NV_ENC_ERR_UNSUPPORTED_DEVICE,       AVERROR(ENOSYS),  "unsupported device"       },
    { NV_ENC_ERR_INVALID_ENCODERDEVICE,    AVERROR(EINVAL),  "invalid encoder device"   },
    { NV_ENC_ERR_INVALID_DEVICE,           AVERROR(EINVAL),  "invalid device"           },
    { NV_ENC_ERR_DEVICE_NOT_EXIST,         AVERROR(EIO),     "device does not exist"    },
    { NV_ENC_ERR_INVALID_PTR,              AVERROR(EFAULT),  "invalid ptr"              },
    { NV_ENC_ERR_INVALID_EVENT,            AVERROR(EINVAL),  "invalid event"            },
    { NV_ENC_ERR_INVALID_PARAM,            AVERROR(EINVAL),  "invalid param"            },
    { NV_ENC_ERR_INVALID_CALL,             AVERROR(EINVAL),  "invalid call"             },
    { NV_ENC_ERR_OUT_OF_MEMORY,            AVERROR(ENOMEM),  "out of memory"            },
    { NV_ENC_ERR_ENCODER_NOT_INITIALIZED,  AVERROR(EINVAL),  "encoder not initialized"  },
    { NV_ENC_ERR_UNSUPPORTED_PARAM,        AVERROR(ENOSYS),  "unsupported param"        },
    { NV_ENC_ERR_LOCK_BUSY,                AVERROR(EAGAIN),  "lock busy"                },
    { NV_ENC_ERR_NOT_ENOUGH_BUFFER,        AVERROR(ENOBUFS), "not enough buffer"        },
    { NV_ENC_ERR_INVALID_VERSION,          AVERROR(EINVAL),  "invalid version"          },
    { NV_ENC_ERR_MAP_FAILED,               AVERROR(EIO),     "map failed"               },
    { NV_ENC_ERR_NEED_MORE_INPUT,          AVERROR(EAGAIN),  "need more input"          },
    { NV_ENC_ERR_ENCODER_BUSY,             AVERROR(EAGAIN),  "encoder busy"             },
    { NV_ENC_ERR_EVENT_NOT_REGISTERD,      AVERROR(EBADF),   "event not registered"     },
    { NV_ENC_ERR_GENERIC,                  AVERROR_UNKNOWN,  "generic error"            },
    { NV_ENC_ERR_INCOMPATIBLE_CLIENT_KEY,  AVERROR(EINVAL),  "incompatible client key"  },
    { NV_ENC_ERR_UNIMPLEMENTED,            AVERROR(ENOSYS),  "unimplemented"            },
    { NV_ENC_ERR_RESOURCE_REGISTER_FAILED, AVERROR(EIO),     "resource register failed" },
    { NV_ENC_ERR_RESOURCE_NOT_REGISTERED,  AVERROR(EBADF),   "resource not registered"  },
    { NV_ENC_ERR_RESOURCE_NOT_MAPPED,      AVERROR(EBADF),   "resource not mapped"      },
};

static int nvenc_map_error(NVENCSTATUS err, const char **desc)
{
    int i;
    for (i = 0; i < FF_ARRAY_ELEMS(nvenc_errors); i++) {
        if (nvenc_errors[i].nverr == err) {
            if (desc)
                *desc = nvenc_errors[i].desc;
            return nvenc_errors[i].averr;
        }
    }
    if (desc)
        *desc = "unknown error";
    return AVERROR_UNKNOWN;
}

static int nvenc_print_error(void *log_ctx, NVENCSTATUS err,
                                     const char *error_string)
{
    const char *desc;
    int ret;
    ret = nvenc_map_error(err, &desc);
    av_log(log_ctx, AV_LOG_ERROR, "%s: %s (%d)\n", error_string, desc, err);
    return ret;
}

144
static av_cold int nvenc_load_libraries(AVCodecContext *avctx)
145 146 147
{
    NvencContext *ctx = avctx->priv_data;
    NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
148 149
    PNVENCODEAPICREATEINSTANCE nvenc_create_instance;
    NVENCSTATUS err;
150

151 152 153 154 155 156 157 158 159
#if CONFIG_CUDA
    dl_fn->cu_init                      = cuInit;
    dl_fn->cu_device_get_count          = cuDeviceGetCount;
    dl_fn->cu_device_get                = cuDeviceGet;
    dl_fn->cu_device_get_name           = cuDeviceGetName;
    dl_fn->cu_device_compute_capability = cuDeviceComputeCapability;
    dl_fn->cu_ctx_create                = cuCtxCreate_v2;
    dl_fn->cu_ctx_pop_current           = cuCtxPopCurrent_v2;
    dl_fn->cu_ctx_destroy               = cuCtxDestroy_v2;
160
#else
161 162 163 164 165 166 167 168 169 170 171
    LOAD_LIBRARY(dl_fn->cuda, CUDA_LIBNAME);

    LOAD_SYMBOL(dl_fn->cu_init, dl_fn->cuda, "cuInit");
    LOAD_SYMBOL(dl_fn->cu_device_get_count, dl_fn->cuda, "cuDeviceGetCount");
    LOAD_SYMBOL(dl_fn->cu_device_get, dl_fn->cuda, "cuDeviceGet");
    LOAD_SYMBOL(dl_fn->cu_device_get_name, dl_fn->cuda, "cuDeviceGetName");
    LOAD_SYMBOL(dl_fn->cu_device_compute_capability, dl_fn->cuda,
                "cuDeviceComputeCapability");
    LOAD_SYMBOL(dl_fn->cu_ctx_create, dl_fn->cuda, "cuCtxCreate_v2");
    LOAD_SYMBOL(dl_fn->cu_ctx_pop_current, dl_fn->cuda, "cuCtxPopCurrent_v2");
    LOAD_SYMBOL(dl_fn->cu_ctx_destroy, dl_fn->cuda, "cuCtxDestroy_v2");
172 173
#endif

174
    LOAD_LIBRARY(dl_fn->nvenc, NVENC_LIBNAME);
175

176 177
    LOAD_SYMBOL(nvenc_create_instance, dl_fn->nvenc,
                "NvEncodeAPICreateInstance");
178

179
    dl_fn->nvenc_funcs.version = NV_ENCODE_API_FUNCTION_LIST_VER;
180

181 182 183
    err = nvenc_create_instance(&dl_fn->nvenc_funcs);
    if (err != NV_ENC_SUCCESS)
        return nvenc_print_error(avctx, err, "Failed to create nvenc instance");
184

185
    av_log(avctx, AV_LOG_VERBOSE, "Nvenc initialized successfully\n");
186 187 188 189

    return 0;
}

190
static av_cold int nvenc_open_session(AVCodecContext *avctx)
191
{
192 193 194 195 196 197 198 199 200 201 202 203 204 205
    NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS params = { 0 };
    NvencContext *ctx = avctx->priv_data;
    NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &ctx->nvenc_dload_funcs.nvenc_funcs;
    NVENCSTATUS ret;

    params.version    = NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS_VER;
    params.apiVersion = NVENCAPI_VERSION;
    params.device     = ctx->cu_context;
    params.deviceType = NV_ENC_DEVICE_TYPE_CUDA;

    ret = p_nvenc->nvEncOpenEncodeSessionEx(&params, &ctx->nvencoder);
    if (ret != NV_ENC_SUCCESS) {
        ctx->nvencoder = NULL;
        return nvenc_print_error(avctx, ret, "OpenEncodeSessionEx failed");
206
    }
207 208

    return 0;
209 210
}

211
static int nvenc_check_codec_support(AVCodecContext *avctx)
212 213
{
    NvencContext *ctx = avctx->priv_data;
214 215 216
    NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &ctx->nvenc_dload_funcs.nvenc_funcs;
    int i, ret, count = 0;
    GUID *guids = NULL;
217

218
    ret = p_nvenc->nvEncGetEncodeGUIDCount(ctx->nvencoder, &count);
219

220 221
    if (ret != NV_ENC_SUCCESS || !count)
        return AVERROR(ENOSYS);
222

223 224 225
    guids = av_malloc(count * sizeof(GUID));
    if (!guids)
        return AVERROR(ENOMEM);
226

227 228 229 230 231
    ret = p_nvenc->nvEncGetEncodeGUIDs(ctx->nvencoder, guids, count, &count);
    if (ret != NV_ENC_SUCCESS) {
        ret = AVERROR(ENOSYS);
        goto fail;
    }
232

233 234 235 236 237 238 239
    ret = AVERROR(ENOSYS);
    for (i = 0; i < count; i++) {
        if (!memcmp(&guids[i], &ctx->init_encode_params.encodeGUID, sizeof(*guids))) {
            ret = 0;
            break;
        }
    }
240

241 242
fail:
    av_free(guids);
243

244 245
    return ret;
}
246

247 248 249 250 251 252
static int nvenc_check_cap(AVCodecContext *avctx, NV_ENC_CAPS cap)
{
    NvencContext *ctx = avctx->priv_data;
    NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &ctx->nvenc_dload_funcs.nvenc_funcs;
    NV_ENC_CAPS_PARAM params        = { 0 };
    int ret, val = 0;
253

254 255
    params.version     = NV_ENC_CAPS_PARAM_VER;
    params.capsToQuery = cap;
256

257
    ret = p_nvenc->nvEncGetEncodeCaps(ctx->nvencoder, ctx->init_encode_params.encodeGUID, &params, &val);
258

259 260 261 262
    if (ret == NV_ENC_SUCCESS)
        return val;
    return 0;
}
263

264 265 266 267
static int nvenc_check_capabilities(AVCodecContext *avctx)
{
    NvencContext *ctx = avctx->priv_data;
    int ret;
268

269 270 271 272
    ret = nvenc_check_codec_support(avctx);
    if (ret < 0) {
        av_log(avctx, AV_LOG_VERBOSE, "Codec not supported\n");
        return ret;
273 274
    }

275 276 277 278
    ret = nvenc_check_cap(avctx, NV_ENC_CAPS_SUPPORT_YUV444_ENCODE);
    if (ctx->data_pix_fmt == AV_PIX_FMT_YUV444P && ret <= 0) {
        av_log(avctx, AV_LOG_VERBOSE, "YUV444P not supported\n");
        return AVERROR(ENOSYS);
279 280
    }

281 282 283 284 285
    ret = nvenc_check_cap(avctx, NV_ENC_CAPS_SUPPORT_LOSSLESS_ENCODE);
    if (ctx->preset >= PRESET_LOSSLESS_DEFAULT && ret <= 0) {
        av_log(avctx, AV_LOG_VERBOSE, "Lossless encoding not supported\n");
        return AVERROR(ENOSYS);
    }
286

287 288 289 290 291 292
    ret = nvenc_check_cap(avctx, NV_ENC_CAPS_WIDTH_MAX);
    if (ret < avctx->width) {
        av_log(avctx, AV_LOG_VERBOSE, "Width %d exceeds %d\n",
               avctx->width, ret);
        return AVERROR(ENOSYS);
    }
293

294 295 296 297 298 299 300 301 302
    ret = nvenc_check_cap(avctx, NV_ENC_CAPS_HEIGHT_MAX);
    if (ret < avctx->height) {
        av_log(avctx, AV_LOG_VERBOSE, "Height %d exceeds %d\n",
               avctx->height, ret);
        return AVERROR(ENOSYS);
    }

    ret = nvenc_check_cap(avctx, NV_ENC_CAPS_NUM_MAX_BFRAMES);
    if (ret < avctx->max_b_frames) {
303
        av_log(avctx, AV_LOG_VERBOSE, "Max B-frames %d exceed %d\n",
304 305 306 307
               avctx->max_b_frames, ret);

        return AVERROR(ENOSYS);
    }
308

309 310 311 312 313 314 315 316
    ret = nvenc_check_cap(avctx, NV_ENC_CAPS_SUPPORT_FIELD_ENCODING);
    if (ret < 1 && avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT) {
        av_log(avctx, AV_LOG_VERBOSE,
               "Interlaced encoding is not supported. Supported level: %d\n",
               ret);
        return AVERROR(ENOSYS);
    }

317 318 319
    return 0;
}

320
static av_cold int nvenc_check_device(AVCodecContext *avctx, int idx)
321 322 323
{
    NvencContext *ctx = avctx->priv_data;
    NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
324 325 326 327 328 329 330
    NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs;
    char name[128] = { 0};
    int major, minor, ret;
    CUresult cu_res;
    CUdevice cu_device;
    CUcontext dummy;
    int loglevel = AV_LOG_VERBOSE;
331

332 333
    if (ctx->device == LIST_DEVICES)
        loglevel = AV_LOG_INFO;
334

335 336 337 338 339 340 341
    cu_res = dl_fn->cu_device_get(&cu_device, idx);
    if (cu_res != CUDA_SUCCESS) {
        av_log(avctx, AV_LOG_ERROR,
               "Cannot access the CUDA device %d\n",
               idx);
        return -1;
    }
342

343 344 345 346 347 348 349 350 351 352 353 354
    cu_res = dl_fn->cu_device_get_name(name, sizeof(name), cu_device);
    if (cu_res != CUDA_SUCCESS)
        return -1;

    cu_res = dl_fn->cu_device_compute_capability(&major, &minor, cu_device);
    if (cu_res != CUDA_SUCCESS)
        return -1;

    av_log(avctx, loglevel, "[ GPU #%d - < %s > has Compute SM %d.%d ]\n", idx, name, major, minor);
    if (((major << 4) | minor) < NVENC_CAP) {
        av_log(avctx, loglevel, "does not support NVENC\n");
        goto fail;
355 356
    }

357 358 359 360
    cu_res = dl_fn->cu_ctx_create(&ctx->cu_context_internal, 0, cu_device);
    if (cu_res != CUDA_SUCCESS) {
        av_log(avctx, AV_LOG_FATAL, "Failed creating CUDA context for NVENC: 0x%x\n", (int)cu_res);
        goto fail;
361 362
    }

363
    ctx->cu_context = ctx->cu_context_internal;
364

365 366 367 368
    cu_res = dl_fn->cu_ctx_pop_current(&dummy);
    if (cu_res != CUDA_SUCCESS) {
        av_log(avctx, AV_LOG_FATAL, "Failed popping CUDA context: 0x%x\n", (int)cu_res);
        goto fail2;
369 370
    }

371 372
    if ((ret = nvenc_open_session(avctx)) < 0)
        goto fail2;
373

374 375
    if ((ret = nvenc_check_capabilities(avctx)) < 0)
        goto fail3;
376

377
    av_log(avctx, loglevel, "supports NVENC\n");
378

379
    dl_fn->nvenc_device_count++;
380

381 382
    if (ctx->device == dl_fn->nvenc_device_count - 1 || ctx->device == ANY_DEVICE)
        return 0;
383

384 385 386
fail3:
    p_nvenc->nvEncDestroyEncoder(ctx->nvencoder);
    ctx->nvencoder = NULL;
387

388 389 390
fail2:
    dl_fn->cu_ctx_destroy(ctx->cu_context_internal);
    ctx->cu_context_internal = NULL;
391

392 393
fail:
    return AVERROR(ENOSYS);
394 395
}

396
static av_cold int nvenc_setup_device(AVCodecContext *avctx)
397 398 399 400
{
    NvencContext *ctx = avctx->priv_data;
    NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;

401 402 403 404 405 406 407 408 409 410 411
    switch (avctx->codec->id) {
    case AV_CODEC_ID_H264:
        ctx->init_encode_params.encodeGUID = NV_ENC_CODEC_H264_GUID;
        break;
    case AV_CODEC_ID_HEVC:
        ctx->init_encode_params.encodeGUID = NV_ENC_CODEC_HEVC_GUID;
        break;
    default:
        return AVERROR_BUG;
    }

412
    if (avctx->pix_fmt == AV_PIX_FMT_CUDA) {
413 414
#if CONFIG_CUDA
        AVHWFramesContext   *frames_ctx;
415
        AVCUDADeviceContext *device_hwctx;
416
        int ret;
417

418
        if (!avctx->hw_frames_ctx)
419 420
            return AVERROR(EINVAL);

421
        frames_ctx   = (AVHWFramesContext*)avctx->hw_frames_ctx->data;
422 423
        device_hwctx = frames_ctx->device_ctx->hwctx;

424
        ctx->cu_context = device_hwctx->cuda_ctx;
425

426 427 428
        ret = nvenc_open_session(avctx);
        if (ret < 0)
            return ret;
429

430 431 432 433 434 435 436 437 438 439
        ret = nvenc_check_capabilities(avctx);
        if (ret < 0) {
            av_log(avctx, AV_LOG_FATAL, "Provided device doesn't support required NVENC features\n");
            return ret;
        }
#else
        return AVERROR_BUG;
#endif
    } else {
        int i, nb_devices = 0;
440

441 442 443 444 445
        if ((dl_fn->cu_init(0)) != CUDA_SUCCESS) {
            av_log(avctx, AV_LOG_ERROR,
                   "Cannot init CUDA\n");
            return AVERROR_UNKNOWN;
        }
446

447 448 449 450 451
        if ((dl_fn->cu_device_get_count(&nb_devices)) != CUDA_SUCCESS) {
            av_log(avctx, AV_LOG_ERROR,
                   "Cannot enumerate the CUDA devices\n");
            return AVERROR_UNKNOWN;
        }
452

453 454 455 456
        if (!nb_devices) {
            av_log(avctx, AV_LOG_FATAL, "No CUDA capable devices found\n");
                return AVERROR_EXTERNAL;
        }
457

458
        av_log(avctx, AV_LOG_VERBOSE, "%d CUDA capable devices found\n", nb_devices);
459

460 461 462 463 464
        dl_fn->nvenc_device_count = 0;
        for (i = 0; i < nb_devices; ++i) {
            if ((nvenc_check_device(avctx, i)) >= 0 && ctx->device != LIST_DEVICES)
                return 0;
        }
465

466 467
        if (ctx->device == LIST_DEVICES)
            return AVERROR_EXIT;
468

469 470 471 472
        if (!dl_fn->nvenc_device_count) {
            av_log(avctx, AV_LOG_FATAL, "No NVENC capable devices found\n");
            return AVERROR_EXTERNAL;
        }
473

474 475
        av_log(avctx, AV_LOG_FATAL, "Requested GPU %d, but only %d GPUs are available!\n", ctx->device, dl_fn->nvenc_device_count);
        return AVERROR(EINVAL);
476 477 478 479 480
    }

    return 0;
}

481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508
typedef struct GUIDTuple {
    const GUID guid;
    int flags;
} GUIDTuple;

static void nvenc_map_preset(NvencContext *ctx)
{
    GUIDTuple presets[] = {
        { NV_ENC_PRESET_DEFAULT_GUID },
        { NV_ENC_PRESET_HQ_GUID,                  NVENC_TWO_PASSES }, /* slow */
        { NV_ENC_PRESET_HQ_GUID,                  NVENC_ONE_PASS }, /* medium */
        { NV_ENC_PRESET_HP_GUID,                  NVENC_ONE_PASS }, /* fast */
        { NV_ENC_PRESET_HP_GUID },
        { NV_ENC_PRESET_HQ_GUID },
        { NV_ENC_PRESET_BD_GUID },
        { NV_ENC_PRESET_LOW_LATENCY_DEFAULT_GUID, NVENC_LOWLATENCY },
        { NV_ENC_PRESET_LOW_LATENCY_HQ_GUID,      NVENC_LOWLATENCY },
        { NV_ENC_PRESET_LOW_LATENCY_HP_GUID,      NVENC_LOWLATENCY },
        { NV_ENC_PRESET_LOSSLESS_DEFAULT_GUID,    NVENC_LOSSLESS },
        { NV_ENC_PRESET_LOSSLESS_HP_GUID,         NVENC_LOSSLESS },
    };

    GUIDTuple *t = &presets[ctx->preset];

    ctx->init_encode_params.presetGUID = t->guid;
    ctx->flags = t->flags;
}

509 510 511
static av_cold void set_constqp(AVCodecContext *avctx)
{
    NvencContext *ctx = avctx->priv_data;
512 513 514 515 516 517
    NV_ENC_RC_PARAMS *rc = &ctx->encode_config.rcParams;

    rc->rateControlMode = NV_ENC_PARAMS_RC_CONSTQP;
    rc->constQP.qpInterB = avctx->global_quality;
    rc->constQP.qpInterP = avctx->global_quality;
    rc->constQP.qpIntra = avctx->global_quality;
518

519 520
    avctx->qmin = -1;
    avctx->qmax = -1;
521 522 523 524 525
}

static av_cold void set_vbr(AVCodecContext *avctx)
{
    NvencContext *ctx = avctx->priv_data;
526 527 528 529 530 531 532 533 534 535
    NV_ENC_RC_PARAMS *rc = &ctx->encode_config.rcParams;
    int qp_inter_p;

    if (avctx->qmin >= 0 && avctx->qmax >= 0) {
        rc->enableMinQP = 1;
        rc->enableMaxQP = 1;

        rc->minQP.qpInterB = avctx->qmin;
        rc->minQP.qpInterP = avctx->qmin;
        rc->minQP.qpIntra = avctx->qmin;
536

537 538 539
        rc->maxQP.qpInterB = avctx->qmax;
        rc->maxQP.qpInterP = avctx->qmax;
        rc->maxQP.qpIntra = avctx->qmax;
540

541
        qp_inter_p = (avctx->qmax + 3 * avctx->qmin) / 4; // biased towards Qmin
542 543 544 545 546 547 548 549
    } else if (avctx->qmin >= 0) {
        rc->enableMinQP = 1;

        rc->minQP.qpInterB = avctx->qmin;
        rc->minQP.qpInterP = avctx->qmin;
        rc->minQP.qpIntra = avctx->qmin;

        qp_inter_p = avctx->qmin;
550 551 552 553 554 555
    } else {
        qp_inter_p = 26; // default to 26
    }

    rc->enableInitialRCQP = 1;
    rc->initialRCQP.qpInterP  = qp_inter_p;
556

557 558 559 560 561 562 563 564 565
    if (avctx->i_quant_factor != 0.0 && avctx->b_quant_factor != 0.0) {
        rc->initialRCQP.qpIntra = av_clip(
            qp_inter_p * fabs(avctx->i_quant_factor) + avctx->i_quant_offset, 0, 51);
        rc->initialRCQP.qpInterB = av_clip(
            qp_inter_p * fabs(avctx->b_quant_factor) + avctx->b_quant_offset, 0, 51);
    } else {
        rc->initialRCQP.qpIntra = qp_inter_p;
        rc->initialRCQP.qpInterB = qp_inter_p;
    }
566 567 568 569 570
}

static av_cold void set_lossless(AVCodecContext *avctx)
{
    NvencContext *ctx = avctx->priv_data;
571
    NV_ENC_RC_PARAMS *rc = &ctx->encode_config.rcParams;
572

573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618
    rc->rateControlMode = NV_ENC_PARAMS_RC_CONSTQP;
    rc->constQP.qpInterB = 0;
    rc->constQP.qpInterP = 0;
    rc->constQP.qpIntra = 0;

    avctx->qmin = -1;
    avctx->qmax = -1;
}

static void nvenc_override_rate_control(AVCodecContext *avctx)
{
    NvencContext *ctx    = avctx->priv_data;
    NV_ENC_RC_PARAMS *rc = &ctx->encode_config.rcParams;

    switch (ctx->rc) {
    case NV_ENC_PARAMS_RC_CONSTQP:
        if (avctx->global_quality <= 0) {
            av_log(avctx, AV_LOG_WARNING,
                   "The constant quality rate-control requires "
                   "the 'global_quality' option set.\n");
            return;
        }
        set_constqp(avctx);
        return;
    case NV_ENC_PARAMS_RC_2_PASS_VBR:
    case NV_ENC_PARAMS_RC_VBR:
        if (avctx->qmin < 0 && avctx->qmax < 0) {
            av_log(avctx, AV_LOG_WARNING,
                   "The variable bitrate rate-control requires "
                   "the 'qmin' and/or 'qmax' option set.\n");
            set_vbr(avctx);
            return;
        }
    case NV_ENC_PARAMS_RC_VBR_MINQP:
        if (avctx->qmin < 0) {
            av_log(avctx, AV_LOG_WARNING,
                   "The variable bitrate rate-control requires "
                   "the 'qmin' option set.\n");
            set_vbr(avctx);
            return;
        }
        set_vbr(avctx);
        break;
    case NV_ENC_PARAMS_RC_CBR:
    case NV_ENC_PARAMS_RC_2_PASS_QUALITY:
    case NV_ENC_PARAMS_RC_2_PASS_FRAMESIZE_CAP:
619
        break;
620 621 622
    }

    rc->rateControlMode = ctx->rc;
623 624
}

625
static av_cold void nvenc_setup_rate_control(AVCodecContext *avctx)
626 627 628 629 630 631 632 633 634 635 636 637
{
    NvencContext *ctx = avctx->priv_data;

    if (avctx->bit_rate > 0) {
        ctx->encode_config.rcParams.averageBitRate = avctx->bit_rate;
    } else if (ctx->encode_config.rcParams.averageBitRate > 0) {
        ctx->encode_config.rcParams.maxBitRate = ctx->encode_config.rcParams.averageBitRate;
    }

    if (avctx->rc_max_rate > 0)
        ctx->encode_config.rcParams.maxBitRate = avctx->rc_max_rate;

638 639 640 641 642
    if (ctx->rc < 0) {
        if (ctx->flags & NVENC_ONE_PASS)
            ctx->twopass = 0;
        if (ctx->flags & NVENC_TWO_PASSES)
            ctx->twopass = 1;
643

644 645
        if (ctx->twopass < 0)
            ctx->twopass = (ctx->flags & NVENC_LOWLATENCY) != 0;
646

647
        if (ctx->cbr) {
648
            if (ctx->twopass) {
649
                ctx->rc = NV_ENC_PARAMS_RC_2_PASS_QUALITY;
650
            } else {
651
                ctx->rc = NV_ENC_PARAMS_RC_CBR;
652
            }
653 654 655 656 657 658
        } else if (avctx->global_quality > 0) {
            ctx->rc = NV_ENC_PARAMS_RC_CONSTQP;
        } else if (ctx->twopass) {
            ctx->rc = NV_ENC_PARAMS_RC_2_PASS_VBR;
        } else if (avctx->qmin >= 0 && avctx->qmax >= 0) {
            ctx->rc = NV_ENC_PARAMS_RC_VBR_MINQP;
659
        }
660
    }
661

662 663
    if (ctx->flags & NVENC_LOSSLESS) {
        set_lossless(avctx);
664
    } else if (ctx->rc >= 0) {
665 666 667 668
        nvenc_override_rate_control(avctx);
    } else {
        ctx->encode_config.rcParams.rateControlMode = NV_ENC_PARAMS_RC_VBR;
        set_vbr(avctx);
669 670 671 672 673 674 675 676 677
    }

    if (avctx->rc_buffer_size > 0) {
        ctx->encode_config.rcParams.vbvBufferSize = avctx->rc_buffer_size;
    } else if (ctx->encode_config.rcParams.averageBitRate > 0) {
        ctx->encode_config.rcParams.vbvBufferSize = 2 * ctx->encode_config.rcParams.averageBitRate;
    }
}

678
static av_cold int nvenc_setup_h264_config(AVCodecContext *avctx)
679
{
680 681 682 683
    NvencContext *ctx                      = avctx->priv_data;
    NV_ENC_CONFIG *cc                      = &ctx->encode_config;
    NV_ENC_CONFIG_H264 *h264               = &cc->encodeCodecConfig.h264Config;
    NV_ENC_CONFIG_H264_VUI_PARAMETERS *vui = &h264->h264VUIParameters;
684

685 686 687 688
    vui->colourMatrix = avctx->colorspace;
    vui->colourPrimaries = avctx->color_primaries;
    vui->transferCharacteristics = avctx->color_trc;
    vui->videoFullRangeFlag = (avctx->color_range == AVCOL_RANGE_JPEG
689
        || ctx->data_pix_fmt == AV_PIX_FMT_YUVJ420P || ctx->data_pix_fmt == AV_PIX_FMT_YUVJ422P || ctx->data_pix_fmt == AV_PIX_FMT_YUVJ444P);
690

691
    vui->colourDescriptionPresentFlag =
692 693
        (avctx->colorspace != 2 || avctx->color_primaries != 2 || avctx->color_trc != 2);

694 695 696 697
    vui->videoSignalTypePresentFlag =
        (vui->colourDescriptionPresentFlag
        || vui->videoFormat != 5
        || vui->videoFullRangeFlag != 0);
698

699 700
    h264->sliceMode = 3;
    h264->sliceModeData = 1;
701

702
    h264->disableSPSPPS = (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) ? 1 : 0;
703 704
    h264->repeatSPSPPS  = (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) ? 0 : 1;
    h264->outputAUD     = 1;
705

706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724
    if (avctx->refs >= 0) {
        /* 0 means "let the hardware decide" */
        h264->maxNumRefFrames = avctx->refs;
    }
    if (avctx->gop_size >= 0) {
        h264->idrPeriod = cc->gopLength;
    }

    if (IS_CBR(cc->rcParams.rateControlMode)) {
        h264->outputBufferingPeriodSEI = 1;
        h264->outputPictureTimingSEI   = 1;
    }

    if (cc->rcParams.rateControlMode == NV_ENC_PARAMS_RC_2_PASS_QUALITY ||
        cc->rcParams.rateControlMode == NV_ENC_PARAMS_RC_2_PASS_FRAMESIZE_CAP ||
        cc->rcParams.rateControlMode == NV_ENC_PARAMS_RC_2_PASS_VBR) {
        h264->adaptiveTransformMode = NV_ENC_H264_ADAPTIVE_TRANSFORM_ENABLE;
        h264->fmoMode = NV_ENC_H264_FMO_DISABLE;
    }
725

726 727 728 729 730
    if (ctx->flags & NVENC_LOSSLESS) {
        h264->qpPrimeYZeroTransformBypassFlag = 1;
    } else {
        switch(ctx->profile) {
        case NV_ENC_H264_PROFILE_BASELINE:
731
            cc->profileGUID = NV_ENC_H264_PROFILE_BASELINE_GUID;
732
            avctx->profile = FF_PROFILE_H264_BASELINE;
733
            break;
734
        case NV_ENC_H264_PROFILE_MAIN:
735
            cc->profileGUID = NV_ENC_H264_PROFILE_MAIN_GUID;
736
            avctx->profile = FF_PROFILE_H264_MAIN;
737
            break;
738
        case NV_ENC_H264_PROFILE_HIGH:
739
            cc->profileGUID = NV_ENC_H264_PROFILE_HIGH_GUID;
740
            avctx->profile = FF_PROFILE_H264_HIGH;
741 742
            break;
        case NV_ENC_H264_PROFILE_HIGH_444P:
743
            cc->profileGUID = NV_ENC_H264_PROFILE_HIGH_444_GUID;
744
            avctx->profile = FF_PROFILE_H264_HIGH_444_PREDICTIVE;
745
            break;
746 747 748 749
        }
    }

    // force setting profile as high444p if input is AV_PIX_FMT_YUV444P
750
    if (ctx->data_pix_fmt == AV_PIX_FMT_YUV444P) {
751
        cc->profileGUID = NV_ENC_H264_PROFILE_HIGH_444_GUID;
752
        avctx->profile = FF_PROFILE_H264_HIGH_444_PREDICTIVE;
753 754
    }

755
    h264->chromaFormatIDC = avctx->profile == FF_PROFILE_H264_HIGH_444_PREDICTIVE ? 3 : 1;
756

757
    h264->level = ctx->level;
758 759 760 761 762 763

    return 0;
}

static av_cold int nvenc_setup_hevc_config(AVCodecContext *avctx)
{
764 765 766 767
    NvencContext *ctx                      = avctx->priv_data;
    NV_ENC_CONFIG *cc                      = &ctx->encode_config;
    NV_ENC_CONFIG_HEVC *hevc               = &cc->encodeCodecConfig.hevcConfig;
    NV_ENC_CONFIG_HEVC_VUI_PARAMETERS *vui = &hevc->hevcVUIParameters;
768

769 770 771 772
    vui->colourMatrix = avctx->colorspace;
    vui->colourPrimaries = avctx->color_primaries;
    vui->transferCharacteristics = avctx->color_trc;
    vui->videoFullRangeFlag = (avctx->color_range == AVCOL_RANGE_JPEG
773
        || ctx->data_pix_fmt == AV_PIX_FMT_YUVJ420P || ctx->data_pix_fmt == AV_PIX_FMT_YUVJ422P || ctx->data_pix_fmt == AV_PIX_FMT_YUVJ444P);
774

775
    vui->colourDescriptionPresentFlag =
776 777
        (avctx->colorspace != 2 || avctx->color_primaries != 2 || avctx->color_trc != 2);

778 779 780 781
    vui->videoSignalTypePresentFlag =
        (vui->colourDescriptionPresentFlag
        || vui->videoFormat != 5
        || vui->videoFullRangeFlag != 0);
782

783 784
    hevc->sliceMode = 3;
    hevc->sliceModeData = 1;
785

786
    hevc->disableSPSPPS = (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) ? 1 : 0;
787 788 789 790 791 792 793 794 795 796
    hevc->repeatSPSPPS  = (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) ? 0 : 1;
    hevc->outputAUD     = 1;

    if (avctx->refs >= 0) {
        /* 0 means "let the hardware decide" */
        hevc->maxNumRefFramesInDPB = avctx->refs;
    }
    if (avctx->gop_size >= 0) {
        hevc->idrPeriod = cc->gopLength;
    }
797

798 799 800 801
    if (IS_CBR(cc->rcParams.rateControlMode)) {
        hevc->outputBufferingPeriodSEI = 1;
        hevc->outputPictureTimingSEI   = 1;
    }
802 803

    /* No other profile is supported in the current SDK version 5 */
804
    cc->profileGUID = NV_ENC_HEVC_PROFILE_MAIN_GUID;
805 806
    avctx->profile = FF_PROFILE_HEVC_MAIN;

807
    hevc->level = ctx->level;
808

809
    hevc->tier = ctx->tier;
810 811 812 813

    return 0;
}

814
static av_cold int nvenc_setup_codec_config(AVCodecContext *avctx)
815 816 817
{
    switch (avctx->codec->id) {
    case AV_CODEC_ID_H264:
818 819
        return nvenc_setup_h264_config(avctx);
    case AV_CODEC_ID_HEVC:
820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841
        return nvenc_setup_hevc_config(avctx);
    /* Earlier switch/case will return if unknown codec is passed. */
    }

    return 0;
}

static av_cold int nvenc_setup_encoder(AVCodecContext *avctx)
{
    NvencContext *ctx = avctx->priv_data;
    NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
    NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs;

    NV_ENC_PRESET_CONFIG preset_config = { 0 };
    NVENCSTATUS nv_status = NV_ENC_SUCCESS;
    AVCPBProperties *cpb_props;
    int res = 0;
    int dw, dh;

    ctx->encode_config.version = NV_ENC_CONFIG_VER;
    ctx->init_encode_params.version = NV_ENC_INITIALIZE_PARAMS_VER;

842 843 844 845 846 847 848 849 850
    ctx->init_encode_params.encodeHeight = avctx->height;
    ctx->init_encode_params.encodeWidth = avctx->width;

    ctx->init_encode_params.encodeConfig = &ctx->encode_config;

    nvenc_map_preset(ctx);

    preset_config.version = NV_ENC_PRESET_CONFIG_VER;
    preset_config.presetCfg.version = NV_ENC_CONFIG_VER;
851

852 853 854 855 856 857
    nv_status = p_nvenc->nvEncGetEncodePresetConfig(ctx->nvencoder,
                                                    ctx->init_encode_params.encodeGUID,
                                                    ctx->init_encode_params.presetGUID,
                                                    &preset_config);
    if (nv_status != NV_ENC_SUCCESS)
        return nvenc_print_error(avctx, nv_status, "Cannot get the preset configuration");
858

859 860 861
    memcpy(&ctx->encode_config, &preset_config.presetCfg, sizeof(ctx->encode_config));

    ctx->encode_config.version = NV_ENC_CONFIG_VER;
862 863 864 865 866 867 868 869 870 871 872 873 874 875

    if (avctx->sample_aspect_ratio.num && avctx->sample_aspect_ratio.den &&
        (avctx->sample_aspect_ratio.num != 1 || avctx->sample_aspect_ratio.num != 1)) {
        av_reduce(&dw, &dh,
                  avctx->width * avctx->sample_aspect_ratio.num,
                  avctx->height * avctx->sample_aspect_ratio.den,
                  1024 * 1024);
        ctx->init_encode_params.darHeight = dh;
        ctx->init_encode_params.darWidth = dw;
    } else {
        ctx->init_encode_params.darHeight = avctx->height;
        ctx->init_encode_params.darWidth = avctx->width;
    }

876 877 878 879 880 881 882
    // De-compensate for hardware, dubiously, trying to compensate for
    // playback at 704 pixel width.
    if (avctx->width == 720 &&
        (avctx->height == 480 || avctx->height == 576)) {
        av_reduce(&dw, &dh,
                  ctx->init_encode_params.darWidth * 44,
                  ctx->init_encode_params.darHeight * 45,
883
                  1024 * 1024);
884 885 886 887
        ctx->init_encode_params.darHeight = dh;
        ctx->init_encode_params.darWidth = dw;
    }

888 889 890 891 892 893
    ctx->init_encode_params.frameRateNum = avctx->time_base.den;
    ctx->init_encode_params.frameRateDen = avctx->time_base.num * avctx->ticks_per_frame;

    ctx->init_encode_params.enableEncodeAsync = 0;
    ctx->init_encode_params.enablePTD = 1;

894 895
    if (avctx->gop_size > 0) {
        if (avctx->max_b_frames >= 0) {
896
            /* 0 is intra-only, 1 is I/P only, 2 is one B-Frame, 3 two B-frames, and so on. */
897 898 899
            ctx->encode_config.frameIntervalP = avctx->max_b_frames + 1;
        }

900
        ctx->encode_config.gopLength = avctx->gop_size;
901 902 903
    } else if (avctx->gop_size == 0) {
        ctx->encode_config.frameIntervalP = 0;
        ctx->encode_config.gopLength = 1;
904 905
    }

906 907
    ctx->initial_pts[0] = AV_NOPTS_VALUE;
    ctx->initial_pts[1] = AV_NOPTS_VALUE;
908

909
    nvenc_setup_rate_control(avctx);
910

911
    if (avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT) {
912 913 914 915 916
        ctx->encode_config.frameFieldMode = NV_ENC_PARAMS_FRAME_FIELD_MODE_FIELD;
    } else {
        ctx->encode_config.frameFieldMode = NV_ENC_PARAMS_FRAME_FIELD_MODE_FRAME;
    }

917
    res = nvenc_setup_codec_config(avctx);
918 919
    if (res)
        return res;
920

921 922
    nv_status = p_nvenc->nvEncInitializeEncoder(ctx->nvencoder, &ctx->init_encode_params);
    if (nv_status != NV_ENC_SUCCESS) {
923
        return nvenc_print_error(avctx, nv_status, "InitializeEncoder failed");
924
    }
925

926 927
    if (ctx->encode_config.frameIntervalP > 1)
        avctx->has_b_frames = 2;
928

929 930
    if (ctx->encode_config.rcParams.averageBitRate > 0)
        avctx->bit_rate = ctx->encode_config.rcParams.averageBitRate;
931

932 933 934 935 936 937
    cpb_props = ff_add_cpb_side_data(avctx);
    if (!cpb_props)
        return AVERROR(ENOMEM);
    cpb_props->max_bitrate = ctx->encode_config.rcParams.maxBitRate;
    cpb_props->avg_bitrate = avctx->bit_rate;
    cpb_props->buffer_size = ctx->encode_config.rcParams.vbvBufferSize;
938

939 940
    return 0;
}
941

942 943 944 945 946
static av_cold int nvenc_alloc_surface(AVCodecContext *avctx, int idx)
{
    NvencContext *ctx = avctx->priv_data;
    NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
    NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs;
947

948 949 950
    NVENCSTATUS nv_status;
    NV_ENC_CREATE_BITSTREAM_BUFFER allocOut = { 0 };
    allocOut.version = NV_ENC_CREATE_BITSTREAM_BUFFER_VER;
951

952
    switch (ctx->data_pix_fmt) {
953
    case AV_PIX_FMT_YUV420P:
954
        ctx->surfaces[idx].format = NV_ENC_BUFFER_FORMAT_YV12_PL;
955
        break;
956

957
    case AV_PIX_FMT_NV12:
958
        ctx->surfaces[idx].format = NV_ENC_BUFFER_FORMAT_NV12_PL;
959
        break;
960

961
    case AV_PIX_FMT_YUV444P:
962
        ctx->surfaces[idx].format = NV_ENC_BUFFER_FORMAT_YUV444_PL;
963
        break;
964

965 966 967 968
    default:
        av_log(avctx, AV_LOG_FATAL, "Invalid input pixel format\n");
        return AVERROR(EINVAL);
    }
969

970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989
    if (avctx->pix_fmt == AV_PIX_FMT_CUDA) {
        ctx->surfaces[idx].in_ref = av_frame_alloc();
        if (!ctx->surfaces[idx].in_ref)
            return AVERROR(ENOMEM);
    } else {
        NV_ENC_CREATE_INPUT_BUFFER allocSurf = { 0 };
        allocSurf.version = NV_ENC_CREATE_INPUT_BUFFER_VER;
        allocSurf.width = (avctx->width + 31) & ~31;
        allocSurf.height = (avctx->height + 31) & ~31;
        allocSurf.memoryHeap = NV_ENC_MEMORY_HEAP_SYSMEM_CACHED;
        allocSurf.bufferFmt = ctx->surfaces[idx].format;

        nv_status = p_nvenc->nvEncCreateInputBuffer(ctx->nvencoder, &allocSurf);
        if (nv_status != NV_ENC_SUCCESS) {
            return nvenc_print_error(avctx, nv_status, "CreateInputBuffer failed");
        }

        ctx->surfaces[idx].input_surface = allocSurf.inputBuffer;
        ctx->surfaces[idx].width = allocSurf.width;
        ctx->surfaces[idx].height = allocSurf.height;
990
    }
991

992
    ctx->surfaces[idx].lockCount = 0;
993

994 995
    /* 1MB is large enough to hold most output frames.
     * NVENC increases this automaticaly if it is not enough. */
996
    allocOut.size = 1024 * 1024;
997

998
    allocOut.memoryHeap = NV_ENC_MEMORY_HEAP_SYSMEM_CACHED;
999

1000 1001
    nv_status = p_nvenc->nvEncCreateBitstreamBuffer(ctx->nvencoder, &allocOut);
    if (nv_status != NV_ENC_SUCCESS) {
1002
        int err = nvenc_print_error(avctx, nv_status, "CreateBitstreamBuffer failed");
1003 1004 1005
        if (avctx->pix_fmt != AV_PIX_FMT_CUDA)
            p_nvenc->nvEncDestroyInputBuffer(ctx->nvencoder, ctx->surfaces[idx].input_surface);
        av_frame_free(&ctx->surfaces[idx].in_ref);
1006
        return err;
1007
    }
1008

1009 1010
    ctx->surfaces[idx].output_surface = allocOut.bitstreamBuffer;
    ctx->surfaces[idx].size = allocOut.size;
1011

1012 1013
    return 0;
}
1014

1015
static av_cold int nvenc_setup_surfaces(AVCodecContext *avctx)
1016 1017
{
    NvencContext *ctx = avctx->priv_data;
1018
    int i, res;
1019 1020 1021 1022
    int num_mbs = ((avctx->width + 15) >> 4) * ((avctx->height + 15) >> 4);
    ctx->nb_surfaces = FFMAX((num_mbs >= 8160) ? 32 : 48,
                             ctx->nb_surfaces);
    ctx->async_depth = FFMIN(ctx->async_depth, ctx->nb_surfaces - 1);
1023 1024


1025 1026
    ctx->surfaces = av_mallocz_array(ctx->nb_surfaces, sizeof(*ctx->surfaces));
    if (!ctx->surfaces)
1027
        return AVERROR(ENOMEM);
1028

1029
    ctx->timestamp_list = av_fifo_alloc(ctx->nb_surfaces * sizeof(int64_t));
1030 1031
    if (!ctx->timestamp_list)
        return AVERROR(ENOMEM);
1032
    ctx->output_surface_queue = av_fifo_alloc(ctx->nb_surfaces * sizeof(NvencSurface*));
1033 1034
    if (!ctx->output_surface_queue)
        return AVERROR(ENOMEM);
1035
    ctx->output_surface_ready_queue = av_fifo_alloc(ctx->nb_surfaces * sizeof(NvencSurface*));
1036 1037 1038
    if (!ctx->output_surface_ready_queue)
        return AVERROR(ENOMEM);

1039
    for (i = 0; i < ctx->nb_surfaces; i++) {
1040
        if ((res = nvenc_alloc_surface(avctx, i)) < 0)
1041 1042
            return res;
    }
1043

1044 1045
    return 0;
}
1046

1047 1048 1049 1050 1051
static av_cold int nvenc_setup_extradata(AVCodecContext *avctx)
{
    NvencContext *ctx = avctx->priv_data;
    NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
    NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs;
1052

1053 1054 1055 1056 1057
    NVENCSTATUS nv_status;
    uint32_t outSize = 0;
    char tmpHeader[256];
    NV_ENC_SEQUENCE_PARAM_PAYLOAD payload = { 0 };
    payload.version = NV_ENC_SEQUENCE_PARAM_PAYLOAD_VER;
1058

1059 1060 1061
    payload.spsppsBuffer = tmpHeader;
    payload.inBufferSize = sizeof(tmpHeader);
    payload.outSPSPPSPayloadSize = &outSize;
1062

1063 1064
    nv_status = p_nvenc->nvEncGetSequenceParams(ctx->nvencoder, &payload);
    if (nv_status != NV_ENC_SUCCESS) {
1065
        return nvenc_print_error(avctx, nv_status, "GetSequenceParams failed");
1066
    }
1067

1068 1069
    avctx->extradata_size = outSize;
    avctx->extradata = av_mallocz(outSize + AV_INPUT_BUFFER_PADDING_SIZE);
1070

1071 1072 1073
    if (!avctx->extradata) {
        return AVERROR(ENOMEM);
    }
1074

1075
    memcpy(avctx->extradata, tmpHeader, outSize);
1076

1077 1078
    return 0;
}
1079

1080
av_cold int ff_nvenc_encode_close(AVCodecContext *avctx)
1081
{
1082
    NvencContext *ctx               = avctx->priv_data;
1083 1084 1085
    NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
    NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs;
    int i;
1086

1087 1088 1089 1090
    /* the encoder has to be flushed before it can be closed */
    if (ctx->nvencoder) {
        NV_ENC_PIC_PARAMS params        = { .version        = NV_ENC_PIC_PARAMS_VER,
                                            .encodePicFlags = NV_ENC_PIC_FLAG_EOS };
1091

1092
        p_nvenc->nvEncEncodePicture(ctx->nvencoder, &params);
1093
    }
1094

1095 1096 1097
    av_fifo_freep(&ctx->timestamp_list);
    av_fifo_freep(&ctx->output_surface_ready_queue);
    av_fifo_freep(&ctx->output_surface_queue);
1098

1099
    if (ctx->surfaces && avctx->pix_fmt == AV_PIX_FMT_CUDA) {
1100
        for (i = 0; i < ctx->nb_surfaces; ++i) {
1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111
            if (ctx->surfaces[i].input_surface) {
                 p_nvenc->nvEncUnmapInputResource(ctx->nvencoder, ctx->surfaces[i].in_map.mappedResource);
            }
        }
        for (i = 0; i < ctx->nb_registered_frames; i++) {
            if (ctx->registered_frames[i].regptr)
                p_nvenc->nvEncUnregisterResource(ctx->nvencoder, ctx->registered_frames[i].regptr);
        }
        ctx->nb_registered_frames = 0;
    }

1112
    if (ctx->surfaces) {
1113
        for (i = 0; i < ctx->nb_surfaces; ++i) {
1114 1115 1116 1117 1118
            if (avctx->pix_fmt != AV_PIX_FMT_CUDA)
                p_nvenc->nvEncDestroyInputBuffer(ctx->nvencoder, ctx->surfaces[i].input_surface);
            av_frame_free(&ctx->surfaces[i].in_ref);
            p_nvenc->nvEncDestroyBitstreamBuffer(ctx->nvencoder, ctx->surfaces[i].output_surface);
        }
1119
    }
1120
    av_freep(&ctx->surfaces);
1121
    ctx->nb_surfaces = 0;
1122

1123 1124
    if (ctx->nvencoder)
        p_nvenc->nvEncDestroyEncoder(ctx->nvencoder);
1125 1126
    ctx->nvencoder = NULL;

1127 1128 1129
    if (ctx->cu_context_internal)
        dl_fn->cu_ctx_destroy(ctx->cu_context_internal);
    ctx->cu_context = ctx->cu_context_internal = NULL;
1130

1131 1132 1133
    if (dl_fn->nvenc)
        dlclose(dl_fn->nvenc);
    dl_fn->nvenc = NULL;
1134 1135 1136 1137

    dl_fn->nvenc_device_count = 0;

#if !CONFIG_CUDA
1138 1139 1140
    if (dl_fn->cuda)
        dlclose(dl_fn->cuda);
    dl_fn->cuda = NULL;
1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158
#endif

    dl_fn->cu_init = NULL;
    dl_fn->cu_device_get_count = NULL;
    dl_fn->cu_device_get = NULL;
    dl_fn->cu_device_get_name = NULL;
    dl_fn->cu_device_compute_capability = NULL;
    dl_fn->cu_ctx_create = NULL;
    dl_fn->cu_ctx_pop_current = NULL;
    dl_fn->cu_ctx_destroy = NULL;

    av_log(avctx, AV_LOG_VERBOSE, "Nvenc unloaded\n");

    return 0;
}

av_cold int ff_nvenc_encode_init(AVCodecContext *avctx)
{
1159 1160
    NvencContext *ctx = avctx->priv_data;
    int ret;
1161

1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173
    if (avctx->pix_fmt == AV_PIX_FMT_CUDA) {
        AVHWFramesContext *frames_ctx;
        if (!avctx->hw_frames_ctx) {
            av_log(avctx, AV_LOG_ERROR,
                   "hw_frames_ctx must be set when using GPU frames as input\n");
            return AVERROR(EINVAL);
        }
        frames_ctx = (AVHWFramesContext*)avctx->hw_frames_ctx->data;
        ctx->data_pix_fmt = frames_ctx->sw_format;
    } else {
        ctx->data_pix_fmt = avctx->pix_fmt;
    }
1174

1175 1176
    if ((ret = nvenc_load_libraries(avctx)) < 0)
        return ret;
1177

1178 1179
    if ((ret = nvenc_setup_device(avctx)) < 0)
        return ret;
1180

1181 1182
    if ((ret = nvenc_setup_encoder(avctx)) < 0)
        return ret;
1183

1184 1185
    if ((ret = nvenc_setup_surfaces(avctx)) < 0)
        return ret;
1186 1187

    if (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) {
1188 1189
        if ((ret = nvenc_setup_extradata(avctx)) < 0)
            return ret;
1190
    }
1191 1192 1193 1194

    return 0;
}

1195
static NvencSurface *get_free_frame(NvencContext *ctx)
1196 1197 1198
{
    int i;

1199
    for (i = 0; i < ctx->nb_surfaces; ++i) {
1200 1201 1202
        if (!ctx->surfaces[i].lockCount) {
            ctx->surfaces[i].lockCount = 1;
            return &ctx->surfaces[i];
1203 1204 1205 1206 1207 1208
        }
    }

    return NULL;
}

1209
static int nvenc_copy_frame(AVCodecContext *avctx, NvencSurface *inSurf,
1210 1211 1212 1213 1214
            NV_ENC_LOCK_INPUT_BUFFER *lockBufferParams, const AVFrame *frame)
{
    uint8_t *buf = lockBufferParams->bufferDataPtr;
    int off = inSurf->height * lockBufferParams->pitch;

1215
    if (frame->format == AV_PIX_FMT_YUV420P) {
1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230
        av_image_copy_plane(buf, lockBufferParams->pitch,
            frame->data[0], frame->linesize[0],
            avctx->width, avctx->height);

        buf += off;

        av_image_copy_plane(buf, lockBufferParams->pitch >> 1,
            frame->data[2], frame->linesize[2],
            avctx->width >> 1, avctx->height >> 1);

        buf += off >> 2;

        av_image_copy_plane(buf, lockBufferParams->pitch >> 1,
            frame->data[1], frame->linesize[1],
            avctx->width >> 1, avctx->height >> 1);
1231
    } else if (frame->format == AV_PIX_FMT_NV12) {
1232 1233 1234 1235 1236 1237 1238 1239 1240
        av_image_copy_plane(buf, lockBufferParams->pitch,
            frame->data[0], frame->linesize[0],
            avctx->width, avctx->height);

        buf += off;

        av_image_copy_plane(buf, lockBufferParams->pitch,
            frame->data[1], frame->linesize[1],
            avctx->width, avctx->height >> 1);
1241
    } else if (frame->format == AV_PIX_FMT_YUV444P) {
1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264
        av_image_copy_plane(buf, lockBufferParams->pitch,
            frame->data[0], frame->linesize[0],
            avctx->width, avctx->height);

        buf += off;

        av_image_copy_plane(buf, lockBufferParams->pitch,
            frame->data[1], frame->linesize[1],
            avctx->width, avctx->height);

        buf += off;

        av_image_copy_plane(buf, lockBufferParams->pitch,
            frame->data[2], frame->linesize[2],
            avctx->width, avctx->height);
    } else {
        av_log(avctx, AV_LOG_FATAL, "Invalid pixel format!\n");
        return AVERROR(EINVAL);
    }

    return 0;
}

1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329
static int nvenc_find_free_reg_resource(AVCodecContext *avctx)
{
    NvencContext *ctx = avctx->priv_data;
    NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
    NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs;

    int i;

    if (ctx->nb_registered_frames == FF_ARRAY_ELEMS(ctx->registered_frames)) {
        for (i = 0; i < ctx->nb_registered_frames; i++) {
            if (!ctx->registered_frames[i].mapped) {
                if (ctx->registered_frames[i].regptr) {
                    p_nvenc->nvEncUnregisterResource(ctx->nvencoder,
                                                ctx->registered_frames[i].regptr);
                    ctx->registered_frames[i].regptr = NULL;
                }
                return i;
            }
        }
    } else {
        return ctx->nb_registered_frames++;
    }

    av_log(avctx, AV_LOG_ERROR, "Too many registered CUDA frames\n");
    return AVERROR(ENOMEM);
}

static int nvenc_register_frame(AVCodecContext *avctx, const AVFrame *frame)
{
    NvencContext *ctx = avctx->priv_data;
    NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
    NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs;

    AVHWFramesContext *frames_ctx = (AVHWFramesContext*)avctx->hw_frames_ctx->data;
    NV_ENC_REGISTER_RESOURCE reg;
    int i, idx, ret;

    for (i = 0; i < ctx->nb_registered_frames; i++) {
        if (ctx->registered_frames[i].ptr == (CUdeviceptr)frame->data[0])
            return i;
    }

    idx = nvenc_find_free_reg_resource(avctx);
    if (idx < 0)
        return idx;

    reg.version            = NV_ENC_REGISTER_RESOURCE_VER;
    reg.resourceType       = NV_ENC_INPUT_RESOURCE_TYPE_CUDADEVICEPTR;
    reg.width              = frames_ctx->width;
    reg.height             = frames_ctx->height;
    reg.bufferFormat       = ctx->surfaces[0].format;
    reg.pitch              = frame->linesize[0];
    reg.resourceToRegister = frame->data[0];

    ret = p_nvenc->nvEncRegisterResource(ctx->nvencoder, &reg);
    if (ret != NV_ENC_SUCCESS) {
        nvenc_print_error(avctx, ret, "Error registering an input resource");
        return AVERROR_UNKNOWN;
    }

    ctx->registered_frames[idx].ptr    = (CUdeviceptr)frame->data[0];
    ctx->registered_frames[idx].regptr = reg.registeredResource;
    return idx;
}

1330
static int nvenc_upload_frame(AVCodecContext *avctx, const AVFrame *frame,
1331
                                      NvencSurface *nvenc_frame)
1332 1333 1334 1335 1336 1337 1338 1339
{
    NvencContext *ctx = avctx->priv_data;
    NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
    NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs;

    int res;
    NVENCSTATUS nv_status;

1340 1341 1342 1343 1344 1345
    if (avctx->pix_fmt == AV_PIX_FMT_CUDA) {
        int reg_idx = nvenc_register_frame(avctx, frame);
        if (reg_idx < 0) {
            av_log(avctx, AV_LOG_ERROR, "Could not register an input CUDA frame\n");
            return reg_idx;
        }
1346

1347 1348 1349
        res = av_frame_ref(nvenc_frame->in_ref, frame);
        if (res < 0)
            return res;
1350

1351 1352 1353 1354 1355 1356 1357
        nvenc_frame->in_map.version = NV_ENC_MAP_INPUT_RESOURCE_VER;
        nvenc_frame->in_map.registeredResource = ctx->registered_frames[reg_idx].regptr;
        nv_status = p_nvenc->nvEncMapInputResource(ctx->nvencoder, &nvenc_frame->in_map);
        if (nv_status != NV_ENC_SUCCESS) {
            av_frame_unref(nvenc_frame->in_ref);
            return nvenc_print_error(avctx, nv_status, "Error mapping an input resource");
        }
1358

1359 1360 1361 1362 1363 1364
        ctx->registered_frames[reg_idx].mapped = 1;
        nvenc_frame->reg_idx                   = reg_idx;
        nvenc_frame->input_surface             = nvenc_frame->in_map.mappedResource;
        return 0;
    } else {
        NV_ENC_LOCK_INPUT_BUFFER lockBufferParams = { 0 };
1365

1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382
        lockBufferParams.version = NV_ENC_LOCK_INPUT_BUFFER_VER;
        lockBufferParams.inputBuffer = nvenc_frame->input_surface;

        nv_status = p_nvenc->nvEncLockInputBuffer(ctx->nvencoder, &lockBufferParams);
        if (nv_status != NV_ENC_SUCCESS) {
            return nvenc_print_error(avctx, nv_status, "Failed locking nvenc input buffer");
        }

        res = nvenc_copy_frame(avctx, nvenc_frame, &lockBufferParams, frame);

        nv_status = p_nvenc->nvEncUnlockInputBuffer(ctx->nvencoder, nvenc_frame->input_surface);
        if (nv_status != NV_ENC_SUCCESS) {
            return nvenc_print_error(avctx, nv_status, "Failed unlocking input buffer!");
        }

        return res;
    }
1383 1384 1385
}

static void nvenc_codec_specific_pic_params(AVCodecContext *avctx,
1386
                                            NV_ENC_PIC_PARAMS *params)
1387 1388 1389 1390 1391
{
    NvencContext *ctx = avctx->priv_data;

    switch (avctx->codec->id) {
    case AV_CODEC_ID_H264:
1392 1393 1394 1395
        params->codecPicParams.h264PicParams.sliceMode =
            ctx->encode_config.encodeCodecConfig.h264Config.sliceMode;
        params->codecPicParams.h264PicParams.sliceModeData =
            ctx->encode_config.encodeCodecConfig.h264Config.sliceModeData;
1396
      break;
1397 1398 1399 1400 1401 1402
    case AV_CODEC_ID_HEVC:
        params->codecPicParams.hevcPicParams.sliceMode =
            ctx->encode_config.encodeCodecConfig.hevcConfig.sliceMode;
        params->codecPicParams.hevcPicParams.sliceModeData =
            ctx->encode_config.encodeCodecConfig.hevcConfig.sliceModeData;
        break;
1403 1404 1405
    }
}

1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419
static inline void timestamp_queue_enqueue(AVFifoBuffer* queue, int64_t timestamp)
{
    av_fifo_generic_write(queue, &timestamp, sizeof(timestamp), NULL);
}

static inline int64_t timestamp_queue_dequeue(AVFifoBuffer* queue)
{
    int64_t timestamp = AV_NOPTS_VALUE;
    if (av_fifo_size(queue) > 0)
        av_fifo_generic_read(queue, &timestamp, sizeof(timestamp), NULL);

    return timestamp;
}

1420 1421 1422 1423 1424 1425 1426 1427
static int nvenc_set_timestamp(AVCodecContext *avctx,
                               NV_ENC_LOCK_BITSTREAM *params,
                               AVPacket *pkt)
{
    NvencContext *ctx = avctx->priv_data;

    pkt->pts = params->outputTimeStamp;

1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438
    /* generate the first dts by linearly extrapolating the
     * first two pts values to the past */
    if (avctx->max_b_frames > 0 && !ctx->first_packet_output &&
        ctx->initial_pts[1] != AV_NOPTS_VALUE) {
        int64_t ts0 = ctx->initial_pts[0], ts1 = ctx->initial_pts[1];
        int64_t delta;

        if ((ts0 < 0 && ts1 > INT64_MAX + ts0) ||
            (ts0 > 0 && ts1 < INT64_MIN + ts0))
            return AVERROR(ERANGE);
        delta = ts1 - ts0;
1439

1440 1441 1442 1443 1444 1445 1446 1447
        if ((delta < 0 && ts0 > INT64_MAX + delta) ||
            (delta > 0 && ts0 < INT64_MIN + delta))
            return AVERROR(ERANGE);
        pkt->dts = ts0 - delta;

        ctx->first_packet_output = 1;
        return 0;
    }
1448

1449
    pkt->dts = timestamp_queue_dequeue(ctx->timestamp_list);
1450 1451 1452 1453

    return 0;
}

1454
static int process_output_surface(AVCodecContext *avctx, AVPacket *pkt, NvencSurface *tmpoutsurf)
1455 1456 1457 1458 1459
{
    NvencContext *ctx = avctx->priv_data;
    NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
    NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs;

1460 1461
    uint32_t slice_mode_data;
    uint32_t *slice_offsets;
1462 1463 1464 1465
    NV_ENC_LOCK_BITSTREAM lock_params = { 0 };
    NVENCSTATUS nv_status;
    int res = 0;

1466 1467
    enum AVPictureType pict_type;

1468 1469 1470 1471 1472 1473 1474 1475
    switch (avctx->codec->id) {
    case AV_CODEC_ID_H264:
      slice_mode_data = ctx->encode_config.encodeCodecConfig.h264Config.sliceModeData;
      break;
    case AV_CODEC_ID_H265:
      slice_mode_data = ctx->encode_config.encodeCodecConfig.hevcConfig.sliceModeData;
      break;
    default:
1476
      av_log(avctx, AV_LOG_ERROR, "Unknown codec name\n");
1477 1478 1479 1480 1481
      res = AVERROR(EINVAL);
      goto error;
    }
    slice_offsets = av_mallocz(slice_mode_data * sizeof(*slice_offsets));

1482
    if (!slice_offsets)
1483
        goto error;
1484 1485 1486 1487 1488 1489 1490 1491 1492

    lock_params.version = NV_ENC_LOCK_BITSTREAM_VER;

    lock_params.doNotWait = 0;
    lock_params.outputBitstream = tmpoutsurf->output_surface;
    lock_params.sliceOffsets = slice_offsets;

    nv_status = p_nvenc->nvEncLockBitstream(ctx->nvencoder, &lock_params);
    if (nv_status != NV_ENC_SUCCESS) {
1493
        res = nvenc_print_error(avctx, nv_status, "Failed locking bitstream buffer");
1494 1495 1496
        goto error;
    }

1497
    if (res = ff_alloc_packet2(avctx, pkt, lock_params.bitstreamSizeInBytes,0)) {
1498 1499 1500 1501 1502 1503 1504 1505
        p_nvenc->nvEncUnlockBitstream(ctx->nvencoder, tmpoutsurf->output_surface);
        goto error;
    }

    memcpy(pkt->data, lock_params.bitstreamBufferPtr, lock_params.bitstreamSizeInBytes);

    nv_status = p_nvenc->nvEncUnlockBitstream(ctx->nvencoder, tmpoutsurf->output_surface);
    if (nv_status != NV_ENC_SUCCESS)
1506
        nvenc_print_error(avctx, nv_status, "Failed unlocking bitstream buffer, expect the gates of mordor to open");
1507

1508 1509 1510 1511 1512 1513 1514 1515 1516

    if (avctx->pix_fmt == AV_PIX_FMT_CUDA) {
        p_nvenc->nvEncUnmapInputResource(ctx->nvencoder, tmpoutsurf->in_map.mappedResource);
        av_frame_unref(tmpoutsurf->in_ref);
        ctx->registered_frames[tmpoutsurf->reg_idx].mapped = 0;

        tmpoutsurf->input_surface = NULL;
    }

1517 1518 1519 1520
    switch (lock_params.pictureType) {
    case NV_ENC_PIC_TYPE_IDR:
        pkt->flags |= AV_PKT_FLAG_KEY;
    case NV_ENC_PIC_TYPE_I:
1521
        pict_type = AV_PICTURE_TYPE_I;
1522 1523
        break;
    case NV_ENC_PIC_TYPE_P:
1524
        pict_type = AV_PICTURE_TYPE_P;
1525 1526
        break;
    case NV_ENC_PIC_TYPE_B:
1527
        pict_type = AV_PICTURE_TYPE_B;
1528 1529
        break;
    case NV_ENC_PIC_TYPE_BI:
1530
        pict_type = AV_PICTURE_TYPE_BI;
1531 1532 1533 1534 1535 1536
        break;
    default:
        av_log(avctx, AV_LOG_ERROR, "Unknown picture type encountered, expect the output to be broken.\n");
        av_log(avctx, AV_LOG_ERROR, "Please report this error and include as much information on how to reproduce it as possible.\n");
        res = AVERROR_EXTERNAL;
        goto error;
1537 1538 1539 1540 1541
    }

#if FF_API_CODED_FRAME
FF_DISABLE_DEPRECATION_WARNINGS
    avctx->coded_frame->pict_type = pict_type;
1542 1543
FF_ENABLE_DEPRECATION_WARNINGS
#endif
1544 1545 1546

    ff_side_data_set_encoder_stats(pkt,
        (lock_params.frameAvgQP - 1) * FF_QP2LAMBDA, NULL, 0, pict_type);
1547

1548 1549 1550
    res = nvenc_set_timestamp(avctx, &lock_params, pkt);
    if (res < 0)
        goto error2;
1551 1552 1553 1554 1555 1556

    av_free(slice_offsets);

    return 0;

error:
1557
    timestamp_queue_dequeue(ctx->timestamp_list);
1558

1559
error2:
1560 1561 1562 1563 1564
    av_free(slice_offsets);

    return res;
}

1565
static int output_ready(AVCodecContext *avctx, int flush)
1566
{
1567
    NvencContext *ctx = avctx->priv_data;
1568 1569
    int nb_ready, nb_pending;

1570 1571 1572 1573 1574 1575
    /* when B-frames are enabled, we wait for two initial timestamps to
     * calculate the first dts */
    if (!flush && avctx->max_b_frames > 0 &&
        (ctx->initial_pts[0] == AV_NOPTS_VALUE || ctx->initial_pts[1] == AV_NOPTS_VALUE))
        return 0;

1576 1577
    nb_ready   = av_fifo_size(ctx->output_surface_ready_queue)   / sizeof(NvencSurface*);
    nb_pending = av_fifo_size(ctx->output_surface_queue)         / sizeof(NvencSurface*);
1578 1579 1580
    if (flush)
        return nb_ready > 0;
    return (nb_ready > 0) && (nb_ready + nb_pending >= ctx->async_depth);
1581 1582
}

1583
int ff_nvenc_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
1584
                          const AVFrame *frame, int *got_packet)
1585 1586
{
    NVENCSTATUS nv_status;
1587 1588
    NvencSurface *tmpoutsurf, *inSurf;
    int res;
1589 1590 1591 1592 1593 1594 1595 1596 1597

    NvencContext *ctx = avctx->priv_data;
    NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
    NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs;

    NV_ENC_PIC_PARAMS pic_params = { 0 };
    pic_params.version = NV_ENC_PIC_PARAMS_VER;

    if (frame) {
1598
        inSurf = get_free_frame(ctx);
1599 1600 1601 1602
        if (!inSurf) {
            av_log(avctx, AV_LOG_ERROR, "No free surfaces\n");
            return AVERROR_BUG;
        }
1603

1604 1605 1606 1607
        res = nvenc_upload_frame(avctx, frame, inSurf);
        if (res) {
            inSurf->lockCount = 0;
            return res;
1608 1609 1610 1611 1612 1613
        }

        pic_params.inputBuffer = inSurf->input_surface;
        pic_params.bufferFmt = inSurf->format;
        pic_params.inputWidth = avctx->width;
        pic_params.inputHeight = avctx->height;
1614
        pic_params.outputBitstream = inSurf->output_surface;
1615

1616
        if (avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT) {
1617
            if (frame->top_field_first)
1618
                pic_params.pictureStruct = NV_ENC_PIC_STRUCT_FIELD_TOP_BOTTOM;
1619
            else
1620 1621 1622 1623 1624 1625 1626
                pic_params.pictureStruct = NV_ENC_PIC_STRUCT_FIELD_BOTTOM_TOP;
        } else {
            pic_params.pictureStruct = NV_ENC_PIC_STRUCT_FRAME;
        }

        pic_params.encodePicFlags = 0;
        pic_params.inputTimeStamp = frame->pts;
1627 1628

        nvenc_codec_specific_pic_params(avctx, &pic_params);
1629 1630 1631 1632 1633
    } else {
        pic_params.encodePicFlags = NV_ENC_PIC_FLAG_EOS;
    }

    nv_status = p_nvenc->nvEncEncodePicture(ctx->nvencoder, &pic_params);
1634 1635 1636
    if (nv_status != NV_ENC_SUCCESS &&
        nv_status != NV_ENC_ERR_NEED_MORE_INPUT)
        return nvenc_print_error(avctx, nv_status, "EncodePicture failed!");
1637

1638
    if (frame) {
1639
        av_fifo_generic_write(ctx->output_surface_queue, &inSurf, sizeof(inSurf), NULL);
1640
        timestamp_queue_enqueue(ctx->timestamp_list, frame->pts);
1641 1642 1643 1644 1645

        if (ctx->initial_pts[0] == AV_NOPTS_VALUE)
            ctx->initial_pts[0] = frame->pts;
        else if (ctx->initial_pts[1] == AV_NOPTS_VALUE)
            ctx->initial_pts[1] = frame->pts;
1646
    }
1647

1648 1649
    /* all the pending buffers are now ready for output */
    if (nv_status == NV_ENC_SUCCESS) {
1650 1651 1652
        while (av_fifo_size(ctx->output_surface_queue) > 0) {
            av_fifo_generic_read(ctx->output_surface_queue, &tmpoutsurf, sizeof(tmpoutsurf), NULL);
            av_fifo_generic_write(ctx->output_surface_ready_queue, &tmpoutsurf, sizeof(tmpoutsurf), NULL);
1653 1654 1655
        }
    }

1656
    if (output_ready(avctx, !frame)) {
1657
        av_fifo_generic_read(ctx->output_surface_ready_queue, &tmpoutsurf, sizeof(tmpoutsurf), NULL);
1658

1659
        res = process_output_surface(avctx, pkt, tmpoutsurf);
1660 1661 1662 1663

        if (res)
            return res;

1664 1665
        av_assert0(tmpoutsurf->lockCount);
        tmpoutsurf->lockCount--;
1666 1667 1668 1669 1670 1671 1672 1673

        *got_packet = 1;
    } else {
        *got_packet = 0;
    }

    return 0;
}