Commit 43c417ac authored by Ganapathy Kasi's avatar Ganapathy Kasi Committed by Timo Rothenpieler

avcodec/nvenc: fix hw accelerated transcode with bframes

hw accelerated transcode (h264_cuvid -> h264_nvenc with -hwaccel cuvid) was
broken after the filtergraph initialization was changed to intialize decoder
first followed by encoder (commit af1761f7).
During initialzing encoder with bframes, local buffers are allocated
internally in encoder which fails since no cuda context is available. Now
pushing the correct cuda context before encoder initialization fixes the issue.
Also adding push/pop cuda ctx during create/destroy/map/unmap resources and
destroy encoder session.
Signed-off-by: 's avatarTimo Rothenpieler <timo@rothenpieler.org>
parent b5a0971f
...@@ -392,9 +392,21 @@ static av_cold int nvenc_check_device(AVCodecContext *avctx, int idx) ...@@ -392,9 +392,21 @@ static av_cold int nvenc_check_device(AVCodecContext *avctx, int idx)
return 0; return 0;
fail3: fail3:
cu_res = dl_fn->cuda_dl->cuCtxPushCurrent(ctx->cu_context);
if (cu_res != CUDA_SUCCESS) {
av_log(avctx, AV_LOG_ERROR, "cuCtxPushCurrent failed\n");
return AVERROR_EXTERNAL;
}
p_nvenc->nvEncDestroyEncoder(ctx->nvencoder); p_nvenc->nvEncDestroyEncoder(ctx->nvencoder);
ctx->nvencoder = NULL; ctx->nvencoder = NULL;
cu_res = dl_fn->cuda_dl->cuCtxPopCurrent(&dummy);
if (cu_res != CUDA_SUCCESS) {
av_log(avctx, AV_LOG_ERROR, "cuCtxPopCurrent failed\n");
return AVERROR_EXTERNAL;
}
fail2: fail2:
dl_fn->cuda_dl->cuCtxDestroy(ctx->cu_context_internal); dl_fn->cuda_dl->cuCtxDestroy(ctx->cu_context_internal);
ctx->cu_context_internal = NULL; ctx->cu_context_internal = NULL;
...@@ -1008,6 +1020,8 @@ static av_cold int nvenc_setup_encoder(AVCodecContext *avctx) ...@@ -1008,6 +1020,8 @@ static av_cold int nvenc_setup_encoder(AVCodecContext *avctx)
NV_ENC_PRESET_CONFIG preset_config = { 0 }; NV_ENC_PRESET_CONFIG preset_config = { 0 };
NVENCSTATUS nv_status = NV_ENC_SUCCESS; NVENCSTATUS nv_status = NV_ENC_SUCCESS;
AVCPBProperties *cpb_props; AVCPBProperties *cpb_props;
CUresult cu_res;
CUcontext dummy;
int res = 0; int res = 0;
int dw, dh; int dw, dh;
...@@ -1098,7 +1112,20 @@ static av_cold int nvenc_setup_encoder(AVCodecContext *avctx) ...@@ -1098,7 +1112,20 @@ static av_cold int nvenc_setup_encoder(AVCodecContext *avctx)
if (res) if (res)
return res; return res;
cu_res = dl_fn->cuda_dl->cuCtxPushCurrent(ctx->cu_context);
if (cu_res != CUDA_SUCCESS) {
av_log(avctx, AV_LOG_ERROR, "cuCtxPushCurrent failed\n");
return AVERROR_EXTERNAL;
}
nv_status = p_nvenc->nvEncInitializeEncoder(ctx->nvencoder, &ctx->init_encode_params); nv_status = p_nvenc->nvEncInitializeEncoder(ctx->nvencoder, &ctx->init_encode_params);
cu_res = dl_fn->cuda_dl->cuCtxPopCurrent(&dummy);
if (cu_res != CUDA_SUCCESS) {
av_log(avctx, AV_LOG_ERROR, "cuCtxPopCurrent failed\n");
return AVERROR_EXTERNAL;
}
if (nv_status != NV_ENC_SUCCESS) { if (nv_status != NV_ENC_SUCCESS) {
return nvenc_print_error(avctx, nv_status, "InitializeEncoder failed"); return nvenc_print_error(avctx, nv_status, "InitializeEncoder failed");
} }
...@@ -1201,6 +1228,9 @@ static av_cold int nvenc_alloc_surface(AVCodecContext *avctx, int idx) ...@@ -1201,6 +1228,9 @@ static av_cold int nvenc_alloc_surface(AVCodecContext *avctx, int idx)
static av_cold int nvenc_setup_surfaces(AVCodecContext *avctx) static av_cold int nvenc_setup_surfaces(AVCodecContext *avctx)
{ {
NvencContext *ctx = avctx->priv_data; NvencContext *ctx = avctx->priv_data;
NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
CUresult cu_res;
CUcontext dummy;
int i, res; int i, res;
ctx->surfaces = av_mallocz_array(ctx->nb_surfaces, sizeof(*ctx->surfaces)); ctx->surfaces = av_mallocz_array(ctx->nb_surfaces, sizeof(*ctx->surfaces));
...@@ -1222,9 +1252,28 @@ static av_cold int nvenc_setup_surfaces(AVCodecContext *avctx) ...@@ -1222,9 +1252,28 @@ static av_cold int nvenc_setup_surfaces(AVCodecContext *avctx)
if (!ctx->output_surface_ready_queue) if (!ctx->output_surface_ready_queue)
return AVERROR(ENOMEM); return AVERROR(ENOMEM);
cu_res = dl_fn->cuda_dl->cuCtxPushCurrent(ctx->cu_context);
if (cu_res != CUDA_SUCCESS) {
av_log(avctx, AV_LOG_ERROR, "cuCtxPushCurrent failed\n");
return AVERROR_EXTERNAL;
}
for (i = 0; i < ctx->nb_surfaces; i++) { for (i = 0; i < ctx->nb_surfaces; i++) {
if ((res = nvenc_alloc_surface(avctx, i)) < 0) if ((res = nvenc_alloc_surface(avctx, i)) < 0)
{
cu_res = dl_fn->cuda_dl->cuCtxPopCurrent(&dummy);
if (cu_res != CUDA_SUCCESS) {
av_log(avctx, AV_LOG_ERROR, "cuCtxPopCurrent failed\n");
return AVERROR_EXTERNAL;
}
return res; return res;
}
}
cu_res = dl_fn->cuda_dl->cuCtxPopCurrent(&dummy);
if (cu_res != CUDA_SUCCESS) {
av_log(avctx, AV_LOG_ERROR, "cuCtxPopCurrent failed\n");
return AVERROR_EXTERNAL;
} }
return 0; return 0;
...@@ -1268,8 +1317,16 @@ av_cold int ff_nvenc_encode_close(AVCodecContext *avctx) ...@@ -1268,8 +1317,16 @@ av_cold int ff_nvenc_encode_close(AVCodecContext *avctx)
NvencContext *ctx = avctx->priv_data; NvencContext *ctx = avctx->priv_data;
NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs; NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs; NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs;
CUresult cu_res;
CUcontext dummy;
int i; int i;
cu_res = dl_fn->cuda_dl->cuCtxPushCurrent(ctx->cu_context);
if (cu_res != CUDA_SUCCESS) {
av_log(avctx, AV_LOG_ERROR, "cuCtxPushCurrent failed\n");
return AVERROR_EXTERNAL;
}
/* the encoder has to be flushed before it can be closed */ /* the encoder has to be flushed before it can be closed */
if (ctx->nvencoder) { if (ctx->nvencoder) {
NV_ENC_PIC_PARAMS params = { .version = NV_ENC_PIC_PARAMS_VER, NV_ENC_PIC_PARAMS params = { .version = NV_ENC_PIC_PARAMS_VER,
...@@ -1311,6 +1368,12 @@ av_cold int ff_nvenc_encode_close(AVCodecContext *avctx) ...@@ -1311,6 +1368,12 @@ av_cold int ff_nvenc_encode_close(AVCodecContext *avctx)
p_nvenc->nvEncDestroyEncoder(ctx->nvencoder); p_nvenc->nvEncDestroyEncoder(ctx->nvencoder);
ctx->nvencoder = NULL; ctx->nvencoder = NULL;
cu_res = dl_fn->cuda_dl->cuCtxPopCurrent(&dummy);
if (cu_res != CUDA_SUCCESS) {
av_log(avctx, AV_LOG_ERROR, "cuCtxPopCurrent failed\n");
return AVERROR_EXTERNAL;
}
if (ctx->cu_context_internal) if (ctx->cu_context_internal)
dl_fn->cuda_dl->cuCtxDestroy(ctx->cu_context_internal); dl_fn->cuda_dl->cuCtxDestroy(ctx->cu_context_internal);
ctx->cu_context = ctx->cu_context_internal = NULL; ctx->cu_context = ctx->cu_context_internal = NULL;
...@@ -1842,8 +1905,20 @@ int ff_nvenc_encode_frame(AVCodecContext *avctx, AVPacket *pkt, ...@@ -1842,8 +1905,20 @@ int ff_nvenc_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
if (output_ready(avctx, !frame)) { if (output_ready(avctx, !frame)) {
av_fifo_generic_read(ctx->output_surface_ready_queue, &tmpoutsurf, sizeof(tmpoutsurf), NULL); av_fifo_generic_read(ctx->output_surface_ready_queue, &tmpoutsurf, sizeof(tmpoutsurf), NULL);
cu_res = dl_fn->cuda_dl->cuCtxPushCurrent(ctx->cu_context);
if (cu_res != CUDA_SUCCESS) {
av_log(avctx, AV_LOG_ERROR, "cuCtxPushCurrent failed\n");
return AVERROR_EXTERNAL;
}
res = process_output_surface(avctx, pkt, tmpoutsurf); res = process_output_surface(avctx, pkt, tmpoutsurf);
cu_res = dl_fn->cuda_dl->cuCtxPopCurrent(&dummy);
if (cu_res != CUDA_SUCCESS) {
av_log(avctx, AV_LOG_ERROR, "cuCtxPopCurrent failed\n");
return AVERROR_EXTERNAL;
}
if (res) if (res)
return res; return res;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment