Commit 7cb053e4 authored by Ben Chang's avatar Ben Chang Committed by Luca Barbato

nvenc: Minimize the surface allocation

The previous default sets the allocated surfaces to 32 unless it is
user-overridden or the lookahead parameter is set.

Change the surfaces calculation for default, B-frames and lookahead scenario.
parent 2e8d88ad
...@@ -874,6 +874,44 @@ static int nvenc_setup_codec_config(AVCodecContext *avctx) ...@@ -874,6 +874,44 @@ static int nvenc_setup_codec_config(AVCodecContext *avctx)
return 0; return 0;
} }
static int nvenc_recalc_surfaces(AVCodecContext *avctx)
{
NVENCContext *ctx = avctx->priv_data;
// default minimum of 4 surfaces
// multiply by 2 for number of NVENCs on gpu (hardcode to 2)
// another multiply by 2 to avoid blocking next PBB group
int nb_surfaces = FFMAX(4, ctx->config.frameIntervalP * 2 * 2);
// lookahead enabled
if (ctx->rc_lookahead > 0) {
// +1 is to account for lkd_bound calculation later
// +4 is to allow sufficient pipelining with lookahead
nb_surfaces = FFMAX(1, FFMAX(nb_surfaces, ctx->rc_lookahead + ctx->config.frameIntervalP + 1 + 4));
if (nb_surfaces > ctx->nb_surfaces && ctx->nb_surfaces > 0) {
av_log(avctx, AV_LOG_WARNING,
"Defined rc_lookahead requires more surfaces, "
"increasing used surfaces %d -> %d\n",
ctx->nb_surfaces, nb_surfaces);
}
ctx->nb_surfaces = FFMAX(nb_surfaces, ctx->nb_surfaces);
} else {
if (ctx->config.frameIntervalP > 1 &&
ctx->nb_surfaces < nb_surfaces && ctx->nb_surfaces > 0) {
av_log(avctx, AV_LOG_WARNING,
"Defined b-frame requires more surfaces, "
"increasing used surfaces %d -> %d\n",
ctx->nb_surfaces, nb_surfaces);
ctx->nb_surfaces = FFMAX(ctx->nb_surfaces, nb_surfaces);
} else if (ctx->nb_surfaces <= 0)
ctx->nb_surfaces = nb_surfaces;
// otherwise use user specified value
}
ctx->nb_surfaces = FFMAX(1, FFMIN(MAX_REGISTERED_FRAMES, ctx->nb_surfaces));
ctx->async_depth = FFMIN(ctx->async_depth, ctx->nb_surfaces - 1);
return 0;
}
static int nvenc_setup_encoder(AVCodecContext *avctx) static int nvenc_setup_encoder(AVCodecContext *avctx)
{ {
NVENCContext *ctx = avctx->priv_data; NVENCContext *ctx = avctx->priv_data;
...@@ -956,6 +994,8 @@ static int nvenc_setup_encoder(AVCodecContext *avctx) ...@@ -956,6 +994,8 @@ static int nvenc_setup_encoder(AVCodecContext *avctx)
ctx->initial_pts[0] = AV_NOPTS_VALUE; ctx->initial_pts[0] = AV_NOPTS_VALUE;
ctx->initial_pts[1] = AV_NOPTS_VALUE; ctx->initial_pts[1] = AV_NOPTS_VALUE;
nvenc_recalc_surfaces(avctx);
nvenc_setup_rate_control(avctx); nvenc_setup_rate_control(avctx);
if (avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT) { if (avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT) {
...@@ -1057,11 +1097,6 @@ static int nvenc_setup_surfaces(AVCodecContext *avctx) ...@@ -1057,11 +1097,6 @@ static int nvenc_setup_surfaces(AVCodecContext *avctx)
NVENCContext *ctx = avctx->priv_data; NVENCContext *ctx = avctx->priv_data;
int i, ret; int i, ret;
ctx->nb_surfaces = FFMAX(4 + avctx->max_b_frames,
ctx->nb_surfaces);
ctx->async_depth = FFMIN(ctx->async_depth, ctx->nb_surfaces - 1);
ctx->frames = av_mallocz_array(ctx->nb_surfaces, sizeof(*ctx->frames)); ctx->frames = av_mallocz_array(ctx->nb_surfaces, sizeof(*ctx->frames));
if (!ctx->frames) if (!ctx->frames)
return AVERROR(ENOMEM); return AVERROR(ENOMEM);
......
...@@ -72,14 +72,14 @@ static const AVOption options[] = { ...@@ -72,14 +72,14 @@ static const AVOption options[] = {
{ "ll_2pass_quality", "Multi-pass optimized for image quality (only for low-latency presets)", 0, AV_OPT_TYPE_CONST, { .i64 = NV_ENC_PARAMS_RC_2_PASS_QUALITY }, 0, 0, VE, "rc" }, { "ll_2pass_quality", "Multi-pass optimized for image quality (only for low-latency presets)", 0, AV_OPT_TYPE_CONST, { .i64 = NV_ENC_PARAMS_RC_2_PASS_QUALITY }, 0, 0, VE, "rc" },
{ "ll_2pass_size", "Multi-pass optimized for constant frame size (only for low-latency presets)", 0, AV_OPT_TYPE_CONST, { .i64 = NV_ENC_PARAMS_RC_2_PASS_FRAMESIZE_CAP }, 0, 0, VE, "rc" }, { "ll_2pass_size", "Multi-pass optimized for constant frame size (only for low-latency presets)", 0, AV_OPT_TYPE_CONST, { .i64 = NV_ENC_PARAMS_RC_2_PASS_FRAMESIZE_CAP }, 0, 0, VE, "rc" },
{ "vbr_2pass", "Multi-pass variable bitrate mode", 0, AV_OPT_TYPE_CONST, { .i64 = NV_ENC_PARAMS_RC_2_PASS_VBR }, 0, 0, VE, "rc" }, { "vbr_2pass", "Multi-pass variable bitrate mode", 0, AV_OPT_TYPE_CONST, { .i64 = NV_ENC_PARAMS_RC_2_PASS_VBR }, 0, 0, VE, "rc" },
{ "surfaces", "Number of concurrent surfaces", OFFSET(nb_surfaces), AV_OPT_TYPE_INT, { .i64 = 32 }, 0, INT_MAX, VE }, { "surfaces", "Number of concurrent surfaces", OFFSET(nb_surfaces), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, MAX_REGISTERED_FRAMES, VE },
{ "device", "Select a specific NVENC device", OFFSET(device), AV_OPT_TYPE_INT, { .i64 = -1 }, -2, INT_MAX, VE, "device" }, { "device", "Select a specific NVENC device", OFFSET(device), AV_OPT_TYPE_INT, { .i64 = -1 }, -2, INT_MAX, VE, "device" },
{ "any", "Pick the first device available", 0, AV_OPT_TYPE_CONST, { .i64 = ANY_DEVICE }, 0, 0, VE, "device" }, { "any", "Pick the first device available", 0, AV_OPT_TYPE_CONST, { .i64 = ANY_DEVICE }, 0, 0, VE, "device" },
{ "list", "List the available devices", 0, AV_OPT_TYPE_CONST, { .i64 = LIST_DEVICES }, 0, 0, VE, "device" }, { "list", "List the available devices", 0, AV_OPT_TYPE_CONST, { .i64 = LIST_DEVICES }, 0, 0, VE, "device" },
{ "async_depth", "Delay frame output by the given amount of frames", OFFSET(async_depth), AV_OPT_TYPE_INT, { .i64 = INT_MAX }, 0, INT_MAX, VE }, { "async_depth", "Delay frame output by the given amount of frames", OFFSET(async_depth), AV_OPT_TYPE_INT, { .i64 = INT_MAX }, 0, INT_MAX, VE },
{ "delay", "Delay frame output by the given amount of frames", OFFSET(async_depth), AV_OPT_TYPE_INT, { .i64 = INT_MAX }, 0, INT_MAX, VE }, { "delay", "Delay frame output by the given amount of frames", OFFSET(async_depth), AV_OPT_TYPE_INT, { .i64 = INT_MAX }, 0, INT_MAX, VE },
#if NVENCAPI_MAJOR_VERSION >= 7 #if NVENCAPI_MAJOR_VERSION >= 7
{ "rc-lookahead", "Number of frames to look ahead for rate-control", OFFSET(rc_lookahead), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, INT_MAX, VE }, { "rc-lookahead", "Number of frames to look ahead for rate-control", OFFSET(rc_lookahead), AV_OPT_TYPE_INT, { .i64 = 0 }, -1, INT_MAX, VE },
{ "no-scenecut", "When lookahead is enabled, set this to 1 to disable adaptive I-frame insertion at scene cuts", OFFSET(no_scenecut), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE }, { "no-scenecut", "When lookahead is enabled, set this to 1 to disable adaptive I-frame insertion at scene cuts", OFFSET(no_scenecut), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
{ "b_adapt", "When lookahead is enabled, set this to 0 to disable adaptive B-frame decision", OFFSET(b_adapt), AV_OPT_TYPE_INT, { .i64 = 1 }, 0, 1, VE }, { "b_adapt", "When lookahead is enabled, set this to 0 to disable adaptive B-frame decision", OFFSET(b_adapt), AV_OPT_TYPE_INT, { .i64 = 1 }, 0, 1, VE },
{ "spatial-aq", "set to 1 to enable Spatial AQ", OFFSET(aq), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE }, { "spatial-aq", "set to 1 to enable Spatial AQ", OFFSET(aq), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
......
...@@ -72,14 +72,14 @@ static const AVOption options[] = { ...@@ -72,14 +72,14 @@ static const AVOption options[] = {
{ "ll_2pass_quality", "Multi-pass optimized for image quality (only for low-latency presets)", 0, AV_OPT_TYPE_CONST, { .i64 = NV_ENC_PARAMS_RC_2_PASS_QUALITY }, 0, 0, VE, "rc" }, { "ll_2pass_quality", "Multi-pass optimized for image quality (only for low-latency presets)", 0, AV_OPT_TYPE_CONST, { .i64 = NV_ENC_PARAMS_RC_2_PASS_QUALITY }, 0, 0, VE, "rc" },
{ "ll_2pass_size", "Multi-pass optimized for constant frame size (only for low-latency presets)", 0, AV_OPT_TYPE_CONST, { .i64 = NV_ENC_PARAMS_RC_2_PASS_FRAMESIZE_CAP }, 0, 0, VE, "rc" }, { "ll_2pass_size", "Multi-pass optimized for constant frame size (only for low-latency presets)", 0, AV_OPT_TYPE_CONST, { .i64 = NV_ENC_PARAMS_RC_2_PASS_FRAMESIZE_CAP }, 0, 0, VE, "rc" },
{ "vbr_2pass", "Multi-pass variable bitrate mode", 0, AV_OPT_TYPE_CONST, { .i64 = NV_ENC_PARAMS_RC_2_PASS_VBR }, 0, 0, VE, "rc" }, { "vbr_2pass", "Multi-pass variable bitrate mode", 0, AV_OPT_TYPE_CONST, { .i64 = NV_ENC_PARAMS_RC_2_PASS_VBR }, 0, 0, VE, "rc" },
{ "surfaces", "Number of concurrent surfaces", OFFSET(nb_surfaces), AV_OPT_TYPE_INT, { .i64 = 32 }, 0, INT_MAX, VE }, { "surfaces", "Number of concurrent surfaces", OFFSET(nb_surfaces), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, MAX_REGISTERED_FRAMES, VE },
{ "device", "Select a specific NVENC device", OFFSET(device), AV_OPT_TYPE_INT, { .i64 = -1 }, -2, INT_MAX, VE, "device" }, { "device", "Select a specific NVENC device", OFFSET(device), AV_OPT_TYPE_INT, { .i64 = -1 }, -2, INT_MAX, VE, "device" },
{ "any", "Pick the first device available", 0, AV_OPT_TYPE_CONST, { .i64 = ANY_DEVICE }, 0, 0, VE, "device" }, { "any", "Pick the first device available", 0, AV_OPT_TYPE_CONST, { .i64 = ANY_DEVICE }, 0, 0, VE, "device" },
{ "list", "List the available devices", 0, AV_OPT_TYPE_CONST, { .i64 = LIST_DEVICES }, 0, 0, VE, "device" }, { "list", "List the available devices", 0, AV_OPT_TYPE_CONST, { .i64 = LIST_DEVICES }, 0, 0, VE, "device" },
{ "async_depth", "Delay frame output by the given amount of frames", OFFSET(async_depth), AV_OPT_TYPE_INT, { .i64 = INT_MAX }, 0, INT_MAX, VE }, { "async_depth", "Delay frame output by the given amount of frames", OFFSET(async_depth), AV_OPT_TYPE_INT, { .i64 = INT_MAX }, 0, INT_MAX, VE },
{ "delay", "Delay frame output by the given amount of frames", OFFSET(async_depth), AV_OPT_TYPE_INT, { .i64 = INT_MAX }, 0, INT_MAX, VE }, { "delay", "Delay frame output by the given amount of frames", OFFSET(async_depth), AV_OPT_TYPE_INT, { .i64 = INT_MAX }, 0, INT_MAX, VE },
#if NVENCAPI_MAJOR_VERSION >= 7 #if NVENCAPI_MAJOR_VERSION >= 7
{ "rc-lookahead", "Number of frames to look ahead for rate-control", OFFSET(rc_lookahead), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, INT_MAX, VE }, { "rc-lookahead", "Number of frames to look ahead for rate-control", OFFSET(rc_lookahead), AV_OPT_TYPE_INT, { .i64 = 0 }, -1, INT_MAX, VE },
{ "no-scenecut", "When lookahead is enabled, set this to 1 to disable adaptive I-frame insertion at scene cuts", OFFSET(no_scenecut), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE }, { "no-scenecut", "When lookahead is enabled, set this to 1 to disable adaptive I-frame insertion at scene cuts", OFFSET(no_scenecut), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
{ "spatial_aq", "set to 1 to enable Spatial AQ", OFFSET(aq), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE }, { "spatial_aq", "set to 1 to enable Spatial AQ", OFFSET(aq), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
{ "zerolatency", "Set 1 to indicate zero latency operation (no reordering delay)", OFFSET(zerolatency), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE }, { "zerolatency", "Set 1 to indicate zero latency operation (no reordering delay)", OFFSET(zerolatency), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment