Commit 118beda3 authored by Anton Khirnov's avatar Anton Khirnov

nvenc: merge input and output surface structs

An input frame always corresponds to exactly one output packet, so there
is no point in complicating the situation by managing them separately.
parent 28259c13
...@@ -750,8 +750,8 @@ static int nvenc_alloc_surface(AVCodecContext *avctx, int idx) ...@@ -750,8 +750,8 @@ static int nvenc_alloc_surface(AVCodecContext *avctx, int idx)
if (ret != NV_ENC_SUCCESS) if (ret != NV_ENC_SUCCESS)
return nvenc_print_error(avctx, ret, "CreateInputBuffer failed"); return nvenc_print_error(avctx, ret, "CreateInputBuffer failed");
ctx->in[idx].in = in_buffer.inputBuffer; ctx->frames[idx].in = in_buffer.inputBuffer;
ctx->in[idx].format = in_buffer.bufferFmt; ctx->frames[idx].format = in_buffer.bufferFmt;
/* 1MB is large enough to hold most output frames. /* 1MB is large enough to hold most output frames.
* NVENC increases this automaticaly if it's not enough. */ * NVENC increases this automaticaly if it's not enough. */
...@@ -763,8 +763,7 @@ static int nvenc_alloc_surface(AVCodecContext *avctx, int idx) ...@@ -763,8 +763,7 @@ static int nvenc_alloc_surface(AVCodecContext *avctx, int idx)
if (ret != NV_ENC_SUCCESS) if (ret != NV_ENC_SUCCESS)
return nvenc_print_error(avctx, ret, "CreateBitstreamBuffer failed"); return nvenc_print_error(avctx, ret, "CreateBitstreamBuffer failed");
ctx->out[idx].out = out_buffer.bitstreamBuffer; ctx->frames[idx].out = out_buffer.bitstreamBuffer;
ctx->out[idx].busy = 0;
return 0; return 0;
} }
...@@ -777,21 +776,17 @@ static int nvenc_setup_surfaces(AVCodecContext *avctx) ...@@ -777,21 +776,17 @@ static int nvenc_setup_surfaces(AVCodecContext *avctx)
ctx->nb_surfaces = FFMAX(4 + avctx->max_b_frames, ctx->nb_surfaces = FFMAX(4 + avctx->max_b_frames,
ctx->nb_surfaces); ctx->nb_surfaces);
ctx->in = av_mallocz(ctx->nb_surfaces * sizeof(*ctx->in)); ctx->frames = av_mallocz_array(ctx->nb_surfaces, sizeof(*ctx->frames));
if (!ctx->in) if (!ctx->frames)
return AVERROR(ENOMEM);
ctx->out = av_mallocz(ctx->nb_surfaces * sizeof(*ctx->out));
if (!ctx->out)
return AVERROR(ENOMEM); return AVERROR(ENOMEM);
ctx->timestamps = av_fifo_alloc(ctx->nb_surfaces * sizeof(int64_t)); ctx->timestamps = av_fifo_alloc(ctx->nb_surfaces * sizeof(int64_t));
if (!ctx->timestamps) if (!ctx->timestamps)
return AVERROR(ENOMEM); return AVERROR(ENOMEM);
ctx->pending = av_fifo_alloc(ctx->nb_surfaces * sizeof(ctx->out)); ctx->pending = av_fifo_alloc(ctx->nb_surfaces * sizeof(*ctx->frames));
if (!ctx->pending) if (!ctx->pending)
return AVERROR(ENOMEM); return AVERROR(ENOMEM);
ctx->ready = av_fifo_alloc(ctx->nb_surfaces * sizeof(ctx->out)); ctx->ready = av_fifo_alloc(ctx->nb_surfaces * sizeof(*ctx->frames));
if (!ctx->ready) if (!ctx->ready)
return AVERROR(ENOMEM); return AVERROR(ENOMEM);
...@@ -846,15 +841,14 @@ av_cold int ff_nvenc_encode_close(AVCodecContext *avctx) ...@@ -846,15 +841,14 @@ av_cold int ff_nvenc_encode_close(AVCodecContext *avctx)
av_fifo_free(ctx->pending); av_fifo_free(ctx->pending);
av_fifo_free(ctx->ready); av_fifo_free(ctx->ready);
if (ctx->in) { if (ctx->frames) {
for (i = 0; i < ctx->nb_surfaces; ++i) { for (i = 0; i < ctx->nb_surfaces; ++i) {
nv->nvEncDestroyInputBuffer(ctx->nvenc_ctx, ctx->in[i].in); nv->nvEncDestroyInputBuffer(ctx->nvenc_ctx, ctx->frames[i].in);
nv->nvEncDestroyBitstreamBuffer(ctx->nvenc_ctx, ctx->out[i].out); nv->nvEncDestroyBitstreamBuffer(ctx->nvenc_ctx, ctx->frames[i].out);
} }
} }
av_freep(&ctx->in); av_freep(&ctx->frames);
av_freep(&ctx->out);
if (ctx->nvenc_ctx) if (ctx->nvenc_ctx)
nv->nvEncDestroyEncoder(ctx->nvenc_ctx); nv->nvEncDestroyEncoder(ctx->nvenc_ctx);
...@@ -895,27 +889,14 @@ av_cold int ff_nvenc_encode_init(AVCodecContext *avctx) ...@@ -895,27 +889,14 @@ av_cold int ff_nvenc_encode_init(AVCodecContext *avctx)
return 0; return 0;
} }
static NVENCInputSurface *get_input_surface(NVENCContext *ctx) static NVENCFrame *get_free_frame(NVENCContext *ctx)
{ {
int i; int i;
for (i = 0; i < ctx->nb_surfaces; i++) { for (i = 0; i < ctx->nb_surfaces; i++) {
if (!ctx->in[i].locked) { if (!ctx->frames[i].locked) {
ctx->in[i].locked = 1; ctx->frames[i].locked = 1;
return &ctx->in[i]; return &ctx->frames[i];
}
}
return NULL;
}
static NVENCOutputSurface *get_output_surface(NVENCContext *ctx)
{
int i;
for (i = 0; i < ctx->nb_surfaces; i++) {
if (!ctx->out[i].busy) {
return &ctx->out[i];
} }
} }
...@@ -976,20 +957,16 @@ static int nvenc_copy_frame(NV_ENC_LOCK_INPUT_BUFFER *in, const AVFrame *frame) ...@@ -976,20 +957,16 @@ static int nvenc_copy_frame(NV_ENC_LOCK_INPUT_BUFFER *in, const AVFrame *frame)
return 0; return 0;
} }
static int nvenc_enqueue_frame(AVCodecContext *avctx, const AVFrame *frame, static int nvenc_upload_frame(AVCodecContext *avctx, const AVFrame *frame,
NVENCInputSurface **in_surf) NVENCFrame *nvenc_frame)
{ {
NVENCContext *ctx = avctx->priv_data; NVENCContext *ctx = avctx->priv_data;
NV_ENCODE_API_FUNCTION_LIST *nv = &ctx->nvel.nvenc_funcs; NV_ENCODE_API_FUNCTION_LIST *nv = &ctx->nvel.nvenc_funcs;
NV_ENC_LOCK_INPUT_BUFFER params = { 0 }; NV_ENC_LOCK_INPUT_BUFFER params = { 0 };
NVENCInputSurface *in = get_input_surface(ctx);
int ret; int ret;
if (!in)
return AVERROR_BUG;
params.version = NV_ENC_LOCK_INPUT_BUFFER_VER; params.version = NV_ENC_LOCK_INPUT_BUFFER_VER;
params.inputBuffer = in->in; params.inputBuffer = nvenc_frame->in;
ret = nv->nvEncLockInputBuffer(ctx->nvenc_ctx, &params); ret = nv->nvEncLockInputBuffer(ctx->nvenc_ctx, &params);
...@@ -1000,16 +977,14 @@ static int nvenc_enqueue_frame(AVCodecContext *avctx, const AVFrame *frame, ...@@ -1000,16 +977,14 @@ static int nvenc_enqueue_frame(AVCodecContext *avctx, const AVFrame *frame,
if (ret < 0) if (ret < 0)
goto fail; goto fail;
ret = nv->nvEncUnlockInputBuffer(ctx->nvenc_ctx, in->in); ret = nv->nvEncUnlockInputBuffer(ctx->nvenc_ctx, nvenc_frame->in);
if (ret != NV_ENC_SUCCESS) if (ret != NV_ENC_SUCCESS)
return nvenc_print_error(avctx, ret, "Cannot unlock the buffer"); return nvenc_print_error(avctx, ret, "Cannot unlock the buffer");
*in_surf = in;
return 0; return 0;
fail: fail:
nv->nvEncUnlockInputBuffer(ctx->nvenc_ctx, in->in); nv->nvEncUnlockInputBuffer(ctx->nvenc_ctx, nvenc_frame->in);
return ret; return ret;
} }
...@@ -1045,19 +1020,6 @@ static inline int nvenc_dequeue_timestamp(AVFifoBuffer *f, int64_t *pts) ...@@ -1045,19 +1020,6 @@ static inline int nvenc_dequeue_timestamp(AVFifoBuffer *f, int64_t *pts)
return av_fifo_generic_read(f, pts, sizeof(*pts), NULL); return av_fifo_generic_read(f, pts, sizeof(*pts), NULL);
} }
static inline int nvenc_enqueue_surface(AVFifoBuffer *f,
NVENCOutputSurface *surf)
{
surf->busy = 1;
return av_fifo_generic_write(f, &surf, sizeof(surf), NULL);
}
static inline int nvenc_dequeue_surface(AVFifoBuffer *f,
NVENCOutputSurface **surf)
{
return av_fifo_generic_read(f, surf, sizeof(*surf), NULL);
}
static int nvenc_set_timestamp(AVCodecContext *avctx, static int nvenc_set_timestamp(AVCodecContext *avctx,
NV_ENC_LOCK_BITSTREAM *params, NV_ENC_LOCK_BITSTREAM *params,
AVPacket *pkt) AVPacket *pkt)
...@@ -1095,15 +1057,15 @@ static int nvenc_get_frame(AVCodecContext *avctx, AVPacket *pkt) ...@@ -1095,15 +1057,15 @@ static int nvenc_get_frame(AVCodecContext *avctx, AVPacket *pkt)
NVENCContext *ctx = avctx->priv_data; NVENCContext *ctx = avctx->priv_data;
NV_ENCODE_API_FUNCTION_LIST *nv = &ctx->nvel.nvenc_funcs; NV_ENCODE_API_FUNCTION_LIST *nv = &ctx->nvel.nvenc_funcs;
NV_ENC_LOCK_BITSTREAM params = { 0 }; NV_ENC_LOCK_BITSTREAM params = { 0 };
NVENCOutputSurface *out = NULL; NVENCFrame *frame;
int ret; int ret;
ret = nvenc_dequeue_surface(ctx->ready, &out); ret = av_fifo_generic_read(ctx->ready, &frame, sizeof(frame), NULL);
if (ret) if (ret)
return ret; return ret;
params.version = NV_ENC_LOCK_BITSTREAM_VER; params.version = NV_ENC_LOCK_BITSTREAM_VER;
params.outputBitstream = out->out; params.outputBitstream = frame->out;
ret = nv->nvEncLockBitstream(ctx->nvenc_ctx, &params); ret = nv->nvEncLockBitstream(ctx->nvenc_ctx, &params);
if (ret < 0) if (ret < 0)
...@@ -1115,11 +1077,11 @@ static int nvenc_get_frame(AVCodecContext *avctx, AVPacket *pkt) ...@@ -1115,11 +1077,11 @@ static int nvenc_get_frame(AVCodecContext *avctx, AVPacket *pkt)
memcpy(pkt->data, params.bitstreamBufferPtr, pkt->size); memcpy(pkt->data, params.bitstreamBufferPtr, pkt->size);
ret = nv->nvEncUnlockBitstream(ctx->nvenc_ctx, out->out); ret = nv->nvEncUnlockBitstream(ctx->nvenc_ctx, frame->out);
if (ret < 0) if (ret < 0)
return nvenc_print_error(avctx, ret, "Cannot unlock the bitstream"); return nvenc_print_error(avctx, ret, "Cannot unlock the bitstream");
out->busy = out->in->locked = 0; frame->locked = 0;
ret = nvenc_set_timestamp(avctx, &params, pkt); ret = nvenc_set_timestamp(avctx, &params, pkt);
if (ret < 0) if (ret < 0)
...@@ -1168,27 +1130,27 @@ int ff_nvenc_encode_frame(AVCodecContext *avctx, AVPacket *pkt, ...@@ -1168,27 +1130,27 @@ int ff_nvenc_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
NVENCContext *ctx = avctx->priv_data; NVENCContext *ctx = avctx->priv_data;
NV_ENCODE_API_FUNCTION_LIST *nv = &ctx->nvel.nvenc_funcs; NV_ENCODE_API_FUNCTION_LIST *nv = &ctx->nvel.nvenc_funcs;
NV_ENC_PIC_PARAMS params = { 0 }; NV_ENC_PIC_PARAMS params = { 0 };
NVENCInputSurface *in = NULL; NVENCFrame *nvenc_frame = NULL;
NVENCOutputSurface *out = NULL;
int enc_ret, ret; int enc_ret, ret;
params.version = NV_ENC_PIC_PARAMS_VER; params.version = NV_ENC_PIC_PARAMS_VER;
if (frame) { if (frame) {
ret = nvenc_enqueue_frame(avctx, frame, &in); nvenc_frame = get_free_frame(ctx);
if (ret < 0) if (!nvenc_frame) {
return ret; av_log(avctx, AV_LOG_ERROR, "No free surfaces\n");
out = get_output_surface(ctx);
if (!out)
return AVERROR_BUG; return AVERROR_BUG;
}
out->in = in; ret = nvenc_upload_frame(avctx, frame, nvenc_frame);
if (ret < 0)
return ret;
params.inputBuffer = in->in; params.inputBuffer = nvenc_frame->in;
params.bufferFmt = in->format; params.bufferFmt = nvenc_frame->format;
params.inputWidth = frame->width; params.inputWidth = frame->width;
params.inputHeight = frame->height; params.inputHeight = frame->height;
params.outputBitstream = out->out; params.outputBitstream = nvenc_frame->out;
params.inputTimeStamp = frame->pts; params.inputTimeStamp = frame->pts;
if (avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT) { if (avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT) {
...@@ -1219,8 +1181,8 @@ int ff_nvenc_encode_frame(AVCodecContext *avctx, AVPacket *pkt, ...@@ -1219,8 +1181,8 @@ int ff_nvenc_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
enc_ret != NV_ENC_ERR_NEED_MORE_INPUT) enc_ret != NV_ENC_ERR_NEED_MORE_INPUT)
return nvenc_print_error(avctx, enc_ret, "Error encoding the frame"); return nvenc_print_error(avctx, enc_ret, "Error encoding the frame");
if (out) { if (nvenc_frame) {
ret = nvenc_enqueue_surface(ctx->pending, out); ret = av_fifo_generic_write(ctx->pending, &nvenc_frame, sizeof(nvenc_frame), NULL);
if (ret < 0) if (ret < 0)
return ret; return ret;
} }
...@@ -1228,8 +1190,8 @@ int ff_nvenc_encode_frame(AVCodecContext *avctx, AVPacket *pkt, ...@@ -1228,8 +1190,8 @@ int ff_nvenc_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
/* all the pending buffers are now ready for output */ /* all the pending buffers are now ready for output */
if (enc_ret == NV_ENC_SUCCESS) { if (enc_ret == NV_ENC_SUCCESS) {
while (av_fifo_size(ctx->pending) > 0) { while (av_fifo_size(ctx->pending) > 0) {
av_fifo_generic_read(ctx->pending, &out, sizeof(out), NULL); av_fifo_generic_read(ctx->pending, &nvenc_frame, sizeof(nvenc_frame), NULL);
av_fifo_generic_write(ctx->ready, &out, sizeof(out), NULL); av_fifo_generic_write(ctx->ready, &nvenc_frame, sizeof(nvenc_frame), NULL);
} }
} }
......
...@@ -27,17 +27,12 @@ ...@@ -27,17 +27,12 @@
#include "avcodec.h" #include "avcodec.h"
typedef struct NVENCInputSurface { typedef struct NVENCFrame {
NV_ENC_INPUT_PTR in; NV_ENC_INPUT_PTR in;
NV_ENC_OUTPUT_PTR out;
NV_ENC_BUFFER_FORMAT format; NV_ENC_BUFFER_FORMAT format;
int locked; int locked;
} NVENCInputSurface; } NVENCFrame;
typedef struct NVENCOutputSurface {
NV_ENC_OUTPUT_PTR out;
NVENCInputSurface *in;
int busy;
} NVENCOutputSurface;
typedef CUresult(CUDAAPI *PCUINIT)(unsigned int Flags); typedef CUresult(CUDAAPI *PCUINIT)(unsigned int Flags);
typedef CUresult(CUDAAPI *PCUDEVICEGETCOUNT)(int *count); typedef CUresult(CUDAAPI *PCUDEVICEGETCOUNT)(int *count);
...@@ -107,8 +102,7 @@ typedef struct NVENCContext { ...@@ -107,8 +102,7 @@ typedef struct NVENCContext {
CUcontext cu_context; CUcontext cu_context;
int nb_surfaces; int nb_surfaces;
NVENCInputSurface *in; NVENCFrame *frames;
NVENCOutputSurface *out;
AVFifoBuffer *timestamps; AVFifoBuffer *timestamps;
AVFifoBuffer *pending, *ready; AVFifoBuffer *pending, *ready;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment