Commit 19d3d0c0 authored by Philip Langdale's avatar Philip Langdale

avutil/hwcontext_cuda: Define and use common CHECK_CU()

We have a pattern of wrapping CUDA calls to print errors and
normalise return values that is used in a couple of places. To
avoid duplication and increase consistency, let's put the wrapper
implementation in a shared place and use it everywhere.

Affects:

* avcodec/cuviddec
* avcodec/nvdec
* avcodec/nvenc
* avfilter/vf_scale_cuda
* avfilter/vf_scale_npp
* avfilter/vf_thumbnail_cuda
* avfilter/vf_transpose_npp
* avfilter/vf_yadif_cuda
parent f0f2832a
...@@ -124,7 +124,7 @@ OBJS-$(CONFIG_MPEGVIDEOENC) += mpegvideo_enc.o mpeg12data.o \ ...@@ -124,7 +124,7 @@ OBJS-$(CONFIG_MPEGVIDEOENC) += mpegvideo_enc.o mpeg12data.o \
motion_est.o ratecontrol.o \ motion_est.o ratecontrol.o \
mpegvideoencdsp.o mpegvideoencdsp.o
OBJS-$(CONFIG_MSS34DSP) += mss34dsp.o OBJS-$(CONFIG_MSS34DSP) += mss34dsp.o
OBJS-$(CONFIG_NVENC) += nvenc.o OBJS-$(CONFIG_NVENC) += nvenc.o cuda_check.o
OBJS-$(CONFIG_PIXBLOCKDSP) += pixblockdsp.o OBJS-$(CONFIG_PIXBLOCKDSP) += pixblockdsp.o
OBJS-$(CONFIG_QPELDSP) += qpeldsp.o OBJS-$(CONFIG_QPELDSP) += qpeldsp.o
OBJS-$(CONFIG_QSV) += qsv.o OBJS-$(CONFIG_QSV) += qsv.o
...@@ -346,7 +346,7 @@ OBJS-$(CONFIG_H264_DECODER) += h264dec.o h264_cabac.o h264_cavlc.o \ ...@@ -346,7 +346,7 @@ OBJS-$(CONFIG_H264_DECODER) += h264dec.o h264_cabac.o h264_cavlc.o \
h264_refs.o h264_sei.o \ h264_refs.o h264_sei.o \
h264_slice.o h264data.o h264_slice.o h264data.o
OBJS-$(CONFIG_H264_AMF_ENCODER) += amfenc_h264.o OBJS-$(CONFIG_H264_AMF_ENCODER) += amfenc_h264.o
OBJS-$(CONFIG_H264_CUVID_DECODER) += cuviddec.o OBJS-$(CONFIG_H264_CUVID_DECODER) += cuviddec.o cuda_check.o
OBJS-$(CONFIG_H264_MEDIACODEC_DECODER) += mediacodecdec.o OBJS-$(CONFIG_H264_MEDIACODEC_DECODER) += mediacodecdec.o
OBJS-$(CONFIG_H264_MMAL_DECODER) += mmaldec.o OBJS-$(CONFIG_H264_MMAL_DECODER) += mmaldec.o
OBJS-$(CONFIG_H264_NVENC_ENCODER) += nvenc_h264.o OBJS-$(CONFIG_H264_NVENC_ENCODER) += nvenc_h264.o
...@@ -852,7 +852,7 @@ OBJS-$(CONFIG_ADPCM_YAMAHA_ENCODER) += adpcmenc.o adpcm_data.o ...@@ -852,7 +852,7 @@ OBJS-$(CONFIG_ADPCM_YAMAHA_ENCODER) += adpcmenc.o adpcm_data.o
# hardware accelerators # hardware accelerators
OBJS-$(CONFIG_D3D11VA) += dxva2.o OBJS-$(CONFIG_D3D11VA) += dxva2.o
OBJS-$(CONFIG_DXVA2) += dxva2.o OBJS-$(CONFIG_DXVA2) += dxva2.o
OBJS-$(CONFIG_NVDEC) += nvdec.o OBJS-$(CONFIG_NVDEC) += nvdec.o cuda_check.o
OBJS-$(CONFIG_VAAPI) += vaapi_decode.o OBJS-$(CONFIG_VAAPI) += vaapi_decode.o
OBJS-$(CONFIG_VIDEOTOOLBOX) += videotoolbox.o OBJS-$(CONFIG_VIDEOTOOLBOX) += videotoolbox.o
OBJS-$(CONFIG_VDPAU) += vdpau.o OBJS-$(CONFIG_VDPAU) += vdpau.o
......
#include "libavutil/cuda_check.c"
...@@ -25,6 +25,7 @@ ...@@ -25,6 +25,7 @@
#include "libavutil/mathematics.h" #include "libavutil/mathematics.h"
#include "libavutil/hwcontext.h" #include "libavutil/hwcontext.h"
#include "libavutil/hwcontext_cuda_internal.h" #include "libavutil/hwcontext_cuda_internal.h"
#include "libavutil/cuda_check.h"
#include "libavutil/fifo.h" #include "libavutil/fifo.h"
#include "libavutil/log.h" #include "libavutil/log.h"
#include "libavutil/opt.h" #include "libavutil/opt.h"
...@@ -95,29 +96,7 @@ typedef struct CuvidParsedFrame ...@@ -95,29 +96,7 @@ typedef struct CuvidParsedFrame
int is_deinterlacing; int is_deinterlacing;
} CuvidParsedFrame; } CuvidParsedFrame;
static int check_cu(AVCodecContext *avctx, CUresult err, const char *func) #define CHECK_CU(x) FF_CUDA_CHECK_DL(avctx, ctx->cudl, x)
{
CuvidContext *ctx = avctx->priv_data;
const char *err_name;
const char *err_string;
av_log(avctx, AV_LOG_TRACE, "Calling %s\n", func);
if (err == CUDA_SUCCESS)
return 0;
ctx->cudl->cuGetErrorName(err, &err_name);
ctx->cudl->cuGetErrorString(err, &err_string);
av_log(avctx, AV_LOG_ERROR, "%s failed", func);
if (err_name && err_string)
av_log(avctx, AV_LOG_ERROR, " -> %s: %s", err_name, err_string);
av_log(avctx, AV_LOG_ERROR, "\n");
return AVERROR_EXTERNAL;
}
#define CHECK_CU(x) check_cu(avctx, (x), #x)
static int CUDAAPI cuvid_handle_video_sequence(void *opaque, CUVIDEOFORMAT* format) static int CUDAAPI cuvid_handle_video_sequence(void *opaque, CUVIDEOFORMAT* format)
{ {
......
...@@ -26,6 +26,7 @@ ...@@ -26,6 +26,7 @@
#include "libavutil/error.h" #include "libavutil/error.h"
#include "libavutil/hwcontext.h" #include "libavutil/hwcontext.h"
#include "libavutil/hwcontext_cuda_internal.h" #include "libavutil/hwcontext_cuda_internal.h"
#include "libavutil/cuda_check.h"
#include "libavutil/pixdesc.h" #include "libavutil/pixdesc.h"
#include "libavutil/pixfmt.h" #include "libavutil/pixfmt.h"
...@@ -50,6 +51,8 @@ typedef struct NVDECFramePool { ...@@ -50,6 +51,8 @@ typedef struct NVDECFramePool {
unsigned int nb_allocated; unsigned int nb_allocated;
} NVDECFramePool; } NVDECFramePool;
#define CHECK_CU(x) FF_CUDA_CHECK_DL(logctx, decoder->cudl, x)
static int map_avcodec_id(enum AVCodecID id) static int map_avcodec_id(enum AVCodecID id)
{ {
switch (id) { switch (id) {
...@@ -86,7 +89,7 @@ static int map_chroma_format(enum AVPixelFormat pix_fmt) ...@@ -86,7 +89,7 @@ static int map_chroma_format(enum AVPixelFormat pix_fmt)
static int nvdec_test_capabilities(NVDECDecoder *decoder, static int nvdec_test_capabilities(NVDECDecoder *decoder,
CUVIDDECODECREATEINFO *params, void *logctx) CUVIDDECODECREATEINFO *params, void *logctx)
{ {
CUresult err; int ret;
CUVIDDECODECAPS caps = { 0 }; CUVIDDECODECAPS caps = { 0 };
caps.eCodecType = params->CodecType; caps.eCodecType = params->CodecType;
...@@ -105,11 +108,9 @@ static int nvdec_test_capabilities(NVDECDecoder *decoder, ...@@ -105,11 +108,9 @@ static int nvdec_test_capabilities(NVDECDecoder *decoder,
return 0; return 0;
} }
err = decoder->cvdl->cuvidGetDecoderCaps(&caps); ret = CHECK_CU(decoder->cvdl->cuvidGetDecoderCaps(&caps));
if (err != CUDA_SUCCESS) { if (ret < 0)
av_log(logctx, AV_LOG_ERROR, "Failed querying decoder capabilities\n"); return ret;
return AVERROR_UNKNOWN;
}
av_log(logctx, AV_LOG_VERBOSE, "NVDEC capabilities:\n"); av_log(logctx, AV_LOG_VERBOSE, "NVDEC capabilities:\n");
av_log(logctx, AV_LOG_VERBOSE, "format supported: %s, max_mb_count: %d\n", av_log(logctx, AV_LOG_VERBOSE, "format supported: %s, max_mb_count: %d\n",
...@@ -150,10 +151,11 @@ static void nvdec_decoder_free(void *opaque, uint8_t *data) ...@@ -150,10 +151,11 @@ static void nvdec_decoder_free(void *opaque, uint8_t *data)
NVDECDecoder *decoder = (NVDECDecoder*)data; NVDECDecoder *decoder = (NVDECDecoder*)data;
if (decoder->decoder) { if (decoder->decoder) {
void *logctx = decoder->hw_device_ref->data;
CUcontext dummy; CUcontext dummy;
decoder->cudl->cuCtxPushCurrent(decoder->cuda_ctx); CHECK_CU(decoder->cudl->cuCtxPushCurrent(decoder->cuda_ctx));
decoder->cvdl->cuvidDestroyDecoder(decoder->decoder); CHECK_CU(decoder->cvdl->cuvidDestroyDecoder(decoder->decoder));
decoder->cudl->cuCtxPopCurrent(&dummy); CHECK_CU(decoder->cudl->cuCtxPopCurrent(&dummy));
} }
av_buffer_unref(&decoder->hw_device_ref); av_buffer_unref(&decoder->hw_device_ref);
...@@ -173,7 +175,6 @@ static int nvdec_decoder_create(AVBufferRef **out, AVBufferRef *hw_device_ref, ...@@ -173,7 +175,6 @@ static int nvdec_decoder_create(AVBufferRef **out, AVBufferRef *hw_device_ref,
NVDECDecoder *decoder; NVDECDecoder *decoder;
CUcontext dummy; CUcontext dummy;
CUresult err;
int ret; int ret;
decoder = av_mallocz(sizeof(*decoder)); decoder = av_mallocz(sizeof(*decoder));
...@@ -202,25 +203,21 @@ static int nvdec_decoder_create(AVBufferRef **out, AVBufferRef *hw_device_ref, ...@@ -202,25 +203,21 @@ static int nvdec_decoder_create(AVBufferRef **out, AVBufferRef *hw_device_ref,
goto fail; goto fail;
} }
err = decoder->cudl->cuCtxPushCurrent(decoder->cuda_ctx); ret = CHECK_CU(decoder->cudl->cuCtxPushCurrent(decoder->cuda_ctx));
if (err != CUDA_SUCCESS) { if (ret < 0)
ret = AVERROR_UNKNOWN;
goto fail; goto fail;
}
ret = nvdec_test_capabilities(decoder, params, logctx); ret = nvdec_test_capabilities(decoder, params, logctx);
if (ret < 0) { if (ret < 0) {
decoder->cudl->cuCtxPopCurrent(&dummy); CHECK_CU(decoder->cudl->cuCtxPopCurrent(&dummy));
goto fail; goto fail;
} }
err = decoder->cvdl->cuvidCreateDecoder(&decoder->decoder, params); ret = CHECK_CU(decoder->cvdl->cuvidCreateDecoder(&decoder->decoder, params));
decoder->cudl->cuCtxPopCurrent(&dummy); CHECK_CU(decoder->cudl->cuCtxPopCurrent(&dummy));
if (err != CUDA_SUCCESS) { if (ret < 0) {
av_log(logctx, AV_LOG_ERROR, "Error creating a NVDEC decoder: %d\n", err);
ret = AVERROR_UNKNOWN;
goto fail; goto fail;
} }
...@@ -364,21 +361,18 @@ static void nvdec_unmap_mapped_frame(void *opaque, uint8_t *data) ...@@ -364,21 +361,18 @@ static void nvdec_unmap_mapped_frame(void *opaque, uint8_t *data)
{ {
NVDECFrame *unmap_data = (NVDECFrame*)data; NVDECFrame *unmap_data = (NVDECFrame*)data;
NVDECDecoder *decoder = (NVDECDecoder*)unmap_data->decoder_ref->data; NVDECDecoder *decoder = (NVDECDecoder*)unmap_data->decoder_ref->data;
void *logctx = decoder->hw_device_ref->data;
CUdeviceptr devptr = (CUdeviceptr)opaque; CUdeviceptr devptr = (CUdeviceptr)opaque;
CUresult err; int ret;
CUcontext dummy; CUcontext dummy;
err = decoder->cudl->cuCtxPushCurrent(decoder->cuda_ctx); ret = CHECK_CU(decoder->cudl->cuCtxPushCurrent(decoder->cuda_ctx));
if (err != CUDA_SUCCESS) { if (ret < 0)
av_log(NULL, AV_LOG_ERROR, "cuCtxPushCurrent failed\n");
goto finish; goto finish;
}
err = decoder->cvdl->cuvidUnmapVideoFrame(decoder->decoder, devptr); CHECK_CU(decoder->cvdl->cuvidUnmapVideoFrame(decoder->decoder, devptr));
if (err != CUDA_SUCCESS)
av_log(NULL, AV_LOG_ERROR, "cuvidUnmapVideoFrame failed\n");
decoder->cudl->cuCtxPopCurrent(&dummy); CHECK_CU(decoder->cudl->cuCtxPopCurrent(&dummy));
finish: finish:
av_buffer_unref(&unmap_data->idx_ref); av_buffer_unref(&unmap_data->idx_ref);
...@@ -395,7 +389,6 @@ static int nvdec_retrieve_data(void *logctx, AVFrame *frame) ...@@ -395,7 +389,6 @@ static int nvdec_retrieve_data(void *logctx, AVFrame *frame)
CUVIDPROCPARAMS vpp = { 0 }; CUVIDPROCPARAMS vpp = { 0 };
NVDECFrame *unmap_data = NULL; NVDECFrame *unmap_data = NULL;
CUresult err;
CUcontext dummy; CUcontext dummy;
CUdeviceptr devptr; CUdeviceptr devptr;
...@@ -406,18 +399,15 @@ static int nvdec_retrieve_data(void *logctx, AVFrame *frame) ...@@ -406,18 +399,15 @@ static int nvdec_retrieve_data(void *logctx, AVFrame *frame)
vpp.progressive_frame = 1; vpp.progressive_frame = 1;
vpp.output_stream = decoder->stream; vpp.output_stream = decoder->stream;
err = decoder->cudl->cuCtxPushCurrent(decoder->cuda_ctx); ret = CHECK_CU(decoder->cudl->cuCtxPushCurrent(decoder->cuda_ctx));
if (err != CUDA_SUCCESS) if (ret < 0)
return AVERROR_UNKNOWN; return ret;
err = decoder->cvdl->cuvidMapVideoFrame(decoder->decoder, cf->idx, &devptr, ret = CHECK_CU(decoder->cvdl->cuvidMapVideoFrame(decoder->decoder,
&pitch, &vpp); cf->idx, &devptr,
if (err != CUDA_SUCCESS) { &pitch, &vpp));
av_log(logctx, AV_LOG_ERROR, "Error mapping a picture with CUVID: %d\n", if (ret < 0)
err);
ret = AVERROR_UNKNOWN;
goto finish; goto finish;
}
unmap_data = av_mallocz(sizeof(*unmap_data)); unmap_data = av_mallocz(sizeof(*unmap_data));
if (!unmap_data) { if (!unmap_data) {
...@@ -447,14 +437,14 @@ static int nvdec_retrieve_data(void *logctx, AVFrame *frame) ...@@ -447,14 +437,14 @@ static int nvdec_retrieve_data(void *logctx, AVFrame *frame)
copy_fail: copy_fail:
if (!frame->buf[1]) { if (!frame->buf[1]) {
decoder->cvdl->cuvidUnmapVideoFrame(decoder->decoder, devptr); CHECK_CU(decoder->cvdl->cuvidUnmapVideoFrame(decoder->decoder, devptr));
av_freep(&unmap_data); av_freep(&unmap_data);
} else { } else {
av_buffer_unref(&frame->buf[1]); av_buffer_unref(&frame->buf[1]);
} }
finish: finish:
decoder->cudl->cuCtxPopCurrent(&dummy); CHECK_CU(decoder->cudl->cuCtxPopCurrent(&dummy));
return ret; return ret;
} }
...@@ -504,9 +494,9 @@ int ff_nvdec_end_frame(AVCodecContext *avctx) ...@@ -504,9 +494,9 @@ int ff_nvdec_end_frame(AVCodecContext *avctx)
{ {
NVDECContext *ctx = avctx->internal->hwaccel_priv_data; NVDECContext *ctx = avctx->internal->hwaccel_priv_data;
NVDECDecoder *decoder = (NVDECDecoder*)ctx->decoder_ref->data; NVDECDecoder *decoder = (NVDECDecoder*)ctx->decoder_ref->data;
void *logctx = avctx;
CUVIDPICPARAMS *pp = &ctx->pic_params; CUVIDPICPARAMS *pp = &ctx->pic_params;
CUresult err;
CUcontext dummy; CUcontext dummy;
int ret = 0; int ret = 0;
...@@ -516,20 +506,16 @@ int ff_nvdec_end_frame(AVCodecContext *avctx) ...@@ -516,20 +506,16 @@ int ff_nvdec_end_frame(AVCodecContext *avctx)
pp->nNumSlices = ctx->nb_slices; pp->nNumSlices = ctx->nb_slices;
pp->pSliceDataOffsets = ctx->slice_offsets; pp->pSliceDataOffsets = ctx->slice_offsets;
err = decoder->cudl->cuCtxPushCurrent(decoder->cuda_ctx); ret = CHECK_CU(decoder->cudl->cuCtxPushCurrent(decoder->cuda_ctx));
if (err != CUDA_SUCCESS) if (ret < 0)
return AVERROR_UNKNOWN; return ret;
err = decoder->cvdl->cuvidDecodePicture(decoder->decoder, &ctx->pic_params); ret = CHECK_CU(decoder->cvdl->cuvidDecodePicture(decoder->decoder, &ctx->pic_params));
if (err != CUDA_SUCCESS) { if (ret < 0)
av_log(avctx, AV_LOG_ERROR, "Error decoding a picture with NVDEC: %d\n",
err);
ret = AVERROR_UNKNOWN;
goto finish; goto finish;
}
finish: finish:
decoder->cudl->cuCtxPopCurrent(&dummy); CHECK_CU(decoder->cudl->cuCtxPopCurrent(&dummy));
return ret; return ret;
} }
......
...@@ -25,12 +25,15 @@ ...@@ -25,12 +25,15 @@
#include "libavutil/hwcontext_cuda.h" #include "libavutil/hwcontext_cuda.h"
#include "libavutil/hwcontext.h" #include "libavutil/hwcontext.h"
#include "libavutil/cuda_check.h"
#include "libavutil/imgutils.h" #include "libavutil/imgutils.h"
#include "libavutil/avassert.h" #include "libavutil/avassert.h"
#include "libavutil/mem.h" #include "libavutil/mem.h"
#include "libavutil/pixdesc.h" #include "libavutil/pixdesc.h"
#include "internal.h" #include "internal.h"
#define CHECK_CU(x) FF_CUDA_CHECK_DL(avctx, dl_fn->cuda_dl, x)
#define NVENC_CAP 0x30 #define NVENC_CAP 0x30
#define IS_CBR(rc) (rc == NV_ENC_PARAMS_RC_CBR || \ #define IS_CBR(rc) (rc == NV_ENC_PARAMS_RC_CBR || \
rc == NV_ENC_PARAMS_RC_CBR_LOWDELAY_HQ || \ rc == NV_ENC_PARAMS_RC_CBR_LOWDELAY_HQ || \
...@@ -183,37 +186,23 @@ static int nvenc_push_context(AVCodecContext *avctx) ...@@ -183,37 +186,23 @@ static int nvenc_push_context(AVCodecContext *avctx)
{ {
NvencContext *ctx = avctx->priv_data; NvencContext *ctx = avctx->priv_data;
NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs; NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
CUresult cu_res;
if (ctx->d3d11_device) if (ctx->d3d11_device)
return 0; return 0;
cu_res = dl_fn->cuda_dl->cuCtxPushCurrent(ctx->cu_context); return CHECK_CU(dl_fn->cuda_dl->cuCtxPushCurrent(ctx->cu_context));
if (cu_res != CUDA_SUCCESS) {
av_log(avctx, AV_LOG_ERROR, "cuCtxPushCurrent failed\n");
return AVERROR_EXTERNAL;
}
return 0;
} }
static int nvenc_pop_context(AVCodecContext *avctx) static int nvenc_pop_context(AVCodecContext *avctx)
{ {
NvencContext *ctx = avctx->priv_data; NvencContext *ctx = avctx->priv_data;
NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs; NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
CUresult cu_res;
CUcontext dummy; CUcontext dummy;
if (ctx->d3d11_device) if (ctx->d3d11_device)
return 0; return 0;
cu_res = dl_fn->cuda_dl->cuCtxPopCurrent(&dummy); return CHECK_CU(dl_fn->cuda_dl->cuCtxPopCurrent(&dummy));
if (cu_res != CUDA_SUCCESS) {
av_log(avctx, AV_LOG_ERROR, "cuCtxPopCurrent failed\n");
return AVERROR_EXTERNAL;
}
return 0;
} }
static av_cold int nvenc_open_session(AVCodecContext *avctx) static av_cold int nvenc_open_session(AVCodecContext *avctx)
...@@ -406,32 +395,23 @@ static av_cold int nvenc_check_device(AVCodecContext *avctx, int idx) ...@@ -406,32 +395,23 @@ static av_cold int nvenc_check_device(AVCodecContext *avctx, int idx)
NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs; NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs;
char name[128] = { 0}; char name[128] = { 0};
int major, minor, ret; int major, minor, ret;
CUresult cu_res;
CUdevice cu_device; CUdevice cu_device;
int loglevel = AV_LOG_VERBOSE; int loglevel = AV_LOG_VERBOSE;
if (ctx->device == LIST_DEVICES) if (ctx->device == LIST_DEVICES)
loglevel = AV_LOG_INFO; loglevel = AV_LOG_INFO;
cu_res = dl_fn->cuda_dl->cuDeviceGet(&cu_device, idx); ret = CHECK_CU(dl_fn->cuda_dl->cuDeviceGet(&cu_device, idx));
if (cu_res != CUDA_SUCCESS) { if (ret < 0)
av_log(avctx, AV_LOG_ERROR, return ret;
"Cannot access the CUDA device %d\n",
idx);
return -1;
}
cu_res = dl_fn->cuda_dl->cuDeviceGetName(name, sizeof(name), cu_device); ret = CHECK_CU(dl_fn->cuda_dl->cuDeviceGetName(name, sizeof(name), cu_device));
if (cu_res != CUDA_SUCCESS) { if (ret < 0)
av_log(avctx, AV_LOG_ERROR, "cuDeviceGetName failed on device %d\n", idx); return ret;
return -1;
}
cu_res = dl_fn->cuda_dl->cuDeviceComputeCapability(&major, &minor, cu_device); ret = CHECK_CU(dl_fn->cuda_dl->cuDeviceComputeCapability(&major, &minor, cu_device));
if (cu_res != CUDA_SUCCESS) { if (ret < 0)
av_log(avctx, AV_LOG_ERROR, "cuDeviceComputeCapability failed on device %d\n", idx); return ret;
return -1;
}
av_log(avctx, loglevel, "[ GPU #%d - < %s > has Compute SM %d.%d ]\n", idx, name, major, minor); av_log(avctx, loglevel, "[ GPU #%d - < %s > has Compute SM %d.%d ]\n", idx, name, major, minor);
if (((major << 4) | minor) < NVENC_CAP) { if (((major << 4) | minor) < NVENC_CAP) {
...@@ -442,11 +422,9 @@ static av_cold int nvenc_check_device(AVCodecContext *avctx, int idx) ...@@ -442,11 +422,9 @@ static av_cold int nvenc_check_device(AVCodecContext *avctx, int idx)
if (ctx->device != idx && ctx->device != ANY_DEVICE) if (ctx->device != idx && ctx->device != ANY_DEVICE)
return -1; return -1;
cu_res = dl_fn->cuda_dl->cuCtxCreate(&ctx->cu_context_internal, 0, cu_device); ret = CHECK_CU(dl_fn->cuda_dl->cuCtxCreate(&ctx->cu_context_internal, 0, cu_device));
if (cu_res != CUDA_SUCCESS) { if (ret < 0)
av_log(avctx, AV_LOG_FATAL, "Failed creating CUDA context for NVENC: 0x%x\n", (int)cu_res);
goto fail; goto fail;
}
ctx->cu_context = ctx->cu_context_internal; ctx->cu_context = ctx->cu_context_internal;
...@@ -477,7 +455,7 @@ fail3: ...@@ -477,7 +455,7 @@ fail3:
return ret; return ret;
fail2: fail2:
dl_fn->cuda_dl->cuCtxDestroy(ctx->cu_context_internal); CHECK_CU(dl_fn->cuda_dl->cuCtxDestroy(ctx->cu_context_internal));
ctx->cu_context_internal = NULL; ctx->cu_context_internal = NULL;
fail: fail:
...@@ -555,17 +533,11 @@ static av_cold int nvenc_setup_device(AVCodecContext *avctx) ...@@ -555,17 +533,11 @@ static av_cold int nvenc_setup_device(AVCodecContext *avctx)
} else { } else {
int i, nb_devices = 0; int i, nb_devices = 0;
if ((dl_fn->cuda_dl->cuInit(0)) != CUDA_SUCCESS) { if (CHECK_CU(dl_fn->cuda_dl->cuInit(0)) < 0)
av_log(avctx, AV_LOG_ERROR,
"Cannot init CUDA\n");
return AVERROR_UNKNOWN; return AVERROR_UNKNOWN;
}
if ((dl_fn->cuda_dl->cuDeviceGetCount(&nb_devices)) != CUDA_SUCCESS) { if (CHECK_CU(dl_fn->cuda_dl->cuDeviceGetCount(&nb_devices)) < 0)
av_log(avctx, AV_LOG_ERROR,
"Cannot enumerate the CUDA devices\n");
return AVERROR_UNKNOWN; return AVERROR_UNKNOWN;
}
if (!nb_devices) { if (!nb_devices) {
av_log(avctx, AV_LOG_FATAL, "No CUDA capable devices found\n"); av_log(avctx, AV_LOG_FATAL, "No CUDA capable devices found\n");
...@@ -1460,7 +1432,7 @@ av_cold int ff_nvenc_encode_close(AVCodecContext *avctx) ...@@ -1460,7 +1432,7 @@ av_cold int ff_nvenc_encode_close(AVCodecContext *avctx)
ctx->nvencoder = NULL; ctx->nvencoder = NULL;
if (ctx->cu_context_internal) if (ctx->cu_context_internal)
dl_fn->cuda_dl->cuCtxDestroy(ctx->cu_context_internal); CHECK_CU(dl_fn->cuda_dl->cuCtxDestroy(ctx->cu_context_internal));
ctx->cu_context = ctx->cu_context_internal = NULL; ctx->cu_context = ctx->cu_context_internal = NULL;
#if CONFIG_D3D11VA #if CONFIG_D3D11VA
......
...@@ -334,8 +334,9 @@ OBJS-$(CONFIG_ROBERTS_OPENCL_FILTER) += vf_convolution_opencl.o opencl.o ...@@ -334,8 +334,9 @@ OBJS-$(CONFIG_ROBERTS_OPENCL_FILTER) += vf_convolution_opencl.o opencl.o
OBJS-$(CONFIG_ROTATE_FILTER) += vf_rotate.o OBJS-$(CONFIG_ROTATE_FILTER) += vf_rotate.o
OBJS-$(CONFIG_SAB_FILTER) += vf_sab.o OBJS-$(CONFIG_SAB_FILTER) += vf_sab.o
OBJS-$(CONFIG_SCALE_FILTER) += vf_scale.o scale.o OBJS-$(CONFIG_SCALE_FILTER) += vf_scale.o scale.o
OBJS-$(CONFIG_SCALE_CUDA_FILTER) += vf_scale_cuda.o vf_scale_cuda.ptx.o OBJS-$(CONFIG_SCALE_CUDA_FILTER) += vf_scale_cuda.o vf_scale_cuda.ptx.o \
OBJS-$(CONFIG_SCALE_NPP_FILTER) += vf_scale_npp.o scale.o cuda_check.o
OBJS-$(CONFIG_SCALE_NPP_FILTER) += vf_scale_npp.o scale.o cuda_check.o
OBJS-$(CONFIG_SCALE_QSV_FILTER) += vf_scale_qsv.o OBJS-$(CONFIG_SCALE_QSV_FILTER) += vf_scale_qsv.o
OBJS-$(CONFIG_SCALE_VAAPI_FILTER) += vf_scale_vaapi.o scale.o vaapi_vpp.o OBJS-$(CONFIG_SCALE_VAAPI_FILTER) += vf_scale_vaapi.o scale.o vaapi_vpp.o
OBJS-$(CONFIG_SCALE2REF_FILTER) += vf_scale.o scale.o OBJS-$(CONFIG_SCALE2REF_FILTER) += vf_scale.o scale.o
...@@ -376,7 +377,8 @@ OBJS-$(CONFIG_TBLEND_FILTER) += vf_blend.o framesync.o ...@@ -376,7 +377,8 @@ OBJS-$(CONFIG_TBLEND_FILTER) += vf_blend.o framesync.o
OBJS-$(CONFIG_TELECINE_FILTER) += vf_telecine.o OBJS-$(CONFIG_TELECINE_FILTER) += vf_telecine.o
OBJS-$(CONFIG_THRESHOLD_FILTER) += vf_threshold.o framesync.o OBJS-$(CONFIG_THRESHOLD_FILTER) += vf_threshold.o framesync.o
OBJS-$(CONFIG_THUMBNAIL_FILTER) += vf_thumbnail.o OBJS-$(CONFIG_THUMBNAIL_FILTER) += vf_thumbnail.o
OBJS-$(CONFIG_THUMBNAIL_CUDA_FILTER) += vf_thumbnail_cuda.o vf_thumbnail_cuda.ptx.o OBJS-$(CONFIG_THUMBNAIL_CUDA_FILTER) += vf_thumbnail_cuda.o vf_thumbnail_cuda.ptx.o \
cuda_check.o
OBJS-$(CONFIG_TILE_FILTER) += vf_tile.o OBJS-$(CONFIG_TILE_FILTER) += vf_tile.o
OBJS-$(CONFIG_TINTERLACE_FILTER) += vf_tinterlace.o OBJS-$(CONFIG_TINTERLACE_FILTER) += vf_tinterlace.o
OBJS-$(CONFIG_TLUT2_FILTER) += vf_lut2.o framesync.o OBJS-$(CONFIG_TLUT2_FILTER) += vf_lut2.o framesync.o
...@@ -386,7 +388,7 @@ OBJS-$(CONFIG_TONEMAP_OPENCL_FILTER) += vf_tonemap_opencl.o colorspace.o ...@@ -386,7 +388,7 @@ OBJS-$(CONFIG_TONEMAP_OPENCL_FILTER) += vf_tonemap_opencl.o colorspace.o
opencl/tonemap.o opencl/colorspace_common.o opencl/tonemap.o opencl/colorspace_common.o
OBJS-$(CONFIG_TPAD_FILTER) += vf_tpad.o OBJS-$(CONFIG_TPAD_FILTER) += vf_tpad.o
OBJS-$(CONFIG_TRANSPOSE_FILTER) += vf_transpose.o OBJS-$(CONFIG_TRANSPOSE_FILTER) += vf_transpose.o
OBJS-$(CONFIG_TRANSPOSE_NPP_FILTER) += vf_transpose_npp.o OBJS-$(CONFIG_TRANSPOSE_NPP_FILTER) += vf_transpose_npp.o cuda_check.o
OBJS-$(CONFIG_TRIM_FILTER) += trim.o OBJS-$(CONFIG_TRIM_FILTER) += trim.o
OBJS-$(CONFIG_UNPREMULTIPLY_FILTER) += vf_premultiply.o framesync.o OBJS-$(CONFIG_UNPREMULTIPLY_FILTER) += vf_premultiply.o framesync.o
OBJS-$(CONFIG_UNSHARP_FILTER) += vf_unsharp.o OBJS-$(CONFIG_UNSHARP_FILTER) += vf_unsharp.o
...@@ -410,7 +412,8 @@ OBJS-$(CONFIG_WEAVE_FILTER) += vf_weave.o ...@@ -410,7 +412,8 @@ OBJS-$(CONFIG_WEAVE_FILTER) += vf_weave.o
OBJS-$(CONFIG_XBR_FILTER) += vf_xbr.o OBJS-$(CONFIG_XBR_FILTER) += vf_xbr.o
OBJS-$(CONFIG_XSTACK_FILTER) += vf_stack.o framesync.o OBJS-$(CONFIG_XSTACK_FILTER) += vf_stack.o framesync.o
OBJS-$(CONFIG_YADIF_FILTER) += vf_yadif.o yadif_common.o OBJS-$(CONFIG_YADIF_FILTER) += vf_yadif.o yadif_common.o
OBJS-$(CONFIG_YADIF_CUDA_FILTER) += vf_yadif_cuda.o vf_yadif_cuda.ptx.o yadif_common.o OBJS-$(CONFIG_YADIF_CUDA_FILTER) += vf_yadif_cuda.o vf_yadif_cuda.ptx.o \
yadif_common.o cuda_check.o
OBJS-$(CONFIG_ZMQ_FILTER) += f_zmq.o OBJS-$(CONFIG_ZMQ_FILTER) += f_zmq.o
OBJS-$(CONFIG_ZOOMPAN_FILTER) += vf_zoompan.o OBJS-$(CONFIG_ZOOMPAN_FILTER) += vf_zoompan.o
OBJS-$(CONFIG_ZSCALE_FILTER) += vf_zscale.o OBJS-$(CONFIG_ZSCALE_FILTER) += vf_zscale.o
......
#include "libavutil/cuda_check.c"
...@@ -28,6 +28,7 @@ ...@@ -28,6 +28,7 @@
#include "libavutil/common.h" #include "libavutil/common.h"
#include "libavutil/hwcontext.h" #include "libavutil/hwcontext.h"
#include "libavutil/hwcontext_cuda.h" #include "libavutil/hwcontext_cuda.h"
#include "libavutil/cuda_check.h"
#include "libavutil/internal.h" #include "libavutil/internal.h"
#include "libavutil/opt.h" #include "libavutil/opt.h"
#include "libavutil/pixdesc.h" #include "libavutil/pixdesc.h"
...@@ -52,6 +53,8 @@ static const enum AVPixelFormat supported_formats[] = { ...@@ -52,6 +53,8 @@ static const enum AVPixelFormat supported_formats[] = {
#define BLOCKX 32 #define BLOCKX 32
#define BLOCKY 16 #define BLOCKY 16
#define CHECK_CU(x) FF_CUDA_CHECK(ctx, x)
typedef struct CUDAScaleContext { typedef struct CUDAScaleContext {
const AVClass *class; const AVClass *class;
enum AVPixelFormat in_fmt; enum AVPixelFormat in_fmt;
...@@ -255,55 +258,48 @@ static av_cold int cudascale_config_props(AVFilterLink *outlink) ...@@ -255,55 +258,48 @@ static av_cold int cudascale_config_props(AVFilterLink *outlink)
AVHWFramesContext *frames_ctx = (AVHWFramesContext*)inlink->hw_frames_ctx->data; AVHWFramesContext *frames_ctx = (AVHWFramesContext*)inlink->hw_frames_ctx->data;
AVCUDADeviceContext *device_hwctx = frames_ctx->device_ctx->hwctx; AVCUDADeviceContext *device_hwctx = frames_ctx->device_ctx->hwctx;
CUcontext dummy, cuda_ctx = device_hwctx->cuda_ctx; CUcontext dummy, cuda_ctx = device_hwctx->cuda_ctx;
CUresult err;
int w, h; int w, h;
int ret; int ret;
extern char vf_scale_cuda_ptx[]; extern char vf_scale_cuda_ptx[];
err = cuCtxPushCurrent(cuda_ctx); ret = CHECK_CU(cuCtxPushCurrent(cuda_ctx));
if (err != CUDA_SUCCESS) { if (ret < 0)
av_log(ctx, AV_LOG_ERROR, "Error pushing cuda context\n");
ret = AVERROR_UNKNOWN;
goto fail; goto fail;
}
err = cuModuleLoadData(&s->cu_module, vf_scale_cuda_ptx); ret = CHECK_CU(cuModuleLoadData(&s->cu_module, vf_scale_cuda_ptx));
if (err != CUDA_SUCCESS) { if (ret < 0)
av_log(ctx, AV_LOG_ERROR, "Error loading module data\n");
ret = AVERROR_UNKNOWN;
goto fail; goto fail;
}
cuModuleGetFunction(&s->cu_func_uchar, s->cu_module, "Subsample_Bilinear_uchar"); CHECK_CU(cuModuleGetFunction(&s->cu_func_uchar, s->cu_module, "Subsample_Bilinear_uchar"));
cuModuleGetFunction(&s->cu_func_uchar2, s->cu_module, "Subsample_Bilinear_uchar2"); CHECK_CU(cuModuleGetFunction(&s->cu_func_uchar2, s->cu_module, "Subsample_Bilinear_uchar2"));
cuModuleGetFunction(&s->cu_func_uchar4, s->cu_module, "Subsample_Bilinear_uchar4"); CHECK_CU(cuModuleGetFunction(&s->cu_func_uchar4, s->cu_module, "Subsample_Bilinear_uchar4"));
cuModuleGetFunction(&s->cu_func_ushort, s->cu_module, "Subsample_Bilinear_ushort"); CHECK_CU(cuModuleGetFunction(&s->cu_func_ushort, s->cu_module, "Subsample_Bilinear_ushort"));
cuModuleGetFunction(&s->cu_func_ushort2, s->cu_module, "Subsample_Bilinear_ushort2"); CHECK_CU(cuModuleGetFunction(&s->cu_func_ushort2, s->cu_module, "Subsample_Bilinear_ushort2"));
cuModuleGetFunction(&s->cu_func_ushort4, s->cu_module, "Subsample_Bilinear_ushort4"); CHECK_CU(cuModuleGetFunction(&s->cu_func_ushort4, s->cu_module, "Subsample_Bilinear_ushort4"));
cuModuleGetTexRef(&s->cu_tex_uchar, s->cu_module, "uchar_tex"); CHECK_CU(cuModuleGetTexRef(&s->cu_tex_uchar, s->cu_module, "uchar_tex"));
cuModuleGetTexRef(&s->cu_tex_uchar2, s->cu_module, "uchar2_tex"); CHECK_CU(cuModuleGetTexRef(&s->cu_tex_uchar2, s->cu_module, "uchar2_tex"));
cuModuleGetTexRef(&s->cu_tex_uchar4, s->cu_module, "uchar4_tex"); CHECK_CU(cuModuleGetTexRef(&s->cu_tex_uchar4, s->cu_module, "uchar4_tex"));
cuModuleGetTexRef(&s->cu_tex_ushort, s->cu_module, "ushort_tex"); CHECK_CU(cuModuleGetTexRef(&s->cu_tex_ushort, s->cu_module, "ushort_tex"));
cuModuleGetTexRef(&s->cu_tex_ushort2, s->cu_module, "ushort2_tex"); CHECK_CU(cuModuleGetTexRef(&s->cu_tex_ushort2, s->cu_module, "ushort2_tex"));
cuModuleGetTexRef(&s->cu_tex_ushort4, s->cu_module, "ushort4_tex"); CHECK_CU(cuModuleGetTexRef(&s->cu_tex_ushort4, s->cu_module, "ushort4_tex"));
cuTexRefSetFlags(s->cu_tex_uchar, CU_TRSF_READ_AS_INTEGER); CHECK_CU(cuTexRefSetFlags(s->cu_tex_uchar, CU_TRSF_READ_AS_INTEGER));
cuTexRefSetFlags(s->cu_tex_uchar2, CU_TRSF_READ_AS_INTEGER); CHECK_CU(cuTexRefSetFlags(s->cu_tex_uchar2, CU_TRSF_READ_AS_INTEGER));
cuTexRefSetFlags(s->cu_tex_uchar4, CU_TRSF_READ_AS_INTEGER); CHECK_CU(cuTexRefSetFlags(s->cu_tex_uchar4, CU_TRSF_READ_AS_INTEGER));
cuTexRefSetFlags(s->cu_tex_ushort, CU_TRSF_READ_AS_INTEGER); CHECK_CU(cuTexRefSetFlags(s->cu_tex_ushort, CU_TRSF_READ_AS_INTEGER));
cuTexRefSetFlags(s->cu_tex_ushort2, CU_TRSF_READ_AS_INTEGER); CHECK_CU(cuTexRefSetFlags(s->cu_tex_ushort2, CU_TRSF_READ_AS_INTEGER));
cuTexRefSetFlags(s->cu_tex_ushort4, CU_TRSF_READ_AS_INTEGER); CHECK_CU(cuTexRefSetFlags(s->cu_tex_ushort4, CU_TRSF_READ_AS_INTEGER));
cuTexRefSetFilterMode(s->cu_tex_uchar, CU_TR_FILTER_MODE_LINEAR); CHECK_CU(cuTexRefSetFilterMode(s->cu_tex_uchar, CU_TR_FILTER_MODE_LINEAR));
cuTexRefSetFilterMode(s->cu_tex_uchar2, CU_TR_FILTER_MODE_LINEAR); CHECK_CU(cuTexRefSetFilterMode(s->cu_tex_uchar2, CU_TR_FILTER_MODE_LINEAR));
cuTexRefSetFilterMode(s->cu_tex_uchar4, CU_TR_FILTER_MODE_LINEAR); CHECK_CU(cuTexRefSetFilterMode(s->cu_tex_uchar4, CU_TR_FILTER_MODE_LINEAR));
cuTexRefSetFilterMode(s->cu_tex_ushort, CU_TR_FILTER_MODE_LINEAR); CHECK_CU(cuTexRefSetFilterMode(s->cu_tex_ushort, CU_TR_FILTER_MODE_LINEAR));
cuTexRefSetFilterMode(s->cu_tex_ushort2, CU_TR_FILTER_MODE_LINEAR); CHECK_CU(cuTexRefSetFilterMode(s->cu_tex_ushort2, CU_TR_FILTER_MODE_LINEAR));
cuTexRefSetFilterMode(s->cu_tex_ushort4, CU_TR_FILTER_MODE_LINEAR); CHECK_CU(cuTexRefSetFilterMode(s->cu_tex_ushort4, CU_TR_FILTER_MODE_LINEAR));
cuCtxPopCurrent(&dummy); CHECK_CU(cuCtxPopCurrent(&dummy));
if ((ret = ff_scale_eval_dimensions(s, if ((ret = ff_scale_eval_dimensions(s,
s->w_expr, s->h_expr, s->w_expr, s->h_expr,
...@@ -339,7 +335,7 @@ fail: ...@@ -339,7 +335,7 @@ fail:
return ret; return ret;
} }
static int call_resize_kernel(CUDAScaleContext *s, CUfunction func, CUtexref tex, int channels, static int call_resize_kernel(CUDAScaleContext *ctx, CUfunction func, CUtexref tex, int channels,
uint8_t *src_dptr, int src_width, int src_height, int src_pitch, uint8_t *src_dptr, int src_width, int src_height, int src_pitch,
uint8_t *dst_dptr, int dst_width, int dst_height, int dst_pitch, uint8_t *dst_dptr, int dst_width, int dst_height, int dst_pitch,
int pixel_size) int pixel_size)
...@@ -358,8 +354,9 @@ static int call_resize_kernel(CUDAScaleContext *s, CUfunction func, CUtexref tex ...@@ -358,8 +354,9 @@ static int call_resize_kernel(CUDAScaleContext *s, CUfunction func, CUtexref tex
desc.Format = CU_AD_FORMAT_UNSIGNED_INT16; desc.Format = CU_AD_FORMAT_UNSIGNED_INT16;
} }
cuTexRefSetAddress2D_v3(tex, &desc, src_devptr, src_pitch * pixel_size); CHECK_CU(cuTexRefSetAddress2D_v3(tex, &desc, src_devptr, src_pitch * pixel_size));
cuLaunchKernel(func, DIV_UP(dst_width, BLOCKX), DIV_UP(dst_height, BLOCKY), 1, BLOCKX, BLOCKY, 1, 0, 0, args_uchar, NULL); CHECK_CU(cuLaunchKernel(func, DIV_UP(dst_width, BLOCKX), DIV_UP(dst_height, BLOCKY), 1,
BLOCKX, BLOCKY, 1, 0, 0, args_uchar, NULL));
return 0; return 0;
} }
...@@ -470,7 +467,6 @@ static int cudascale_filter_frame(AVFilterLink *link, AVFrame *in) ...@@ -470,7 +467,6 @@ static int cudascale_filter_frame(AVFilterLink *link, AVFrame *in)
AVCUDADeviceContext *device_hwctx = frames_ctx->device_ctx->hwctx; AVCUDADeviceContext *device_hwctx = frames_ctx->device_ctx->hwctx;
AVFrame *out = NULL; AVFrame *out = NULL;
CUresult err;
CUcontext dummy; CUcontext dummy;
int ret = 0; int ret = 0;
...@@ -480,15 +476,13 @@ static int cudascale_filter_frame(AVFilterLink *link, AVFrame *in) ...@@ -480,15 +476,13 @@ static int cudascale_filter_frame(AVFilterLink *link, AVFrame *in)
goto fail; goto fail;
} }
err = cuCtxPushCurrent(device_hwctx->cuda_ctx); ret = CHECK_CU(cuCtxPushCurrent(device_hwctx->cuda_ctx));
if (err != CUDA_SUCCESS) { if (ret < 0)
ret = AVERROR_UNKNOWN;
goto fail; goto fail;
}
ret = cudascale_scale(ctx, out, in); ret = cudascale_scale(ctx, out, in);
cuCtxPopCurrent(&dummy); CHECK_CU(cuCtxPopCurrent(&dummy));
if (ret < 0) if (ret < 0)
goto fail; goto fail;
......
...@@ -29,6 +29,7 @@ ...@@ -29,6 +29,7 @@
#include "libavutil/common.h" #include "libavutil/common.h"
#include "libavutil/hwcontext.h" #include "libavutil/hwcontext.h"
#include "libavutil/hwcontext_cuda_internal.h" #include "libavutil/hwcontext_cuda_internal.h"
#include "libavutil/cuda_check.h"
#include "libavutil/internal.h" #include "libavutil/internal.h"
#include "libavutil/opt.h" #include "libavutil/opt.h"
#include "libavutil/pixdesc.h" #include "libavutil/pixdesc.h"
...@@ -39,6 +40,8 @@ ...@@ -39,6 +40,8 @@
#include "scale.h" #include "scale.h"
#include "video.h" #include "video.h"
#define CHECK_CU(x) FF_CUDA_CHECK_DL(ctx, device_hwctx->internal->cuda_dl, x)
static const enum AVPixelFormat supported_formats[] = { static const enum AVPixelFormat supported_formats[] = {
AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV420P,
AV_PIX_FMT_NV12, AV_PIX_FMT_NV12,
...@@ -498,7 +501,6 @@ static int nppscale_filter_frame(AVFilterLink *link, AVFrame *in) ...@@ -498,7 +501,6 @@ static int nppscale_filter_frame(AVFilterLink *link, AVFrame *in)
AVCUDADeviceContext *device_hwctx = frames_ctx->device_ctx->hwctx; AVCUDADeviceContext *device_hwctx = frames_ctx->device_ctx->hwctx;
AVFrame *out = NULL; AVFrame *out = NULL;
CUresult err;
CUcontext dummy; CUcontext dummy;
int ret = 0; int ret = 0;
...@@ -511,15 +513,13 @@ static int nppscale_filter_frame(AVFilterLink *link, AVFrame *in) ...@@ -511,15 +513,13 @@ static int nppscale_filter_frame(AVFilterLink *link, AVFrame *in)
goto fail; goto fail;
} }
err = device_hwctx->internal->cuda_dl->cuCtxPushCurrent(device_hwctx->cuda_ctx); ret = CHECK_CU(device_hwctx->internal->cuda_dl->cuCtxPushCurrent(device_hwctx->cuda_ctx));
if (err != CUDA_SUCCESS) { if (ret < 0)
ret = AVERROR_UNKNOWN;
goto fail; goto fail;
}
ret = nppscale_scale(ctx, out, in); ret = nppscale_scale(ctx, out, in);
device_hwctx->internal->cuda_dl->cuCtxPopCurrent(&dummy); CHECK_CU(device_hwctx->internal->cuda_dl->cuCtxPopCurrent(&dummy));
if (ret < 0) if (ret < 0)
goto fail; goto fail;
......
...@@ -24,12 +24,15 @@ ...@@ -24,12 +24,15 @@
#include "libavutil/hwcontext.h" #include "libavutil/hwcontext.h"
#include "libavutil/hwcontext_cuda.h" #include "libavutil/hwcontext_cuda.h"
#include "libavutil/cuda_check.h"
#include "libavutil/opt.h" #include "libavutil/opt.h"
#include "libavutil/pixdesc.h" #include "libavutil/pixdesc.h"
#include "avfilter.h" #include "avfilter.h"
#include "internal.h" #include "internal.h"
#define CHECK_CU(x) FF_CUDA_CHECK(ctx, x)
#define HIST_SIZE (3*256) #define HIST_SIZE (3*256)
#define DIV_UP(a, b) ( ((a) + (b) - 1) / (b) ) #define DIV_UP(a, b) ( ((a) + (b) - 1) / (b) )
#define BLOCKX 32 #define BLOCKX 32
...@@ -154,7 +157,7 @@ static AVFrame *get_best_frame(AVFilterContext *ctx) ...@@ -154,7 +157,7 @@ static AVFrame *get_best_frame(AVFilterContext *ctx)
return picref; return picref;
} }
static int thumbnail_kernel(ThumbnailCudaContext *s, CUfunction func, CUtexref tex, int channels, static int thumbnail_kernel(ThumbnailCudaContext *ctx, CUfunction func, CUtexref tex, int channels,
int *histogram, uint8_t *src_dptr, int src_width, int src_height, int src_pitch, int pixel_size) int *histogram, uint8_t *src_dptr, int src_width, int src_height, int src_pitch, int pixel_size)
{ {
CUdeviceptr src_devptr = (CUdeviceptr)src_dptr; CUdeviceptr src_devptr = (CUdeviceptr)src_dptr;
...@@ -171,8 +174,10 @@ static int thumbnail_kernel(ThumbnailCudaContext *s, CUfunction func, CUtexref t ...@@ -171,8 +174,10 @@ static int thumbnail_kernel(ThumbnailCudaContext *s, CUfunction func, CUtexref t
desc.Format = CU_AD_FORMAT_UNSIGNED_INT16; desc.Format = CU_AD_FORMAT_UNSIGNED_INT16;
} }
cuTexRefSetAddress2D_v3(tex, &desc, src_devptr, src_pitch); CHECK_CU(cuTexRefSetAddress2D_v3(tex, &desc, src_devptr, src_pitch));
cuLaunchKernel(func, DIV_UP(src_width, BLOCKX), DIV_UP(src_height, BLOCKY), 1, BLOCKX, BLOCKY, 1, 0, 0, args, NULL); CHECK_CU(cuLaunchKernel(func,
DIV_UP(src_width, BLOCKX), DIV_UP(src_height, BLOCKY), 1,
BLOCKX, BLOCKY, 1, 0, 0, args, NULL));
return 0; return 0;
} }
...@@ -235,7 +240,6 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *frame) ...@@ -235,7 +240,6 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *frame)
int *hist = s->frames[s->n].histogram; int *hist = s->frames[s->n].histogram;
AVHWFramesContext *hw_frames_ctx = (AVHWFramesContext*)s->hw_frames_ctx->data; AVHWFramesContext *hw_frames_ctx = (AVHWFramesContext*)s->hw_frames_ctx->data;
AVCUDADeviceContext *device_hwctx = hw_frames_ctx->device_ctx->hwctx; AVCUDADeviceContext *device_hwctx = hw_frames_ctx->device_ctx->hwctx;
CUresult err;
CUcontext dummy; CUcontext dummy;
CUDA_MEMCPY2D cpy = { 0 }; CUDA_MEMCPY2D cpy = { 0 };
int ret = 0; int ret = 0;
...@@ -243,11 +247,11 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *frame) ...@@ -243,11 +247,11 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *frame)
// keep a reference of each frame // keep a reference of each frame
s->frames[s->n].buf = frame; s->frames[s->n].buf = frame;
err = cuCtxPushCurrent(device_hwctx->cuda_ctx); ret = CHECK_CU(cuCtxPushCurrent(device_hwctx->cuda_ctx));
if (err != CUDA_SUCCESS) if (ret < 0)
return AVERROR_UNKNOWN; return ret;
cuMemsetD8(s->data, 0, HIST_SIZE * sizeof(int)); CHECK_CU(cuMemsetD8(s->data, 0, HIST_SIZE * sizeof(int)));
thumbnail(ctx, (int*)s->data, frame); thumbnail(ctx, (int*)s->data, frame);
...@@ -260,11 +264,9 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *frame) ...@@ -260,11 +264,9 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *frame)
cpy.WidthInBytes = HIST_SIZE * sizeof(int); cpy.WidthInBytes = HIST_SIZE * sizeof(int);
cpy.Height = 1; cpy.Height = 1;
err = cuMemcpy2D(&cpy); ret = CHECK_CU(cuMemcpy2D(&cpy));
if (err != CUDA_SUCCESS) { if (ret < 0)
av_log(ctx, AV_LOG_ERROR, "Error transferring the data from the CUDA frame\n"); return ret;
return AVERROR_UNKNOWN;
}
if (hw_frames_ctx->sw_format == AV_PIX_FMT_NV12 || hw_frames_ctx->sw_format == AV_PIX_FMT_YUV420P || if (hw_frames_ctx->sw_format == AV_PIX_FMT_NV12 || hw_frames_ctx->sw_format == AV_PIX_FMT_YUV420P ||
hw_frames_ctx->sw_format == AV_PIX_FMT_P010LE || hw_frames_ctx->sw_format == AV_PIX_FMT_P016LE) hw_frames_ctx->sw_format == AV_PIX_FMT_P010LE || hw_frames_ctx->sw_format == AV_PIX_FMT_P016LE)
...@@ -274,7 +276,7 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *frame) ...@@ -274,7 +276,7 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *frame)
hist[i] = 4 * hist[i]; hist[i] = 4 * hist[i];
} }
cuCtxPopCurrent(&dummy); CHECK_CU(cuCtxPopCurrent(&dummy));
if (ret < 0) if (ret < 0)
return ret; return ret;
...@@ -292,12 +294,12 @@ static av_cold void uninit(AVFilterContext *ctx) ...@@ -292,12 +294,12 @@ static av_cold void uninit(AVFilterContext *ctx)
ThumbnailCudaContext *s = ctx->priv; ThumbnailCudaContext *s = ctx->priv;
if (s->data) { if (s->data) {
cuMemFree(s->data); CHECK_CU(cuMemFree(s->data));
s->data = 0; s->data = 0;
} }
if (s->cu_module) { if (s->cu_module) {
cuModuleUnload(s->cu_module); CHECK_CU(cuModuleUnload(s->cu_module));
s->cu_module = NULL; s->cu_module = NULL;
} }
...@@ -340,49 +342,43 @@ static int config_props(AVFilterLink *inlink) ...@@ -340,49 +342,43 @@ static int config_props(AVFilterLink *inlink)
AVHWFramesContext *hw_frames_ctx = (AVHWFramesContext*)inlink->hw_frames_ctx->data; AVHWFramesContext *hw_frames_ctx = (AVHWFramesContext*)inlink->hw_frames_ctx->data;
AVCUDADeviceContext *device_hwctx = hw_frames_ctx->device_ctx->hwctx; AVCUDADeviceContext *device_hwctx = hw_frames_ctx->device_ctx->hwctx;
CUcontext dummy, cuda_ctx = device_hwctx->cuda_ctx; CUcontext dummy, cuda_ctx = device_hwctx->cuda_ctx;
CUresult err; int ret;
extern char vf_thumbnail_cuda_ptx[]; extern char vf_thumbnail_cuda_ptx[];
err = cuCtxPushCurrent(cuda_ctx); ret = CHECK_CU(cuCtxPushCurrent(cuda_ctx));
if (err != CUDA_SUCCESS) { if (ret < 0)
av_log(ctx, AV_LOG_ERROR, "Error pushing cuda context\n"); return ret;
return AVERROR_UNKNOWN;
}
err = cuModuleLoadData(&s->cu_module, vf_thumbnail_cuda_ptx); ret = CHECK_CU(cuModuleLoadData(&s->cu_module, vf_thumbnail_cuda_ptx));
if (err != CUDA_SUCCESS) { if (ret < 0)
av_log(ctx, AV_LOG_ERROR, "Error loading module data\n"); return ret;
return AVERROR_UNKNOWN;
}
cuModuleGetFunction(&s->cu_func_uchar, s->cu_module, "Thumbnail_uchar"); CHECK_CU(cuModuleGetFunction(&s->cu_func_uchar, s->cu_module, "Thumbnail_uchar"));
cuModuleGetFunction(&s->cu_func_uchar2, s->cu_module, "Thumbnail_uchar2"); CHECK_CU(cuModuleGetFunction(&s->cu_func_uchar2, s->cu_module, "Thumbnail_uchar2"));
cuModuleGetFunction(&s->cu_func_ushort, s->cu_module, "Thumbnail_ushort"); CHECK_CU(cuModuleGetFunction(&s->cu_func_ushort, s->cu_module, "Thumbnail_ushort"));
cuModuleGetFunction(&s->cu_func_ushort2, s->cu_module, "Thumbnail_ushort2"); CHECK_CU(cuModuleGetFunction(&s->cu_func_ushort2, s->cu_module, "Thumbnail_ushort2"));
cuModuleGetTexRef(&s->cu_tex_uchar, s->cu_module, "uchar_tex"); CHECK_CU(cuModuleGetTexRef(&s->cu_tex_uchar, s->cu_module, "uchar_tex"));
cuModuleGetTexRef(&s->cu_tex_uchar2, s->cu_module, "uchar2_tex"); CHECK_CU(cuModuleGetTexRef(&s->cu_tex_uchar2, s->cu_module, "uchar2_tex"));
cuModuleGetTexRef(&s->cu_tex_ushort, s->cu_module, "ushort_tex"); CHECK_CU(cuModuleGetTexRef(&s->cu_tex_ushort, s->cu_module, "ushort_tex"));
cuModuleGetTexRef(&s->cu_tex_ushort2, s->cu_module, "ushort2_tex"); CHECK_CU(cuModuleGetTexRef(&s->cu_tex_ushort2, s->cu_module, "ushort2_tex"));
cuTexRefSetFlags(s->cu_tex_uchar, CU_TRSF_READ_AS_INTEGER); CHECK_CU(cuTexRefSetFlags(s->cu_tex_uchar, CU_TRSF_READ_AS_INTEGER));
cuTexRefSetFlags(s->cu_tex_uchar2, CU_TRSF_READ_AS_INTEGER); CHECK_CU(cuTexRefSetFlags(s->cu_tex_uchar2, CU_TRSF_READ_AS_INTEGER));
cuTexRefSetFlags(s->cu_tex_ushort, CU_TRSF_READ_AS_INTEGER); CHECK_CU(cuTexRefSetFlags(s->cu_tex_ushort, CU_TRSF_READ_AS_INTEGER));
cuTexRefSetFlags(s->cu_tex_ushort2, CU_TRSF_READ_AS_INTEGER); CHECK_CU(cuTexRefSetFlags(s->cu_tex_ushort2, CU_TRSF_READ_AS_INTEGER));
cuTexRefSetFilterMode(s->cu_tex_uchar, CU_TR_FILTER_MODE_LINEAR); CHECK_CU(cuTexRefSetFilterMode(s->cu_tex_uchar, CU_TR_FILTER_MODE_LINEAR));
cuTexRefSetFilterMode(s->cu_tex_uchar2, CU_TR_FILTER_MODE_LINEAR); CHECK_CU(cuTexRefSetFilterMode(s->cu_tex_uchar2, CU_TR_FILTER_MODE_LINEAR));
cuTexRefSetFilterMode(s->cu_tex_ushort, CU_TR_FILTER_MODE_LINEAR); CHECK_CU(cuTexRefSetFilterMode(s->cu_tex_ushort, CU_TR_FILTER_MODE_LINEAR));
cuTexRefSetFilterMode(s->cu_tex_ushort2, CU_TR_FILTER_MODE_LINEAR); CHECK_CU(cuTexRefSetFilterMode(s->cu_tex_ushort2, CU_TR_FILTER_MODE_LINEAR));
err = cuMemAlloc(&s->data, HIST_SIZE * sizeof(int)); ret = CHECK_CU(cuMemAlloc(&s->data, HIST_SIZE * sizeof(int)));
if (err != CUDA_SUCCESS) { if (ret < 0)
av_log(ctx, AV_LOG_ERROR, "Error allocating cuda memory\n"); return ret;
return AVERROR_UNKNOWN;
}
cuCtxPopCurrent(&dummy); CHECK_CU(cuCtxPopCurrent(&dummy));
s->hw_frames_ctx = ctx->inputs[0]->hw_frames_ctx; s->hw_frames_ctx = ctx->inputs[0]->hw_frames_ctx;
......
...@@ -23,6 +23,7 @@ ...@@ -23,6 +23,7 @@
#include "libavutil/common.h" #include "libavutil/common.h"
#include "libavutil/hwcontext.h" #include "libavutil/hwcontext.h"
#include "libavutil/hwcontext_cuda_internal.h" #include "libavutil/hwcontext_cuda_internal.h"
#include "libavutil/cuda_check.h"
#include "libavutil/internal.h" #include "libavutil/internal.h"
#include "libavutil/opt.h" #include "libavutil/opt.h"
#include "libavutil/pixdesc.h" #include "libavutil/pixdesc.h"
...@@ -32,6 +33,8 @@ ...@@ -32,6 +33,8 @@
#include "internal.h" #include "internal.h"
#include "video.h" #include "video.h"
#define CHECK_CU(x) FF_CUDA_CHECK_DL(ctx, device_hwctx->internal->cuda_dl, x)
static const enum AVPixelFormat supported_formats[] = { static const enum AVPixelFormat supported_formats[] = {
AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV420P,
AV_PIX_FMT_YUV444P AV_PIX_FMT_YUV444P
...@@ -397,7 +400,6 @@ static int npptranspose_filter_frame(AVFilterLink *link, AVFrame *in) ...@@ -397,7 +400,6 @@ static int npptranspose_filter_frame(AVFilterLink *link, AVFrame *in)
AVHWFramesContext *frames_ctx = (AVHWFramesContext*)outlink->hw_frames_ctx->data; AVHWFramesContext *frames_ctx = (AVHWFramesContext*)outlink->hw_frames_ctx->data;
AVCUDADeviceContext *device_hwctx = frames_ctx->device_ctx->hwctx; AVCUDADeviceContext *device_hwctx = frames_ctx->device_ctx->hwctx;
AVFrame *out = NULL; AVFrame *out = NULL;
CUresult err;
CUcontext dummy; CUcontext dummy;
int ret = 0; int ret = 0;
...@@ -410,15 +412,13 @@ static int npptranspose_filter_frame(AVFilterLink *link, AVFrame *in) ...@@ -410,15 +412,13 @@ static int npptranspose_filter_frame(AVFilterLink *link, AVFrame *in)
goto fail; goto fail;
} }
err = device_hwctx->internal->cuda_dl->cuCtxPushCurrent(device_hwctx->cuda_ctx); ret = CHECK_CU(device_hwctx->internal->cuda_dl->cuCtxPushCurrent(device_hwctx->cuda_ctx));
if (err != CUDA_SUCCESS) { if (ret < 0)
ret = AVERROR_UNKNOWN;
goto fail; goto fail;
}
ret = npptranspose_filter(ctx, out, in); ret = npptranspose_filter(ctx, out, in);
device_hwctx->internal->cuda_dl->cuCtxPopCurrent(&dummy); CHECK_CU(device_hwctx->internal->cuda_dl->cuCtxPopCurrent(&dummy));
if (ret < 0) if (ret < 0)
goto fail; goto fail;
......
...@@ -21,6 +21,7 @@ ...@@ -21,6 +21,7 @@
#include <cuda.h> #include <cuda.h>
#include "libavutil/avassert.h" #include "libavutil/avassert.h"
#include "libavutil/hwcontext_cuda.h" #include "libavutil/hwcontext_cuda.h"
#include "libavutil/cuda_check.h"
#include "internal.h" #include "internal.h"
#include "yadif.h" #include "yadif.h"
...@@ -48,28 +49,7 @@ typedef struct DeintCUDAContext { ...@@ -48,28 +49,7 @@ typedef struct DeintCUDAContext {
#define BLOCKX 32 #define BLOCKX 32
#define BLOCKY 16 #define BLOCKY 16
static int check_cu(AVFilterContext *avctx, CUresult err, const char *func) #define CHECK_CU(x) FF_CUDA_CHECK(ctx, x)
{
const char *err_name;
const char *err_string;
av_log(avctx, AV_LOG_TRACE, "Calling %s\n", func);
if (err == CUDA_SUCCESS)
return 0;
cuGetErrorName(err, &err_name);
cuGetErrorString(err, &err_string);
av_log(avctx, AV_LOG_ERROR, "%s failed", func);
if (err_name && err_string)
av_log(avctx, AV_LOG_ERROR, " -> %s: %s", err_name, err_string);
av_log(avctx, AV_LOG_ERROR, "\n");
return AVERROR_EXTERNAL;
}
#define CHECK_CU(x) check_cu(ctx, (x), #x)
static CUresult call_kernel(AVFilterContext *ctx, CUfunction func, static CUresult call_kernel(AVFilterContext *ctx, CUfunction func,
CUdeviceptr prev, CUdeviceptr cur, CUdeviceptr next, CUdeviceptr prev, CUdeviceptr cur, CUdeviceptr next,
...@@ -85,7 +65,7 @@ static CUresult call_kernel(AVFilterContext *ctx, CUfunction func, ...@@ -85,7 +65,7 @@ static CUresult call_kernel(AVFilterContext *ctx, CUfunction func,
{ {
DeintCUDAContext *s = ctx->priv; DeintCUDAContext *s = ctx->priv;
CUtexObject tex_prev = 0, tex_cur = 0, tex_next = 0; CUtexObject tex_prev = 0, tex_cur = 0, tex_next = 0;
CUresult err; int ret;
int skip_spatial_check = s->yadif.mode&2; int skip_spatial_check = s->yadif.mode&2;
void *args[] = { &dst, &tex_prev, &tex_cur, &tex_next, void *args[] = { &dst, &tex_prev, &tex_cur, &tex_next,
...@@ -108,24 +88,21 @@ static CUresult call_kernel(AVFilterContext *ctx, CUfunction func, ...@@ -108,24 +88,21 @@ static CUresult call_kernel(AVFilterContext *ctx, CUfunction func,
}; };
res_desc.res.pitch2D.devPtr = (CUdeviceptr)prev; res_desc.res.pitch2D.devPtr = (CUdeviceptr)prev;
err = CHECK_CU(cuTexObjectCreate(&tex_prev, &res_desc, &tex_desc, NULL)); ret = CHECK_CU(cuTexObjectCreate(&tex_prev, &res_desc, &tex_desc, NULL));
if (err != CUDA_SUCCESS) { if (ret < 0)
goto exit; goto exit;
}
res_desc.res.pitch2D.devPtr = (CUdeviceptr)cur; res_desc.res.pitch2D.devPtr = (CUdeviceptr)cur;
err = CHECK_CU(cuTexObjectCreate(&tex_cur, &res_desc, &tex_desc, NULL)); ret = CHECK_CU(cuTexObjectCreate(&tex_cur, &res_desc, &tex_desc, NULL));
if (err != CUDA_SUCCESS) { if (ret < 0)
goto exit; goto exit;
}
res_desc.res.pitch2D.devPtr = (CUdeviceptr)next; res_desc.res.pitch2D.devPtr = (CUdeviceptr)next;
err = CHECK_CU(cuTexObjectCreate(&tex_next, &res_desc, &tex_desc, NULL)); ret = CHECK_CU(cuTexObjectCreate(&tex_next, &res_desc, &tex_desc, NULL));
if (err != CUDA_SUCCESS) { if (ret < 0)
goto exit; goto exit;
}
err = CHECK_CU(cuLaunchKernel(func, ret = CHECK_CU(cuLaunchKernel(func,
DIV_UP(dst_width, BLOCKX), DIV_UP(dst_height, BLOCKY), 1, DIV_UP(dst_width, BLOCKX), DIV_UP(dst_height, BLOCKY), 1,
BLOCKX, BLOCKY, 1, BLOCKX, BLOCKY, 1,
0, s->stream, args, NULL)); 0, s->stream, args, NULL));
...@@ -138,7 +115,7 @@ exit: ...@@ -138,7 +115,7 @@ exit:
if (tex_next) if (tex_next)
CHECK_CU(cuTexObjectDestroy(tex_next)); CHECK_CU(cuTexObjectDestroy(tex_next));
return err; return ret;
} }
static void filter(AVFilterContext *ctx, AVFrame *dst, static void filter(AVFilterContext *ctx, AVFrame *dst,
...@@ -147,13 +124,11 @@ static void filter(AVFilterContext *ctx, AVFrame *dst, ...@@ -147,13 +124,11 @@ static void filter(AVFilterContext *ctx, AVFrame *dst,
DeintCUDAContext *s = ctx->priv; DeintCUDAContext *s = ctx->priv;
YADIFContext *y = &s->yadif; YADIFContext *y = &s->yadif;
CUcontext dummy; CUcontext dummy;
CUresult err; int i, ret;
int i;
err = CHECK_CU(cuCtxPushCurrent(s->cu_ctx)); ret = CHECK_CU(cuCtxPushCurrent(s->cu_ctx));
if (err != CUDA_SUCCESS) { if (ret < 0)
goto exit; return;
}
for (i = 0; i < y->csp->nb_components; i++) { for (i = 0; i < y->csp->nb_components; i++) {
CUfunction func; CUfunction func;
...@@ -204,10 +179,7 @@ static void filter(AVFilterContext *ctx, AVFrame *dst, ...@@ -204,10 +179,7 @@ static void filter(AVFilterContext *ctx, AVFrame *dst,
parity, tff); parity, tff);
} }
err = CHECK_CU(cuStreamSynchronize(s->stream)); CHECK_CU(cuStreamSynchronize(s->stream));
if (err != CUDA_SUCCESS) {
goto exit;
}
exit: exit:
CHECK_CU(cuCtxPopCurrent(&dummy)); CHECK_CU(cuCtxPopCurrent(&dummy));
...@@ -283,7 +255,6 @@ static int config_output(AVFilterLink *link) ...@@ -283,7 +255,6 @@ static int config_output(AVFilterLink *link)
YADIFContext *y = &s->yadif; YADIFContext *y = &s->yadif;
int ret = 0; int ret = 0;
CUcontext dummy; CUcontext dummy;
CUresult err;
av_assert0(s->input_frames); av_assert0(s->input_frames);
s->device_ref = av_buffer_ref(s->input_frames->device_ref); s->device_ref = av_buffer_ref(s->input_frames->device_ref);
...@@ -342,41 +313,29 @@ static int config_output(AVFilterLink *link) ...@@ -342,41 +313,29 @@ static int config_output(AVFilterLink *link)
y->csp = av_pix_fmt_desc_get(output_frames->sw_format); y->csp = av_pix_fmt_desc_get(output_frames->sw_format);
y->filter = filter; y->filter = filter;
err = CHECK_CU(cuCtxPushCurrent(s->cu_ctx)); ret = CHECK_CU(cuCtxPushCurrent(s->cu_ctx));
if (err != CUDA_SUCCESS) { if (ret < 0)
ret = AVERROR_EXTERNAL;
goto exit; goto exit;
}
err = CHECK_CU(cuModuleLoadData(&s->cu_module, vf_yadif_cuda_ptx)); ret = CHECK_CU(cuModuleLoadData(&s->cu_module, vf_yadif_cuda_ptx));
if (err != CUDA_SUCCESS) { if (ret < 0)
ret = AVERROR_INVALIDDATA;
goto exit; goto exit;
}
err = CHECK_CU(cuModuleGetFunction(&s->cu_func_uchar, s->cu_module, "yadif_uchar")); ret = CHECK_CU(cuModuleGetFunction(&s->cu_func_uchar, s->cu_module, "yadif_uchar"));
if (err != CUDA_SUCCESS) { if (ret < 0)
ret = AVERROR_INVALIDDATA;
goto exit; goto exit;
}
err = CHECK_CU(cuModuleGetFunction(&s->cu_func_uchar2, s->cu_module, "yadif_uchar2")); ret = CHECK_CU(cuModuleGetFunction(&s->cu_func_uchar2, s->cu_module, "yadif_uchar2"));
if (err != CUDA_SUCCESS) { if (ret < 0)
ret = AVERROR_INVALIDDATA;
goto exit; goto exit;
}
err= CHECK_CU(cuModuleGetFunction(&s->cu_func_ushort, s->cu_module, "yadif_ushort")); ret = CHECK_CU(cuModuleGetFunction(&s->cu_func_ushort, s->cu_module, "yadif_ushort"));
if (err != CUDA_SUCCESS) { if (ret < 0)
ret = AVERROR_INVALIDDATA;
goto exit; goto exit;
}
err = CHECK_CU(cuModuleGetFunction(&s->cu_func_ushort2, s->cu_module, "yadif_ushort2")); ret = CHECK_CU(cuModuleGetFunction(&s->cu_func_ushort2, s->cu_module, "yadif_ushort2"));
if (err != CUDA_SUCCESS) { if (ret < 0)
ret = AVERROR_INVALIDDATA;
goto exit; goto exit;
}
exit: exit:
CHECK_CU(cuCtxPopCurrent(&dummy)); CHECK_CU(cuCtxPopCurrent(&dummy));
......
...@@ -157,7 +157,7 @@ OBJS = adler32.o \ ...@@ -157,7 +157,7 @@ OBJS = adler32.o \
xtea.o \ xtea.o \
tea.o \ tea.o \
OBJS-$(CONFIG_CUDA) += hwcontext_cuda.o OBJS-$(CONFIG_CUDA) += hwcontext_cuda.o cuda_check.o
OBJS-$(CONFIG_D3D11VA) += hwcontext_d3d11va.o OBJS-$(CONFIG_D3D11VA) += hwcontext_d3d11va.o
OBJS-$(CONFIG_DXVA2) += hwcontext_dxva2.o OBJS-$(CONFIG_DXVA2) += hwcontext_dxva2.o
OBJS-$(CONFIG_LIBDRM) += hwcontext_drm.o OBJS-$(CONFIG_LIBDRM) += hwcontext_drm.o
...@@ -175,7 +175,8 @@ OBJS += $(COMPAT_OBJS:%=../compat/%) ...@@ -175,7 +175,8 @@ OBJS += $(COMPAT_OBJS:%=../compat/%)
SLIBOBJS-$(HAVE_GNU_WINDRES) += avutilres.o SLIBOBJS-$(HAVE_GNU_WINDRES) += avutilres.o
SKIPHEADERS-$(HAVE_CUDA_H) += hwcontext_cuda.h SKIPHEADERS-$(HAVE_CUDA_H) += hwcontext_cuda.h
SKIPHEADERS-$(CONFIG_CUDA) += hwcontext_cuda_internal.h SKIPHEADERS-$(CONFIG_CUDA) += hwcontext_cuda_internal.h \
cuda_check.h
SKIPHEADERS-$(CONFIG_D3D11VA) += hwcontext_d3d11va.h SKIPHEADERS-$(CONFIG_D3D11VA) += hwcontext_d3d11va.h
SKIPHEADERS-$(CONFIG_DXVA2) += hwcontext_dxva2.h SKIPHEADERS-$(CONFIG_DXVA2) += hwcontext_dxva2.h
SKIPHEADERS-$(CONFIG_QSV) += hwcontext_qsv.h SKIPHEADERS-$(CONFIG_QSV) += hwcontext_qsv.h
......
/*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "compat/cuda/dynlink_loader.h"
#include "libavutil/cuda_check.h"
int ff_cuda_check(void *avctx,
void *cuGetErrorName_fn,
void *cuGetErrorString_fn,
CUresult err, const char *func)
{
const char *err_name;
const char *err_string;
av_log(avctx, AV_LOG_TRACE, "Calling %s\n", func);
if (err == CUDA_SUCCESS)
return 0;
((tcuGetErrorName *)cuGetErrorName_fn)(err, &err_name);
((tcuGetErrorString *)cuGetErrorString_fn)(err, &err_string);
av_log(avctx, AV_LOG_ERROR, "%s failed", func);
if (err_name && err_string)
av_log(avctx, AV_LOG_ERROR, " -> %s: %s", err_name, err_string);
av_log(avctx, AV_LOG_ERROR, "\n");
return AVERROR_EXTERNAL;
}
/*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef FF_CUDA_CHECK_H
#define FF_CUDA_CHECK_H
/**
* Wrap a CUDA function call and print error information if it fails.
*/
int ff_cuda_check(void *avctx,
void *cuGetErrorName_fn, void *cuGetErrorString_fn,
CUresult err, const char *func);
/**
* Convenience wrapper for ff_cuda_check when directly linking libcuda.
*/
#define FF_CUDA_CHECK(avclass, x) ff_cuda_check(avclass, cuGetErrorName, cuGetErrorString, (x), #x)
/**
* Convenience wrapper for ff_cuda_check when dynamically loading cuda symbols.
*/
#define FF_CUDA_CHECK_DL(avclass, cudl, x) ff_cuda_check(avclass, cudl->cuGetErrorName, cudl->cuGetErrorString, (x), #x)
#endif /* FF_CUDA_CHECK_H */
...@@ -21,6 +21,7 @@ ...@@ -21,6 +21,7 @@
#include "hwcontext.h" #include "hwcontext.h"
#include "hwcontext_internal.h" #include "hwcontext_internal.h"
#include "hwcontext_cuda_internal.h" #include "hwcontext_cuda_internal.h"
#include "cuda_check.h"
#include "mem.h" #include "mem.h"
#include "pixdesc.h" #include "pixdesc.h"
#include "pixfmt.h" #include "pixfmt.h"
...@@ -43,6 +44,8 @@ static const enum AVPixelFormat supported_formats[] = { ...@@ -43,6 +44,8 @@ static const enum AVPixelFormat supported_formats[] = {
AV_PIX_FMT_0BGR32, AV_PIX_FMT_0BGR32,
}; };
#define CHECK_CU(x) FF_CUDA_CHECK_DL(device_ctx, cu, x)
static int cuda_frames_get_constraints(AVHWDeviceContext *ctx, static int cuda_frames_get_constraints(AVHWDeviceContext *ctx,
const void *hwconfig, const void *hwconfig,
AVHWFramesConstraints *constraints) AVHWFramesConstraints *constraints)
...@@ -70,48 +73,48 @@ static int cuda_frames_get_constraints(AVHWDeviceContext *ctx, ...@@ -70,48 +73,48 @@ static int cuda_frames_get_constraints(AVHWDeviceContext *ctx,
static void cuda_buffer_free(void *opaque, uint8_t *data) static void cuda_buffer_free(void *opaque, uint8_t *data)
{ {
AVHWFramesContext *ctx = opaque; AVHWFramesContext *ctx = opaque;
AVCUDADeviceContext *hwctx = ctx->device_ctx->hwctx; AVHWDeviceContext *device_ctx = ctx->device_ctx;
CudaFunctions *cu = hwctx->internal->cuda_dl; AVCUDADeviceContext *hwctx = device_ctx->hwctx;
CudaFunctions *cu = hwctx->internal->cuda_dl;
CUcontext dummy; CUcontext dummy;
cu->cuCtxPushCurrent(hwctx->cuda_ctx); CHECK_CU(cu->cuCtxPushCurrent(hwctx->cuda_ctx));
cu->cuMemFree((CUdeviceptr)data); CHECK_CU(cu->cuMemFree((CUdeviceptr)data));
cu->cuCtxPopCurrent(&dummy); CHECK_CU(cu->cuCtxPopCurrent(&dummy));
} }
static AVBufferRef *cuda_pool_alloc(void *opaque, int size) static AVBufferRef *cuda_pool_alloc(void *opaque, int size)
{ {
AVHWFramesContext *ctx = opaque; AVHWFramesContext *ctx = opaque;
AVCUDADeviceContext *hwctx = ctx->device_ctx->hwctx; AVHWDeviceContext *device_ctx = ctx->device_ctx;
CudaFunctions *cu = hwctx->internal->cuda_dl; AVCUDADeviceContext *hwctx = device_ctx->hwctx;
CudaFunctions *cu = hwctx->internal->cuda_dl;
AVBufferRef *ret = NULL; AVBufferRef *ret = NULL;
CUcontext dummy = NULL; CUcontext dummy = NULL;
CUdeviceptr data; CUdeviceptr data;
CUresult err; int err;
err = cu->cuCtxPushCurrent(hwctx->cuda_ctx); err = CHECK_CU(cu->cuCtxPushCurrent(hwctx->cuda_ctx));
if (err != CUDA_SUCCESS) { if (err < 0)
av_log(ctx, AV_LOG_ERROR, "Error setting current CUDA context\n");
return NULL; return NULL;
}
err = cu->cuMemAlloc(&data, size); err = CHECK_CU(cu->cuMemAlloc(&data, size));
if (err != CUDA_SUCCESS) if (err < 0)
goto fail; goto fail;
ret = av_buffer_create((uint8_t*)data, size, cuda_buffer_free, ctx, 0); ret = av_buffer_create((uint8_t*)data, size, cuda_buffer_free, ctx, 0);
if (!ret) { if (!ret) {
cu->cuMemFree(data); CHECK_CU(cu->cuMemFree(data));
goto fail; goto fail;
} }
fail: fail:
cu->cuCtxPopCurrent(&dummy); CHECK_CU(cu->cuCtxPopCurrent(&dummy));
return ret; return ret;
} }
...@@ -194,17 +197,17 @@ static int cuda_transfer_get_formats(AVHWFramesContext *ctx, ...@@ -194,17 +197,17 @@ static int cuda_transfer_get_formats(AVHWFramesContext *ctx,
static int cuda_transfer_data_from(AVHWFramesContext *ctx, AVFrame *dst, static int cuda_transfer_data_from(AVHWFramesContext *ctx, AVFrame *dst,
const AVFrame *src) const AVFrame *src)
{ {
CUDAFramesContext *priv = ctx->internal->priv; CUDAFramesContext *priv = ctx->internal->priv;
AVCUDADeviceContext *device_hwctx = ctx->device_ctx->hwctx; AVHWDeviceContext *device_ctx = ctx->device_ctx;
CudaFunctions *cu = device_hwctx->internal->cuda_dl; AVCUDADeviceContext *hwctx = device_ctx->hwctx;
CudaFunctions *cu = hwctx->internal->cuda_dl;
CUcontext dummy; CUcontext dummy;
CUresult err; int i, ret;
int i;
err = cu->cuCtxPushCurrent(device_hwctx->cuda_ctx); ret = CHECK_CU(cu->cuCtxPushCurrent(hwctx->cuda_ctx));
if (err != CUDA_SUCCESS) if (ret < 0)
return AVERROR_UNKNOWN; return ret;
for (i = 0; i < FF_ARRAY_ELEMS(src->data) && src->data[i]; i++) { for (i = 0; i < FF_ARRAY_ELEMS(src->data) && src->data[i]; i++) {
CUDA_MEMCPY2D cpy = { CUDA_MEMCPY2D cpy = {
...@@ -218,20 +221,17 @@ static int cuda_transfer_data_from(AVHWFramesContext *ctx, AVFrame *dst, ...@@ -218,20 +221,17 @@ static int cuda_transfer_data_from(AVHWFramesContext *ctx, AVFrame *dst,
.Height = src->height >> (i ? priv->shift_height : 0), .Height = src->height >> (i ? priv->shift_height : 0),
}; };
err = cu->cuMemcpy2DAsync(&cpy, device_hwctx->stream); ret = CHECK_CU(cu->cuMemcpy2DAsync(&cpy, hwctx->stream));
if (err != CUDA_SUCCESS) { if (ret < 0)
av_log(ctx, AV_LOG_ERROR, "Error transferring the data from the CUDA frame\n"); goto exit;
return AVERROR_UNKNOWN;
}
} }
err = cu->cuStreamSynchronize(device_hwctx->stream); ret = CHECK_CU(cu->cuStreamSynchronize(hwctx->stream));
if (err != CUDA_SUCCESS) { if (ret < 0)
av_log(ctx, AV_LOG_ERROR, "Error synchronizing CUDA stream\n"); goto exit;
return AVERROR_UNKNOWN;
}
cu->cuCtxPopCurrent(&dummy); exit:
CHECK_CU(cu->cuCtxPopCurrent(&dummy));
return 0; return 0;
} }
...@@ -239,17 +239,17 @@ static int cuda_transfer_data_from(AVHWFramesContext *ctx, AVFrame *dst, ...@@ -239,17 +239,17 @@ static int cuda_transfer_data_from(AVHWFramesContext *ctx, AVFrame *dst,
static int cuda_transfer_data_to(AVHWFramesContext *ctx, AVFrame *dst, static int cuda_transfer_data_to(AVHWFramesContext *ctx, AVFrame *dst,
const AVFrame *src) const AVFrame *src)
{ {
CUDAFramesContext *priv = ctx->internal->priv; CUDAFramesContext *priv = ctx->internal->priv;
AVCUDADeviceContext *device_hwctx = ctx->device_ctx->hwctx; AVHWDeviceContext *device_ctx = ctx->device_ctx;
CudaFunctions *cu = device_hwctx->internal->cuda_dl; AVCUDADeviceContext *hwctx = device_ctx->hwctx;
CudaFunctions *cu = hwctx->internal->cuda_dl;
CUcontext dummy; CUcontext dummy;
CUresult err; int i, ret;
int i;
err = cu->cuCtxPushCurrent(device_hwctx->cuda_ctx); ret = CHECK_CU(cu->cuCtxPushCurrent(hwctx->cuda_ctx));
if (err != CUDA_SUCCESS) if (ret < 0)
return AVERROR_UNKNOWN; return ret;
for (i = 0; i < FF_ARRAY_ELEMS(src->data) && src->data[i]; i++) { for (i = 0; i < FF_ARRAY_ELEMS(src->data) && src->data[i]; i++) {
CUDA_MEMCPY2D cpy = { CUDA_MEMCPY2D cpy = {
...@@ -263,31 +263,29 @@ static int cuda_transfer_data_to(AVHWFramesContext *ctx, AVFrame *dst, ...@@ -263,31 +263,29 @@ static int cuda_transfer_data_to(AVHWFramesContext *ctx, AVFrame *dst,
.Height = src->height >> (i ? priv->shift_height : 0), .Height = src->height >> (i ? priv->shift_height : 0),
}; };
err = cu->cuMemcpy2DAsync(&cpy, device_hwctx->stream); ret = CHECK_CU(cu->cuMemcpy2DAsync(&cpy, hwctx->stream));
if (err != CUDA_SUCCESS) { if (ret < 0)
av_log(ctx, AV_LOG_ERROR, "Error transferring the data to the CUDA frame\n"); goto exit;
return AVERROR_UNKNOWN;
}
} }
err = cu->cuStreamSynchronize(device_hwctx->stream); ret = CHECK_CU(cu->cuStreamSynchronize(hwctx->stream));
if (err != CUDA_SUCCESS) { if (ret < 0)
av_log(ctx, AV_LOG_ERROR, "Error synchronizing CUDA stream\n"); goto exit;
return AVERROR_UNKNOWN;
}
cu->cuCtxPopCurrent(&dummy); exit:
CHECK_CU(cu->cuCtxPopCurrent(&dummy));
return 0; return 0;
} }
static void cuda_device_uninit(AVHWDeviceContext *ctx) static void cuda_device_uninit(AVHWDeviceContext *device_ctx)
{ {
AVCUDADeviceContext *hwctx = ctx->hwctx; AVCUDADeviceContext *hwctx = device_ctx->hwctx;
if (hwctx->internal) { if (hwctx->internal) {
CudaFunctions *cu = hwctx->internal->cuda_dl;
if (hwctx->internal->is_allocated && hwctx->cuda_ctx) { if (hwctx->internal->is_allocated && hwctx->cuda_ctx) {
hwctx->internal->cuda_dl->cuCtxDestroy(hwctx->cuda_ctx); CHECK_CU(cu->cuCtxDestroy(hwctx->cuda_ctx));
hwctx->cuda_ctx = NULL; hwctx->cuda_ctx = NULL;
} }
cuda_free_functions(&hwctx->internal->cuda_dl); cuda_free_functions(&hwctx->internal->cuda_dl);
...@@ -322,53 +320,47 @@ error: ...@@ -322,53 +320,47 @@ error:
return ret; return ret;
} }
static int cuda_device_create(AVHWDeviceContext *ctx, const char *device, static int cuda_device_create(AVHWDeviceContext *device_ctx,
const char *device,
AVDictionary *opts, int flags) AVDictionary *opts, int flags)
{ {
AVCUDADeviceContext *hwctx = ctx->hwctx; AVCUDADeviceContext *hwctx = device_ctx->hwctx;
CudaFunctions *cu; CudaFunctions *cu;
CUdevice cu_device; CUdevice cu_device;
CUcontext dummy; CUcontext dummy;
CUresult err; int ret, device_idx = 0;
int device_idx = 0;
if (device) if (device)
device_idx = strtol(device, NULL, 0); device_idx = strtol(device, NULL, 0);
if (cuda_device_init(ctx) < 0) if (cuda_device_init(device_ctx) < 0)
goto error; goto error;
cu = hwctx->internal->cuda_dl; cu = hwctx->internal->cuda_dl;
err = cu->cuInit(0); ret = CHECK_CU(cu->cuInit(0));
if (err != CUDA_SUCCESS) { if (ret < 0)
av_log(ctx, AV_LOG_ERROR, "Could not initialize the CUDA driver API\n");
goto error; goto error;
}
err = cu->cuDeviceGet(&cu_device, device_idx); ret = CHECK_CU(cu->cuDeviceGet(&cu_device, device_idx));
if (err != CUDA_SUCCESS) { if (ret < 0)
av_log(ctx, AV_LOG_ERROR, "Could not get the device number %d\n", device_idx);
goto error; goto error;
}
err = cu->cuCtxCreate(&hwctx->cuda_ctx, CU_CTX_SCHED_BLOCKING_SYNC, cu_device); ret = CHECK_CU(cu->cuCtxCreate(&hwctx->cuda_ctx, CU_CTX_SCHED_BLOCKING_SYNC, cu_device));
if (err != CUDA_SUCCESS) { if (ret < 0)
av_log(ctx, AV_LOG_ERROR, "Error creating a CUDA context\n");
goto error; goto error;
}
// Setting stream to NULL will make functions automatically use the default CUstream // Setting stream to NULL will make functions automatically use the default CUstream
hwctx->stream = NULL; hwctx->stream = NULL;
cu->cuCtxPopCurrent(&dummy); CHECK_CU(cu->cuCtxPopCurrent(&dummy));
hwctx->internal->is_allocated = 1; hwctx->internal->is_allocated = 1;
return 0; return 0;
error: error:
cuda_device_uninit(ctx); cuda_device_uninit(device_ctx);
return AVERROR_UNKNOWN; return AVERROR_UNKNOWN;
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment