Commit b4c9c099 authored by Philip Langdale's avatar Philip Langdale Committed by Timo Rothenpieler

avfilter/vf_thumbnail_cuda: Switch to using ffnvcodec

This change switches the vf_thumbnail_cuda filter from using the
full cuda sdk to using the ffnvcodec headers and loader.

Most of the change is a direct mapping, but I also switched from
using texture references to using texture objects. This is supposed
to be the preferred way of using textures, and the texture object API
is the one I added to ffnvcodec.
Signed-off-by: 's avatarPhilip Langdale <philipl@overt.org>
Signed-off-by: 's avatarTimo Rothenpieler <timo@rothenpieler.org>
parent 2544c7ea
...@@ -2976,7 +2976,7 @@ v4l2_m2m_deps="linux_videodev2_h sem_timedwait" ...@@ -2976,7 +2976,7 @@ v4l2_m2m_deps="linux_videodev2_h sem_timedwait"
hwupload_cuda_filter_deps="ffnvcodec" hwupload_cuda_filter_deps="ffnvcodec"
scale_npp_filter_deps="ffnvcodec libnpp" scale_npp_filter_deps="ffnvcodec libnpp"
scale_cuda_filter_deps="ffnvcodec cuda_nvcc" scale_cuda_filter_deps="ffnvcodec cuda_nvcc"
thumbnail_cuda_filter_deps="cuda_sdk" thumbnail_cuda_filter_deps="ffnvcodec cuda_nvcc"
transpose_npp_filter_deps="ffnvcodec libnpp" transpose_npp_filter_deps="ffnvcodec libnpp"
amf_deps_any="libdl LoadLibrary" amf_deps_any="libdl LoadLibrary"
......
This diff is collapsed.
...@@ -22,55 +22,54 @@ ...@@ -22,55 +22,54 @@
extern "C" { extern "C" {
texture<unsigned char, 2> uchar_tex; __global__ void Thumbnail_uchar(cudaTextureObject_t uchar_tex,
texture<uchar2, 2> uchar2_tex; int *histogram, int src_width, int src_height)
texture<unsigned short, 2> ushort_tex;
texture<ushort2, 2> ushort2_tex;
__global__ void Thumbnail_uchar(int *histogram, int src_width, int src_height)
{ {
int x = blockIdx.x * blockDim.x + threadIdx.x; int x = blockIdx.x * blockDim.x + threadIdx.x;
int y = blockIdx.y * blockDim.y + threadIdx.y; int y = blockIdx.y * blockDim.y + threadIdx.y;
if (y < src_height && x < src_width) if (y < src_height && x < src_width)
{ {
unsigned char pixel = tex2D(uchar_tex, x, y); unsigned char pixel = tex2D<unsigned char>(uchar_tex, x, y);
atomicAdd(&histogram[pixel], 1); atomicAdd(&histogram[pixel], 1);
} }
} }
__global__ void Thumbnail_uchar2(int *histogram, int src_width, int src_height) __global__ void Thumbnail_uchar2(cudaTextureObject_t uchar2_tex,
int *histogram, int src_width, int src_height)
{ {
int x = blockIdx.x * blockDim.x + threadIdx.x; int x = blockIdx.x * blockDim.x + threadIdx.x;
int y = blockIdx.y * blockDim.y + threadIdx.y; int y = blockIdx.y * blockDim.y + threadIdx.y;
if (y < src_height && x < src_width) if (y < src_height && x < src_width)
{ {
uchar2 pixel = tex2D(uchar2_tex, x, y); uchar2 pixel = tex2D<uchar2>(uchar2_tex, x, y);
atomicAdd(&histogram[pixel.x], 1); atomicAdd(&histogram[pixel.x], 1);
atomicAdd(&histogram[256 + pixel.y], 1); atomicAdd(&histogram[256 + pixel.y], 1);
} }
} }
__global__ void Thumbnail_ushort(int *histogram, int src_width, int src_height) __global__ void Thumbnail_ushort(cudaTextureObject_t ushort_tex,
int *histogram, int src_width, int src_height)
{ {
int x = blockIdx.x * blockDim.x + threadIdx.x; int x = blockIdx.x * blockDim.x + threadIdx.x;
int y = blockIdx.y * blockDim.y + threadIdx.y; int y = blockIdx.y * blockDim.y + threadIdx.y;
if (y < src_height && x < src_width) if (y < src_height && x < src_width)
{ {
unsigned short pixel = (tex2D(ushort_tex, x, y) + 128) >> 8; unsigned short pixel = (tex2D<unsigned short>(ushort_tex, x, y) + 128) >> 8;
atomicAdd(&histogram[pixel], 1); atomicAdd(&histogram[pixel], 1);
} }
} }
__global__ void Thumbnail_ushort2(int *histogram, int src_width, int src_height) __global__ void Thumbnail_ushort2(cudaTextureObject_t ushort2_tex,
int *histogram, int src_width, int src_height)
{ {
int x = blockIdx.x * blockDim.x + threadIdx.x; int x = blockIdx.x * blockDim.x + threadIdx.x;
int y = blockIdx.y * blockDim.y + threadIdx.y; int y = blockIdx.y * blockDim.y + threadIdx.y;
if (y < src_height && x < src_width) if (y < src_height && x < src_width)
{ {
ushort2 pixel = tex2D(ushort2_tex, x, y); ushort2 pixel = tex2D<ushort2>(ushort2_tex, x, y);
atomicAdd(&histogram[(pixel.x + 128) >> 8], 1); atomicAdd(&histogram[(pixel.x + 128) >> 8], 1);
atomicAdd(&histogram[256 + (pixel.y + 128) >> 8], 1); atomicAdd(&histogram[256 + (pixel.y + 128) >> 8], 1);
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment