Commit 1d74150a authored by Ruiling Song's avatar Ruiling Song

lavfi/opencl: add nlmeans_opencl filter

Reviewed-by: 's avatarMark Thompson <sw@jkqxz.net>
Signed-off-by: 's avatarRuiling Song <ruiling.song@intel.com>
parent 023ea5e3
......@@ -3470,6 +3470,7 @@ mpdecimate_filter_select="pixelutils"
minterpolate_filter_select="scene_sad"
mptestsrc_filter_deps="gpl"
negate_filter_deps="lut_filter"
nlmeans_opencl_filter_deps="opencl"
nnedi_filter_deps="gpl"
ocr_filter_deps="libtesseract"
ocv_filter_deps="libopencv"
......
......@@ -19350,6 +19350,10 @@ Make every semi-green pixel in the input transparent with some slight blending:
@end example
@end itemize
@section nlmeans_opencl
Non-local Means denoise filter through OpenCL, this filter accepts same options as @ref{nlmeans}.
@section overlay_opencl
Overlay one video on top of another.
......
......@@ -297,6 +297,7 @@ OBJS-$(CONFIG_MIX_FILTER) += vf_mix.o
OBJS-$(CONFIG_MPDECIMATE_FILTER) += vf_mpdecimate.o
OBJS-$(CONFIG_NEGATE_FILTER) += vf_lut.o
OBJS-$(CONFIG_NLMEANS_FILTER) += vf_nlmeans.o
OBJS-$(CONFIG_NLMEANS_OPENCL_FILTER) += vf_nlmeans_opencl.o opencl.o opencl/nlmeans.o
OBJS-$(CONFIG_NNEDI_FILTER) += vf_nnedi.o
OBJS-$(CONFIG_NOFORMAT_FILTER) += vf_format.o
OBJS-$(CONFIG_NOISE_FILTER) += vf_noise.o
......
......@@ -282,6 +282,7 @@ extern AVFilter ff_vf_mix;
extern AVFilter ff_vf_mpdecimate;
extern AVFilter ff_vf_negate;
extern AVFilter ff_vf_nlmeans;
extern AVFilter ff_vf_nlmeans_opencl;
extern AVFilter ff_vf_nnedi;
extern AVFilter ff_vf_noformat;
extern AVFilter ff_vf_noise;
......
/*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
const sampler_t sampler = (CLK_NORMALIZED_COORDS_FALSE |
CLK_ADDRESS_CLAMP_TO_EDGE |
CLK_FILTER_NEAREST);
kernel void horiz_sum(__global uint4 *integral_img,
__read_only image2d_t src,
int width,
int height,
int4 dx,
int4 dy)
{
int y = get_global_id(0);
int work_size = get_global_size(0);
uint4 sum = (uint4)(0);
float4 s2;
for (int i = 0; i < width; i++) {
float s1 = read_imagef(src, sampler, (int2)(i, y)).x;
s2.x = read_imagef(src, sampler, (int2)(i + dx.x, y + dy.x)).x;
s2.y = read_imagef(src, sampler, (int2)(i + dx.y, y + dy.y)).x;
s2.z = read_imagef(src, sampler, (int2)(i + dx.z, y + dy.z)).x;
s2.w = read_imagef(src, sampler, (int2)(i + dx.w, y + dy.w)).x;
sum += convert_uint4((s1 - s2) * (s1 - s2) * 255 * 255);
integral_img[y * width + i] = sum;
}
}
kernel void vert_sum(__global uint4 *integral_img,
__global int *overflow,
int width,
int height)
{
int x = get_global_id(0);
uint4 sum = 0;
for (int i = 0; i < height; i++) {
if (any((uint4)UINT_MAX - integral_img[i * width + x] < sum))
atomic_inc(overflow);
integral_img[i * width + x] += sum;
sum = integral_img[i * width + x];
}
}
kernel void weight_accum(global float *sum, global float *weight,
global uint4 *integral_img, __read_only image2d_t src,
int width, int height, int p, float h,
int4 dx, int4 dy)
{
// w(x) = integral_img(x-p, y-p) +
// integral_img(x+p, y+p) -
// integral_img(x+p, y-p) -
// integral_img(x-p, y+p)
// total_sum[x] += w(x, y) * src(x + dx, y + dy)
// total_weight += w(x, y)
int x = get_global_id(0);
int y = get_global_id(1);
int4 xoff = x + dx;
int4 yoff = y + dy;
uint4 a = 0, b = 0, c = 0, d = 0;
uint4 src_pix = 0;
// out-of-bounding-box?
int oobb = (x - p) < 0 || (y - p) < 0 || (y + p) >= height || (x + p) >= width;
src_pix.x = (int)(255 * read_imagef(src, sampler, (int2)(xoff.x, yoff.x)).x);
src_pix.y = (int)(255 * read_imagef(src, sampler, (int2)(xoff.y, yoff.y)).x);
src_pix.z = (int)(255 * read_imagef(src, sampler, (int2)(xoff.z, yoff.z)).x);
src_pix.w = (int)(255 * read_imagef(src, sampler, (int2)(xoff.w, yoff.w)).x);
if (!oobb) {
a = integral_img[(y - p) * width + x - p];
b = integral_img[(y + p) * width + x - p];
c = integral_img[(y - p) * width + x + p];
d = integral_img[(y + p) * width + x + p];
}
float4 patch_diff = convert_float4(d + a - c - b);
float4 w = native_exp(-patch_diff / (h * h));
float w_sum = w.x + w.y + w.z + w.w;
weight[y * width + x] += w_sum;
sum[y * width + x] += dot(w, convert_float4(src_pix));
}
kernel void average(__write_only image2d_t dst,
__read_only image2d_t src,
global float *sum, global float *weight) {
int x = get_global_id(0);
int y = get_global_id(1);
int2 dim = get_image_dim(dst);
float w = weight[y * dim.x + x];
float s = sum[y * dim.x + x];
float src_pix = read_imagef(src, sampler, (int2)(x, y)).x;
float r = (s + src_pix * 255) / (1.0f + w) / 255.0f;
if (x < dim.x && y < dim.y)
write_imagef(dst, (int2)(x, y), (float4)(r, 0.0f, 0.0f, 1.0f));
}
......@@ -24,6 +24,7 @@ extern const char *ff_opencl_source_colorkey;
extern const char *ff_opencl_source_colorspace_common;
extern const char *ff_opencl_source_convolution;
extern const char *ff_opencl_source_neighbor;
extern const char *ff_opencl_source_nlmeans;
extern const char *ff_opencl_source_overlay;
extern const char *ff_opencl_source_tonemap;
extern const char *ff_opencl_source_transpose;
......
......@@ -30,7 +30,7 @@
#include "libavutil/version.h"
#define LIBAVFILTER_VERSION_MAJOR 7
#define LIBAVFILTER_VERSION_MINOR 53
#define LIBAVFILTER_VERSION_MINOR 54
#define LIBAVFILTER_VERSION_MICRO 101
#define LIBAVFILTER_VERSION_INT AV_VERSION_INT(LIBAVFILTER_VERSION_MAJOR, \
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment