Commit c59e49f9 authored by Paul B Mahol's avatar Paul B Mahol

avfilter/vf_convolve: implement slice threading

Signed-off-by: 's avatarPaul B Mahol <onemda@gmail.com>
parent f9f1b2a9
...@@ -29,12 +29,14 @@ ...@@ -29,12 +29,14 @@
#include "internal.h" #include "internal.h"
#include "video.h" #include "video.h"
#define MAX_THREADS 16
typedef struct ConvolveContext { typedef struct ConvolveContext {
const AVClass *class; const AVClass *class;
FFFrameSync fs; FFFrameSync fs;
FFTContext *fft[4]; FFTContext *fft[4][MAX_THREADS];
FFTContext *ifft[4]; FFTContext *ifft[4][MAX_THREADS];
int fft_bits[4]; int fft_bits[4];
int fft_len[4]; int fft_len[4];
...@@ -152,15 +154,28 @@ static int config_input_impulse(AVFilterLink *inlink) ...@@ -152,15 +154,28 @@ static int config_input_impulse(AVFilterLink *inlink)
return 0; return 0;
} }
static void fft_horizontal(ConvolveContext *s, FFTComplex *fft_hdata, typedef struct ThreadData {
int n, int plane) FFTComplex *hdata, *vdata;
int plane, n;
} ThreadData;
static int fft_horizontal(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
{ {
ConvolveContext *s = ctx->priv;
ThreadData *td = arg;
FFTComplex *hdata = td->hdata;
const int plane = td->plane;
const int n = td->n;
int start = (n * jobnr ) / nb_jobs;
int end = (n * (jobnr+1)) / nb_jobs;
int y; int y;
for (y = 0; y < n; y++) { for (y = start; y < end; y++) {
av_fft_permute(s->fft[plane], fft_hdata + y * n); av_fft_permute(s->fft[plane][jobnr], hdata + y * n);
av_fft_calc(s->fft[plane], fft_hdata + y * n); av_fft_calc(s->fft[plane][jobnr], hdata + y * n);
} }
return 0;
} }
static void get_input(ConvolveContext *s, FFTComplex *fft_hdata, static void get_input(ConvolveContext *s, FFTComplex *fft_hdata,
...@@ -238,46 +253,73 @@ static void get_input(ConvolveContext *s, FFTComplex *fft_hdata, ...@@ -238,46 +253,73 @@ static void get_input(ConvolveContext *s, FFTComplex *fft_hdata,
} }
} }
static void fft_vertical(ConvolveContext *s, FFTComplex *fft_hdata, FFTComplex *fft_vdata, static int fft_vertical(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
int n, int plane)
{ {
ConvolveContext *s = ctx->priv;
ThreadData *td = arg;
FFTComplex *hdata = td->hdata;
FFTComplex *vdata = td->vdata;
const int plane = td->plane;
const int n = td->n;
int start = (n * jobnr ) / nb_jobs;
int end = (n * (jobnr+1)) / nb_jobs;
int y, x; int y, x;
for (y = 0; y < n; y++) { for (y = start; y < end; y++) {
for (x = 0; x < n; x++) { for (x = 0; x < n; x++) {
fft_vdata[y * n + x].re = fft_hdata[x * n + y].re; vdata[y * n + x].re = hdata[x * n + y].re;
fft_vdata[y * n + x].im = fft_hdata[x * n + y].im; vdata[y * n + x].im = hdata[x * n + y].im;
} }
av_fft_permute(s->fft[plane], fft_vdata + y * n); av_fft_permute(s->fft[plane][jobnr], vdata + y * n);
av_fft_calc(s->fft[plane], fft_vdata + y * n); av_fft_calc(s->fft[plane][jobnr], vdata + y * n);
} }
return 0;
} }
static void ifft_vertical(ConvolveContext *s, int n, int plane) static int ifft_vertical(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
{ {
ConvolveContext *s = ctx->priv;
ThreadData *td = arg;
FFTComplex *hdata = td->hdata;
FFTComplex *vdata = td->vdata;
const int plane = td->plane;
const int n = td->n;
int start = (n * jobnr ) / nb_jobs;
int end = (n * (jobnr+1)) / nb_jobs;
int y, x; int y, x;
for (y = 0; y < n; y++) { for (y = start; y < end; y++) {
av_fft_permute(s->ifft[plane], s->fft_vdata[plane] + y * n); av_fft_permute(s->ifft[plane][jobnr], vdata + y * n);
av_fft_calc(s->ifft[plane], s->fft_vdata[plane] + y * n); av_fft_calc(s->ifft[plane][jobnr], vdata + y * n);
for (x = 0; x < n; x++) { for (x = 0; x < n; x++) {
s->fft_hdata[plane][x * n + y].re = s->fft_vdata[plane][y * n + x].re; hdata[x * n + y].re = vdata[y * n + x].re;
s->fft_hdata[plane][x * n + y].im = s->fft_vdata[plane][y * n + x].im; hdata[x * n + y].im = vdata[y * n + x].im;
} }
} }
return 0;
} }
static void ifft_horizontal(ConvolveContext *s, int n, int plane) static int ifft_horizontal(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
{ {
FFTComplex *input = s->fft_hdata[plane]; ConvolveContext *s = ctx->priv;
ThreadData *td = arg;
FFTComplex *hdata = td->hdata;
const int plane = td->plane;
const int n = td->n;
int start = (n * jobnr ) / nb_jobs;
int end = (n * (jobnr+1)) / nb_jobs;
int y; int y;
for (y = 0; y < n; y++) { for (y = start; y < end; y++) {
av_fft_permute(s->ifft[plane], input + y * n); av_fft_permute(s->ifft[plane][jobnr], hdata + y * n);
av_fft_calc(s->ifft[plane], input + y * n); av_fft_calc(s->ifft[plane][jobnr], hdata + y * n);
} }
return 0;
} }
static void get_output(ConvolveContext *s, AVFrame *out, static void get_output(ConvolveContext *s, AVFrame *out,
...@@ -356,15 +398,20 @@ static int do_convolve(FFFrameSync *fs) ...@@ -356,15 +398,20 @@ static int do_convolve(FFFrameSync *fs)
const int w = s->planewidth[plane]; const int w = s->planewidth[plane];
const int h = s->planeheight[plane]; const int h = s->planeheight[plane];
float total = 0; float total = 0;
ThreadData td;
if (!(s->planes & (1 << plane))) { if (!(s->planes & (1 << plane))) {
continue; continue;
} }
td.plane = plane, td.n = n;
get_input(s, s->fft_hdata[plane], mainpic, w, h, n, plane, 1.f); get_input(s, s->fft_hdata[plane], mainpic, w, h, n, plane, 1.f);
fft_horizontal(s, s->fft_hdata[plane], n, plane);
fft_vertical(s, s->fft_hdata[plane], s->fft_vdata[plane], td.hdata = s->fft_hdata[plane];
n, plane); td.vdata = s->fft_vdata[plane];
ctx->internal->execute(ctx, fft_horizontal, &td, NULL, FFMIN3(MAX_THREADS, n, ff_filter_get_nb_threads(ctx)));
ctx->internal->execute(ctx, fft_vertical, &td, NULL, FFMIN3(MAX_THREADS, n, ff_filter_get_nb_threads(ctx)));
if ((!s->impulse && !s->got_impulse[plane]) || s->impulse) { if ((!s->impulse && !s->got_impulse[plane]) || s->impulse) {
if (s->depth == 8) { if (s->depth == 8) {
...@@ -385,9 +432,12 @@ static int do_convolve(FFFrameSync *fs) ...@@ -385,9 +432,12 @@ static int do_convolve(FFFrameSync *fs)
total = FFMAX(1, total); total = FFMAX(1, total);
get_input(s, s->fft_hdata_impulse[plane], impulsepic, w, h, n, plane, 1 / total); get_input(s, s->fft_hdata_impulse[plane], impulsepic, w, h, n, plane, 1 / total);
fft_horizontal(s, s->fft_hdata_impulse[plane], n, plane);
fft_vertical(s, s->fft_hdata_impulse[plane], s->fft_vdata_impulse[plane], td.hdata = s->fft_hdata_impulse[plane];
n, plane); td.vdata = s->fft_vdata_impulse[plane];
ctx->internal->execute(ctx, fft_horizontal, &td, NULL, FFMIN3(MAX_THREADS, n, ff_filter_get_nb_threads(ctx)));
ctx->internal->execute(ctx, fft_vertical, &td, NULL, FFMIN3(MAX_THREADS, n, ff_filter_get_nb_threads(ctx)));
s->got_impulse[plane] = 1; s->got_impulse[plane] = 1;
} }
...@@ -408,8 +458,11 @@ static int do_convolve(FFFrameSync *fs) ...@@ -408,8 +458,11 @@ static int do_convolve(FFFrameSync *fs)
} }
} }
ifft_vertical(s, n, plane); td.hdata = s->fft_hdata[plane];
ifft_horizontal(s, n, plane); td.vdata = s->fft_vdata[plane];
ctx->internal->execute(ctx, ifft_vertical, &td, NULL, FFMIN3(MAX_THREADS, n, ff_filter_get_nb_threads(ctx)));
ctx->internal->execute(ctx, ifft_horizontal, &td, NULL, FFMIN3(MAX_THREADS, n, ff_filter_get_nb_threads(ctx)));
get_output(s, mainpic, w, h, n, plane); get_output(s, mainpic, w, h, n, plane);
} }
...@@ -421,7 +474,7 @@ static int config_output(AVFilterLink *outlink) ...@@ -421,7 +474,7 @@ static int config_output(AVFilterLink *outlink)
AVFilterContext *ctx = outlink->src; AVFilterContext *ctx = outlink->src;
ConvolveContext *s = ctx->priv; ConvolveContext *s = ctx->priv;
AVFilterLink *mainlink = ctx->inputs[0]; AVFilterLink *mainlink = ctx->inputs[0];
int ret, i; int ret, i, j;
s->fs.on_event = do_convolve; s->fs.on_event = do_convolve;
ret = ff_framesync_init_dualinput(&s->fs, ctx); ret = ff_framesync_init_dualinput(&s->fs, ctx);
...@@ -437,11 +490,13 @@ static int config_output(AVFilterLink *outlink) ...@@ -437,11 +490,13 @@ static int config_output(AVFilterLink *outlink)
return ret; return ret;
for (i = 0; i < s->nb_planes; i++) { for (i = 0; i < s->nb_planes; i++) {
s->fft[i] = av_fft_init(s->fft_bits[i], 0); for (j = 0; j < MAX_THREADS; j++) {
s->ifft[i] = av_fft_init(s->fft_bits[i], 1); s->fft[i][j] = av_fft_init(s->fft_bits[i], 0);
if (!s->fft[i] || !s->ifft[i]) s->ifft[i][j] = av_fft_init(s->fft_bits[i], 1);
if (!s->fft[i][j] || !s->ifft[i][j])
return AVERROR(ENOMEM); return AVERROR(ENOMEM);
} }
}
return 0; return 0;
} }
...@@ -455,15 +510,18 @@ static int activate(AVFilterContext *ctx) ...@@ -455,15 +510,18 @@ static int activate(AVFilterContext *ctx)
static av_cold void uninit(AVFilterContext *ctx) static av_cold void uninit(AVFilterContext *ctx)
{ {
ConvolveContext *s = ctx->priv; ConvolveContext *s = ctx->priv;
int i; int i, j;
for (i = 0; i < 4; i++) { for (i = 0; i < 4; i++) {
av_freep(&s->fft_hdata[i]); av_freep(&s->fft_hdata[i]);
av_freep(&s->fft_vdata[i]); av_freep(&s->fft_vdata[i]);
av_freep(&s->fft_hdata_impulse[i]); av_freep(&s->fft_hdata_impulse[i]);
av_freep(&s->fft_vdata_impulse[i]); av_freep(&s->fft_vdata_impulse[i]);
av_fft_end(s->fft[i]);
av_fft_end(s->ifft[i]); for (j = 0; j < MAX_THREADS; j++) {
av_fft_end(s->fft[i][j]);
av_fft_end(s->ifft[i][j]);
}
} }
ff_framesync_uninit(&s->fs); ff_framesync_uninit(&s->fs);
...@@ -502,5 +560,5 @@ AVFilter ff_vf_convolve = { ...@@ -502,5 +560,5 @@ AVFilter ff_vf_convolve = {
.priv_class = &convolve_class, .priv_class = &convolve_class,
.inputs = convolve_inputs, .inputs = convolve_inputs,
.outputs = convolve_outputs, .outputs = convolve_outputs,
.flags = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL, .flags = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL | AVFILTER_FLAG_SLICE_THREADS,
}; };
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment