Adds dnn inference module for simple convolutional networks. Reimplements srcnn filter based on it.

Signed-off-by: Pedro Arthur <bygrandao@gmail.com>

Adds dnn inference module for simple convolutional networks. Reimplements srcnn filter based on it.
Signed-off-by: Pedro Arthur <bygrandao@gmail.com>
bdf1bbdb · Sergey Lavrushkin · Pedro Arthur · cba16793 · bdf1bbdb · bdf1bbdb
Commit bdf1bbdb authored May 25, 2018 by Sergey Lavrushkin Committed by Pedro Arthur May 29, 2018
9 changed files
--- a/Changelog
+++ b/Changelog
@@ -9,6 +9,8 @@ version <next>:
 - aderivative and aintegral audio filters
 - pal75bars and pal100bars video filter sources
 - support mbedTLS based TLS
+- DNN inference interface
+- Reimplemented SRCNN filter using DNN inference interface
 version 4.0:

--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -12,6 +12,8 @@ OBJS = allfilters.o                                                     \
       avfiltergraph.o                                                  \
       buffersink.o                                                     \
       buffersrc.o                                                      \
+       dnn_interface.o                                                  \
+       dnn_backend_native.o                                             \
       drawutils.o                                                      \
       fifo.o                                                           \
       formats.o                                                        \

--- a/libavfilter/dnn_backend_native.c
+++ b/libavfilter/dnn_backend_native.c
+/*
+ * Copyright (c) 2018 Sergey Lavrushkin
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+/**
+ * @file
+ * DNN native backend implementation.
+ */
+#include "dnn_backend_native.h"
+#include "dnn_srcnn.h"
+#include "libavformat/avio.h"
+typedef enum {INPUT, CONV} LayerType;
+typedef struct Layer{
+    LayerType type;
+    float* output;
+    void* params;
+} Layer;
+typedef struct ConvolutionalParams{
+    int32_t input_num, output_num, kernel_size;
+    float* kernel;
+    float* biases;
+} ConvolutionalParams;
+typedef struct InputParams{
+    int height, width, channels;
+} InputParams;
+// Represents simple feed-forward convolutional network.
+typedef struct ConvolutionalNetwork{
+    Layer* layers;
+    int32_t layers_num;
+} ConvolutionalNetwork;
+static DNNReturnType set_input_output_native(void* model, const DNNData* input, const DNNData* output)
+{
+    ConvolutionalNetwork* network = (ConvolutionalNetwork*)model;
+    InputParams* input_params;
+    ConvolutionalParams* conv_params;
+    int cur_width, cur_height, cur_channels;
+    int32_t layer;
+    if (network->layers_num <= 0 || network->layers[0].type != INPUT){
+        return DNN_ERROR;
+    }
+    else{
+        network->layers[0].output = input->data;
+        input_params = (InputParams*)network->layers[0].params;
+        input_params->width = cur_width = input->width;
+        input_params->height = cur_height = input->height;
+        input_params->channels = cur_channels = input->channels;
+    }
+    for (layer = 1; layer < network->layers_num; ++layer){
+        switch (network->layers[layer].type){
+        case CONV:
+            conv_params = (ConvolutionalParams*)network->layers[layer].params;
+            if (conv_params->input_num != cur_channels){
+                return DNN_ERROR;
+            }
+            cur_channels = conv_params->output_num;
+            if (layer < network->layers_num - 1){
+                if (!network->layers[layer].output){
+                    av_freep(&network->layers[layer].output);
+                }
+                network->layers[layer].output = av_malloc(cur_height * cur_width * cur_channels * sizeof(float));
+                if (!network->layers[layer].output){
+                    return DNN_ERROR;
+                }
+            }
+            else{
+                network->layers[layer].output = output->data;
+                if (output->width != cur_width || output->height != cur_height || output->channels != cur_channels){
+                    return DNN_ERROR;
+                }
+            }
+            break;
+        default:
+            return DNN_ERROR;
+        }
+    }
+    return DNN_SUCCESS;
+}
+// Loads model and its parameters that are stored in a binary file with following structure:
+// layers_num,conv_input_num,conv_output_num,conv_kernel_size,conv_kernel,conv_biases,conv_input_num...
+DNNModel* ff_dnn_load_model_native(const char* model_filename)
+{
+    DNNModel* model = NULL;
+    ConvolutionalNetwork* network = NULL;
+    AVIOContext* model_file_context;
+    int file_size, dnn_size, kernel_size, i;
+    int32_t layer;
+    ConvolutionalParams* conv_params;
+    model = av_malloc(sizeof(DNNModel));
+    if (!model){
+        return NULL;
+    }
+    if (avio_open(&model_file_context, model_filename, AVIO_FLAG_READ) < 0){
+        av_freep(&model);
+        return NULL;
+    }
+    file_size = avio_size(model_file_context);
+    network = av_malloc(sizeof(ConvolutionalNetwork));
+    if (!network){
+        avio_closep(&model_file_context);
+        av_freep(&model);
+        return NULL;
+    }
+    model->model = (void*)network;
+    network->layers_num = 1 + (int32_t)avio_rl32(model_file_context);
+    dnn_size = 4;
+    network->layers = av_malloc(network->layers_num * sizeof(Layer));
+    if (!network->layers){
+        av_freep(&network);
+        avio_closep(&model_file_context);
+        av_freep(&model);
+        return NULL;
+    }
+    for (layer = 0; layer < network->layers_num; ++layer){
+        network->layers[layer].output = NULL;
+        network->layers[layer].params = NULL;
+    }
+    network->layers[0].type = INPUT;
+    network->layers[0].params = av_malloc(sizeof(InputParams));
+    if (!network->layers[0].params){
+        avio_closep(&model_file_context);
+        ff_dnn_free_model_native(&model);
+        return NULL;
+    }
+    for (layer = 1; layer < network->layers_num; ++layer){
+        conv_params = av_malloc(sizeof(ConvolutionalParams));
+        if (!conv_params){
+            avio_closep(&model_file_context);
+            ff_dnn_free_model_native(&model);
+            return NULL;
+        }
+        conv_params->input_num = (int32_t)avio_rl32(model_file_context);
+        conv_params->output_num = (int32_t)avio_rl32(model_file_context);
+        conv_params->kernel_size = (int32_t)avio_rl32(model_file_context);
+        kernel_size = conv_params->input_num * conv_params->output_num *
+                      conv_params->kernel_size * conv_params->kernel_size;
+        dnn_size += 12 + (kernel_size + conv_params->output_num << 2);
+        if (dnn_size > file_size || conv_params->input_num <= 0 ||
+            conv_params->output_num <= 0 || conv_params->kernel_size <= 0){
+            avio_closep(&model_file_context);
+            ff_dnn_free_model_native(&model);
+            return NULL;
+        }
+        conv_params->kernel = av_malloc(kernel_size * sizeof(float));
+        conv_params->biases = av_malloc(conv_params->output_num * sizeof(float));
+        if (!conv_params->kernel || !conv_params->biases){
+            avio_closep(&model_file_context);
+            ff_dnn_free_model_native(&model);
+            return NULL;
+        }
+        for (i = 0; i < kernel_size; ++i){
+            conv_params->kernel[i] = av_int2float(avio_rl32(model_file_context));
+        }
+        for (i = 0; i < conv_params->output_num; ++i){
+            conv_params->biases[i] = av_int2float(avio_rl32(model_file_context));
+        }
+        network->layers[layer].type = CONV;
+        network->layers[layer].params = conv_params;
+    }
+    avio_closep(&model_file_context);
+    if (dnn_size != file_size){
+        ff_dnn_free_model_native(&model);
+        return NULL;
+    }
+    model->set_input_output = &set_input_output_native;
+    return model;
+}
+static int set_up_conv_layer(Layer* layer, const float* kernel, const float* biases, int32_t input_num, int32_t output_num, int32_t size)
+{
+    ConvolutionalParams* conv_params;
+    int kernel_size;
+    conv_params = av_malloc(sizeof(ConvolutionalParams));
+    if (!conv_params){
+        return DNN_ERROR;
+    }
+    conv_params->input_num = input_num;
+    conv_params->output_num = output_num;
+    conv_params->kernel_size = size;
+    kernel_size = input_num * output_num * size * size;
+    conv_params->kernel = av_malloc(kernel_size * sizeof(float));
+    conv_params->biases = av_malloc(conv_params->output_num * sizeof(float));
+    if (!conv_params->kernel || !conv_params->biases){
+        av_freep(&conv_params->kernel);
+        av_freep(&conv_params->biases);
+        av_freep(&conv_params);
+        return DNN_ERROR;
+    }
+    memcpy(conv_params->kernel, kernel, kernel_size * sizeof(float));
+    memcpy(conv_params->biases, biases, output_num * sizeof(float));
+    layer->type = CONV;
+    layer->params = conv_params;
+    return DNN_SUCCESS;
+}
+DNNModel* ff_dnn_load_default_model_native(DNNDefaultModel model_type)
+{
+    DNNModel* model = NULL;
+    ConvolutionalNetwork* network = NULL;
+    int32_t layer;
+    model = av_malloc(sizeof(DNNModel));
+    if (!model){
+        return NULL;
+    }
+    network = av_malloc(sizeof(ConvolutionalNetwork));
+    if (!network){
+        av_freep(&model);
+        return NULL;
+    }
+    model->model = (void*)network;
+    switch (model_type){
+    case DNN_SRCNN:
+        network->layers_num = 4;
+        network->layers = av_malloc(network->layers_num * sizeof(Layer));
+        if (!network->layers){
+            av_freep(&network);
+            av_freep(&model);
+            return NULL;
+        }
+        for (layer = 0; layer < network->layers_num; ++layer){
+            network->layers[layer].output = NULL;
+            network->layers[layer].params = NULL;
+        }
+        network->layers[0].type = INPUT;
+        network->layers[0].params = av_malloc(sizeof(InputParams));
+        if (!network->layers[0].params){
+            ff_dnn_free_model_native(&model);
+            return NULL;
+        }
+        if (set_up_conv_layer(network->layers + 1, conv1_kernel, conv1_biases, 1, 64, 9) != DNN_SUCCESS ||
+            set_up_conv_layer(network->layers + 2, conv2_kernel, conv2_biases, 64, 32, 1) != DNN_SUCCESS ||
+            set_up_conv_layer(network->layers + 3, conv3_kernel, conv3_biases, 32, 1, 5) != DNN_SUCCESS){
+            ff_dnn_free_model_native(&model);
+            return NULL;
+        }
+        model->set_input_output = &set_input_output_native;
+        return model;
+    default:
+        av_freep(&network);
+        av_freep(&model);
+        return NULL;
+    }
+}
+#define CLAMP_TO_EDGE(x, w) ((x) < 0 ? 0 : ((x) >= (w) ? (w - 1) : (x)))
+static void convolve(const float* input, float* output, const ConvolutionalParams* conv_params, int32_t width, int32_t height)
+{
+    int y, x, n_filter, ch, kernel_y, kernel_x;
+    int radius = conv_params->kernel_size >> 1;
+    int src_linesize = width * conv_params->input_num;
+    int filter_linesize = conv_params->kernel_size * conv_params->input_num;
+    int filter_size = conv_params->kernel_size * filter_linesize;
+    for (y = 0; y < height; ++y){
+        for (x = 0; x < width; ++x){
+            for (n_filter = 0; n_filter < conv_params->output_num; ++n_filter){
+                output[n_filter] = conv_params->biases[n_filter];
+                for (ch = 0; ch < conv_params->input_num; ++ch){
+                    for (kernel_y = 0; kernel_y < conv_params->kernel_size; ++kernel_y){
+                        for (kernel_x = 0; kernel_x < conv_params->kernel_size; ++kernel_x){
+                            output[n_filter] += input[CLAMP_TO_EDGE(y + kernel_y - radius, height) * src_linesize +
+                                                      CLAMP_TO_EDGE(x + kernel_x - radius, width) * conv_params->input_num + ch] *
+                                                conv_params->kernel[n_filter * filter_size + kernel_y * filter_linesize +
+                                                                    kernel_x * conv_params->input_num + ch];
+                        }
+                    }
+                }
+                output[n_filter] = FFMAX(output[n_filter], 0.0);
+            }
+            output += conv_params->output_num;
+        }
+    }
+}
+DNNReturnType ff_dnn_execute_model_native(const DNNModel* model)
+{
+    ConvolutionalNetwork* network = (ConvolutionalNetwork*)model->model;
+    InputParams* input_params;
+    int cur_width, cur_height;
+    int32_t layer;
+    if (network->layers_num <= 0 || network->layers[0].type != INPUT || !network->layers[0].output){
+        return DNN_ERROR;
+    }
+    else{
+        input_params = (InputParams*)network->layers[0].params;
+        cur_width = input_params->width;
+        cur_height = input_params->height;
+    }
+    for (layer = 1; layer < network->layers_num; ++layer){
+        if (!network->layers[layer].output){
+            return DNN_ERROR;
+        }
+        switch (network->layers[layer].type){
+        case CONV:
+            convolve(network->layers[layer - 1].output, network->layers[layer].output, (ConvolutionalParams*)network->layers[layer].params, cur_width, cur_height);
+            break;
+        case INPUT:
+            return DNN_ERROR;
+        }
+    }
+    return DNN_SUCCESS;
+}
+void ff_dnn_free_model_native(DNNModel** model)
+{
+    ConvolutionalNetwork* network;
+    ConvolutionalParams* conv_params;
+    int32_t layer;
+    if (*model)
+    {
+        network = (ConvolutionalNetwork*)(*model)->model;
+        for (layer = 0; layer < network->layers_num; ++layer){
+            switch (network->layers[layer].type){
+            case CONV:
+                if (layer < network->layers_num - 1){
+                    av_freep(&network->layers[layer].output);
+                }
+                conv_params = (ConvolutionalParams*)network->layers[layer].params;
+                av_freep(&conv_params->kernel);
+                av_freep(&conv_params->biases);
+                av_freep(&conv_params);
+                break;
+            case INPUT:
+                av_freep(&network->layers[layer].params);
+            }
+        }
+        av_freep(network);
+        av_freep(model);
+    }
+}
--- a/libavfilter/dnn_backend_native.h
+++ b/libavfilter/dnn_backend_native.h
+/*
+ * Copyright (c) 2018 Sergey Lavrushkin
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+/**
+ * @file
+ * DNN inference functions interface for native backend.
+ */
+#ifndef AVFILTER_DNN_BACKEND_NATIVE_H
+#define AVFILTER_DNN_BACKEND_NATIVE_H
+#include "dnn_interface.h"
+DNNModel* ff_dnn_load_model_native(const char* model_filename);
+DNNModel* ff_dnn_load_default_model_native(DNNDefaultModel model_type);
+DNNReturnType ff_dnn_execute_model_native(const DNNModel* model);
+void ff_dnn_free_model_native(DNNModel** model);
+#endif
--- a/libavfilter/dnn_interface.c
+++ b/libavfilter/dnn_interface.c
+/*
+ * Copyright (c) 2018 Sergey Lavrushkin
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+/**
+ * @file
+ * Implements DNN module initialization with specified backend.
+ */
+#include "dnn_interface.h"
+#include "dnn_backend_native.h"
+#include "libavutil/mem.h"
+DNNModule* ff_get_dnn_module(DNNBackendType backend_type)
+{
+    DNNModule* dnn_module;
+    dnn_module = av_malloc(sizeof(DNNModule));
+    if(!dnn_module){
+        return NULL;
+    }
+    switch(backend_type){
+    case DNN_NATIVE:
+        dnn_module->load_model = &ff_dnn_load_model_native;
+        dnn_module->load_default_model = &ff_dnn_load_default_model_native;
+        dnn_module->execute_model = &ff_dnn_execute_model_native;
+        dnn_module->free_model = &ff_dnn_free_model_native;
+    }
+    return dnn_module;
+}
--- a/libavfilter/dnn_interface.h
+++ b/libavfilter/dnn_interface.h
+/*
+ * Copyright (c) 2018 Sergey Lavrushkin
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+/**
+ * @file
+ * DNN inference engine interface.
+ */
+#ifndef AVFILTER_DNN_INTERFACE_H
+#define AVFILTER_DNN_INTERFACE_H
+typedef enum {DNN_SUCCESS, DNN_ERROR} DNNReturnType;
+typedef enum {DNN_NATIVE} DNNBackendType;
+typedef enum {DNN_SRCNN} DNNDefaultModel;
+typedef struct DNNData{
+    float* data;
+    int width, height, channels;
+} DNNData;
+typedef struct DNNModel{
+    // Stores model that can be different for different backends.
+    void* model;
+    // Sets model input and output, while allocating additional memory for intermediate calculations.
+    // Should be called at least once before model execution.
+    DNNReturnType (*set_input_output)(void* model, const DNNData* input, const DNNData* output);
+} DNNModel;
+// Stores pointers to functions for loading, executing, freeing DNN models for one of the backends.
+typedef struct DNNModule{
+    // Loads model and parameters from given file. Returns NULL if it is not possible.
+    DNNModel* (*load_model)(const char* model_filename);
+    // Loads one of the default models
+    DNNModel* (*load_default_model)(DNNDefaultModel model_type);
+    // Executes model with specified input and output. Returns DNN_ERROR otherwise.
+    DNNReturnType (*execute_model)(const DNNModel* model);
+    // Frees memory allocated for model.
+    void (*free_model)(DNNModel** model);
+} DNNModule;
+// Initializes DNNModule depending on chosen backend.
+DNNModule* ff_get_dnn_module(DNNBackendType backend_type);
+#endif
--- a/libavfilter/dnn_srcnn.h
+++ b/libavfilter/dnn_srcnn.h
--- a/libavfilter/vf_srcnn.c
+++ b/libavfilter/vf_srcnn.c
@@ -28,164 +28,47 @@
 #include "formats.h"
 #include "internal.h"
 #include "libavutil/opt.h"
-#include "vf_srcnn.h"
 #include "libavformat/avio.h"
+#include "dnn_interface.h"
-typedef struct Convolution
-{
-    double* kernel;
-    double* biases;
-    int32_t size, input_channels, output_channels;
-} Convolution;
 typedef struct SRCNNContext {
    const AVClass *class;
-    /// SRCNN convolutions
+    char* model_filename;
-    struct Convolution conv1, conv2, conv3;
+    float* input_output_buf;
-    /// Path to binary file with kernels specifications
+    DNNModule* dnn_module;
-    char* config_file_path;
+    DNNModel* model;
-    /// Buffers for network input/output and feature maps
+    DNNData input_output;
-    double* input_output_buf;
-    double* conv1_buf;
-    double* conv2_buf;
 } SRCNNContext;
 #define OFFSET(x) offsetof(SRCNNContext, x)
 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM
 static const AVOption srcnn_options[] = {
-    { "config_file", "path to configuration file with network parameters", OFFSET(config_file_path), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 0, FLAGS },
+    { "model_filename", "path to model file specifying network architecture and its parameters", OFFSET(model_filename), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 0, FLAGS },
    { NULL }
 };
 AVFILTER_DEFINE_CLASS(srcnn);
-#define CHECK_FILE_SIZE(file_size, srcnn_size, avio_context)    if (srcnn_size > file_size){ \
+static av_cold int init(AVFilterContext* context)
-                                                                    av_log(context, AV_LOG_ERROR, "error reading configuration file\n");\
-                                                                    avio_closep(&avio_context); \
-                                                                    return AVERROR(EIO); \
-                                                                }
-#define CHECK_ALLOCATION(call, end_call)    if (call){ \
-                                                av_log(context, AV_LOG_ERROR, "could not allocate memory for convolutions\n"); \
-                                                end_call; \
-                                                return AVERROR(ENOMEM); \
-                                            }
-static int allocate_read_conv_data(Convolution* conv, AVIOContext* config_file_context)
-{
-    int32_t kernel_size = conv->output_channels * conv->size * conv->size * conv->input_channels;
-    int32_t i;
-    conv->kernel = av_malloc(kernel_size * sizeof(double));
-    if (!conv->kernel){
-        return AVERROR(ENOMEM);
-    }
-    for (i = 0; i < kernel_size; ++i){
-        conv->kernel[i] = av_int2double(avio_rl64(config_file_context));
-    }
-    conv->biases = av_malloc(conv->output_channels * sizeof(double));
-    if (!conv->biases){
-        return AVERROR(ENOMEM);
-    }
-    for (i = 0; i < conv->output_channels; ++i){
-        conv->biases[i] = av_int2double(avio_rl64(config_file_context));
-    }
-    return 0;
-}
-static int allocate_copy_conv_data(Convolution* conv, const double* kernel, const double* biases)
 {
-    int32_t kernel_size = conv->output_channels * conv->size * conv->size * conv->input_channels;
+    SRCNNContext* srcnn_context = context->priv;
-    conv->kernel = av_malloc(kernel_size * sizeof(double));
+    srcnn_context->dnn_module = ff_get_dnn_module(DNN_NATIVE);
-    if (!conv->kernel){
+    if (!srcnn_context->dnn_module){
+        av_log(context, AV_LOG_ERROR, "could not create dnn module\n");
        return AVERROR(ENOMEM);
    }
-    memcpy(conv->kernel, kernel, kernel_size * sizeof(double));
+    if (!srcnn_context->model_filename){
+        av_log(context, AV_LOG_INFO, "model file for network was not specified, using default network for x2 upsampling\n");
-    conv->biases = av_malloc(conv->output_channels * sizeof(double));
+        srcnn_context->model = (srcnn_context->dnn_module->load_default_model)(DNN_SRCNN);
-    if (!conv->kernel){
-        return AVERROR(ENOMEM);
-    }
-    memcpy(conv->biases, biases, conv->output_channels * sizeof(double));
-    return 0;
-}
-static av_cold int init(AVFilterContext* context)
-{
-    SRCNNContext *srcnn_context = context->priv;
-    AVIOContext* config_file_context;
-    int64_t file_size, srcnn_size;
-    /// Check specified confguration file name and read network weights from it
-    if (!srcnn_context->config_file_path){
-        av_log(context, AV_LOG_INFO, "configuration file for network was not specified, using default weights for x2 upsampling\n");
-        /// Create convolution kernels and copy default weights
-        srcnn_context->conv1.input_channels = 1;
-        srcnn_context->conv1.output_channels = 64;
-        srcnn_context->conv1.size = 9;
-        CHECK_ALLOCATION(allocate_copy_conv_data(&srcnn_context->conv1, conv1_kernel, conv1_biases), )
-        srcnn_context->conv2.input_channels = 64;
-        srcnn_context->conv2.output_channels = 32;
-        srcnn_context->conv2.size = 1;
-        CHECK_ALLOCATION(allocate_copy_conv_data(&srcnn_context->conv2, conv2_kernel, conv2_biases), )
-        srcnn_context->conv3.input_channels = 32;
-        srcnn_context->conv3.output_channels = 1;
-        srcnn_context->conv3.size = 5;
-        CHECK_ALLOCATION(allocate_copy_conv_data(&srcnn_context->conv3, conv3_kernel, conv3_biases), )
-    }
-    else if (avio_check(srcnn_context->config_file_path, AVIO_FLAG_READ) > 0){
-        if (avio_open(&config_file_context, srcnn_context->config_file_path, AVIO_FLAG_READ) < 0){
-            av_log(context, AV_LOG_ERROR, "failed to open configuration file\n");
-            return AVERROR(EIO);
-        }
-        file_size = avio_size(config_file_context);
-        /// Create convolution kernels and read weights from file
-        srcnn_context->conv1.input_channels = 1;
-        srcnn_context->conv1.size = (int32_t)avio_rl32(config_file_context);
-        srcnn_context->conv1.output_channels = (int32_t)avio_rl32(config_file_context);
-        srcnn_size = 8 + (srcnn_context->conv1.output_channels * srcnn_context->conv1.size *
-                          srcnn_context->conv1.size * srcnn_context->conv1.input_channels +
-                          srcnn_context->conv1.output_channels << 3);
-        CHECK_FILE_SIZE(file_size, srcnn_size, config_file_context)
-        CHECK_ALLOCATION(allocate_read_conv_data(&srcnn_context->conv1, config_file_context), avio_closep(&config_file_context))
-        srcnn_context->conv2.input_channels = (int32_t)avio_rl32(config_file_context);
-        srcnn_context->conv2.size = (int32_t)avio_rl32(config_file_context);
-        srcnn_context->conv2.output_channels = (int32_t)avio_rl32(config_file_context);
-        srcnn_size += 12 + (srcnn_context->conv2.output_channels * srcnn_context->conv2.size *
-                            srcnn_context->conv2.size * srcnn_context->conv2.input_channels +
-                            srcnn_context->conv2.output_channels << 3);
-        CHECK_FILE_SIZE(file_size, srcnn_size, config_file_context)
-        CHECK_ALLOCATION(allocate_read_conv_data(&srcnn_context->conv2, config_file_context), avio_closep(&config_file_context))
-        srcnn_context->conv3.input_channels = (int32_t)avio_rl32(config_file_context);
-        srcnn_context->conv3.size = (int32_t)avio_rl32(config_file_context);
-        srcnn_context->conv3.output_channels = 1;
-        srcnn_size += 8 + (srcnn_context->conv3.output_channels * srcnn_context->conv3.size *
-                           srcnn_context->conv3.size * srcnn_context->conv3.input_channels
-                           + srcnn_context->conv3.output_channels << 3);
-        if (file_size != srcnn_size){
-            av_log(context, AV_LOG_ERROR, "error reading configuration file\n");
-            avio_closep(&config_file_context);
-            return AVERROR(EIO);
-        }
-        CHECK_ALLOCATION(allocate_read_conv_data(&srcnn_context->conv3, config_file_context), avio_closep(&config_file_context))
-        avio_closep(&config_file_context);
    }
    else{
-        av_log(context, AV_LOG_ERROR, "specified configuration file does not exist or not readable\n");
+        srcnn_context->model = (srcnn_context->dnn_module->load_model)(srcnn_context->model_filename);
+    }
+    if (!srcnn_context->model){
+        av_log(context, AV_LOG_ERROR, "could not load dnn model\n");
        return AVERROR(EIO);
    }
@@ -197,7 +80,7 @@ static int query_formats(AVFilterContext* context)
    const enum AVPixelFormat pixel_formats[] = {AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV422P, AV_PIX_FMT_YUV444P,
                                                AV_PIX_FMT_YUV410P, AV_PIX_FMT_YUV411P, AV_PIX_FMT_GRAY8,
                                                AV_PIX_FMT_NONE};
-    AVFilterFormats *formats_list;
+    AVFilterFormats* formats_list;
    formats_list = ff_make_format_list(pixel_formats);
    if (!formats_list){
@@ -209,28 +92,29 @@ static int query_formats(AVFilterContext* context)
 static int config_props(AVFilterLink* inlink)
 {
-    AVFilterContext *context = inlink->dst;
+    AVFilterContext* context = inlink->dst;
-    SRCNNContext *srcnn_context = context->priv;
+    SRCNNContext* srcnn_context = context->priv;
-    int min_dim;
+    DNNReturnType result;
-    /// Check if input data width or height is too low
-    min_dim = FFMIN(inlink->w, inlink->h);
-    if (min_dim <= srcnn_context->conv1.size >> 1 || min_dim <= srcnn_context->conv2.size >> 1 || min_dim <= srcnn_context->conv3.size >> 1){
-        av_log(context, AV_LOG_ERROR, "input width or height is too low\n");
-        return AVERROR(EIO);
-    }
-    /// Allocate network buffers
-    srcnn_context->input_output_buf = av_malloc(inlink->h * inlink->w * sizeof(double));
-    srcnn_context->conv1_buf = av_malloc(inlink->h * inlink->w * srcnn_context->conv1.output_channels * sizeof(double));
-    srcnn_context->conv2_buf = av_malloc(inlink->h * inlink->w * srcnn_context->conv2.output_channels * sizeof(double));
-    if (!srcnn_context->input_output_buf || !srcnn_context->conv1_buf || !srcnn_context->conv2_buf){
+    srcnn_context->input_output_buf = av_malloc(inlink->h * inlink->w * sizeof(float));
-        av_log(context, AV_LOG_ERROR, "could not allocate memory for srcnn buffers\n");
+    if (!srcnn_context->input_output_buf){
+        av_log(context, AV_LOG_ERROR, "could not allocate memory for input/output buffer\n");
        return AVERROR(ENOMEM);
    }
-    return 0;
+    srcnn_context->input_output.data = srcnn_context->input_output_buf;
+    srcnn_context->input_output.width = inlink->w;
+    srcnn_context->input_output.height = inlink->h;
+    srcnn_context->input_output.channels = 1;
+    result = (srcnn_context->model->set_input_output)(srcnn_context->model->model, &srcnn_context->input_output, &srcnn_context->input_output);
+    if (result != DNN_SUCCESS){
+        av_log(context, AV_LOG_ERROR, "could not set input and output for the model\n");
+        return AVERROR(EIO);
+    }
+    else{
+        return 0;
+    }
 }
 typedef struct ThreadData{
@@ -238,28 +122,19 @@ typedef struct ThreadData{
    int out_linesize, height, width;
 } ThreadData;
-typedef struct ConvThreadData
+static int uint8_to_float(AVFilterContext* context, void* arg, int jobnr, int nb_jobs)
-{
-    const Convolution* conv;
-    const double* input;
-    double* output;
-    int height, width;
-} ConvThreadData;
-/// Convert uint8 data to double and scale it to use in network
-static int uint8_to_double(AVFilterContext* context, void* arg, int jobnr, int nb_jobs)
 {
    SRCNNContext* srcnn_context = context->priv;
    const ThreadData* td = arg;
    const int slice_start = (td->height *  jobnr     ) / nb_jobs;
    const int slice_end   = (td->height * (jobnr + 1)) / nb_jobs;
    const uint8_t* src = td->out + slice_start * td->out_linesize;
-    double* dst = srcnn_context->input_output_buf + slice_start * td->width;
+    float* dst = srcnn_context->input_output_buf + slice_start * td->width;
    int y, x;
    for (y = slice_start; y < slice_end; ++y){
        for (x = 0; x < td->width; ++x){
-            dst[x] = (double)src[x] / 255.0;
+            dst[x] = (float)src[x] / 255.0f;
        }
        src += td->out_linesize;
        dst += td->width;
@@ -268,20 +143,19 @@ static int uint8_to_double(AVFilterContext* context, void* arg, int jobnr, int n
    return 0;
 }
-/// Convert double data from network to uint8 and scale it to output as filter result
+static int float_to_uint8(AVFilterContext* context, void* arg, int jobnr, int nb_jobs)
-static int double_to_uint8(AVFilterContext* context, void* arg, int jobnr, int nb_jobs)
 {
    SRCNNContext* srcnn_context = context->priv;
    const ThreadData* td = arg;
    const int slice_start = (td->height *  jobnr     ) / nb_jobs;
    const int slice_end   = (td->height * (jobnr + 1)) / nb_jobs;
-    const double* src = srcnn_context->input_output_buf + slice_start * td->width;
+    const float* src = srcnn_context->input_output_buf + slice_start * td->width;
    uint8_t* dst = td->out + slice_start * td->out_linesize;
    int y, x;
    for (y = slice_start; y < slice_end; ++y){
        for (x = 0; x < td->width; ++x){
-            dst[x] = (uint8_t)(255.0 * FFMIN(src[x], 1.0));
+            dst[x] = (uint8_t)(255.0f * FFMIN(src[x], 1.0f));
        }
        src += td->width;
        dst += td->out_linesize;
@@ -290,45 +164,6 @@ static int double_to_uint8(AVFilterContext* context, void* arg, int jobnr, int n
    return 0;
 }
-#define CLAMP_TO_EDGE(x, w) ((x) < 0 ? 0 : ((x) >= (w) ? (w - 1) : (x)))
-static int convolve(AVFilterContext* context, void* arg, int jobnr, int nb_jobs)
-{
-    const ConvThreadData* td = arg;
-    const int slice_start = (td->height *  jobnr     ) / nb_jobs;
-    const int slice_end   = (td->height * (jobnr + 1)) / nb_jobs;
-    const double* src = td->input;
-    double* dst = td->output + slice_start * td->width * td->conv->output_channels;
-    int y, x;
-    int32_t n_filter, ch, kernel_y, kernel_x;
-    int32_t radius = td->conv->size >> 1;
-    int src_linesize = td->width * td->conv->input_channels;
-    int filter_linesize = td->conv->size * td->conv->input_channels;
-    int filter_size = td->conv->size * filter_linesize;
-    for (y = slice_start; y < slice_end; ++y){
-        for (x = 0; x < td->width; ++x){
-            for (n_filter = 0; n_filter < td->conv->output_channels; ++n_filter){
-                dst[n_filter] = td->conv->biases[n_filter];
-                for (ch = 0; ch < td->conv->input_channels; ++ch){
-                    for (kernel_y = 0; kernel_y < td->conv->size; ++kernel_y){
-                        for (kernel_x = 0; kernel_x < td->conv->size; ++kernel_x){
-                            dst[n_filter] += src[CLAMP_TO_EDGE(y + kernel_y - radius, td->height) * src_linesize +
-                                                 CLAMP_TO_EDGE(x + kernel_x - radius, td->width) * td->conv->input_channels + ch] *
-                                             td->conv->kernel[n_filter * filter_size + kernel_y * filter_linesize +
-                                                              kernel_x * td->conv->input_channels + ch];
-                        }
-                    }
-                }
-                dst[n_filter] = FFMAX(dst[n_filter], 0.0);
-            }
-            dst += td->conv->output_channels;
-        }
-    }
-    return 0;
-}
 static int filter_frame(AVFilterLink* inlink, AVFrame* in)
 {
    AVFilterContext* context = inlink->dst;
@@ -336,8 +171,8 @@ static int filter_frame(AVFilterLink* inlink, AVFrame* in)
    AVFilterLink* outlink = context->outputs[0];
    AVFrame* out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
    ThreadData td;
-    ConvThreadData ctd;
    int nb_threads;
+    DNNReturnType dnn_result;
    if (!out){
        av_log(context, AV_LOG_ERROR, "could not allocate memory for output frame\n");
@@ -349,24 +184,19 @@ static int filter_frame(AVFilterLink* inlink, AVFrame* in)
    av_frame_free(&in);
    td.out = out->data[0];
    td.out_linesize = out->linesize[0];
-    td.height = ctd.height = out->height;
+    td.height = out->height;
-    td.width = ctd.width = out->width;
+    td.width = out->width;
    nb_threads = ff_filter_get_nb_threads(context);
-    context->internal->execute(context, uint8_to_double, &td, NULL, FFMIN(td.height, nb_threads));
+    context->internal->execute(context, uint8_to_float, &td, NULL, FFMIN(td.height, nb_threads));
-    ctd.conv = &srcnn_context->conv1;
-    ctd.input = srcnn_context->input_output_buf;
+    dnn_result = (srcnn_context->dnn_module->execute_model)(srcnn_context->model);
-    ctd.output = srcnn_context->conv1_buf;
+    if (dnn_result != DNN_SUCCESS){
-    context->internal->execute(context, convolve, &ctd, NULL, FFMIN(ctd.height, nb_threads));
+        av_log(context, AV_LOG_ERROR, "failed to execute loaded model\n");
-    ctd.conv = &srcnn_context->conv2;
+        return AVERROR(EIO);
-    ctd.input = srcnn_context->conv1_buf;
+    }
-    ctd.output = srcnn_context->conv2_buf;
-    context->internal->execute(context, convolve, &ctd, NULL, FFMIN(ctd.height, nb_threads));
+    context->internal->execute(context, float_to_uint8, &td, NULL, FFMIN(td.height, nb_threads));
-    ctd.conv = &srcnn_context->conv3;
-    ctd.input = srcnn_context->conv2_buf;
-    ctd.output = srcnn_context->input_output_buf;
-    context->internal->execute(context, convolve, &ctd, NULL, FFMIN(ctd.height, nb_threads));
-    context->internal->execute(context, double_to_uint8, &td, NULL, FFMIN(td.height, nb_threads));
    return ff_filter_frame(outlink, out);
 }
@@ -375,18 +205,11 @@ static av_cold void uninit(AVFilterContext* context)
 {
    SRCNNContext* srcnn_context = context->priv;
-    /// Free convolution data
+    if (srcnn_context->dnn_module){
-    av_freep(&srcnn_context->conv1.kernel);
+        (srcnn_context->dnn_module->free_model)(&srcnn_context->model);
-    av_freep(&srcnn_context->conv1.biases);
+        av_freep(&srcnn_context->dnn_module);
-    av_freep(&srcnn_context->conv2.kernel);
+    }
-    av_freep(&srcnn_context->conv2.biases);
-    av_freep(&srcnn_context->conv3.kernel);
-    av_freep(&srcnn_context->conv3.kernel);
-    /// Free network buffers
    av_freep(&srcnn_context->input_output_buf);
-    av_freep(&srcnn_context->conv1_buf);
-    av_freep(&srcnn_context->conv2_buf);
 }
 static const AVFilterPad srcnn_inputs[] = {
@@ -419,3 +242,4 @@ AVFilter ff_vf_srcnn = {
    .priv_class    = &srcnn_class,
    .flags         = AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC | AVFILTER_FLAG_SLICE_THREADS,
 };
--- a/libavfilter/vf_srcnn.h
+++ b/libavfilter/vf_srcnn.h