Commit bd10c1e9 authored by Sergey Lavrushkin's avatar Sergey Lavrushkin Committed by Pedro Arthur

libavfilter: Removes stored DNN models. Adds support for native backend model...

libavfilter: Removes stored DNN models. Adds support for native backend model file format in tf backend.
Signed-off-by: 's avatarPedro Arthur <bygrandao@gmail.com>
parent bc1097a2
......@@ -15593,30 +15593,24 @@ option may cause flicker since the B-Frames have often larger QP. Default is
@section sr
Scale the input by applying one of the super-resolution methods based on
convolutional neural networks.
convolutional neural networks. Supported models:
Training scripts as well as scripts for model generation are provided in
the repository at @url{https://github.com/HighVoltageRocknRoll/sr.git}.
The filter accepts the following options:
@table @option
@item model
Specify which super-resolution model to use. This option accepts the following values:
@table @samp
@item srcnn
Super-Resolution Convolutional Neural Network model.
@itemize
@item
Super-Resolution Convolutional Neural Network model (SRCNN).
See @url{https://arxiv.org/abs/1501.00092}.
@item espcn
Efficient Sub-Pixel Convolutional Neural Network model.
@item
Efficient Sub-Pixel Convolutional Neural Network model (ESPCN).
See @url{https://arxiv.org/abs/1609.05158}.
@end itemize
@end table
Training scripts as well as scripts for model generation are provided in
the repository at @url{https://github.com/HighVoltageRocknRoll/sr.git}.
Default value is @samp{srcnn}.
The filter accepts the following options:
@table @option
@item dnn_backend
Specify which DNN backend to use for model loading and execution. This option accepts
the following values:
......@@ -15630,23 +15624,20 @@ TensorFlow backend. To enable this backend you
need to install the TensorFlow for C library (see
@url{https://www.tensorflow.org/install/install_c}) and configure FFmpeg with
@code{--enable-libtensorflow}
@end table
Default value is @samp{native}.
@item scale_factor
Set scale factor for SRCNN model, for which custom model file was provided.
Allowed values are @code{2}, @code{3} and @code{4}. Default value is @code{2}.
Scale factor is necessary for SRCNN model, because it accepts input upscaled
using bicubic upscaling with proper scale factor.
@item model_filename
@item model
Set path to model file specifying network architecture and its parameters.
Note that different backends use different file formats. TensorFlow backend
can load files for both formats, while native backend can load files for only
its format.
@item scale_factor
Set scale factor for SRCNN model. Allowed values are @code{2}, @code{3} and @code{4}.
Default value is @code{2}. Scale factor is necessary for SRCNN model, because it accepts
input upscaled using bicubic upscaling with proper scale factor.
@end table
@anchor{subtitles}
......
......@@ -24,40 +24,6 @@
*/
#include "dnn_backend_native.h"
#include "dnn_srcnn.h"
#include "dnn_espcn.h"
#include "libavformat/avio.h"
typedef enum {INPUT, CONV, DEPTH_TO_SPACE} LayerType;
typedef enum {RELU, TANH, SIGMOID} ActivationFunc;
typedef struct Layer{
LayerType type;
float *output;
void *params;
} Layer;
typedef struct ConvolutionalParams{
int32_t input_num, output_num, kernel_size;
ActivationFunc activation;
float *kernel;
float *biases;
} ConvolutionalParams;
typedef struct InputParams{
int height, width, channels;
} InputParams;
typedef struct DepthToSpaceParams{
int block_size;
} DepthToSpaceParams;
// Represents simple feed-forward convolutional network.
typedef struct ConvolutionalNetwork{
Layer *layers;
int32_t layers_num;
} ConvolutionalNetwork;
static DNNReturnType set_input_output_native(void *model, DNNData *input, DNNData *output)
{
......@@ -134,7 +100,7 @@ DNNModel *ff_dnn_load_model_native(const char *model_filename)
AVIOContext *model_file_context;
int file_size, dnn_size, kernel_size, i;
int32_t layer;
LayerType layer_type;
DNNLayerType layer_type;
ConvolutionalParams *conv_params;
DepthToSpaceParams *depth_to_space_params;
......@@ -251,118 +217,6 @@ DNNModel *ff_dnn_load_model_native(const char *model_filename)
return model;
}
static int set_up_conv_layer(Layer *layer, const float *kernel, const float *biases, ActivationFunc activation,
int32_t input_num, int32_t output_num, int32_t size)
{
ConvolutionalParams *conv_params;
int kernel_size;
conv_params = av_malloc(sizeof(ConvolutionalParams));
if (!conv_params){
return DNN_ERROR;
}
conv_params->activation = activation;
conv_params->input_num = input_num;
conv_params->output_num = output_num;
conv_params->kernel_size = size;
kernel_size = input_num * output_num * size * size;
conv_params->kernel = av_malloc(kernel_size * sizeof(float));
conv_params->biases = av_malloc(conv_params->output_num * sizeof(float));
if (!conv_params->kernel || !conv_params->biases){
av_freep(&conv_params->kernel);
av_freep(&conv_params->biases);
av_freep(&conv_params);
return DNN_ERROR;
}
memcpy(conv_params->kernel, kernel, kernel_size * sizeof(float));
memcpy(conv_params->biases, biases, output_num * sizeof(float));
layer->type = CONV;
layer->params = conv_params;
return DNN_SUCCESS;
}
DNNModel *ff_dnn_load_default_model_native(DNNDefaultModel model_type)
{
DNNModel *model = NULL;
ConvolutionalNetwork *network = NULL;
DepthToSpaceParams *depth_to_space_params;
int32_t layer;
model = av_malloc(sizeof(DNNModel));
if (!model){
return NULL;
}
network = av_malloc(sizeof(ConvolutionalNetwork));
if (!network){
av_freep(&model);
return NULL;
}
model->model = (void *)network;
switch (model_type){
case DNN_SRCNN:
network->layers_num = 4;
break;
case DNN_ESPCN:
network->layers_num = 5;
break;
default:
av_freep(&network);
av_freep(&model);
return NULL;
}
network->layers = av_malloc(network->layers_num * sizeof(Layer));
if (!network->layers){
av_freep(&network);
av_freep(&model);
return NULL;
}
for (layer = 0; layer < network->layers_num; ++layer){
network->layers[layer].output = NULL;
network->layers[layer].params = NULL;
}
network->layers[0].type = INPUT;
network->layers[0].params = av_malloc(sizeof(InputParams));
if (!network->layers[0].params){
ff_dnn_free_model_native(&model);
return NULL;
}
switch (model_type){
case DNN_SRCNN:
if (set_up_conv_layer(network->layers + 1, srcnn_conv1_kernel, srcnn_conv1_bias, RELU, 1, 64, 9) != DNN_SUCCESS ||
set_up_conv_layer(network->layers + 2, srcnn_conv2_kernel, srcnn_conv2_bias, RELU, 64, 32, 1) != DNN_SUCCESS ||
set_up_conv_layer(network->layers + 3, srcnn_conv3_kernel, srcnn_conv3_bias, RELU, 32, 1, 5) != DNN_SUCCESS){
ff_dnn_free_model_native(&model);
return NULL;
}
break;
case DNN_ESPCN:
if (set_up_conv_layer(network->layers + 1, espcn_conv1_kernel, espcn_conv1_bias, TANH, 1, 64, 5) != DNN_SUCCESS ||
set_up_conv_layer(network->layers + 2, espcn_conv2_kernel, espcn_conv2_bias, TANH, 64, 32, 3) != DNN_SUCCESS ||
set_up_conv_layer(network->layers + 3, espcn_conv3_kernel, espcn_conv3_bias, SIGMOID, 32, 4, 3) != DNN_SUCCESS){
ff_dnn_free_model_native(&model);
return NULL;
}
network->layers[4].type = DEPTH_TO_SPACE;
depth_to_space_params = av_malloc(sizeof(DepthToSpaceParams));
if (!depth_to_space_params){
ff_dnn_free_model_native(&model);
return NULL;
}
depth_to_space_params->block_size = 2;
network->layers[4].params = depth_to_space_params;
}
model->set_input_output = &set_input_output_native;
return model;
}
#define CLAMP_TO_EDGE(x, w) ((x) < 0 ? 0 : ((x) >= (w) ? (w - 1) : (x)))
static void convolve(const float *input, float *output, const ConvolutionalParams *conv_params, int width, int height)
......
......@@ -28,10 +28,40 @@
#define AVFILTER_DNN_BACKEND_NATIVE_H
#include "dnn_interface.h"
#include "libavformat/avio.h"
DNNModel *ff_dnn_load_model_native(const char *model_filename);
typedef enum {INPUT, CONV, DEPTH_TO_SPACE} DNNLayerType;
typedef enum {RELU, TANH, SIGMOID} DNNActivationFunc;
typedef struct Layer{
DNNLayerType type;
float *output;
void *params;
} Layer;
typedef struct ConvolutionalParams{
int32_t input_num, output_num, kernel_size;
DNNActivationFunc activation;
float *kernel;
float *biases;
} ConvolutionalParams;
DNNModel *ff_dnn_load_default_model_native(DNNDefaultModel model_type);
typedef struct InputParams{
int height, width, channels;
} InputParams;
typedef struct DepthToSpaceParams{
int block_size;
} DepthToSpaceParams;
// Represents simple feed-forward convolutional network.
typedef struct ConvolutionalNetwork{
Layer *layers;
int32_t layers_num;
} ConvolutionalNetwork;
DNNModel *ff_dnn_load_model_native(const char *model_filename);
DNNReturnType ff_dnn_execute_model_native(const DNNModel *model);
......
This diff is collapsed.
......@@ -31,8 +31,6 @@
DNNModel *ff_dnn_load_model_tf(const char *model_filename);
DNNModel *ff_dnn_load_default_model_tf(DNNDefaultModel model_type);
DNNReturnType ff_dnn_execute_model_tf(const DNNModel *model);
void ff_dnn_free_model_tf(DNNModel **model);
......
This diff is collapsed.
......@@ -40,14 +40,12 @@ DNNModule *ff_get_dnn_module(DNNBackendType backend_type)
switch(backend_type){
case DNN_NATIVE:
dnn_module->load_model = &ff_dnn_load_model_native;
dnn_module->load_default_model = &ff_dnn_load_default_model_native;
dnn_module->execute_model = &ff_dnn_execute_model_native;
dnn_module->free_model = &ff_dnn_free_model_native;
break;
case DNN_TF:
#if (CONFIG_LIBTENSORFLOW == 1)
dnn_module->load_model = &ff_dnn_load_model_tf;
dnn_module->load_default_model = &ff_dnn_load_default_model_tf;
dnn_module->execute_model = &ff_dnn_execute_model_tf;
dnn_module->free_model = &ff_dnn_free_model_tf;
#else
......
......@@ -30,8 +30,6 @@ typedef enum {DNN_SUCCESS, DNN_ERROR} DNNReturnType;
typedef enum {DNN_NATIVE, DNN_TF} DNNBackendType;
typedef enum {DNN_SRCNN, DNN_ESPCN} DNNDefaultModel;
typedef struct DNNData{
float *data;
int width, height, channels;
......@@ -49,8 +47,6 @@ typedef struct DNNModel{
typedef struct DNNModule{
// Loads model and parameters from given file. Returns NULL if it is not possible.
DNNModel *(*load_model)(const char *model_filename);
// Loads one of the default models
DNNModel *(*load_default_model)(DNNDefaultModel model_type);
// Executes model with specified input and output. Returns DNN_ERROR otherwise.
DNNReturnType (*execute_model)(const DNNModel *model);
// Frees memory allocated for model.
......
This source diff could not be displayed because it is too large. You can view the blob instead.
......@@ -33,12 +33,9 @@
#include "libswscale/swscale.h"
#include "dnn_interface.h"
typedef enum {SRCNN, ESPCN} SRModel;
typedef struct SRContext {
const AVClass *class;
SRModel model_type;
char *model_filename;
DNNBackendType backend_type;
DNNModule *dnn_module;
......@@ -52,16 +49,13 @@ typedef struct SRContext {
#define OFFSET(x) offsetof(SRContext, x)
#define FLAGS AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM
static const AVOption sr_options[] = {
{ "model", "specifies what DNN model to use", OFFSET(model_type), AV_OPT_TYPE_FLAGS, { .i64 = 0 }, 0, 1, FLAGS, "model_type" },
{ "srcnn", "Super-Resolution Convolutional Neural Network model (scale factor should be specified for custom SRCNN model)", 0, AV_OPT_TYPE_CONST, { .i64 = 0 }, 0, 0, FLAGS, "model_type" },
{ "espcn", "Efficient Sub-Pixel Convolutional Neural Network model", 0, AV_OPT_TYPE_CONST, { .i64 = 1 }, 0, 0, FLAGS, "model_type" },
{ "dnn_backend", "DNN backend used for model execution", OFFSET(backend_type), AV_OPT_TYPE_FLAGS, { .i64 = 0 }, 0, 1, FLAGS, "backend" },
{ "native", "native backend flag", 0, AV_OPT_TYPE_CONST, { .i64 = 0 }, 0, 0, FLAGS, "backend" },
#if (CONFIG_LIBTENSORFLOW == 1)
{ "tensorflow", "tensorflow backend flag", 0, AV_OPT_TYPE_CONST, { .i64 = 1 }, 0, 0, FLAGS, "backend" },
#endif
{"scale_factor", "scale factor for SRCNN model", OFFSET(scale_factor), AV_OPT_TYPE_INT, { .i64 = 2 }, 2, 4, FLAGS},
{ "model_filename", "path to model file specifying network architecture and its parameters", OFFSET(model_filename), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 0, FLAGS },
{ "scale_factor", "scale factor for SRCNN model", OFFSET(scale_factor), AV_OPT_TYPE_INT, { .i64 = 2 }, 2, 4, FLAGS },
{ "model", "path to model file specifying network architecture and its parameters", OFFSET(model_filename), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 0, FLAGS },
{ NULL }
};
......@@ -77,15 +71,8 @@ static av_cold int init(AVFilterContext *context)
return AVERROR(ENOMEM);
}
if (!sr_context->model_filename){
av_log(context, AV_LOG_VERBOSE, "model file for network was not specified, using default network for x2 upsampling\n");
sr_context->scale_factor = 2;
switch (sr_context->model_type){
case SRCNN:
sr_context->model = (sr_context->dnn_module->load_default_model)(DNN_SRCNN);
break;
case ESPCN:
sr_context->model = (sr_context->dnn_module->load_default_model)(DNN_ESPCN);
}
av_log(context, AV_LOG_ERROR, "model file for network was not specified\n");
return AVERROR(EIO);
}
else{
sr_context->model = (sr_context->dnn_module->load_model)(sr_context->model_filename);
......@@ -126,15 +113,8 @@ static int config_props(AVFilterLink *inlink)
DNNReturnType result;
int sws_src_h, sws_src_w, sws_dst_h, sws_dst_w;
switch (sr_context->model_type){
case SRCNN:
sr_context->input.width = inlink->w * sr_context->scale_factor;
sr_context->input.height = inlink->h * sr_context->scale_factor;
break;
case ESPCN:
sr_context->input.width = inlink->w;
sr_context->input.height = inlink->h;
}
sr_context->input.width = inlink->w * sr_context->scale_factor;
sr_context->input.height = inlink->h * sr_context->scale_factor;
sr_context->input.channels = 1;
result = (sr_context->model->set_input_output)(sr_context->model->model, &sr_context->input, &sr_context->output);
......@@ -143,6 +123,16 @@ static int config_props(AVFilterLink *inlink)
return AVERROR(EIO);
}
else{
if (sr_context->input.height != sr_context->output.height || sr_context->input.width != sr_context->output.width){
sr_context->input.width = inlink->w;
sr_context->input.height = inlink->h;
result = (sr_context->model->set_input_output)(sr_context->model->model, &sr_context->input, &sr_context->output);
if (result != DNN_SUCCESS){
av_log(context, AV_LOG_ERROR, "could not set input and output for the model\n");
return AVERROR(EIO);
}
sr_context->scale_factor = 0;
}
outlink->h = sr_context->output.height;
outlink->w = sr_context->output.width;
sr_context->sws_contexts[1] = sws_getContext(sr_context->input.width, sr_context->input.height, AV_PIX_FMT_GRAY8,
......@@ -157,8 +147,7 @@ static int config_props(AVFilterLink *inlink)
av_log(context, AV_LOG_ERROR, "could not create SwsContext for conversions\n");
return AVERROR(ENOMEM);
}
switch (sr_context->model_type){
case SRCNN:
if (sr_context->scale_factor){
sr_context->sws_contexts[0] = sws_getContext(inlink->w, inlink->h, inlink->format,
outlink->w, outlink->h, outlink->format,
SWS_BICUBIC, NULL, NULL, NULL);
......@@ -167,8 +156,8 @@ static int config_props(AVFilterLink *inlink)
return AVERROR(ENOMEM);
}
sr_context->sws_slice_h = inlink->h;
break;
case ESPCN:
}
else{
if (inlink->format != AV_PIX_FMT_GRAY8){
sws_src_h = sr_context->input.height;
sws_src_w = sr_context->input.width;
......@@ -233,15 +222,14 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
av_frame_copy_props(out, in);
out->height = sr_context->output.height;
out->width = sr_context->output.width;
switch (sr_context->model_type){
case SRCNN:
if (sr_context->scale_factor){
sws_scale(sr_context->sws_contexts[0], (const uint8_t **)in->data, in->linesize,
0, sr_context->sws_slice_h, out->data, out->linesize);
sws_scale(sr_context->sws_contexts[1], (const uint8_t **)out->data, out->linesize,
0, out->height, (uint8_t * const*)(&sr_context->input.data), &sr_context->sws_input_linesize);
break;
case ESPCN:
}
else{
if (sr_context->sws_contexts[0]){
sws_scale(sr_context->sws_contexts[0], (const uint8_t **)(in->data + 1), in->linesize + 1,
0, sr_context->sws_slice_h, out->data + 1, out->linesize + 1);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment