Commit 4364e1f1 authored by Georg Martius's avatar Georg Martius Committed by Clément Bœsch

lavfi: add video stabilization plugins using vid.stab library

vidstabdetect and vidstabtransform common functions for interfacing
vid.stab are in libavfilter/vidstabutils.c
Signed-off-by: 's avatarGeorg Martius <martius@mis.mpg.de>
parent 6f1716c4
......@@ -31,6 +31,8 @@ version <next>:
- asetrate filter
- interleave filter
- timeline editing with filters
- vidstabdetect and vidstabtransform filters for video stabilization using
the vid.stab library
version 1.2:
......
......@@ -72,6 +72,7 @@ compatible libraries
The following libraries are under GPL:
- libcdio
- libutvideo
- libvidstab
- libx264
- libxavs
- libxvid
......
......@@ -226,6 +226,7 @@ External library support:
--enable-libtwolame enable MP2 encoding via libtwolame [no]
--enable-libutvideo enable Ut Video encoding and decoding via libutvideo [no]
--enable-libv4l2 enable libv4l2/v4l-utils [no]
--enable-libvidstab enable video stabilization using vid.stab [no]
--enable-libvo-aacenc enable AAC encoding via libvo-aacenc [no]
--enable-libvo-amrwbenc enable AMR-WB encoding via libvo-amrwbenc [no]
--enable-libvorbis enable Vorbis en/decoding via libvorbis,
......@@ -1181,6 +1182,7 @@ EXTERNAL_LIBRARY_LIST="
libtwolame
libutvideo
libv4l2
libvidstab
libvo_aacenc
libvo_amrwbenc
libvorbis
......@@ -2152,6 +2154,8 @@ stereo3d_filter_deps="gpl"
subtitles_filter_deps="avformat avcodec libass"
super2xsai_filter_deps="gpl"
tinterlace_filter_deps="gpl"
vidstabdetect_filter_deps="libvidstab"
vidstabtransform_filter_deps="libvidstab"
yadif_filter_deps="gpl"
pixfmts_super2xsai_test_deps="super2xsai_filter"
tinterlace_merge_test_deps="tinterlace_filter"
......@@ -3573,6 +3577,7 @@ die_license_disabled_gpl() {
die_license_disabled gpl libcdio
die_license_disabled gpl libutvideo
die_license_disabled gpl libvidstab
die_license_disabled gpl libx264
die_license_disabled gpl libxavs
die_license_disabled gpl libxvid
......@@ -4015,6 +4020,7 @@ enabled libtwolame && require libtwolame twolame.h twolame_init -ltwolame &&
die "ERROR: libtwolame must be installed and version must be >= 0.3.10"; }
enabled libutvideo && require_cpp utvideo "stdint.h stdlib.h utvideo/utvideo.h utvideo/Codec.h" 'CCodec*' -lutvideo -lstdc++
enabled libv4l2 && require_pkg_config libv4l2 libv4l2.h v4l2_ioctl
enabled libvidstab && require_pkg_config vidstab vid.stab/libvidstab.h vsMotionDetectInit
enabled libvo_aacenc && require libvo_aacenc vo-aacenc/voAAC.h voGetAACEncAPI -lvo-aacenc
enabled libvo_amrwbenc && require libvo_amrwbenc vo-amrwbenc/enc_if.h E_IF_init -lvo-amrwbenc
enabled libvorbis && require libvorbis vorbis/vorbisenc.h vorbis_info_init -lvorbisenc -lvorbis -logg
......
......@@ -5308,6 +5308,141 @@ much, but it will increase the amount of blurring needed to cover over
the image and will destroy more information than necessary, and extra
pixels will slow things down on a large logo.
@anchor{vidstabtransform}
@section vidstabtransform
Video stabilization/deshaking: pass 2 of 2,
see @ref{vidstabdetect} for pass 1.
Read a file with transform information for each frame and
apply/compensate them. Together with the @ref{vidstabdetect}
filter this can be used to deshake videos. See also
@url{http://public.hronopik.de/vid.stab}. It is important to also use
the unsharp filter, see below.
To enable compilation of this filter you need to configure FFmpeg with
@code{--enable-libvidstab}.
This filter accepts the following named options, expressed as a
sequence of @var{key}=@var{value} pairs, separated by ":".
@table @option
@item input
path to the file used to read the transforms (default: @file{transforms.trf})
@item smoothing
number of frames (value*2 + 1) used for lowpass filtering the camera movements
(default: 10). For example a number of 10 means that 21 frames are used
(10 in the past and 10 in the future) to smoothen the motion in the
video. A larger values leads to a smoother video, but limits the
acceleration of the camera (pan/tilt movements).
@item maxshift
maximal number of pixels to translate frames (default: -1 no limit)
@item maxangle
maximal angle in radians (degree*PI/180) to rotate frames (default: -1
no limit)
@item crop
How to deal with borders that may be visible due to movement
compensation. Available values are:
@table @samp
@item keep
keep image information from previous frame (default)
@item black
fill the border black
@end table
@item invert
@table @samp
@item 0
keep transforms normal (default)
@item 1
invert transforms
@end table
@item relative
consider transforms as
@table @samp
@item 0
absolute
@item 1
relative to previous frame (default)
@end table
@item zoom
percentage to zoom (default: 0)
@table @samp
@item >0
zoom in
@item <0
zoom out
@end table
@item optzoom
if 1 then optimal zoom value is determined (default).
Optimal zoom means no (or only little) border should be visible.
Note that the value given at zoom is added to the one calculated
here.
@item interpol
type of interpolation
Available values are:
@table @samp
@item no
no interpolation
@item linear
linear only horizontal
@item bilinear
linear in both directions (default)
@item bicubic
cubic in both directions (slow)
@end table
@item tripod
virtual tripod mode means that the video is stabilized such that the
camera stays stationary. Use also @code{tripod} option of
@ref{vidstabdetect}.
@table @samp
@item 0
off (default)
@item 1
virtual tripod mode: equivalent to @code{relative=0:smoothing=0}
@end table
@end table
@subsection Examples
@itemize
@item
typical call with default default values:
(note the unsharp filter which is always recommended)
@example
ffmpeg -i inp.mpeg -vf vidstabtransform,unsharp=5:5:0.8:3:3:0.4 inp_stabilized.mpeg
@end example
@item
zoom in a bit more and load transform data from a given file
@example
vidstabtransform=zoom=5:input="mytransforms.trf"
@end example
@item
smoothen the video even more
@example
vidstabtransform=smoothing=30
@end example
@end itemize
@section scale
Scale (resize) the input video, using the libswscale library.
......@@ -5706,6 +5841,93 @@ in [-30,0] will filter edges. Default value is 0.
If a chroma option is not explicitly set, the corresponding luma value
is set.
@anchor{vidstabdetect}
@section vidstabdetect
Video stabilization/deshaking: pass 1 of 2, see @ref{vidstabtransform}
for pass 2.
Generates a file with relative transform information translation,
rotation about subsequent frames.
To enable compilation of this filter you need to configure FFmpeg with
@code{--enable-libvidstab}.
This filter accepts the following named options, expressed as a
sequence of @var{key}=@var{value} pairs, separated by ":".
@table @option
@item result
path to the file used to write the transforms (default:@file{transforms.trf})
@item shakiness
how shaky is the video and how quick is the camera? (default: 5)
@table @samp
@item 1
little (fast)
@item ...
@item 10
very strong/quick (slow)
@end table
@item accuracy
accuracy of detection process (>=shakiness) (default: 9)
@table @samp
@item 1
low (fast)
@item 15
high (slow)
@end table
@item stepsize
stepsize of search process, region around minimum is scanned with 1 pixel
resolution (default: 6)
@item mincontrast
below this contrast a local measurement field is discarded (0-1) (default: 0.3)
@item tripod
virtual tripod mode: @code{tripod=framenum} if framenum>0 otherwise disabled.
The motion of the frames is compared to a reference frame (framenum).
The idea is to compensate all movements in a more-or-less static scene
and keep the camera view absolutely still.
(default: 0 (disabled))
@item show
draw nothing (default); 1,2: show fields and transforms in the resulting frames
@end table
@subsection Examples
@itemize
@item
use default values:
@example
vidstabdetect
@end example
@item
strongly shaky movie and put the results in @code{mytransforms.trf}
@example
vidstabdetect=shakiness=10:accuracy=15:result="mytransforms.trf"
@end example
@item
visualize some internals in the resulting video
@example
vidstabdetect=show=1
@end example
@item
Typical call with visualization
@example
ffmpeg -i input -vf vidstabdetect=shakiness=5:show=1 dummy.avi
@end example
@end itemize
@section stereo3d
Convert between different stereoscopic image formats.
......
......@@ -179,6 +179,8 @@ OBJS-$(CONFIG_TINTERLACE_FILTER) += vf_tinterlace.o
OBJS-$(CONFIG_TRANSPOSE_FILTER) += vf_transpose.o
OBJS-$(CONFIG_UNSHARP_FILTER) += vf_unsharp.o
OBJS-$(CONFIG_VFLIP_FILTER) += vf_vflip.o
OBJS-$(CONFIG_VIDSTABDETECT_FILTER) += vidstabutils.o vf_vidstabdetect.o
OBJS-$(CONFIG_VIDSTABTRANSFORM_FILTER) += vidstabutils.o vf_vidstabtransform.o
OBJS-$(CONFIG_YADIF_FILTER) += vf_yadif.o
OBJS-$(CONFIG_CELLAUTO_FILTER) += vsrc_cellauto.o
......
......@@ -176,6 +176,8 @@ void avfilter_register_all(void)
REGISTER_FILTER(TRANSPOSE, transpose, vf);
REGISTER_FILTER(UNSHARP, unsharp, vf);
REGISTER_FILTER(VFLIP, vflip, vf);
REGISTER_FILTER(VIDSTABDETECT, vidstabdetect, vf);
REGISTER_FILTER(VIDSTABTRANSFORM, vidstabtransform, vf);
REGISTER_FILTER(YADIF, yadif, vf);
REGISTER_FILTER(CELLAUTO, cellauto, vsrc);
......
/*
* Copyright (c) 2013 Georg Martius <georg dot martius at web dot de>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#define DEFAULT_RESULT_NAME "transforms.trf"
#include <vid.stab/libvidstab.h>
#include "libavutil/common.h"
#include "libavutil/opt.h"
#include "libavutil/imgutils.h"
#include "avfilter.h"
#include "internal.h"
#include "vidstabutils.h"
typedef struct {
const AVClass* class;
VSMotionDetect md;
VSMotionDetectConfig conf;
char* result;
FILE* f;
} StabData;
#define OFFSET(x) offsetof(StabData, x)
#define OFFSETC(x) (offsetof(StabData, conf)+offsetof(VSMotionDetectConfig, x))
#define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
static const AVOption vidstabdetect_options[]= {
{"result", "path to the file used to write the transforms (def:transforms.trf)", OFFSET(result), AV_OPT_TYPE_STRING, {.str = DEFAULT_RESULT_NAME}},
{"shakiness", "how shaky is the video and how quick is the camera?"
" 1: little (fast) 10: very strong/quick (slow) (def: 5)", OFFSETC(shakiness), AV_OPT_TYPE_INT, {.i64 = 5}, 1, 10, FLAGS},
{"accuracy", "(>=shakiness) 1: low 15: high (slow) (def: 9)", OFFSETC(accuracy), AV_OPT_TYPE_INT, {.i64 = 9 }, 1, 15, FLAGS},
{"stepsize", "region around minimum is scanned with 1 pixel resolution (def: 6)", OFFSETC(stepSize), AV_OPT_TYPE_INT, {.i64 = 6}, 1, 32, FLAGS},
{"mincontrast", "below this contrast a field is discarded (0-1) (def: 0.3)", OFFSETC(contrastThreshold), AV_OPT_TYPE_DOUBLE, {.dbl = 0.25}, 0.0, 1.0, FLAGS},
{"show", "0: draw nothing (def); 1,2: show fields and transforms", OFFSETC(show), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 2, FLAGS},
{"tripod", "virtual tripod mode (if >0): motion is compared to a reference"
" reference frame (frame # is the value) (def: 0)", OFFSETC(virtualTripod), AV_OPT_TYPE_INT, {.i64 = 0}, 0, INT_MAX, FLAGS},
{NULL},
};
AVFILTER_DEFINE_CLASS(vidstabdetect);
static av_cold int init(AVFilterContext *ctx)
{
StabData* sd = ctx->priv;
vs_set_mem_and_log_functions();
sd->class = &vidstabdetect_class;
av_log(ctx, AV_LOG_VERBOSE, "vidstabdetect filter: init %s\n", LIBVIDSTAB_VERSION);
return 0;
}
static av_cold void uninit(AVFilterContext *ctx)
{
StabData *sd = ctx->priv;
VSMotionDetect* md = &(sd->md);
if (sd->f) {
fclose(sd->f);
sd->f = NULL;
}
vsMotionDetectionCleanup(md);
}
static int query_formats(AVFilterContext *ctx)
{
// If you add something here also add it in vidstabutils.c
static const enum AVPixelFormat pix_fmts[] = {
AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV422P, AV_PIX_FMT_YUV420P,
AV_PIX_FMT_YUV411P, AV_PIX_FMT_YUV410P, AV_PIX_FMT_YUVA420P,
AV_PIX_FMT_YUV440P, AV_PIX_FMT_GRAY8,
AV_PIX_FMT_RGB24, AV_PIX_FMT_BGR24, AV_PIX_FMT_RGBA,
AV_PIX_FMT_NONE
};
ff_set_common_formats(ctx, ff_make_format_list(pix_fmts));
return 0;
}
static int config_input(AVFilterLink *inlink)
{
AVFilterContext *ctx = inlink->dst;
StabData *sd = ctx->priv;
VSMotionDetect* md = &(sd->md);
VSFrameInfo fi;
const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
vsFrameInfoInit(&fi,inlink->w, inlink->h, av_2_vs_pixel_format(ctx, inlink->format));
if(fi.bytesPerPixel != av_get_bits_per_pixel(desc)/8){
av_log(ctx, AV_LOG_ERROR, "pixel-format error: wrong bits/per/pixel, please report a BUG");
return AVERROR(EINVAL);
}
if(fi.log2ChromaW != desc->log2_chroma_w){
av_log(ctx, AV_LOG_ERROR, "pixel-format error: log2_chroma_w, please report a BUG");
return AVERROR(EINVAL);
}
if(fi.log2ChromaH != desc->log2_chroma_h){
av_log(ctx, AV_LOG_ERROR, "pixel-format error: log2_chroma_h, please report a BUG");
return AVERROR(EINVAL);
}
// set values that are not initializes by the options
sd->conf.algo = 1;
sd->conf.modName = "vidstabdetect";
if(vsMotionDetectInit(md, &sd->conf, &fi) != VS_OK){
av_log(ctx, AV_LOG_ERROR, "initialization of Motion Detection failed, please report a BUG");
return AVERROR(EINVAL);
}
vsMotionDetectGetConfig(&sd->conf, md);
av_log(ctx, AV_LOG_INFO, "Video stabilization settings (pass 1/2):\n");
av_log(ctx, AV_LOG_INFO, " shakiness = %d\n", sd->conf.shakiness);
av_log(ctx, AV_LOG_INFO, " accuracy = %d\n", sd->conf.accuracy);
av_log(ctx, AV_LOG_INFO, " stepsize = %d\n", sd->conf.stepSize);
av_log(ctx, AV_LOG_INFO, " mincontrast = %f\n", sd->conf.contrastThreshold);
av_log(ctx, AV_LOG_INFO, " show = %d\n", sd->conf.show);
av_log(ctx, AV_LOG_INFO, " result = %s\n", sd->result);
sd->f = fopen(sd->result, "w");
if (sd->f == NULL) {
av_log(ctx, AV_LOG_ERROR, "cannot open transform file %s\n", sd->result);
return AVERROR(EINVAL);
}else{
if(vsPrepareFile(md, sd->f) != VS_OK){
av_log(ctx, AV_LOG_ERROR, "cannot write to transform file %s\n", sd->result);
return AVERROR(EINVAL);
}
}
return 0;
}
static int filter_frame(AVFilterLink *inlink, AVFrame *in)
{
AVFilterContext *ctx = inlink->dst;
StabData *sd = ctx->priv;
VSMotionDetect* md = &(sd->md);
LocalMotions localmotions;
AVFilterLink *outlink = inlink->dst->outputs[0];
int direct = 0;
AVFrame *out;
VSFrame frame;
int plane;
if (av_frame_is_writable(in)) {
direct = 1;
out = in;
} else {
out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
if (!out) {
av_frame_free(&in);
return AVERROR(ENOMEM);
}
av_frame_copy_props(out, in);
}
for(plane=0; plane < md->fi.planes; plane++){
frame.data[plane] = in->data[plane];
frame.linesize[plane] = in->linesize[plane];
}
if(vsMotionDetection(md, &localmotions, &frame) != VS_OK){
av_log(ctx, AV_LOG_ERROR, "motion detection failed");
return AVERROR(AVERROR_EXTERNAL);
} else {
if(vsWriteToFile(md, sd->f, &localmotions) != VS_OK){
av_log(ctx, AV_LOG_ERROR, "cannot write to transform file");
return AVERROR(errno);
}
vs_vector_del(&localmotions);
}
if(sd->conf.show>0 && !direct){
av_image_copy(out->data, out->linesize,
(void*)in->data, in->linesize,
in->format, in->width, in->height);
}
if (!direct)
av_frame_free(&in);
return ff_filter_frame(outlink, out);
}
static const AVFilterPad avfilter_vf_vidstabdetect_inputs[] = {
{
.name = "default",
.type = AVMEDIA_TYPE_VIDEO,
.filter_frame = filter_frame,
.config_props = config_input,
},
{ NULL }
};
static const AVFilterPad avfilter_vf_vidstabdetect_outputs[] = {
{
.name = "default",
.type = AVMEDIA_TYPE_VIDEO,
},
{ NULL }
};
AVFilter avfilter_vf_vidstabdetect = {
.name = "vidstabdetect",
.description = NULL_IF_CONFIG_SMALL("pass 1 of 2 for stabilization"
"extracts relative transformations"
"(pass 2 see vidstabtransform)"),
.priv_size = sizeof(StabData),
.init = init,
.uninit = uninit,
.query_formats = query_formats,
.inputs = avfilter_vf_vidstabdetect_inputs,
.outputs = avfilter_vf_vidstabdetect_outputs,
.priv_class = &vidstabdetect_class,
};
This diff is collapsed.
/*
* Copyright (c) 2013 Georg Martius <georg dot martius at web dot de>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "vidstabutils.h"
/** convert AV's pixelformat to vid.stab pixelformat */
VSPixelFormat av_2_vs_pixel_format(AVFilterContext *ctx, enum AVPixelFormat pf){
switch(pf){
case AV_PIX_FMT_YUV420P: return PF_YUV420P;
case AV_PIX_FMT_YUV422P: return PF_YUV422P;
case AV_PIX_FMT_YUV444P: return PF_YUV444P;
case AV_PIX_FMT_YUV410P: return PF_YUV410P;
case AV_PIX_FMT_YUV411P: return PF_YUV411P;
case AV_PIX_FMT_YUV440P: return PF_YUV440P;
case AV_PIX_FMT_YUVA420P: return PF_YUVA420P;
case AV_PIX_FMT_GRAY8: return PF_GRAY8;
case AV_PIX_FMT_RGB24: return PF_RGB24;
case AV_PIX_FMT_BGR24: return PF_BGR24;
case AV_PIX_FMT_RGBA: return PF_RGBA;
default:
av_log(ctx, AV_LOG_ERROR, "cannot deal with pixel format %i\n", pf);
return PF_NONE;
}
}
/** struct to hold a valid context for logging from within vid.stab lib */
typedef struct {
const AVClass* class;
} VS2AVLogCtx;
/** wrapper to log vs_log into av_log */
static int vs_2_av_log_wrapper(int type, const char* tag, const char* format, ...){
va_list ap;
VS2AVLogCtx ctx;
AVClass class = {
.class_name = tag,
.item_name = av_default_item_name,
.option = 0,
.version = LIBAVUTIL_VERSION_INT,
.category = AV_CLASS_CATEGORY_FILTER,
};
ctx.class = &class;
va_start (ap, format);
av_vlog(&ctx, type, format, ap);
va_end (ap);
return VS_OK;
}
/** sets the memory allocation function and logging constants to av versions */
void vs_set_mem_and_log_functions(void){
vs_malloc = av_malloc;
vs_zalloc = av_mallocz;
vs_realloc = av_realloc;
vs_free = av_free;
VS_ERROR_TYPE = AV_LOG_ERROR;
VS_WARN_TYPE = AV_LOG_WARNING;
VS_INFO_TYPE = AV_LOG_INFO;
VS_MSG_TYPE = AV_LOG_VERBOSE;
vs_log = vs_2_av_log_wrapper;
VS_ERROR = 0;
VS_OK = 1;
}
/*
* Copyright (c) 2013 Georg Martius <georg dot martius at web dot de>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef AVFILTER_VIDSTABUTILS_H
#define AVFILTER_VIDSTABUTILS_H
#include <vid.stab/libvidstab.h>
#include "avfilter.h"
/* ** some conversions from avlib to vid.stab constants and functions *** */
/** converts the pixelformat of avlib into the one of the vid.stab library */
VSPixelFormat av_2_vs_pixel_format(AVFilterContext *ctx, enum AVPixelFormat pf);
/** sets the memory allocation function and logging constants to av versions */
void vs_set_mem_and_log_functions(void);
#endif
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment