Commit 6c2a7a8e authored by Marton Balint's avatar Marton Balint

avfilter/vf_framerate: factorize SAD functions which compute SAD for a whole frame

Also add SIMD which works on lines because it is faster then calculating it on
8x8 blocks using pixelutils.
Signed-off-by: 's avatarMarton Balint <cus@passwd.hu>
parent 6df9020f
...@@ -2337,6 +2337,7 @@ CONFIG_EXTRA=" ...@@ -2337,6 +2337,7 @@ CONFIG_EXTRA="
rtpdec rtpdec
rtpenc_chain rtpenc_chain
rv34dsp rv34dsp
scene_sad
sinewin sinewin
snappy snappy
srtp srtp
...@@ -3400,7 +3401,7 @@ find_rect_filter_deps="avcodec avformat gpl" ...@@ -3400,7 +3401,7 @@ find_rect_filter_deps="avcodec avformat gpl"
firequalizer_filter_deps="avcodec" firequalizer_filter_deps="avcodec"
firequalizer_filter_select="rdft" firequalizer_filter_select="rdft"
flite_filter_deps="libflite" flite_filter_deps="libflite"
framerate_filter_select="pixelutils" framerate_filter_select="scene_sad"
frei0r_filter_deps="frei0r libdl" frei0r_filter_deps="frei0r libdl"
frei0r_src_filter_deps="frei0r libdl" frei0r_src_filter_deps="frei0r libdl"
fspp_filter_deps="gpl" fspp_filter_deps="gpl"
......
...@@ -28,6 +28,7 @@ OBJS-$(HAVE_THREADS) += pthread.o ...@@ -28,6 +28,7 @@ OBJS-$(HAVE_THREADS) += pthread.o
OBJS-$(CONFIG_QSVVPP) += qsvvpp.o OBJS-$(CONFIG_QSVVPP) += qsvvpp.o
DNN-OBJS-$(CONFIG_LIBTENSORFLOW) += dnn_backend_tf.o DNN-OBJS-$(CONFIG_LIBTENSORFLOW) += dnn_backend_tf.o
OBJS-$(CONFIG_DNN) += dnn_interface.o dnn_backend_native.o $(DNN-OBJS-yes) OBJS-$(CONFIG_DNN) += dnn_interface.o dnn_backend_native.o $(DNN-OBJS-yes)
OBJS-$(CONFIG_SCENE_SAD) += scene_sad.o
# audio filters # audio filters
OBJS-$(CONFIG_ABENCH_FILTER) += f_bench.o OBJS-$(CONFIG_ABENCH_FILTER) += f_bench.o
......
...@@ -19,7 +19,7 @@ ...@@ -19,7 +19,7 @@
#ifndef AVFILTER_FRAMERATE_H #ifndef AVFILTER_FRAMERATE_H
#define AVFILTER_FRAMERATE_H #define AVFILTER_FRAMERATE_H
#include "libavutil/pixelutils.h" #include "scene_sad.h"
#include "avfilter.h" #include "avfilter.h"
#define BLEND_FUNC_PARAMS const uint8_t *src1, ptrdiff_t src1_linesize, \ #define BLEND_FUNC_PARAMS const uint8_t *src1, ptrdiff_t src1_linesize, \
...@@ -48,7 +48,7 @@ typedef struct FrameRateContext { ...@@ -48,7 +48,7 @@ typedef struct FrameRateContext {
AVRational srce_time_base; ///< timebase of source AVRational srce_time_base; ///< timebase of source
AVRational dest_time_base; ///< timebase of destination AVRational dest_time_base; ///< timebase of destination
av_pixelutils_sad_fn sad; ///< Sum of the absolute difference function (scene detect only) ff_scene_sad_fn sad; ///< Sum of the absolute difference function (scene detect only)
double prev_mafd; ///< previous MAFD (scene detect only) double prev_mafd; ///< previous MAFD (scene detect only)
int blend_factor_max; int blend_factor_max;
......
/*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
/**
* @file
* Scene SAD funtions
*/
#include "scene_sad.h"
void ff_scene_sad16_c(SCENE_SAD_PARAMS)
{
uint64_t sad = 0;
const uint16_t *src1w = (const uint16_t *)src1;
const uint16_t *src2w = (const uint16_t *)src2;
int x, y;
stride1 /= 2;
stride2 /= 2;
for (y = 0; y < height; y++) {
for (x = 0; x < width; x++)
sad += FFABS(src1w[x] - src2w[x]);
src1w += stride1;
src2w += stride2;
}
*sum = sad;
}
void ff_scene_sad_c(SCENE_SAD_PARAMS)
{
uint64_t sad = 0;
int x, y;
for (y = 0; y < height; y++) {
for (x = 0; x < width; x++)
sad += FFABS(src1[x] - src2[x]);
src1 += stride1;
src2 += stride2;
}
*sum = sad;
}
ff_scene_sad_fn ff_scene_sad_get_fn(int depth)
{
ff_scene_sad_fn sad = NULL;
if (ARCH_X86)
sad = ff_scene_sad_get_fn_x86(depth);
if (!sad) {
if (depth == 8)
sad = ff_scene_sad_c;
if (depth == 16)
sad = ff_scene_sad16_c;
}
return sad;
}
/*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
/**
* @file
* Scene SAD funtions
*/
#ifndef AVFILTER_SCENE_SAD_H
#define AVFILTER_SCENE_SAD_H
#include "avfilter.h"
#define SCENE_SAD_PARAMS const uint8_t *src1, ptrdiff_t stride1, \
const uint8_t *src2, ptrdiff_t stride2, \
ptrdiff_t width, ptrdiff_t height, \
uint64_t *sum
typedef void (*ff_scene_sad_fn)(SCENE_SAD_PARAMS);
void ff_scene_sad_c(SCENE_SAD_PARAMS);
void ff_scene_sad16_c(SCENE_SAD_PARAMS);
ff_scene_sad_fn ff_scene_sad_get_fn_x86(int depth);
ff_scene_sad_fn ff_scene_sad_get_fn(int depth);
#endif /* AVFILTER_SCENE_SAD_H */
...@@ -33,13 +33,13 @@ ...@@ -33,13 +33,13 @@
#include "libavutil/internal.h" #include "libavutil/internal.h"
#include "libavutil/opt.h" #include "libavutil/opt.h"
#include "libavutil/pixdesc.h" #include "libavutil/pixdesc.h"
#include "libavutil/pixelutils.h"
#include "avfilter.h" #include "avfilter.h"
#include "internal.h" #include "internal.h"
#include "video.h" #include "video.h"
#include "filters.h" #include "filters.h"
#include "framerate.h" #include "framerate.h"
#include "scene_sad.h"
#define OFFSET(x) offsetof(FrameRateContext, x) #define OFFSET(x) offsetof(FrameRateContext, x)
#define V AV_OPT_FLAG_VIDEO_PARAM #define V AV_OPT_FLAG_VIDEO_PARAM
...@@ -62,52 +62,6 @@ static const AVOption framerate_options[] = { ...@@ -62,52 +62,6 @@ static const AVOption framerate_options[] = {
AVFILTER_DEFINE_CLASS(framerate); AVFILTER_DEFINE_CLASS(framerate);
static av_always_inline int64_t sad_8x8_16(const uint16_t *src1, ptrdiff_t stride1,
const uint16_t *src2, ptrdiff_t stride2)
{
int sum = 0;
int x, y;
for (y = 0; y < 8; y++) {
for (x = 0; x < 8; x++)
sum += FFABS(src1[x] - src2[x]);
src1 += stride1;
src2 += stride2;
}
return sum;
}
static int64_t scene_sad16(FrameRateContext *s, const uint16_t *p1, int p1_linesize, const uint16_t* p2, int p2_linesize, const int width, const int height)
{
int64_t sad;
int x, y;
for (sad = y = 0; y < height - 7; y += 8) {
for (x = 0; x < width - 7; x += 8) {
sad += sad_8x8_16(p1 + y * p1_linesize + x,
p1_linesize,
p2 + y * p2_linesize + x,
p2_linesize);
}
}
return sad;
}
static int64_t scene_sad8(FrameRateContext *s, uint8_t *p1, int p1_linesize, uint8_t* p2, int p2_linesize, const int width, const int height)
{
int64_t sad;
int x, y;
for (sad = y = 0; y < height - 7; y += 8) {
for (x = 0; x < width - 7; x += 8) {
sad += s->sad(p1 + y * p1_linesize + x,
p1_linesize,
p2 + y * p2_linesize + x,
p2_linesize);
}
}
emms_c();
return sad;
}
static double get_scene_score(AVFilterContext *ctx, AVFrame *crnt, AVFrame *next) static double get_scene_score(AVFilterContext *ctx, AVFrame *crnt, AVFrame *next)
{ {
FrameRateContext *s = ctx->priv; FrameRateContext *s = ctx->priv;
...@@ -117,16 +71,13 @@ static double get_scene_score(AVFilterContext *ctx, AVFrame *crnt, AVFrame *next ...@@ -117,16 +71,13 @@ static double get_scene_score(AVFilterContext *ctx, AVFrame *crnt, AVFrame *next
if (crnt->height == next->height && if (crnt->height == next->height &&
crnt->width == next->width) { crnt->width == next->width) {
int64_t sad; uint64_t sad;
double mafd, diff; double mafd, diff;
ff_dlog(ctx, "get_scene_score() process\n"); ff_dlog(ctx, "get_scene_score() process\n");
if (s->bitdepth == 8) s->sad(crnt->data[0], crnt->linesize[0], next->data[0], next->linesize[0], crnt->width, crnt->height, &sad);
sad = scene_sad8(s, crnt->data[0], crnt->linesize[0], next->data[0], next->linesize[0], crnt->width, crnt->height); emms_c();
else mafd = (double)sad * 100.0 / (crnt->width * crnt->height) / (1 << s->bitdepth);
sad = scene_sad16(s, (const uint16_t*)crnt->data[0], crnt->linesize[0] / 2, (const uint16_t*)next->data[0], next->linesize[0] / 2, crnt->width, crnt->height);
mafd = (double)sad * 100.0 / FFMAX(1, (crnt->height & ~7) * (crnt->width & ~7)) / (1 << s->bitdepth);
diff = fabs(mafd - s->prev_mafd); diff = fabs(mafd - s->prev_mafd);
ret = av_clipf(FFMIN(mafd, diff), 0, 100.0); ret = av_clipf(FFMIN(mafd, diff), 0, 100.0);
s->prev_mafd = mafd; s->prev_mafd = mafd;
...@@ -350,7 +301,7 @@ static int config_input(AVFilterLink *inlink) ...@@ -350,7 +301,7 @@ static int config_input(AVFilterLink *inlink)
s->bitdepth = pix_desc->comp[0].depth; s->bitdepth = pix_desc->comp[0].depth;
s->vsub = pix_desc->log2_chroma_h; s->vsub = pix_desc->log2_chroma_h;
s->sad = av_pixelutils_get_sad_fn(3, 3, 2, s); // 8x8 both sources aligned s->sad = ff_scene_sad_get_fn(s->bitdepth == 8 ? 8 : 16);
if (!s->sad) if (!s->sad)
return AVERROR(EINVAL); return AVERROR(EINVAL);
......
OBJS-$(CONFIG_SCENE_SAD) += x86/scene_sad_init.o
OBJS-$(CONFIG_AFIR_FILTER) += x86/af_afir_init.o OBJS-$(CONFIG_AFIR_FILTER) += x86/af_afir_init.o
OBJS-$(CONFIG_BLEND_FILTER) += x86/vf_blend_init.o OBJS-$(CONFIG_BLEND_FILTER) += x86/vf_blend_init.o
OBJS-$(CONFIG_BWDIF_FILTER) += x86/vf_bwdif_init.o OBJS-$(CONFIG_BWDIF_FILTER) += x86/vf_bwdif_init.o
...@@ -29,6 +31,8 @@ OBJS-$(CONFIG_VOLUME_FILTER) += x86/af_volume_init.o ...@@ -29,6 +31,8 @@ OBJS-$(CONFIG_VOLUME_FILTER) += x86/af_volume_init.o
OBJS-$(CONFIG_W3FDIF_FILTER) += x86/vf_w3fdif_init.o OBJS-$(CONFIG_W3FDIF_FILTER) += x86/vf_w3fdif_init.o
OBJS-$(CONFIG_YADIF_FILTER) += x86/vf_yadif_init.o OBJS-$(CONFIG_YADIF_FILTER) += x86/vf_yadif_init.o
X86ASM-OBJS-$(CONFIG_SCENE_SAD) += x86/scene_sad.o
X86ASM-OBJS-$(CONFIG_AFIR_FILTER) += x86/af_afir.o X86ASM-OBJS-$(CONFIG_AFIR_FILTER) += x86/af_afir.o
X86ASM-OBJS-$(CONFIG_BLEND_FILTER) += x86/vf_blend.o X86ASM-OBJS-$(CONFIG_BLEND_FILTER) += x86/vf_blend.o
X86ASM-OBJS-$(CONFIG_BWDIF_FILTER) += x86/vf_bwdif.o X86ASM-OBJS-$(CONFIG_BWDIF_FILTER) += x86/vf_bwdif.o
......
;*****************************************************************************
;* x86-optimized functions for scene SAD
;*
;* Copyright (C) 2018 Marton Balint
;*
;* Based on vf_blend.asm, Copyright (C) 2015 Paul B Mahol
;*
;* This file is part of FFmpeg.
;*
;* FFmpeg is free software; you can redistribute it and/or
;* modify it under the terms of the GNU Lesser General Public
;* License as published by the Free Software Foundation; either
;* version 2.1 of the License, or (at your option) any later version.
;*
;* FFmpeg is distributed in the hope that it will be useful,
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;* Lesser General Public License for more details.
;*
;* You should have received a copy of the GNU Lesser General Public
;* License along with FFmpeg; if not, write to the Free Software
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;******************************************************************************
%include "libavutil/x86/x86util.asm"
SECTION .text
%macro SAD_INIT 0
cglobal scene_sad, 6, 7, 2, src1, stride1, src2, stride2, width, end, x
add src1q, widthq
add src2q, widthq
neg widthq
pxor m1, m1
%endmacro
%macro SAD_LOOP 0
.nextrow:
mov xq, widthq
.loop:
movu m0, [src1q + xq]
psadbw m0, [src2q + xq]
paddq m1, m0
add xq, mmsize
jl .loop
add src1q, stride1q
add src2q, stride2q
sub endd, 1
jg .nextrow
mov r0q, r6mp
movu [r0q], m1 ; sum
REP_RET
%endmacro
%macro SAD_FRAMES 0
SAD_INIT
SAD_LOOP
%endmacro
INIT_XMM sse2
SAD_FRAMES
%if HAVE_AVX2_EXTERNAL
INIT_YMM avx2
SAD_FRAMES
%endif
/*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "libavutil/cpu.h"
#include "libavutil/x86/cpu.h"
#include "libavfilter/scene_sad.h"
#define SCENE_SAD_FUNC(FUNC_NAME, ASM_FUNC_NAME, MMSIZE) \
void ASM_FUNC_NAME(SCENE_SAD_PARAMS); \
\
static void FUNC_NAME(SCENE_SAD_PARAMS) { \
uint64_t sad[MMSIZE / 8] = {0}; \
ptrdiff_t awidth = width & ~(MMSIZE - 1); \
*sum = 0; \
ASM_FUNC_NAME(src1, stride1, src2, stride2, awidth, height, sad); \
for (int i = 0; i < MMSIZE / 8; i++) \
*sum += sad[i]; \
ff_scene_sad_c(src1 + awidth, stride1, \
src2 + awidth, stride2, \
width - awidth, height, sad); \
*sum += sad[0]; \
}
SCENE_SAD_FUNC(scene_sad_sse2, ff_scene_sad_sse2, 16);
SCENE_SAD_FUNC(scene_sad_avx2, ff_scene_sad_avx2, 32);
ff_scene_sad_fn ff_scene_sad_get_fn_x86(int depth)
{
int cpu_flags = av_get_cpu_flags();
if (depth == 8) {
if (EXTERNAL_AVX2_FAST(cpu_flags))
return scene_sad_avx2;
else if (EXTERNAL_SSE2(cpu_flags))
return scene_sad_sse2;
}
return NULL;
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment