Commit 28a2107a authored by Clément Bœsch's avatar Clément Bœsch

avutil: add pixelutils API

parent 6931d127
...@@ -144,6 +144,7 @@ Component options: ...@@ -144,6 +144,7 @@ Component options:
--disable-mdct disable MDCT code --disable-mdct disable MDCT code
--disable-rdft disable RDFT code --disable-rdft disable RDFT code
--disable-fft disable FFT code --disable-fft disable FFT code
--disable-pixelutils disable pixel utils in libavutil
Hardware accelerators: Hardware accelerators:
--disable-dxva2 disable DXVA2 code [autodetect] --disable-dxva2 disable DXVA2 code [autodetect]
...@@ -1451,6 +1452,7 @@ SUBSYSTEM_LIST=" ...@@ -1451,6 +1452,7 @@ SUBSYSTEM_LIST="
lsp lsp
lzo lzo
mdct mdct
pixelutils
network network
rdft rdft
" "
......
...@@ -15,6 +15,9 @@ libavutil: 2012-10-22 ...@@ -15,6 +15,9 @@ libavutil: 2012-10-22
API changes, most recent first: API changes, most recent first:
2014-08-02 - xxxxxxx - lavu 52.98.100 - pixelutils.h
Add pixelutils API with SAD functions
2014-08-xx - xxxxxxx - lavu 53.22.0 - pixfmt.h 2014-08-xx - xxxxxxx - lavu 53.22.0 - pixfmt.h
Add AV_PIX_FMT_YA16 pixel format for 16 bit packed gray with alpha. Add AV_PIX_FMT_YA16 pixel format for 16 bit packed gray with alpha.
......
...@@ -44,6 +44,7 @@ HEADERS = adler32.h \ ...@@ -44,6 +44,7 @@ HEADERS = adler32.h \
opt.h \ opt.h \
parseutils.h \ parseutils.h \
pixdesc.h \ pixdesc.h \
pixelutils.h \
pixfmt.h \ pixfmt.h \
random_seed.h \ random_seed.h \
replaygain.h \ replaygain.h \
...@@ -113,6 +114,7 @@ OBJS = adler32.o \ ...@@ -113,6 +114,7 @@ OBJS = adler32.o \
opt.o \ opt.o \
parseutils.o \ parseutils.o \
pixdesc.o \ pixdesc.o \
pixelutils.o \
random_seed.o \ random_seed.o \
rational.o \ rational.o \
rc4.o \ rc4.o \
...@@ -170,6 +172,7 @@ TESTPROGS = adler32 \ ...@@ -170,6 +172,7 @@ TESTPROGS = adler32 \
pca \ pca \
parseutils \ parseutils \
pixdesc \ pixdesc \
pixelutils \
random_seed \ random_seed \
rational \ rational \
ripemd \ ripemd \
......
/*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "config.h"
#include "common.h"
#include "pixelutils.h"
#if CONFIG_PIXELUTILS
#include "x86/pixelutils.h"
static av_always_inline int sad_wxh(const uint8_t *src1, ptrdiff_t stride1,
const uint8_t *src2, ptrdiff_t stride2,
int w, int h)
{
int x, y, sum = 0;
for (y = 0; y < h; y++) {
for (x = 0; x < w; x++)
sum += abs(src1[x] - src2[x]);
src1 += stride1;
src2 += stride2;
}
return sum;
}
#define DECLARE_BLOCK_FUNCTIONS(size) \
static int block_sad_##size##x##size##_c(const uint8_t *src1, ptrdiff_t stride1, \
const uint8_t *src2, ptrdiff_t stride2) \
{ \
return sad_wxh(src1, stride1, src2, stride2, size, size); \
}
DECLARE_BLOCK_FUNCTIONS(2)
DECLARE_BLOCK_FUNCTIONS(4)
DECLARE_BLOCK_FUNCTIONS(8)
DECLARE_BLOCK_FUNCTIONS(16)
static const av_pixelutils_sad_fn sad_c[] = {
block_sad_2x2_c,
block_sad_4x4_c,
block_sad_8x8_c,
block_sad_16x16_c,
};
#endif /* CONFIG_PIXELUTILS */
av_pixelutils_sad_fn av_pixelutils_get_sad_fn(int w_bits, int h_bits, int aligned, void *log_ctx)
{
#if !CONFIG_PIXELUTILS
av_log(log_ctx, AV_LOG_ERROR, "pixelutils support is required "
"but libavutil is not compiled with it\n");
return NULL;
#else
av_pixelutils_sad_fn sad[FF_ARRAY_ELEMS(sad_c)];
memcpy(sad, sad_c, sizeof(sad));
if (w_bits < 1 || w_bits > FF_ARRAY_ELEMS(sad) ||
h_bits < 1 || h_bits > FF_ARRAY_ELEMS(sad))
return NULL;
if (w_bits != h_bits) // only squared sad for now
return NULL;
#if ARCH_X86
ff_pixelutils_sad_init_x86(sad, aligned);
#endif
return sad[w_bits - 1];
#endif
}
#ifdef TEST
#define W1 320
#define H1 240
#define W2 640
#define H2 480
static int run_test(const char *test,
const uint32_t *b1, const uint32_t *b2)
{
int i, a, ret = 0;
for (a = 0; a < 3; a++) {
const uint8_t *block1 = (const uint8_t *)b1;
const uint8_t *block2 = (const uint8_t *)b2;
switch (a) {
case 0: block1++; block2++; break;
case 1: block2++; break;
case 2: break;
}
for (i = 1; i <= FF_ARRAY_ELEMS(sad_c); i++) {
av_pixelutils_sad_fn f_ref = sad_c[i - 1];
av_pixelutils_sad_fn f_out = av_pixelutils_get_sad_fn(i, i, a, NULL);
const int out = f_out(block1, W1, block2, W2);
const int ref = f_ref(block1, W1, block2, W2);
printf("[%s] [%c%c] SAD [%s] %dx%d=%d ref=%d\n",
out == ref ? "OK" : "FAIL",
a ? 'A' : 'U', a == 2 ? 'A' : 'U',
test, 1<<i, 1<<i, out, ref);
if (out != ref)
ret = 1;
}
}
return ret;
}
int main(void)
{
int i, ret;
DECLARE_ALIGNED(32, uint32_t, buf1)[W1*H1];
DECLARE_ALIGNED(32, uint32_t, buf2)[W2*H2];
uint32_t state = 0;
for (i = 0; i < W1*H1; i++) {
state = state * 1664525 + 1013904223;
buf1[i] = state;
}
for (i = 0; i < W2*H2; i++) {
state = state * 1664525 + 1013904223;
buf2[i] = state;
}
ret = run_test("random", buf1, buf2);
if (ret < 0)
return ret;
memset(buf1, 0xff, sizeof(buf1));
memset(buf2, 0x00, sizeof(buf2));
ret = run_test("max", buf1, buf2);
if (ret < 0)
return ret;
memset(buf1, 0x90, sizeof(buf1));
memset(buf2, 0x90, sizeof(buf2));
return run_test("min", buf1, buf2);
}
#endif /* TEST */
/*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef AVUTIL_PIXELUTILS_H
#define AVUTIL_PIXELUTILS_H
#include <stddef.h>
#include <stdint.h>
#include "common.h"
/**
* Sum of abs(src1[x] - src2[x])
*/
typedef int (*av_pixelutils_sad_fn)(const uint8_t *src1, ptrdiff_t stride1,
const uint8_t *src2, ptrdiff_t stride2);
/**
* Get a potentially optimized pointer to a Sum-of-absolute-differences
* function (see the av_pixelutils_sad_fn prototype).
*
* @param w_bits 1<<w_bits is the requested width of the block size
* @param h_bits 1<<h_bits is the requested height of the block size
* @param aligned If set to 2, the returned sad function will assume src1 and
* src2 addresses are aligned on the block size.
* If set to 1, the returned sad function will assume src1 is
* aligned on the block size.
* If set to 0, the returned sad function assume no particular
* alignment.
* @param log_ctx context used for logging, can be NULL
*
* @return a pointer to the SAD function or NULL in case of error (because of
* invalid parameters)
*/
av_pixelutils_sad_fn av_pixelutils_get_sad_fn(int w_bits, int h_bits,
int aligned, void *log_ctx);
#endif /* AVUTIL_PIXELUTILS_H */
...@@ -56,7 +56,7 @@ ...@@ -56,7 +56,7 @@
*/ */
#define LIBAVUTIL_VERSION_MAJOR 52 #define LIBAVUTIL_VERSION_MAJOR 52
#define LIBAVUTIL_VERSION_MINOR 97 #define LIBAVUTIL_VERSION_MINOR 98
#define LIBAVUTIL_VERSION_MICRO 100 #define LIBAVUTIL_VERSION_MICRO 100
#define LIBAVUTIL_VERSION_INT AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \ #define LIBAVUTIL_VERSION_INT AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \
......
...@@ -2,7 +2,11 @@ OBJS += x86/cpu.o \ ...@@ -2,7 +2,11 @@ OBJS += x86/cpu.o \
x86/float_dsp_init.o \ x86/float_dsp_init.o \
x86/lls_init.o \ x86/lls_init.o \
OBJS-$(CONFIG_PIXELUTILS) += x86/pixelutils_init.o \
YASM-OBJS += x86/cpuid.o \ YASM-OBJS += x86/cpuid.o \
x86/emms.o \ x86/emms.o \
x86/float_dsp.o \ x86/float_dsp.o \
x86/lls.o \ x86/lls.o \
YASM-OBJS-$(CONFIG_PIXELUTILS) += x86/pixelutils.o \
;******************************************************************************
;* Pixel utilities SIMD
;*
;* Copyright (C) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
;* Copyright (C) 2014 Clément Bœsch <u pkh me>
;*
;* This file is part of FFmpeg.
;*
;* FFmpeg is free software; you can redistribute it and/or
;* modify it under the terms of the GNU Lesser General Public
;* License as published by the Free Software Foundation; either
;* version 2.1 of the License, or (at your option) any later version.
;*
;* FFmpeg is distributed in the hope that it will be useful,
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;* Lesser General Public License for more details.
;*
;* You should have received a copy of the GNU Lesser General Public
;* License along with FFmpeg; if not, write to the Free Software
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;******************************************************************************
%include "x86util.asm"
SECTION_TEXT
;-------------------------------------------------------------------------------
; int ff_pixelutils_sad_8x8_mmx(const uint8_t *src1, ptrdiff_t stride1,
; const uint8_t *src2, ptrdiff_t stride2);
;-------------------------------------------------------------------------------
INIT_MMX mmx
cglobal pixelutils_sad_8x8, 4,4,0, src1, stride1, src2, stride2
pxor m7, m7
pxor m6, m6
%rep 4
mova m0, [src1q]
mova m2, [src1q + stride1q]
mova m1, [src2q]
mova m3, [src2q + stride2q]
psubusb m4, m0, m1
psubusb m5, m2, m3
psubusb m1, m0
psubusb m3, m2
por m1, m4
por m3, m5
punpcklbw m0, m1, m7
punpcklbw m2, m3, m7
punpckhbw m1, m7
punpckhbw m3, m7
paddw m0, m1
paddw m2, m3
paddw m0, m2
paddw m6, m0
lea src1q, [src1q + 2*stride1q]
lea src2q, [src2q + 2*stride2q]
%endrep
psrlq m0, m6, 32
paddw m6, m0
psrlq m0, m6, 16
paddw m6, m0
movd eax, m6
movzx eax, ax
RET
;-------------------------------------------------------------------------------
; int ff_pixelutils_sad_8x8_mmxext(const uint8_t *src1, ptrdiff_t stride1,
; const uint8_t *src2, ptrdiff_t stride2);
;-------------------------------------------------------------------------------
INIT_MMX mmxext
cglobal pixelutils_sad_8x8, 4,4,0, src1, stride1, src2, stride2
pxor m2, m2
%rep 4
mova m0, [src1q]
mova m1, [src1q + stride1q]
psadbw m0, [src2q]
psadbw m1, [src2q + stride2q]
paddw m2, m0
paddw m2, m1
lea src1q, [src1q + 2*stride1q]
lea src2q, [src2q + 2*stride2q]
%endrep
movd eax, m2
RET
;-------------------------------------------------------------------------------
; int ff_pixelutils_sad_16x16_mmxext(const uint8_t *src1, ptrdiff_t stride1,
; const uint8_t *src2, ptrdiff_t stride2);
;-------------------------------------------------------------------------------
INIT_MMX mmxext
cglobal pixelutils_sad_16x16, 4,4,0, src1, stride1, src2, stride2
pxor m2, m2
%rep 16
mova m0, [src1q]
mova m1, [src1q + 8]
psadbw m0, [src2q]
psadbw m1, [src2q + 8]
paddw m2, m0
paddw m2, m1
add src1q, stride1q
add src2q, stride2q
%endrep
movd eax, m2
RET
;-------------------------------------------------------------------------------
; int ff_pixelutils_sad_16x16_sse(const uint8_t *src1, ptrdiff_t stride1,
; const uint8_t *src2, ptrdiff_t stride2);
;-------------------------------------------------------------------------------
INIT_XMM sse2
cglobal pixelutils_sad_16x16, 4,4,5, src1, stride1, src2, stride2
pxor m4, m4
%rep 8
movu m0, [src1q]
movu m1, [src1q + stride1q]
movu m2, [src2q]
movu m3, [src2q + stride2q]
psadbw m0, m2
psadbw m1, m3
paddw m4, m0
paddw m4, m1
lea src1q, [src1q + 2*stride1q]
lea src2q, [src2q + 2*stride2q]
%endrep
movhlps m0, m4
paddw m4, m0
movd eax, m4
RET
;-------------------------------------------------------------------------------
; int ff_pixelutils_sad_[au]_16x16_sse(const uint8_t *src1, ptrdiff_t stride1,
; const uint8_t *src2, ptrdiff_t stride2);
;-------------------------------------------------------------------------------
%macro SAD_XMM_16x16 1
INIT_XMM sse2
cglobal pixelutils_sad_%1_16x16, 4,4,3, src1, stride1, src2, stride2
pxor m2, m2
%rep 8
mov%1 m0, [src2q]
mov%1 m1, [src2q + stride2q]
psadbw m0, [src1q]
psadbw m1, [src1q + stride1q]
paddw m2, m0
paddw m2, m1
lea src1q, [src1q + 2*stride1q]
lea src2q, [src2q + 2*stride2q]
%endrep
movhlps m0, m2
paddw m2, m0
movd eax, m2
RET
%endmacro
SAD_XMM_16x16 a
SAD_XMM_16x16 u
/*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef AVUTIL_X86_PIXELUTILS_H
#define AVUTIL_X86_PIXELUTILS_H
#include "libavutil/pixelutils.h"
void ff_pixelutils_sad_init_x86(av_pixelutils_sad_fn *sad, int aligned);
#endif /* AVUTIL_X86_PIXELUTILS_H */
/*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "config.h"
#include "pixelutils.h"
#include "cpu.h"
int ff_pixelutils_sad_8x8_mmx(const uint8_t *src1, ptrdiff_t stride1,
const uint8_t *src2, ptrdiff_t stride2);
int ff_pixelutils_sad_8x8_mmxext(const uint8_t *src1, ptrdiff_t stride1,
const uint8_t *src2, ptrdiff_t stride2);
int ff_pixelutils_sad_16x16_mmxext(const uint8_t *src1, ptrdiff_t stride1,
const uint8_t *src2, ptrdiff_t stride2);
int ff_pixelutils_sad_16x16_sse2(const uint8_t *src1, ptrdiff_t stride1,
const uint8_t *src2, ptrdiff_t stride2);
int ff_pixelutils_sad_a_16x16_sse2(const uint8_t *src1, ptrdiff_t stride1,
const uint8_t *src2, ptrdiff_t stride2);
int ff_pixelutils_sad_u_16x16_sse2(const uint8_t *src1, ptrdiff_t stride1,
const uint8_t *src2, ptrdiff_t stride2);
void ff_pixelutils_sad_init_x86(av_pixelutils_sad_fn *sad, int aligned)
{
int cpu_flags = av_get_cpu_flags();
if (EXTERNAL_MMX(cpu_flags)) {
sad[2] = ff_pixelutils_sad_8x8_mmx;
}
if (EXTERNAL_MMXEXT(cpu_flags)) {
sad[2] = ff_pixelutils_sad_8x8_mmxext;
sad[3] = ff_pixelutils_sad_16x16_mmxext;
}
if (EXTERNAL_SSE2(cpu_flags)) {
switch (aligned) {
case 0: sad[3] = ff_pixelutils_sad_16x16_sse2; break; // src1 unaligned, src2 unaligned
case 1: sad[3] = ff_pixelutils_sad_u_16x16_sse2; break; // src1 aligned, src2 unaligned
case 2: sad[3] = ff_pixelutils_sad_a_16x16_sse2; break; // src1 aligned, src2 aligned
}
}
}
...@@ -73,6 +73,10 @@ FATE_LIBAVUTIL += fate-parseutils ...@@ -73,6 +73,10 @@ FATE_LIBAVUTIL += fate-parseutils
fate-parseutils: libavutil/parseutils-test$(EXESUF) fate-parseutils: libavutil/parseutils-test$(EXESUF)
fate-parseutils: CMD = run libavutil/parseutils-test fate-parseutils: CMD = run libavutil/parseutils-test
FATE_LIBAVUTIL-$(CONFIG_PIXELUTILS) += fate-pixelutils
fate-pixelutils: libavutil/pixelutils-test$(EXESUF)
fate-pixelutils: CMD = run libavutil/pixelutils-test
FATE_LIBAVUTIL += fate-random_seed FATE_LIBAVUTIL += fate-random_seed
fate-random_seed: libavutil/random_seed-test$(EXESUF) fate-random_seed: libavutil/random_seed-test$(EXESUF)
fate-random_seed: CMD = run libavutil/random_seed-test fate-random_seed: CMD = run libavutil/random_seed-test
...@@ -98,5 +102,6 @@ FATE_LIBAVUTIL += fate-xtea ...@@ -98,5 +102,6 @@ FATE_LIBAVUTIL += fate-xtea
fate-xtea: libavutil/xtea-test$(EXESUF) fate-xtea: libavutil/xtea-test$(EXESUF)
fate-xtea: CMD = run libavutil/xtea-test fate-xtea: CMD = run libavutil/xtea-test
FATE_LIBAVUTIL += $(FATE_LIBAVUTIL-yes)
FATE-$(CONFIG_AVUTIL) += $(FATE_LIBAVUTIL) FATE-$(CONFIG_AVUTIL) += $(FATE_LIBAVUTIL)
fate-libavutil: $(FATE_LIBAVUTIL) fate-libavutil: $(FATE_LIBAVUTIL)
[OK] [UU] SAD [random] 2x2=314 ref=314
[OK] [UU] SAD [random] 4x4=1129 ref=1129
[OK] [UU] SAD [random] 8x8=4936 ref=4936
[OK] [UU] SAD [random] 16x16=20704 ref=20704
[OK] [AU] SAD [random] 2x2=440 ref=440
[OK] [AU] SAD [random] 4x4=1317 ref=1317
[OK] [AU] SAD [random] 8x8=5262 ref=5262
[OK] [AU] SAD [random] 16x16=21040 ref=21040
[OK] [AA] SAD [random] 2x2=196 ref=196
[OK] [AA] SAD [random] 4x4=1225 ref=1225
[OK] [AA] SAD [random] 8x8=4712 ref=4712
[OK] [AA] SAD [random] 16x16=21184 ref=21184
[OK] [UU] SAD [max] 2x2=1020 ref=1020
[OK] [UU] SAD [max] 4x4=4080 ref=4080
[OK] [UU] SAD [max] 8x8=16320 ref=16320
[OK] [UU] SAD [max] 16x16=65280 ref=65280
[OK] [AU] SAD [max] 2x2=1020 ref=1020
[OK] [AU] SAD [max] 4x4=4080 ref=4080
[OK] [AU] SAD [max] 8x8=16320 ref=16320
[OK] [AU] SAD [max] 16x16=65280 ref=65280
[OK] [AA] SAD [max] 2x2=1020 ref=1020
[OK] [AA] SAD [max] 4x4=4080 ref=4080
[OK] [AA] SAD [max] 8x8=16320 ref=16320
[OK] [AA] SAD [max] 16x16=65280 ref=65280
[OK] [UU] SAD [min] 2x2=0 ref=0
[OK] [UU] SAD [min] 4x4=0 ref=0
[OK] [UU] SAD [min] 8x8=0 ref=0
[OK] [UU] SAD [min] 16x16=0 ref=0
[OK] [AU] SAD [min] 2x2=0 ref=0
[OK] [AU] SAD [min] 4x4=0 ref=0
[OK] [AU] SAD [min] 8x8=0 ref=0
[OK] [AU] SAD [min] 16x16=0 ref=0
[OK] [AA] SAD [min] 2x2=0 ref=0
[OK] [AA] SAD [min] 4x4=0 ref=0
[OK] [AA] SAD [min] 8x8=0 ref=0
[OK] [AA] SAD [min] 16x16=0 ref=0
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment