Commit d1adad3c authored by Michael Niedermayer's avatar Michael Niedermayer

Merge swscale bloatup

This will be cleaned up in the next merge

Authorship / merged commits:
commit f668afd4
Author: Janne Grunau <janne-libav@jannau.net>
Date:   Fri Apr 15 09:12:34 2011 +0200

    swscale: fix "ISO C90 forbids mixed declarations and code" warning

    only hit with --enable-runtime-cpudetect

commit 7f2ae5c7
Author: Janne Grunau <janne-libav@jannau.net>
Date:   Fri Apr 15 02:09:44 2011 +0200

    swscale: fix compilation with --enable-runtime-cpudetect

commit b6cad3df
Author: Janne Grunau <janne-libav@jannau.net>
Date:   Fri Apr 15 00:31:04 2011 +0200

    swscale: correct include path to fix ppc altivec build

commit 6216fc70
Author: Luca Barbato <lu_zero@gentoo.org>
Date:   Thu Apr 14 22:03:45 2011 +0200

    swscale: simplify rgb2rgb templating

    MMX is always built. Drop the ifdefs

commit 33a0421b
Author: Josh Allmann <joshua.allmann@gmail.com>
Date:   Wed Apr 13 20:57:32 2011 +0200

    swscale: simplify initialization code

    Simplify the fallthrough case when no accelerated functions
    can be initialized.

commit 735bf195
Author: Josh Allmann <joshua.allmann@gmail.com>
Date:   Wed Apr 13 20:57:31 2011 +0200

    swscale: further cleanup swscale.c

    Move x86-specific constants out of swscale.c

commit 86330b4c
Author: Luca Barbato <lu_zero@gentoo.org>
Date:   Wed Apr 13 20:57:30 2011 +0200

    swscale: partially move the arch specific code left

    PPC and x86 code is split off from swscale_template.c. Lots of code is
    still duplicated and should be removed later.

    Again uniformize the init system to be more similar to the dsputil one.

    Unset h*scale_fast in the x86 init in order to make the output
    consistent with the previous status. Thanks to Josh for spotting it.

commit c0038328
Author: Luca Barbato <lu_zero@gentoo.org>
Date:   Wed Apr 13 20:57:29 2011 +0200

    swscale: move away x86 specific code from rgb2rgb

    Keep only the plain C code in the main rgb2rgb.c and move the x86
    specific optimizations to x86/rgb2rgb.c
    Change the initialization pattern a little so some of it can be
    factorized to behave more like dsputils.

Conflicts:
	libswscale/rgb2rgb.c
	libswscale/swscale_template.c
parent d9d56036
......@@ -12,7 +12,8 @@ OBJS-$(ARCH_BFIN) += bfin/internal_bfin.o \
bfin/yuv2rgb_bfin.o
OBJS-$(CONFIG_MLIB) += mlib/yuv2rgb_mlib.o
OBJS-$(HAVE_ALTIVEC) += ppc/yuv2rgb_altivec.o
OBJS-$(HAVE_MMX) += x86/yuv2rgb_mmx.o
OBJS-$(HAVE_MMX) += x86/rgb2rgb.o \
x86/yuv2rgb_mmx.o
OBJS-$(HAVE_VIS) += sparc/yuv2rgb_vis.o
TESTPROGS = colorspace swscale
......
This diff is collapsed.
......@@ -24,7 +24,6 @@
*/
#include <inttypes.h>
#include "config.h"
#include "libavutil/x86_cpu.h"
#include "libavutil/bswap.h"
#include "rgb2rgb.h"
#include "swscale.h"
......@@ -95,45 +94,6 @@ void (*yuyvtoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t
long width, long height,
long lumStride, long chromStride, long srcStride);
#if ARCH_X86
DECLARE_ASM_CONST(8, uint64_t, mmx_ff) = 0x00000000000000FFULL;
DECLARE_ASM_CONST(8, uint64_t, mmx_null) = 0x0000000000000000ULL;
DECLARE_ASM_CONST(8, uint64_t, mmx_one) = 0xFFFFFFFFFFFFFFFFULL;
DECLARE_ASM_CONST(8, uint64_t, mask32b) = 0x000000FF000000FFULL;
DECLARE_ASM_CONST(8, uint64_t, mask32g) = 0x0000FF000000FF00ULL;
DECLARE_ASM_CONST(8, uint64_t, mask32r) = 0x00FF000000FF0000ULL;
DECLARE_ASM_CONST(8, uint64_t, mask32a) = 0xFF000000FF000000ULL;
DECLARE_ASM_CONST(8, uint64_t, mask32) = 0x00FFFFFF00FFFFFFULL;
DECLARE_ASM_CONST(8, uint64_t, mask3216br) = 0x00F800F800F800F8ULL;
DECLARE_ASM_CONST(8, uint64_t, mask3216g) = 0x0000FC000000FC00ULL;
DECLARE_ASM_CONST(8, uint64_t, mask3215g) = 0x0000F8000000F800ULL;
DECLARE_ASM_CONST(8, uint64_t, mul3216) = 0x2000000420000004ULL;
DECLARE_ASM_CONST(8, uint64_t, mul3215) = 0x2000000820000008ULL;
DECLARE_ASM_CONST(8, uint64_t, mask24b) = 0x00FF0000FF0000FFULL;
DECLARE_ASM_CONST(8, uint64_t, mask24g) = 0xFF0000FF0000FF00ULL;
DECLARE_ASM_CONST(8, uint64_t, mask24r) = 0x0000FF0000FF0000ULL;
DECLARE_ASM_CONST(8, uint64_t, mask24l) = 0x0000000000FFFFFFULL;
DECLARE_ASM_CONST(8, uint64_t, mask24h) = 0x0000FFFFFF000000ULL;
DECLARE_ASM_CONST(8, uint64_t, mask24hh) = 0xffff000000000000ULL;
DECLARE_ASM_CONST(8, uint64_t, mask24hhh) = 0xffffffff00000000ULL;
DECLARE_ASM_CONST(8, uint64_t, mask24hhhh) = 0xffffffffffff0000ULL;
DECLARE_ASM_CONST(8, uint64_t, mask15b) = 0x001F001F001F001FULL; /* 00000000 00011111 xxB */
DECLARE_ASM_CONST(8, uint64_t, mask15rg) = 0x7FE07FE07FE07FE0ULL; /* 01111111 11100000 RGx */
DECLARE_ASM_CONST(8, uint64_t, mask15s) = 0xFFE0FFE0FFE0FFE0ULL;
DECLARE_ASM_CONST(8, uint64_t, mask15g) = 0x03E003E003E003E0ULL;
DECLARE_ASM_CONST(8, uint64_t, mask15r) = 0x7C007C007C007C00ULL;
#define mask16b mask15b
DECLARE_ASM_CONST(8, uint64_t, mask16g) = 0x07E007E007E007E0ULL;
DECLARE_ASM_CONST(8, uint64_t, mask16r) = 0xF800F800F800F800ULL;
DECLARE_ASM_CONST(8, uint64_t, red_16mask) = 0x0000f8000000f800ULL;
DECLARE_ASM_CONST(8, uint64_t, green_16mask) = 0x000007e0000007e0ULL;
DECLARE_ASM_CONST(8, uint64_t, blue_16mask) = 0x0000001f0000001fULL;
DECLARE_ASM_CONST(8, uint64_t, red_15mask) = 0x00007c0000007c00ULL;
DECLARE_ASM_CONST(8, uint64_t, green_15mask) = 0x000003e0000003e0ULL;
DECLARE_ASM_CONST(8, uint64_t, blue_15mask) = 0x0000001f0000001fULL;
#endif /* ARCH_X86 */
#define RGB2YUV_SHIFT 8
#define BY ((int)( 0.098*(1<<RGB2YUV_SHIFT)+0.5))
#define BV ((int)(-0.071*(1<<RGB2YUV_SHIFT)+0.5))
......@@ -145,50 +105,9 @@ DECLARE_ASM_CONST(8, uint64_t, blue_15mask) = 0x0000001f0000001fULL;
#define RV ((int)( 0.439*(1<<RGB2YUV_SHIFT)+0.5))
#define RU ((int)(-0.148*(1<<RGB2YUV_SHIFT)+0.5))
//Note: We have C, MMX, MMX2, 3DNOW versions, there is no 3DNOW + MMX2 one.
//plain C versions
#define COMPILE_TEMPLATE_MMX 0
#define COMPILE_TEMPLATE_MMX2 0
#define COMPILE_TEMPLATE_AMD3DNOW 0
#define COMPILE_TEMPLATE_SSE2 0
#define RENAME(a) a ## _C
#include "rgb2rgb_template.c"
#if ARCH_X86
//MMX versions
#undef RENAME
#undef COMPILE_TEMPLATE_MMX
#define COMPILE_TEMPLATE_MMX 1
#define RENAME(a) a ## _MMX
#include "rgb2rgb_template.c"
//MMX2 versions
#undef RENAME
#undef COMPILE_TEMPLATE_MMX2
#define COMPILE_TEMPLATE_MMX2 1
#define RENAME(a) a ## _MMX2
#include "rgb2rgb_template.c"
//SSE2 versions
#undef RENAME
#undef COMPILE_TEMPLATE_SSE2
#define COMPILE_TEMPLATE_SSE2 1
#define RENAME(a) a ## _SSE2
#include "rgb2rgb_template.c"
//3DNOW versions
#undef RENAME
#undef COMPILE_TEMPLATE_MMX2
#undef COMPILE_TEMPLATE_SSE2
#undef COMPILE_TEMPLATE_AMD3DNOW
#define COMPILE_TEMPLATE_MMX2 0
#define COMPILE_TEMPLATE_SSE2 1
#define COMPILE_TEMPLATE_AMD3DNOW 1
#define RENAME(a) a ## _3DNOW
#include "rgb2rgb_template.c"
#endif //ARCH_X86 || ARCH_X86_64
/*
RGB15->RGB16 original by Strepto/Astral
......@@ -199,18 +118,10 @@ DECLARE_ASM_CONST(8, uint64_t, blue_15mask) = 0x0000001f0000001fULL;
void sws_rgb2rgb_init(int flags)
{
#if HAVE_SSE2 || HAVE_MMX2 || HAVE_AMD3DNOW || HAVE_MMX
if (flags & SWS_CPU_CAPS_SSE2)
rgb2rgb_init_SSE2();
else if (flags & SWS_CPU_CAPS_MMX2)
rgb2rgb_init_MMX2();
else if (flags & SWS_CPU_CAPS_3DNOW)
rgb2rgb_init_3DNOW();
else if (flags & SWS_CPU_CAPS_MMX)
rgb2rgb_init_MMX();
else
rgb2rgb_init_c();
#if HAVE_MMX2 || HAVE_AMD3DNOW || HAVE_MMX
rgb2rgb_init_x86(flags);
#endif /* HAVE_MMX2 || HAVE_AMD3DNOW || HAVE_MMX */
rgb2rgb_init_C();
}
#if LIBSWSCALE_VERSION_MAJOR < 1
......
......@@ -168,4 +168,6 @@ extern void (*yuyvtoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const u
void sws_rgb2rgb_init(int flags);
void rgb2rgb_init_x86(int flags);
#endif /* SWSCALE_RGB2RGB_H */
This source diff could not be displayed because it is too large. You can view the blob instead.
......@@ -122,63 +122,6 @@ add BGR4 output support
write special BGR->BGR scaler
*/
#if ARCH_X86
DECLARE_ASM_CONST(8, uint64_t, bF8)= 0xF8F8F8F8F8F8F8F8LL;
DECLARE_ASM_CONST(8, uint64_t, bFC)= 0xFCFCFCFCFCFCFCFCLL;
DECLARE_ASM_CONST(8, uint64_t, w10)= 0x0010001000100010LL;
DECLARE_ASM_CONST(8, uint64_t, w02)= 0x0002000200020002LL;
DECLARE_ASM_CONST(8, uint64_t, bm00001111)=0x00000000FFFFFFFFLL;
DECLARE_ASM_CONST(8, uint64_t, bm00000111)=0x0000000000FFFFFFLL;
DECLARE_ASM_CONST(8, uint64_t, bm11111000)=0xFFFFFFFFFF000000LL;
DECLARE_ASM_CONST(8, uint64_t, bm01010101)=0x00FF00FF00FF00FFLL;
const DECLARE_ALIGNED(8, uint64_t, ff_dither4)[2] = {
0x0103010301030103LL,
0x0200020002000200LL,};
const DECLARE_ALIGNED(8, uint64_t, ff_dither8)[2] = {
0x0602060206020602LL,
0x0004000400040004LL,};
DECLARE_ASM_CONST(8, uint64_t, b16Mask)= 0x001F001F001F001FLL;
DECLARE_ASM_CONST(8, uint64_t, g16Mask)= 0x07E007E007E007E0LL;
DECLARE_ASM_CONST(8, uint64_t, r16Mask)= 0xF800F800F800F800LL;
DECLARE_ASM_CONST(8, uint64_t, b15Mask)= 0x001F001F001F001FLL;
DECLARE_ASM_CONST(8, uint64_t, g15Mask)= 0x03E003E003E003E0LL;
DECLARE_ASM_CONST(8, uint64_t, r15Mask)= 0x7C007C007C007C00LL;
DECLARE_ALIGNED(8, const uint64_t, ff_M24A) = 0x00FF0000FF0000FFLL;
DECLARE_ALIGNED(8, const uint64_t, ff_M24B) = 0xFF0000FF0000FF00LL;
DECLARE_ALIGNED(8, const uint64_t, ff_M24C) = 0x0000FF0000FF0000LL;
#ifdef FAST_BGR2YV12
DECLARE_ALIGNED(8, const uint64_t, ff_bgr2YCoeff) = 0x000000210041000DULL;
DECLARE_ALIGNED(8, const uint64_t, ff_bgr2UCoeff) = 0x0000FFEEFFDC0038ULL;
DECLARE_ALIGNED(8, const uint64_t, ff_bgr2VCoeff) = 0x00000038FFD2FFF8ULL;
#else
DECLARE_ALIGNED(8, const uint64_t, ff_bgr2YCoeff) = 0x000020E540830C8BULL;
DECLARE_ALIGNED(8, const uint64_t, ff_bgr2UCoeff) = 0x0000ED0FDAC23831ULL;
DECLARE_ALIGNED(8, const uint64_t, ff_bgr2VCoeff) = 0x00003831D0E6F6EAULL;
#endif /* FAST_BGR2YV12 */
DECLARE_ALIGNED(8, const uint64_t, ff_bgr2YOffset) = 0x1010101010101010ULL;
DECLARE_ALIGNED(8, const uint64_t, ff_bgr2UVOffset) = 0x8080808080808080ULL;
DECLARE_ALIGNED(8, const uint64_t, ff_w1111) = 0x0001000100010001ULL;
DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toY1Coeff) = 0x0C88000040870C88ULL;
DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toY2Coeff) = 0x20DE4087000020DEULL;
DECLARE_ASM_CONST(8, uint64_t, ff_rgb24toY1Coeff) = 0x20DE0000408720DEULL;
DECLARE_ASM_CONST(8, uint64_t, ff_rgb24toY2Coeff) = 0x0C88408700000C88ULL;
DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toYOffset) = 0x0008400000084000ULL;
DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toUV)[2][4] = {
{0x38380000DAC83838ULL, 0xECFFDAC80000ECFFULL, 0xF6E40000D0E3F6E4ULL, 0x3838D0E300003838ULL},
{0xECFF0000DAC8ECFFULL, 0x3838DAC800003838ULL, 0x38380000D0E33838ULL, 0xF6E4D0E30000F6E4ULL},
};
DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toUVOffset)= 0x0040400000404000ULL;
#endif /* ARCH_X86 */
DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_4)[2][8]={
{ 1, 3, 1, 3, 1, 3, 1, 3, },
{ 2, 0, 2, 0, 2, 0, 2, 0, },
......@@ -1367,17 +1310,14 @@ static inline void monoblack2Y(uint8_t *dst, const uint8_t *src, long width, uin
#define COMPILE_TEMPLATE_AMD3DNOW 0
#define COMPILE_TEMPLATE_ALTIVEC 0
#if COMPILE_C
#define RENAME(a) a ## _C
#include "swscale_template.c"
#endif
#if COMPILE_ALTIVEC
#undef RENAME
#undef COMPILE_TEMPLATE_ALTIVEC
#define COMPILE_TEMPLATE_ALTIVEC 1
#define RENAME(a) a ## _altivec
#include "swscale_template.c"
#include "ppc/swscale_template.c"
#endif
#if ARCH_X86
......@@ -1392,7 +1332,7 @@ static inline void monoblack2Y(uint8_t *dst, const uint8_t *src, long width, uin
#define COMPILE_TEMPLATE_MMX2 0
#define COMPILE_TEMPLATE_AMD3DNOW 0
#define RENAME(a) a ## _MMX
#include "swscale_template.c"
#include "x86/swscale_template.c"
#endif
//MMX2 versions
......@@ -1405,7 +1345,7 @@ static inline void monoblack2Y(uint8_t *dst, const uint8_t *src, long width, uin
#define COMPILE_TEMPLATE_MMX2 1
#define COMPILE_TEMPLATE_AMD3DNOW 0
#define RENAME(a) a ## _MMX2
#include "swscale_template.c"
#include "x86/swscale_template.c"
#endif
//3DNOW versions
......@@ -1418,44 +1358,36 @@ static inline void monoblack2Y(uint8_t *dst, const uint8_t *src, long width, uin
#define COMPILE_TEMPLATE_MMX2 0
#define COMPILE_TEMPLATE_AMD3DNOW 1
#define RENAME(a) a ## _3DNow
#include "swscale_template.c"
#include "x86/swscale_template.c"
#endif
#endif //ARCH_X86
SwsFunc ff_getSwsFunc(SwsContext *c)
{
#if CONFIG_RUNTIME_CPUDETECT
int flags = c->flags;
sws_init_swScale_c(c);
#if CONFIG_RUNTIME_CPUDETECT
#if ARCH_X86
// ordered per speed fastest first
if (flags & SWS_CPU_CAPS_MMX2) {
if (c->flags & SWS_CPU_CAPS_MMX2) {
sws_init_swScale_MMX2(c);
return swScale_MMX2;
} else if (flags & SWS_CPU_CAPS_3DNOW) {
} else if (c->flags & SWS_CPU_CAPS_3DNOW) {
sws_init_swScale_3DNow(c);
return swScale_3DNow;
} else if (flags & SWS_CPU_CAPS_MMX) {
} else if (c->flags & SWS_CPU_CAPS_MMX) {
sws_init_swScale_MMX(c);
return swScale_MMX;
} else {
sws_init_swScale_C(c);
return swScale_C;
}
#else
#if COMPILE_ALTIVEC
if (flags & SWS_CPU_CAPS_ALTIVEC) {
if (c->flags & SWS_CPU_CAPS_ALTIVEC) {
sws_init_swScale_altivec(c);
return swScale_altivec;
} else {
sws_init_swScale_C(c);
return swScale_C;
}
#endif
sws_init_swScale_C(c);
return swScale_C;
#endif /* ARCH_X86 */
#else //CONFIG_RUNTIME_CPUDETECT
#if COMPILE_TEMPLATE_MMX2
......@@ -1470,11 +1402,10 @@ SwsFunc ff_getSwsFunc(SwsContext *c)
#elif COMPILE_TEMPLATE_ALTIVEC
sws_init_swScale_altivec(c);
return swScale_altivec;
#else
sws_init_swScale_C(c);
return swScale_C;
#endif
#endif //!CONFIG_RUNTIME_CPUDETECT
return swScale_c;
}
static void copyPlane(const uint8_t *src, int srcStride,
......
This source diff could not be displayed because it is too large. You can view the blob instead.
/*
* software RGB to RGB converter
* pluralize by software PAL8 to RGB converter
* software YUV to YUV converter
* software YUV to RGB converter
* Written by Nick Kurshev.
* palette & YUV & runtime CPU stuff by Michael (michaelni@gmx.at)
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <stdint.h>
#include "config.h"
#include "libavutil/x86_cpu.h"
#include "libavutil/bswap.h"
#include "libswscale/rgb2rgb.h"
#include "libswscale/swscale.h"
#include "libswscale/swscale_internal.h"
DECLARE_ASM_CONST(8, uint64_t, mmx_ff) = 0x00000000000000FFULL;
DECLARE_ASM_CONST(8, uint64_t, mmx_null) = 0x0000000000000000ULL;
DECLARE_ASM_CONST(8, uint64_t, mmx_one) = 0xFFFFFFFFFFFFFFFFULL;
DECLARE_ASM_CONST(8, uint64_t, mask32b) = 0x000000FF000000FFULL;
DECLARE_ASM_CONST(8, uint64_t, mask32g) = 0x0000FF000000FF00ULL;
DECLARE_ASM_CONST(8, uint64_t, mask32r) = 0x00FF000000FF0000ULL;
DECLARE_ASM_CONST(8, uint64_t, mask32a) = 0xFF000000FF000000ULL;
DECLARE_ASM_CONST(8, uint64_t, mask32) = 0x00FFFFFF00FFFFFFULL;
DECLARE_ASM_CONST(8, uint64_t, mask3216br) = 0x00F800F800F800F8ULL;
DECLARE_ASM_CONST(8, uint64_t, mask3216g) = 0x0000FC000000FC00ULL;
DECLARE_ASM_CONST(8, uint64_t, mask3215g) = 0x0000F8000000F800ULL;
DECLARE_ASM_CONST(8, uint64_t, mul3216) = 0x2000000420000004ULL;
DECLARE_ASM_CONST(8, uint64_t, mul3215) = 0x2000000820000008ULL;
DECLARE_ASM_CONST(8, uint64_t, mask24b) = 0x00FF0000FF0000FFULL;
DECLARE_ASM_CONST(8, uint64_t, mask24g) = 0xFF0000FF0000FF00ULL;
DECLARE_ASM_CONST(8, uint64_t, mask24r) = 0x0000FF0000FF0000ULL;
DECLARE_ASM_CONST(8, uint64_t, mask24l) = 0x0000000000FFFFFFULL;
DECLARE_ASM_CONST(8, uint64_t, mask24h) = 0x0000FFFFFF000000ULL;
DECLARE_ASM_CONST(8, uint64_t, mask24hh) = 0xffff000000000000ULL;
DECLARE_ASM_CONST(8, uint64_t, mask24hhh) = 0xffffffff00000000ULL;
DECLARE_ASM_CONST(8, uint64_t, mask24hhhh) = 0xffffffffffff0000ULL;
DECLARE_ASM_CONST(8, uint64_t, mask15b) = 0x001F001F001F001FULL; /* 00000000 00011111 xxB */
DECLARE_ASM_CONST(8, uint64_t, mask15rg) = 0x7FE07FE07FE07FE0ULL; /* 01111111 11100000 RGx */
DECLARE_ASM_CONST(8, uint64_t, mask15s) = 0xFFE0FFE0FFE0FFE0ULL;
DECLARE_ASM_CONST(8, uint64_t, mask15g) = 0x03E003E003E003E0ULL;
DECLARE_ASM_CONST(8, uint64_t, mask15r) = 0x7C007C007C007C00ULL;
#define mask16b mask15b
DECLARE_ASM_CONST(8, uint64_t, mask16g) = 0x07E007E007E007E0ULL;
DECLARE_ASM_CONST(8, uint64_t, mask16r) = 0xF800F800F800F800ULL;
DECLARE_ASM_CONST(8, uint64_t, red_16mask) = 0x0000f8000000f800ULL;
DECLARE_ASM_CONST(8, uint64_t, green_16mask) = 0x000007e0000007e0ULL;
DECLARE_ASM_CONST(8, uint64_t, blue_16mask) = 0x0000001f0000001fULL;
DECLARE_ASM_CONST(8, uint64_t, red_15mask) = 0x00007c0000007c00ULL;
DECLARE_ASM_CONST(8, uint64_t, green_15mask) = 0x000003e0000003e0ULL;
DECLARE_ASM_CONST(8, uint64_t, blue_15mask) = 0x0000001f0000001fULL;
#define RGB2YUV_SHIFT 8
#define BY ((int)( 0.098*(1<<RGB2YUV_SHIFT)+0.5))
#define BV ((int)(-0.071*(1<<RGB2YUV_SHIFT)+0.5))
#define BU ((int)( 0.439*(1<<RGB2YUV_SHIFT)+0.5))
#define GY ((int)( 0.504*(1<<RGB2YUV_SHIFT)+0.5))
#define GV ((int)(-0.368*(1<<RGB2YUV_SHIFT)+0.5))
#define GU ((int)(-0.291*(1<<RGB2YUV_SHIFT)+0.5))
#define RY ((int)( 0.257*(1<<RGB2YUV_SHIFT)+0.5))
#define RV ((int)( 0.439*(1<<RGB2YUV_SHIFT)+0.5))
#define RU ((int)(-0.148*(1<<RGB2YUV_SHIFT)+0.5))
//Note: We have C, MMX, MMX2, 3DNOW versions, there is no 3DNOW + MMX2 one.
#define COMPILE_TEMPLATE_MMX2 0
#define COMPILE_TEMPLATE_AMD3DNOW 0
#define COMPILE_TEMPLATE_SSE2 0
//MMX versions
#undef RENAME
#define RENAME(a) a ## _MMX
#include "rgb2rgb_template.c"
//MMX2 versions
#undef RENAME
#undef COMPILE_TEMPLATE_MMX2
#define COMPILE_TEMPLATE_MMX2 1
#define RENAME(a) a ## _MMX2
#include "rgb2rgb_template.c"
//SSE2 versions
#undef RENAME
#undef COMPILE_TEMPLATE_SSE2
#define COMPILE_TEMPLATE_SSE2 1
#define RENAME(a) a ## _SSE2
#include "rgb2rgb_template.c"
//3DNOW versions
#undef RENAME
#undef COMPILE_TEMPLATE_MMX2
#undef COMPILE_TEMPLATE_SSE2
#undef COMPILE_TEMPLATE_AMD3DNOW
#define COMPILE_TEMPLATE_MMX2 0
#define COMPILE_TEMPLATE_SSE2 1
#define COMPILE_TEMPLATE_AMD3DNOW 1
#define RENAME(a) a ## _3DNOW
#include "rgb2rgb_template.c"
/*
RGB15->RGB16 original by Strepto/Astral
ported to gcc & bugfixed : A'rpi
MMX2, 3DNOW optimization by Nick Kurshev
32-bit C version, and and&add trick by Michael Niedermayer
*/
void rgb2rgb_init_x86(int flags)
{
#if HAVE_MMX2 || HAVE_AMD3DNOW || HAVE_MMX
if (flags & SWS_CPU_CAPS_SSE2)
rgb2rgb_init_SSE2();
else if (flags & SWS_CPU_CAPS_MMX2)
rgb2rgb_init_MMX2();
else if (flags & SWS_CPU_CAPS_3DNOW)
rgb2rgb_init_3DNOW();
else if (flags & SWS_CPU_CAPS_MMX)
rgb2rgb_init_MMX();
#endif /* HAVE_MMX2 || HAVE_AMD3DNOW || HAVE_MMX */
}
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
/*
* Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef SWSCALE_X86_SWSCALE_TEMPLATE_H
#define SWSCALE_X86_SWSCALE_TEMPLATE_H
DECLARE_ASM_CONST(8, uint64_t, bF8)= 0xF8F8F8F8F8F8F8F8LL;
DECLARE_ASM_CONST(8, uint64_t, bFC)= 0xFCFCFCFCFCFCFCFCLL;
DECLARE_ASM_CONST(8, uint64_t, w10)= 0x0010001000100010LL;
DECLARE_ASM_CONST(8, uint64_t, w02)= 0x0002000200020002LL;
DECLARE_ASM_CONST(8, uint64_t, bm00001111)=0x00000000FFFFFFFFLL;
DECLARE_ASM_CONST(8, uint64_t, bm00000111)=0x0000000000FFFFFFLL;
DECLARE_ASM_CONST(8, uint64_t, bm11111000)=0xFFFFFFFFFF000000LL;
DECLARE_ASM_CONST(8, uint64_t, bm01010101)=0x00FF00FF00FF00FFLL;
const DECLARE_ALIGNED(8, uint64_t, ff_dither4)[2] = {
0x0103010301030103LL,
0x0200020002000200LL,};
const DECLARE_ALIGNED(8, uint64_t, ff_dither8)[2] = {
0x0602060206020602LL,
0x0004000400040004LL,};
DECLARE_ASM_CONST(8, uint64_t, b16Mask)= 0x001F001F001F001FLL;
DECLARE_ASM_CONST(8, uint64_t, g16Mask)= 0x07E007E007E007E0LL;
DECLARE_ASM_CONST(8, uint64_t, r16Mask)= 0xF800F800F800F800LL;
DECLARE_ASM_CONST(8, uint64_t, b15Mask)= 0x001F001F001F001FLL;
DECLARE_ASM_CONST(8, uint64_t, g15Mask)= 0x03E003E003E003E0LL;
DECLARE_ASM_CONST(8, uint64_t, r15Mask)= 0x7C007C007C007C00LL;
DECLARE_ALIGNED(8, const uint64_t, ff_M24A) = 0x00FF0000FF0000FFLL;
DECLARE_ALIGNED(8, const uint64_t, ff_M24B) = 0xFF0000FF0000FF00LL;
DECLARE_ALIGNED(8, const uint64_t, ff_M24C) = 0x0000FF0000FF0000LL;
#ifdef FAST_BGR2YV12
DECLARE_ALIGNED(8, const uint64_t, ff_bgr2YCoeff) = 0x000000210041000DULL;
DECLARE_ALIGNED(8, const uint64_t, ff_bgr2UCoeff) = 0x0000FFEEFFDC0038ULL;
DECLARE_ALIGNED(8, const uint64_t, ff_bgr2VCoeff) = 0x00000038FFD2FFF8ULL;
#else
DECLARE_ALIGNED(8, const uint64_t, ff_bgr2YCoeff) = 0x000020E540830C8BULL;
DECLARE_ALIGNED(8, const uint64_t, ff_bgr2UCoeff) = 0x0000ED0FDAC23831ULL;
DECLARE_ALIGNED(8, const uint64_t, ff_bgr2VCoeff) = 0x00003831D0E6F6EAULL;
#endif /* FAST_BGR2YV12 */
DECLARE_ALIGNED(8, const uint64_t, ff_bgr2YOffset) = 0x1010101010101010ULL;
DECLARE_ALIGNED(8, const uint64_t, ff_bgr2UVOffset) = 0x8080808080808080ULL;
DECLARE_ALIGNED(8, const uint64_t, ff_w1111) = 0x0001000100010001ULL;
DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toY1Coeff) = 0x0C88000040870C88ULL;
DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toY2Coeff) = 0x20DE4087000020DEULL;
DECLARE_ASM_CONST(8, uint64_t, ff_rgb24toY1Coeff) = 0x20DE0000408720DEULL;
DECLARE_ASM_CONST(8, uint64_t, ff_rgb24toY2Coeff) = 0x0C88408700000C88ULL;
DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toYOffset) = 0x0008400000084000ULL;
DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toUV)[2][4] = {
{0x38380000DAC83838ULL, 0xECFFDAC80000ECFFULL, 0xF6E40000D0E3F6E4ULL, 0x3838D0E300003838ULL},
{0xECFF0000DAC8ECFFULL, 0x3838DAC800003838ULL, 0x38380000D0E33838ULL, 0xF6E4D0E30000F6E4ULL},
};
DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toUVOffset)= 0x0040400000404000ULL;
#endif /* SWSCALE_X86_SWSCALE_TEMPLATE_H */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment