Commit dd68318c authored by Ramiro Polla's avatar Ramiro Polla

Cosmetics:

- Place curly brackets in the same line as while/for/if/switch/else/do;
- Place curly brackets at column 0 in the next line starting a function.

Originally committed as revision 29523 to svn://svn.mplayerhq.hu/mplayer/trunk/libswscale
parent 9dc6bb7b
...@@ -71,7 +71,7 @@ int main(int argc, char **argv) ...@@ -71,7 +71,7 @@ int main(int argc, char **argv)
av_log(NULL, AV_LOG_INFO, "CPU capabilities forced to %x\n", cpu_caps); av_log(NULL, AV_LOG_INFO, "CPU capabilities forced to %x\n", cpu_caps);
sws_rgb2rgb_init(cpu_caps); sws_rgb2rgb_init(cpu_caps);
for(funcNum=0; ; funcNum++){ for(funcNum=0; ; funcNum++) {
struct func_info_s { struct func_info_s {
int src_bpp; int src_bpp;
int dst_bpp; int dst_bpp;
...@@ -118,13 +118,13 @@ int main(int argc, char **argv) ...@@ -118,13 +118,13 @@ int main(int argc, char **argv)
av_log(NULL, AV_LOG_INFO,"."); av_log(NULL, AV_LOG_INFO,".");
memset(srcBuffer, srcByte, SIZE); memset(srcBuffer, srcByte, SIZE);
for(width=63; width>0; width--){ for(width=63; width>0; width--) {
int dstOffset; int dstOffset;
for(dstOffset=128; dstOffset<196; dstOffset+=4){ for(dstOffset=128; dstOffset<196; dstOffset+=4) {
int srcOffset; int srcOffset;
memset(dstBuffer, dstByte, SIZE); memset(dstBuffer, dstByte, SIZE);
for(srcOffset=128; srcOffset<196; srcOffset+=4){ for(srcOffset=128; srcOffset<196; srcOffset+=4) {
uint8_t *src= srcBuffer+srcOffset; uint8_t *src= srcBuffer+srcOffset;
uint8_t *dst= dstBuffer+dstOffset; uint8_t *dst= dstBuffer+dstOffset;
const char *name=NULL; const char *name=NULL;
...@@ -139,24 +139,24 @@ int main(int argc, char **argv) ...@@ -139,24 +139,24 @@ int main(int argc, char **argv)
if(!srcBpp) break; if(!srcBpp) break;
for(i=0; i<SIZE; i++){ for(i=0; i<SIZE; i++) {
if(srcBuffer[i]!=srcByte){ if(srcBuffer[i]!=srcByte) {
av_log(NULL, AV_LOG_INFO, "src damaged at %d w:%d src:%d dst:%d %s\n", av_log(NULL, AV_LOG_INFO, "src damaged at %d w:%d src:%d dst:%d %s\n",
i, width, srcOffset, dstOffset, name); i, width, srcOffset, dstOffset, name);
failed=1; failed=1;
break; break;
} }
} }
for(i=0; i<dstOffset; i++){ for(i=0; i<dstOffset; i++) {
if(dstBuffer[i]!=dstByte){ if(dstBuffer[i]!=dstByte) {
av_log(NULL, AV_LOG_INFO, "dst damaged at %d w:%d src:%d dst:%d %s\n", av_log(NULL, AV_LOG_INFO, "dst damaged at %d w:%d src:%d dst:%d %s\n",
i, width, srcOffset, dstOffset, name); i, width, srcOffset, dstOffset, name);
failed=1; failed=1;
break; break;
} }
} }
for(i=dstOffset + width*dstBpp; i<SIZE; i++){ for(i=dstOffset + width*dstBpp; i<SIZE; i++) {
if(dstBuffer[i]!=dstByte){ if(dstBuffer[i]!=dstByte) {
av_log(NULL, AV_LOG_INFO, "dst damaged at %d w:%d src:%d dst:%d %s\n", av_log(NULL, AV_LOG_INFO, "dst damaged at %d w:%d src:%d dst:%d %s\n",
i, width, srcOffset, dstOffset, name); i, width, srcOffset, dstOffset, name);
failed=1; failed=1;
......
...@@ -31,8 +31,9 @@ ...@@ -31,8 +31,9 @@
#include "libswscale/swscale.h" #include "libswscale/swscale.h"
static int mlib_YUV2ARGB420_32(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, static int mlib_YUV2ARGB420_32(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
int srcSliceH, uint8_t* dst[], int dstStride[]){ int srcSliceH, uint8_t* dst[], int dstStride[])
if(c->srcFormat == PIX_FMT_YUV422P){ {
if(c->srcFormat == PIX_FMT_YUV422P) {
srcStride[1] *= 2; srcStride[1] *= 2;
srcStride[2] *= 2; srcStride[2] *= 2;
} }
...@@ -45,8 +46,9 @@ static int mlib_YUV2ARGB420_32(SwsContext *c, uint8_t* src[], int srcStride[], i ...@@ -45,8 +46,9 @@ static int mlib_YUV2ARGB420_32(SwsContext *c, uint8_t* src[], int srcStride[], i
} }
static int mlib_YUV2ABGR420_32(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, static int mlib_YUV2ABGR420_32(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
int srcSliceH, uint8_t* dst[], int dstStride[]){ int srcSliceH, uint8_t* dst[], int dstStride[])
if(c->srcFormat == PIX_FMT_YUV422P){ {
if(c->srcFormat == PIX_FMT_YUV422P) {
srcStride[1] *= 2; srcStride[1] *= 2;
srcStride[2] *= 2; srcStride[2] *= 2;
} }
...@@ -59,8 +61,9 @@ static int mlib_YUV2ABGR420_32(SwsContext *c, uint8_t* src[], int srcStride[], i ...@@ -59,8 +61,9 @@ static int mlib_YUV2ABGR420_32(SwsContext *c, uint8_t* src[], int srcStride[], i
} }
static int mlib_YUV2RGB420_24(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, static int mlib_YUV2RGB420_24(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
int srcSliceH, uint8_t* dst[], int dstStride[]){ int srcSliceH, uint8_t* dst[], int dstStride[])
if(c->srcFormat == PIX_FMT_YUV422P){ {
if(c->srcFormat == PIX_FMT_YUV422P) {
srcStride[1] *= 2; srcStride[1] *= 2;
srcStride[2] *= 2; srcStride[2] *= 2;
} }
...@@ -75,7 +78,7 @@ static int mlib_YUV2RGB420_24(SwsContext *c, uint8_t* src[], int srcStride[], in ...@@ -75,7 +78,7 @@ static int mlib_YUV2RGB420_24(SwsContext *c, uint8_t* src[], int srcStride[], in
SwsFunc ff_yuv2rgb_init_mlib(SwsContext *c) SwsFunc ff_yuv2rgb_init_mlib(SwsContext *c)
{ {
switch(c->dstFormat){ switch(c->dstFormat) {
case PIX_FMT_RGB24: return mlib_YUV2RGB420_24; case PIX_FMT_RGB24: return mlib_YUV2RGB420_24;
case PIX_FMT_BGR32: return mlib_YUV2ARGB420_32; case PIX_FMT_BGR32: return mlib_YUV2ARGB420_32;
case PIX_FMT_RGB32: return mlib_YUV2ABGR420_32; case PIX_FMT_RGB32: return mlib_YUV2ABGR420_32;
......
...@@ -23,7 +23,8 @@ ...@@ -23,7 +23,8 @@
#include "swscale.h" #include "swscale.h"
#include "swscale_internal.h" #include "swscale_internal.h"
static const char * sws_context_to_name(void * ptr) { static const char * sws_context_to_name(void * ptr)
{
return "swscaler"; return "swscaler";
} }
......
...@@ -24,7 +24,8 @@ ...@@ -24,7 +24,8 @@
#define vzero vec_splat_s32(0) #define vzero vec_splat_s32(0)
static inline void static inline void
altivec_packIntArrayToCharArray(int *val, uint8_t* dest, int dstW) { altivec_packIntArrayToCharArray(int *val, uint8_t* dest, int dstW)
{
register int i; register int i;
vector unsigned int altivec_vectorShiftInt19 = vector unsigned int altivec_vectorShiftInt19 =
vec_add(vec_splat_u32(10), vec_splat_u32(9)); vec_add(vec_splat_u32(10), vec_splat_u32(9));
...@@ -389,7 +390,8 @@ static inline void hScale_altivec_real(int16_t *dst, int dstW, ...@@ -389,7 +390,8 @@ static inline void hScale_altivec_real(int16_t *dst, int dstW,
} }
static inline int yv12toyuy2_unscaled_altivec(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, static inline int yv12toyuy2_unscaled_altivec(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
int srcSliceH, uint8_t* dstParam[], int dstStride_a[]) { int srcSliceH, uint8_t* dstParam[], int dstStride_a[])
{
uint8_t *dst=dstParam[0] + dstStride_a[0]*srcSliceY; uint8_t *dst=dstParam[0] + dstStride_a[0]*srcSliceY;
// yv12toyuy2(src[0], src[1], src[2], dst, c->srcW, srcSliceH, srcStride[0], srcStride[1], dstStride[0]); // yv12toyuy2(src[0], src[1], src[2], dst, c->srcW, srcSliceH, srcStride[0], srcStride[1], dstStride[0]);
uint8_t *ysrc = src[0]; uint8_t *ysrc = src[0];
...@@ -466,7 +468,8 @@ static inline int yv12toyuy2_unscaled_altivec(SwsContext *c, uint8_t* src[], int ...@@ -466,7 +468,8 @@ static inline int yv12toyuy2_unscaled_altivec(SwsContext *c, uint8_t* src[], int
} }
static inline int yv12touyvy_unscaled_altivec(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, static inline int yv12touyvy_unscaled_altivec(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
int srcSliceH, uint8_t* dstParam[], int dstStride_a[]) { int srcSliceH, uint8_t* dstParam[], int dstStride_a[])
{
uint8_t *dst=dstParam[0] + dstStride_a[0]*srcSliceY; uint8_t *dst=dstParam[0] + dstStride_a[0]*srcSliceY;
// yv12toyuy2(src[0], src[1], src[2], dst, c->srcW, srcSliceH, srcStride[0], srcStride[1], dstStride[0]); // yv12toyuy2(src[0], src[1], src[2], dst, c->srcW, srcSliceH, srcStride[0], srcStride[1], dstStride[0]);
uint8_t *ysrc = src[0]; uint8_t *ysrc = src[0];
......
...@@ -714,7 +714,7 @@ SwsFunc ff_yuv2rgb_init_altivec(SwsContext *c) ...@@ -714,7 +714,7 @@ SwsFunc ff_yuv2rgb_init_altivec(SwsContext *c)
if ((c->srcH & 0x1) != 0) if ((c->srcH & 0x1) != 0)
return NULL; return NULL;
switch(c->dstFormat){ switch(c->dstFormat) {
case PIX_FMT_RGB24: case PIX_FMT_RGB24:
av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space RGB24\n"); av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space RGB24\n");
return altivec_yuv2_rgb24; return altivec_yuv2_rgb24;
...@@ -738,7 +738,7 @@ SwsFunc ff_yuv2rgb_init_altivec(SwsContext *c) ...@@ -738,7 +738,7 @@ SwsFunc ff_yuv2rgb_init_altivec(SwsContext *c)
break; break;
case PIX_FMT_UYVY422: case PIX_FMT_UYVY422:
switch(c->dstFormat){ switch(c->dstFormat) {
case PIX_FMT_BGR32: case PIX_FMT_BGR32:
av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space UYVY -> RGB32\n"); av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space UYVY -> RGB32\n");
return altivec_uyvy_rgb32; return altivec_uyvy_rgb32;
...@@ -800,7 +800,7 @@ ff_yuv2packedX_altivec(SwsContext *c, ...@@ -800,7 +800,7 @@ ff_yuv2packedX_altivec(SwsContext *c,
out = (vector unsigned char *)dest; out = (vector unsigned char *)dest;
for (i=0; i<dstW; i+=16){ for (i=0; i<dstW; i+=16) {
Y0 = RND; Y0 = RND;
Y1 = RND; Y1 = RND;
/* extract 16 coeffs from lumSrc */ /* extract 16 coeffs from lumSrc */
......
...@@ -196,7 +196,8 @@ DECLARE_ASM_CONST(8, uint64_t, blue_15mask) = 0x0000001f0000001fULL; ...@@ -196,7 +196,8 @@ DECLARE_ASM_CONST(8, uint64_t, blue_15mask) = 0x0000001f0000001fULL;
32-bit C version, and and&add trick by Michael Niedermayer 32-bit C version, and and&add trick by Michael Niedermayer
*/ */
void sws_rgb2rgb_init(int flags){ void sws_rgb2rgb_init(int flags)
{
#if (HAVE_MMX2 || HAVE_AMD3DNOW || HAVE_MMX) && CONFIG_GPL #if (HAVE_MMX2 || HAVE_AMD3DNOW || HAVE_MMX) && CONFIG_GPL
if (flags & SWS_CPU_CAPS_MMX2) if (flags & SWS_CPU_CAPS_MMX2)
rgb2rgb_init_MMX2(); rgb2rgb_init_MMX2();
...@@ -227,8 +228,7 @@ void palette8topacked24(const uint8_t *src, uint8_t *dst, long num_pixels, const ...@@ -227,8 +228,7 @@ void palette8topacked24(const uint8_t *src, uint8_t *dst, long num_pixels, const
{ {
long i; long i;
for (i=0; i<num_pixels; i++) for (i=0; i<num_pixels; i++) {
{
//FIXME slow? //FIXME slow?
dst[0]= palette[src[i]*4+0]; dst[0]= palette[src[i]*4+0];
dst[1]= palette[src[i]*4+1]; dst[1]= palette[src[i]*4+1];
...@@ -273,8 +273,7 @@ void rgb32to24(const uint8_t *src, uint8_t *dst, long src_size) ...@@ -273,8 +273,7 @@ void rgb32to24(const uint8_t *src, uint8_t *dst, long src_size)
{ {
long i; long i;
long num_pixels = src_size >> 2; long num_pixels = src_size >> 2;
for (i=0; i<num_pixels; i++) for (i=0; i<num_pixels; i++) {
{
#if HAVE_BIGENDIAN #if HAVE_BIGENDIAN
/* RGB32 (= A,B,G,R) -> BGR24 (= B,G,R) */ /* RGB32 (= A,B,G,R) -> BGR24 (= B,G,R) */
dst[3*i + 0] = src[4*i + 1]; dst[3*i + 0] = src[4*i + 1];
...@@ -291,8 +290,7 @@ void rgb32to24(const uint8_t *src, uint8_t *dst, long src_size) ...@@ -291,8 +290,7 @@ void rgb32to24(const uint8_t *src, uint8_t *dst, long src_size)
void rgb24to32(const uint8_t *src, uint8_t *dst, long src_size) void rgb24to32(const uint8_t *src, uint8_t *dst, long src_size)
{ {
long i; long i;
for (i=0; 3*i<src_size; i++) for (i=0; 3*i<src_size; i++) {
{
#if HAVE_BIGENDIAN #if HAVE_BIGENDIAN
/* RGB24 (= R,G,B) -> BGR32 (= A,R,G,B) */ /* RGB24 (= R,G,B) -> BGR32 (= A,R,G,B) */
dst[4*i + 0] = 255; dst[4*i + 0] = 255;
...@@ -314,8 +312,7 @@ void rgb16tobgr32(const uint8_t *src, uint8_t *dst, long src_size) ...@@ -314,8 +312,7 @@ void rgb16tobgr32(const uint8_t *src, uint8_t *dst, long src_size)
uint8_t *d = dst; uint8_t *d = dst;
const uint16_t *s = (const uint16_t *)src; const uint16_t *s = (const uint16_t *)src;
end = s + src_size/2; end = s + src_size/2;
while (s < end) while (s < end) {
{
register uint16_t bgr; register uint16_t bgr;
bgr = *s++; bgr = *s++;
#if HAVE_BIGENDIAN #if HAVE_BIGENDIAN
...@@ -338,8 +335,7 @@ void rgb16to24(const uint8_t *src, uint8_t *dst, long src_size) ...@@ -338,8 +335,7 @@ void rgb16to24(const uint8_t *src, uint8_t *dst, long src_size)
uint8_t *d = dst; uint8_t *d = dst;
const uint16_t *s = (const uint16_t *)src; const uint16_t *s = (const uint16_t *)src;
end = s + src_size/2; end = s + src_size/2;
while (s < end) while (s < end) {
{
register uint16_t bgr; register uint16_t bgr;
bgr = *s++; bgr = *s++;
*d++ = (bgr&0xF800)>>8; *d++ = (bgr&0xF800)>>8;
...@@ -353,8 +349,7 @@ void rgb16tobgr16(const uint8_t *src, uint8_t *dst, long src_size) ...@@ -353,8 +349,7 @@ void rgb16tobgr16(const uint8_t *src, uint8_t *dst, long src_size)
long i; long i;
long num_pixels = src_size >> 1; long num_pixels = src_size >> 1;
for (i=0; i<num_pixels; i++) for (i=0; i<num_pixels; i++) {
{
unsigned rgb = ((const uint16_t*)src)[i]; unsigned rgb = ((const uint16_t*)src)[i];
((uint16_t*)dst)[i] = (rgb>>11) | (rgb&0x7E0) | (rgb<<11); ((uint16_t*)dst)[i] = (rgb>>11) | (rgb&0x7E0) | (rgb<<11);
} }
...@@ -365,8 +360,7 @@ void rgb16tobgr15(const uint8_t *src, uint8_t *dst, long src_size) ...@@ -365,8 +360,7 @@ void rgb16tobgr15(const uint8_t *src, uint8_t *dst, long src_size)
long i; long i;
long num_pixels = src_size >> 1; long num_pixels = src_size >> 1;
for (i=0; i<num_pixels; i++) for (i=0; i<num_pixels; i++) {
{
unsigned rgb = ((const uint16_t*)src)[i]; unsigned rgb = ((const uint16_t*)src)[i];
((uint16_t*)dst)[i] = (rgb>>11) | ((rgb&0x7C0)>>1) | ((rgb&0x1F)<<10); ((uint16_t*)dst)[i] = (rgb>>11) | ((rgb&0x7C0)>>1) | ((rgb&0x1F)<<10);
} }
...@@ -378,8 +372,7 @@ void rgb15tobgr32(const uint8_t *src, uint8_t *dst, long src_size) ...@@ -378,8 +372,7 @@ void rgb15tobgr32(const uint8_t *src, uint8_t *dst, long src_size)
uint8_t *d = dst; uint8_t *d = dst;
const uint16_t *s = (const uint16_t *)src; const uint16_t *s = (const uint16_t *)src;
end = s + src_size/2; end = s + src_size/2;
while (s < end) while (s < end) {
{
register uint16_t bgr; register uint16_t bgr;
bgr = *s++; bgr = *s++;
#if HAVE_BIGENDIAN #if HAVE_BIGENDIAN
...@@ -402,8 +395,7 @@ void rgb15to24(const uint8_t *src, uint8_t *dst, long src_size) ...@@ -402,8 +395,7 @@ void rgb15to24(const uint8_t *src, uint8_t *dst, long src_size)
uint8_t *d = dst; uint8_t *d = dst;
const uint16_t *s = (const uint16_t *)src; const uint16_t *s = (const uint16_t *)src;
end = s + src_size/2; end = s + src_size/2;
while (s < end) while (s < end) {
{
register uint16_t bgr; register uint16_t bgr;
bgr = *s++; bgr = *s++;
*d++ = (bgr&0x7C00)>>7; *d++ = (bgr&0x7C00)>>7;
...@@ -417,8 +409,7 @@ void rgb15tobgr16(const uint8_t *src, uint8_t *dst, long src_size) ...@@ -417,8 +409,7 @@ void rgb15tobgr16(const uint8_t *src, uint8_t *dst, long src_size)
long i; long i;
long num_pixels = src_size >> 1; long num_pixels = src_size >> 1;
for (i=0; i<num_pixels; i++) for (i=0; i<num_pixels; i++) {
{
unsigned rgb = ((const uint16_t*)src)[i]; unsigned rgb = ((const uint16_t*)src)[i];
((uint16_t*)dst)[i] = ((rgb&0x7C00)>>10) | ((rgb&0x3E0)<<1) | (rgb<<11); ((uint16_t*)dst)[i] = ((rgb&0x7C00)>>10) | ((rgb&0x3E0)<<1) | (rgb<<11);
} }
...@@ -429,8 +420,7 @@ void rgb15tobgr15(const uint8_t *src, uint8_t *dst, long src_size) ...@@ -429,8 +420,7 @@ void rgb15tobgr15(const uint8_t *src, uint8_t *dst, long src_size)
long i; long i;
long num_pixels = src_size >> 1; long num_pixels = src_size >> 1;
for (i=0; i<num_pixels; i++) for (i=0; i<num_pixels; i++) {
{
unsigned br; unsigned br;
unsigned rgb = ((const uint16_t*)src)[i]; unsigned rgb = ((const uint16_t*)src)[i];
br = rgb&0x7c1F; br = rgb&0x7c1F;
...@@ -442,8 +432,7 @@ void bgr8torgb8(const uint8_t *src, uint8_t *dst, long src_size) ...@@ -442,8 +432,7 @@ void bgr8torgb8(const uint8_t *src, uint8_t *dst, long src_size)
{ {
long i; long i;
long num_pixels = src_size; long num_pixels = src_size;
for (i=0; i<num_pixels; i++) for (i=0; i<num_pixels; i++) {
{
unsigned b,g,r; unsigned b,g,r;
register uint8_t rgb; register uint8_t rgb;
rgb = src[i]; rgb = src[i];
......
...@@ -84,8 +84,7 @@ static inline void RENAME(rgb24tobgr32)(const uint8_t *src, uint8_t *dst, long s ...@@ -84,8 +84,7 @@ static inline void RENAME(rgb24tobgr32)(const uint8_t *src, uint8_t *dst, long s
__asm__ volatile(PREFETCH" %0"::"m"(*s):"memory"); __asm__ volatile(PREFETCH" %0"::"m"(*s):"memory");
mm_end = end - 23; mm_end = end - 23;
__asm__ volatile("movq %0, %%mm7"::"m"(mask32a):"memory"); __asm__ volatile("movq %0, %%mm7"::"m"(mask32a):"memory");
while (s < mm_end) while (s < mm_end) {
{
__asm__ volatile( __asm__ volatile(
PREFETCH" 32%1 \n\t" PREFETCH" 32%1 \n\t"
"movd %1, %%mm0 \n\t" "movd %1, %%mm0 \n\t"
...@@ -113,8 +112,7 @@ static inline void RENAME(rgb24tobgr32)(const uint8_t *src, uint8_t *dst, long s ...@@ -113,8 +112,7 @@ static inline void RENAME(rgb24tobgr32)(const uint8_t *src, uint8_t *dst, long s
__asm__ volatile(SFENCE:::"memory"); __asm__ volatile(SFENCE:::"memory");
__asm__ volatile(EMMS:::"memory"); __asm__ volatile(EMMS:::"memory");
#endif #endif
while (s < end) while (s < end) {
{
#if HAVE_BIGENDIAN #if HAVE_BIGENDIAN
/* RGB24 (= R,G,B) -> RGB32 (= A,B,G,R) */ /* RGB24 (= R,G,B) -> RGB32 (= A,B,G,R) */
*dest++ = 255; *dest++ = 255;
...@@ -143,8 +141,7 @@ static inline void RENAME(rgb32tobgr24)(const uint8_t *src, uint8_t *dst, long s ...@@ -143,8 +141,7 @@ static inline void RENAME(rgb32tobgr24)(const uint8_t *src, uint8_t *dst, long s
#if HAVE_MMX #if HAVE_MMX
__asm__ volatile(PREFETCH" %0"::"m"(*s):"memory"); __asm__ volatile(PREFETCH" %0"::"m"(*s):"memory");
mm_end = end - 31; mm_end = end - 31;
while (s < mm_end) while (s < mm_end) {
{
__asm__ volatile( __asm__ volatile(
PREFETCH" 32%1 \n\t" PREFETCH" 32%1 \n\t"
"movq %1, %%mm0 \n\t" "movq %1, %%mm0 \n\t"
...@@ -199,8 +196,7 @@ static inline void RENAME(rgb32tobgr24)(const uint8_t *src, uint8_t *dst, long s ...@@ -199,8 +196,7 @@ static inline void RENAME(rgb32tobgr24)(const uint8_t *src, uint8_t *dst, long s
__asm__ volatile(SFENCE:::"memory"); __asm__ volatile(SFENCE:::"memory");
__asm__ volatile(EMMS:::"memory"); __asm__ volatile(EMMS:::"memory");
#endif #endif
while (s < end) while (s < end) {
{
#if HAVE_BIGENDIAN #if HAVE_BIGENDIAN
/* RGB32 (= A,B,G,R) -> RGB24 (= R,G,B) */ /* RGB32 (= A,B,G,R) -> RGB24 (= R,G,B) */
s++; s++;
...@@ -234,8 +230,7 @@ static inline void RENAME(rgb15to16)(const uint8_t *src, uint8_t *dst, long src_ ...@@ -234,8 +230,7 @@ static inline void RENAME(rgb15to16)(const uint8_t *src, uint8_t *dst, long src_
__asm__ volatile(PREFETCH" %0"::"m"(*s)); __asm__ volatile(PREFETCH" %0"::"m"(*s));
__asm__ volatile("movq %0, %%mm4"::"m"(mask15s)); __asm__ volatile("movq %0, %%mm4"::"m"(mask15s));
mm_end = end - 15; mm_end = end - 15;
while (s<mm_end) while (s<mm_end) {
{
__asm__ volatile( __asm__ volatile(
PREFETCH" 32%1 \n\t" PREFETCH" 32%1 \n\t"
"movq %1, %%mm0 \n\t" "movq %1, %%mm0 \n\t"
...@@ -258,15 +253,13 @@ static inline void RENAME(rgb15to16)(const uint8_t *src, uint8_t *dst, long src_ ...@@ -258,15 +253,13 @@ static inline void RENAME(rgb15to16)(const uint8_t *src, uint8_t *dst, long src_
__asm__ volatile(EMMS:::"memory"); __asm__ volatile(EMMS:::"memory");
#endif #endif
mm_end = end - 3; mm_end = end - 3;
while (s < mm_end) while (s < mm_end) {
{
register unsigned x= *((const uint32_t *)s); register unsigned x= *((const uint32_t *)s);
*((uint32_t *)d) = (x&0x7FFF7FFF) + (x&0x7FE07FE0); *((uint32_t *)d) = (x&0x7FFF7FFF) + (x&0x7FE07FE0);
d+=4; d+=4;
s+=4; s+=4;
} }
if (s < end) if (s < end) {
{
register unsigned short x= *((const uint16_t *)s); register unsigned short x= *((const uint16_t *)s);
*((uint16_t *)d) = (x&0x7FFF) + (x&0x7FE0); *((uint16_t *)d) = (x&0x7FFF) + (x&0x7FE0);
} }
...@@ -284,8 +277,7 @@ static inline void RENAME(rgb16to15)(const uint8_t *src, uint8_t *dst, long src_ ...@@ -284,8 +277,7 @@ static inline void RENAME(rgb16to15)(const uint8_t *src, uint8_t *dst, long src_
__asm__ volatile("movq %0, %%mm7"::"m"(mask15rg)); __asm__ volatile("movq %0, %%mm7"::"m"(mask15rg));
__asm__ volatile("movq %0, %%mm6"::"m"(mask15b)); __asm__ volatile("movq %0, %%mm6"::"m"(mask15b));
mm_end = end - 15; mm_end = end - 15;
while (s<mm_end) while (s<mm_end) {
{
__asm__ volatile( __asm__ volatile(
PREFETCH" 32%1 \n\t" PREFETCH" 32%1 \n\t"
"movq %1, %%mm0 \n\t" "movq %1, %%mm0 \n\t"
...@@ -312,15 +304,13 @@ static inline void RENAME(rgb16to15)(const uint8_t *src, uint8_t *dst, long src_ ...@@ -312,15 +304,13 @@ static inline void RENAME(rgb16to15)(const uint8_t *src, uint8_t *dst, long src_
__asm__ volatile(EMMS:::"memory"); __asm__ volatile(EMMS:::"memory");
#endif #endif
mm_end = end - 3; mm_end = end - 3;
while (s < mm_end) while (s < mm_end) {
{
register uint32_t x= *((const uint32_t*)s); register uint32_t x= *((const uint32_t*)s);
*((uint32_t *)d) = ((x>>1)&0x7FE07FE0) | (x&0x001F001F); *((uint32_t *)d) = ((x>>1)&0x7FE07FE0) | (x&0x001F001F);
s+=4; s+=4;
d+=4; d+=4;
} }
if (s < end) if (s < end) {
{
register uint16_t x= *((const uint16_t*)s); register uint16_t x= *((const uint16_t*)s);
*((uint16_t *)d) = ((x>>1)&0x7FE0) | (x&0x001F); *((uint16_t *)d) = ((x>>1)&0x7FE0) | (x&0x001F);
} }
...@@ -378,8 +368,7 @@ static inline void RENAME(rgb32to16)(const uint8_t *src, uint8_t *dst, long src_ ...@@ -378,8 +368,7 @@ static inline void RENAME(rgb32to16)(const uint8_t *src, uint8_t *dst, long src_
"movq %0, %%mm7 \n\t" "movq %0, %%mm7 \n\t"
"movq %1, %%mm6 \n\t" "movq %1, %%mm6 \n\t"
::"m"(red_16mask),"m"(green_16mask)); ::"m"(red_16mask),"m"(green_16mask));
while (s < mm_end) while (s < mm_end) {
{
__asm__ volatile( __asm__ volatile(
PREFETCH" 32%1 \n\t" PREFETCH" 32%1 \n\t"
"movd %1, %%mm0 \n\t" "movd %1, %%mm0 \n\t"
...@@ -417,8 +406,7 @@ static inline void RENAME(rgb32to16)(const uint8_t *src, uint8_t *dst, long src_ ...@@ -417,8 +406,7 @@ static inline void RENAME(rgb32to16)(const uint8_t *src, uint8_t *dst, long src_
__asm__ volatile(SFENCE:::"memory"); __asm__ volatile(SFENCE:::"memory");
__asm__ volatile(EMMS:::"memory"); __asm__ volatile(EMMS:::"memory");
#endif #endif
while (s < end) while (s < end) {
{
register int rgb = *(const uint32_t*)s; s += 4; register int rgb = *(const uint32_t*)s; s += 4;
*d++ = ((rgb&0xFF)>>3) + ((rgb&0xFC00)>>5) + ((rgb&0xF80000)>>8); *d++ = ((rgb&0xFF)>>3) + ((rgb&0xFC00)>>5) + ((rgb&0xF80000)>>8);
} }
...@@ -440,8 +428,7 @@ static inline void RENAME(rgb32tobgr16)(const uint8_t *src, uint8_t *dst, long s ...@@ -440,8 +428,7 @@ static inline void RENAME(rgb32tobgr16)(const uint8_t *src, uint8_t *dst, long s
"movq %1, %%mm6 \n\t" "movq %1, %%mm6 \n\t"
::"m"(red_16mask),"m"(green_16mask)); ::"m"(red_16mask),"m"(green_16mask));
mm_end = end - 15; mm_end = end - 15;
while (s < mm_end) while (s < mm_end) {
{
__asm__ volatile( __asm__ volatile(
PREFETCH" 32%1 \n\t" PREFETCH" 32%1 \n\t"
"movd %1, %%mm0 \n\t" "movd %1, %%mm0 \n\t"
...@@ -478,8 +465,7 @@ static inline void RENAME(rgb32tobgr16)(const uint8_t *src, uint8_t *dst, long s ...@@ -478,8 +465,7 @@ static inline void RENAME(rgb32tobgr16)(const uint8_t *src, uint8_t *dst, long s
__asm__ volatile(SFENCE:::"memory"); __asm__ volatile(SFENCE:::"memory");
__asm__ volatile(EMMS:::"memory"); __asm__ volatile(EMMS:::"memory");
#endif #endif
while (s < end) while (s < end) {
{
register int rgb = *(const uint32_t*)s; s += 4; register int rgb = *(const uint32_t*)s; s += 4;
*d++ = ((rgb&0xF8)<<8) + ((rgb&0xFC00)>>5) + ((rgb&0xF80000)>>19); *d++ = ((rgb&0xF8)<<8) + ((rgb&0xFC00)>>5) + ((rgb&0xF80000)>>19);
} }
...@@ -537,8 +523,7 @@ static inline void RENAME(rgb32to15)(const uint8_t *src, uint8_t *dst, long src_ ...@@ -537,8 +523,7 @@ static inline void RENAME(rgb32to15)(const uint8_t *src, uint8_t *dst, long src_
"movq %0, %%mm7 \n\t" "movq %0, %%mm7 \n\t"
"movq %1, %%mm6 \n\t" "movq %1, %%mm6 \n\t"
::"m"(red_15mask),"m"(green_15mask)); ::"m"(red_15mask),"m"(green_15mask));
while (s < mm_end) while (s < mm_end) {
{
__asm__ volatile( __asm__ volatile(
PREFETCH" 32%1 \n\t" PREFETCH" 32%1 \n\t"
"movd %1, %%mm0 \n\t" "movd %1, %%mm0 \n\t"
...@@ -576,8 +561,7 @@ static inline void RENAME(rgb32to15)(const uint8_t *src, uint8_t *dst, long src_ ...@@ -576,8 +561,7 @@ static inline void RENAME(rgb32to15)(const uint8_t *src, uint8_t *dst, long src_
__asm__ volatile(SFENCE:::"memory"); __asm__ volatile(SFENCE:::"memory");
__asm__ volatile(EMMS:::"memory"); __asm__ volatile(EMMS:::"memory");
#endif #endif
while (s < end) while (s < end) {
{
register int rgb = *(const uint32_t*)s; s += 4; register int rgb = *(const uint32_t*)s; s += 4;
*d++ = ((rgb&0xFF)>>3) + ((rgb&0xF800)>>6) + ((rgb&0xF80000)>>9); *d++ = ((rgb&0xFF)>>3) + ((rgb&0xF800)>>6) + ((rgb&0xF80000)>>9);
} }
...@@ -599,8 +583,7 @@ static inline void RENAME(rgb32tobgr15)(const uint8_t *src, uint8_t *dst, long s ...@@ -599,8 +583,7 @@ static inline void RENAME(rgb32tobgr15)(const uint8_t *src, uint8_t *dst, long s
"movq %1, %%mm6 \n\t" "movq %1, %%mm6 \n\t"
::"m"(red_15mask),"m"(green_15mask)); ::"m"(red_15mask),"m"(green_15mask));
mm_end = end - 15; mm_end = end - 15;
while (s < mm_end) while (s < mm_end) {
{
__asm__ volatile( __asm__ volatile(
PREFETCH" 32%1 \n\t" PREFETCH" 32%1 \n\t"
"movd %1, %%mm0 \n\t" "movd %1, %%mm0 \n\t"
...@@ -637,8 +620,7 @@ static inline void RENAME(rgb32tobgr15)(const uint8_t *src, uint8_t *dst, long s ...@@ -637,8 +620,7 @@ static inline void RENAME(rgb32tobgr15)(const uint8_t *src, uint8_t *dst, long s
__asm__ volatile(SFENCE:::"memory"); __asm__ volatile(SFENCE:::"memory");
__asm__ volatile(EMMS:::"memory"); __asm__ volatile(EMMS:::"memory");
#endif #endif
while (s < end) while (s < end) {
{
register int rgb = *(const uint32_t*)s; s += 4; register int rgb = *(const uint32_t*)s; s += 4;
*d++ = ((rgb&0xF8)<<7) + ((rgb&0xF800)>>6) + ((rgb&0xF80000)>>19); *d++ = ((rgb&0xF8)<<7) + ((rgb&0xF800)>>6) + ((rgb&0xF80000)>>19);
} }
...@@ -660,8 +642,7 @@ static inline void RENAME(rgb24tobgr16)(const uint8_t *src, uint8_t *dst, long s ...@@ -660,8 +642,7 @@ static inline void RENAME(rgb24tobgr16)(const uint8_t *src, uint8_t *dst, long s
"movq %1, %%mm6 \n\t" "movq %1, %%mm6 \n\t"
::"m"(red_16mask),"m"(green_16mask)); ::"m"(red_16mask),"m"(green_16mask));
mm_end = end - 11; mm_end = end - 11;
while (s < mm_end) while (s < mm_end) {
{
__asm__ volatile( __asm__ volatile(
PREFETCH" 32%1 \n\t" PREFETCH" 32%1 \n\t"
"movd %1, %%mm0 \n\t" "movd %1, %%mm0 \n\t"
...@@ -698,8 +679,7 @@ static inline void RENAME(rgb24tobgr16)(const uint8_t *src, uint8_t *dst, long s ...@@ -698,8 +679,7 @@ static inline void RENAME(rgb24tobgr16)(const uint8_t *src, uint8_t *dst, long s
__asm__ volatile(SFENCE:::"memory"); __asm__ volatile(SFENCE:::"memory");
__asm__ volatile(EMMS:::"memory"); __asm__ volatile(EMMS:::"memory");
#endif #endif
while (s < end) while (s < end) {
{
const int b = *s++; const int b = *s++;
const int g = *s++; const int g = *s++;
const int r = *s++; const int r = *s++;
...@@ -723,8 +703,7 @@ static inline void RENAME(rgb24to16)(const uint8_t *src, uint8_t *dst, long src_ ...@@ -723,8 +703,7 @@ static inline void RENAME(rgb24to16)(const uint8_t *src, uint8_t *dst, long src_
"movq %1, %%mm6 \n\t" "movq %1, %%mm6 \n\t"
::"m"(red_16mask),"m"(green_16mask)); ::"m"(red_16mask),"m"(green_16mask));
mm_end = end - 15; mm_end = end - 15;
while (s < mm_end) while (s < mm_end) {
{
__asm__ volatile( __asm__ volatile(
PREFETCH" 32%1 \n\t" PREFETCH" 32%1 \n\t"
"movd %1, %%mm0 \n\t" "movd %1, %%mm0 \n\t"
...@@ -761,8 +740,7 @@ static inline void RENAME(rgb24to16)(const uint8_t *src, uint8_t *dst, long src_ ...@@ -761,8 +740,7 @@ static inline void RENAME(rgb24to16)(const uint8_t *src, uint8_t *dst, long src_
__asm__ volatile(SFENCE:::"memory"); __asm__ volatile(SFENCE:::"memory");
__asm__ volatile(EMMS:::"memory"); __asm__ volatile(EMMS:::"memory");
#endif #endif
while (s < end) while (s < end) {
{
const int r = *s++; const int r = *s++;
const int g = *s++; const int g = *s++;
const int b = *s++; const int b = *s++;
...@@ -786,8 +764,7 @@ static inline void RENAME(rgb24tobgr15)(const uint8_t *src, uint8_t *dst, long s ...@@ -786,8 +764,7 @@ static inline void RENAME(rgb24tobgr15)(const uint8_t *src, uint8_t *dst, long s
"movq %1, %%mm6 \n\t" "movq %1, %%mm6 \n\t"
::"m"(red_15mask),"m"(green_15mask)); ::"m"(red_15mask),"m"(green_15mask));
mm_end = end - 11; mm_end = end - 11;
while (s < mm_end) while (s < mm_end) {
{
__asm__ volatile( __asm__ volatile(
PREFETCH" 32%1 \n\t" PREFETCH" 32%1 \n\t"
"movd %1, %%mm0 \n\t" "movd %1, %%mm0 \n\t"
...@@ -824,8 +801,7 @@ static inline void RENAME(rgb24tobgr15)(const uint8_t *src, uint8_t *dst, long s ...@@ -824,8 +801,7 @@ static inline void RENAME(rgb24tobgr15)(const uint8_t *src, uint8_t *dst, long s
__asm__ volatile(SFENCE:::"memory"); __asm__ volatile(SFENCE:::"memory");
__asm__ volatile(EMMS:::"memory"); __asm__ volatile(EMMS:::"memory");
#endif #endif
while (s < end) while (s < end) {
{
const int b = *s++; const int b = *s++;
const int g = *s++; const int g = *s++;
const int r = *s++; const int r = *s++;
...@@ -849,8 +825,7 @@ static inline void RENAME(rgb24to15)(const uint8_t *src, uint8_t *dst, long src_ ...@@ -849,8 +825,7 @@ static inline void RENAME(rgb24to15)(const uint8_t *src, uint8_t *dst, long src_
"movq %1, %%mm6 \n\t" "movq %1, %%mm6 \n\t"
::"m"(red_15mask),"m"(green_15mask)); ::"m"(red_15mask),"m"(green_15mask));
mm_end = end - 15; mm_end = end - 15;
while (s < mm_end) while (s < mm_end) {
{
__asm__ volatile( __asm__ volatile(
PREFETCH" 32%1 \n\t" PREFETCH" 32%1 \n\t"
"movd %1, %%mm0 \n\t" "movd %1, %%mm0 \n\t"
...@@ -887,8 +862,7 @@ static inline void RENAME(rgb24to15)(const uint8_t *src, uint8_t *dst, long src_ ...@@ -887,8 +862,7 @@ static inline void RENAME(rgb24to15)(const uint8_t *src, uint8_t *dst, long src_
__asm__ volatile(SFENCE:::"memory"); __asm__ volatile(SFENCE:::"memory");
__asm__ volatile(EMMS:::"memory"); __asm__ volatile(EMMS:::"memory");
#endif #endif
while (s < end) while (s < end) {
{
const int r = *s++; const int r = *s++;
const int g = *s++; const int g = *s++;
const int b = *s++; const int b = *s++;
...@@ -929,8 +903,7 @@ static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, long s ...@@ -929,8 +903,7 @@ static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, long s
#if HAVE_MMX #if HAVE_MMX
__asm__ volatile(PREFETCH" %0"::"m"(*s):"memory"); __asm__ volatile(PREFETCH" %0"::"m"(*s):"memory");
mm_end = end - 7; mm_end = end - 7;
while (s < mm_end) while (s < mm_end) {
{
__asm__ volatile( __asm__ volatile(
PREFETCH" 32%1 \n\t" PREFETCH" 32%1 \n\t"
"movq %1, %%mm0 \n\t" "movq %1, %%mm0 \n\t"
...@@ -1049,8 +1022,7 @@ static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, long s ...@@ -1049,8 +1022,7 @@ static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, long s
__asm__ volatile(SFENCE:::"memory"); __asm__ volatile(SFENCE:::"memory");
__asm__ volatile(EMMS:::"memory"); __asm__ volatile(EMMS:::"memory");
#endif #endif
while (s < end) while (s < end) {
{
register uint16_t bgr; register uint16_t bgr;
bgr = *s++; bgr = *s++;
*d++ = (bgr&0x1F)<<3; *d++ = (bgr&0x1F)<<3;
...@@ -1071,8 +1043,7 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, long s ...@@ -1071,8 +1043,7 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, long s
#if HAVE_MMX #if HAVE_MMX
__asm__ volatile(PREFETCH" %0"::"m"(*s):"memory"); __asm__ volatile(PREFETCH" %0"::"m"(*s):"memory");
mm_end = end - 7; mm_end = end - 7;
while (s < mm_end) while (s < mm_end) {
{
__asm__ volatile( __asm__ volatile(
PREFETCH" 32%1 \n\t" PREFETCH" 32%1 \n\t"
"movq %1, %%mm0 \n\t" "movq %1, %%mm0 \n\t"
...@@ -1190,8 +1161,7 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, long s ...@@ -1190,8 +1161,7 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, long s
__asm__ volatile(SFENCE:::"memory"); __asm__ volatile(SFENCE:::"memory");
__asm__ volatile(EMMS:::"memory"); __asm__ volatile(EMMS:::"memory");
#endif #endif
while (s < end) while (s < end) {
{
register uint16_t bgr; register uint16_t bgr;
bgr = *s++; bgr = *s++;
*d++ = (bgr&0x1F)<<3; *d++ = (bgr&0x1F)<<3;
...@@ -1233,8 +1203,7 @@ static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, long src_ ...@@ -1233,8 +1203,7 @@ static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, long src_
__asm__ volatile("pxor %%mm7,%%mm7 \n\t":::"memory"); __asm__ volatile("pxor %%mm7,%%mm7 \n\t":::"memory");
__asm__ volatile("pcmpeqd %%mm6,%%mm6 \n\t":::"memory"); __asm__ volatile("pcmpeqd %%mm6,%%mm6 \n\t":::"memory");
mm_end = end - 3; mm_end = end - 3;
while (s < mm_end) while (s < mm_end) {
{
__asm__ volatile( __asm__ volatile(
PREFETCH" 32%1 \n\t" PREFETCH" 32%1 \n\t"
"movq %1, %%mm0 \n\t" "movq %1, %%mm0 \n\t"
...@@ -1256,8 +1225,7 @@ static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, long src_ ...@@ -1256,8 +1225,7 @@ static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, long src_
__asm__ volatile(SFENCE:::"memory"); __asm__ volatile(SFENCE:::"memory");
__asm__ volatile(EMMS:::"memory"); __asm__ volatile(EMMS:::"memory");
#endif #endif
while (s < end) while (s < end) {
{
register uint16_t bgr; register uint16_t bgr;
bgr = *s++; bgr = *s++;
#if HAVE_BIGENDIAN #if HAVE_BIGENDIAN
...@@ -1288,8 +1256,7 @@ static inline void RENAME(rgb16to32)(const uint8_t *src, uint8_t *dst, long src_ ...@@ -1288,8 +1256,7 @@ static inline void RENAME(rgb16to32)(const uint8_t *src, uint8_t *dst, long src_
__asm__ volatile("pxor %%mm7,%%mm7 \n\t":::"memory"); __asm__ volatile("pxor %%mm7,%%mm7 \n\t":::"memory");
__asm__ volatile("pcmpeqd %%mm6,%%mm6 \n\t":::"memory"); __asm__ volatile("pcmpeqd %%mm6,%%mm6 \n\t":::"memory");
mm_end = end - 3; mm_end = end - 3;
while (s < mm_end) while (s < mm_end) {
{
__asm__ volatile( __asm__ volatile(
PREFETCH" 32%1 \n\t" PREFETCH" 32%1 \n\t"
"movq %1, %%mm0 \n\t" "movq %1, %%mm0 \n\t"
...@@ -1311,8 +1278,7 @@ static inline void RENAME(rgb16to32)(const uint8_t *src, uint8_t *dst, long src_ ...@@ -1311,8 +1278,7 @@ static inline void RENAME(rgb16to32)(const uint8_t *src, uint8_t *dst, long src_
__asm__ volatile(SFENCE:::"memory"); __asm__ volatile(SFENCE:::"memory");
__asm__ volatile(EMMS:::"memory"); __asm__ volatile(EMMS:::"memory");
#endif #endif
while (s < end) while (s < end) {
{
register uint16_t bgr; register uint16_t bgr;
bgr = *s++; bgr = *s++;
#if HAVE_BIGENDIAN #if HAVE_BIGENDIAN
...@@ -1453,8 +1419,7 @@ static inline void RENAME(rgb24tobgr24)(const uint8_t *src, uint8_t *dst, long s ...@@ -1453,8 +1419,7 @@ static inline void RENAME(rgb24tobgr24)(const uint8_t *src, uint8_t *dst, long s
src-= src_size; src-= src_size;
dst-= src_size; dst-= src_size;
#endif #endif
for (i=0; i<src_size; i+=3) for (i=0; i<src_size; i+=3) {
{
register uint8_t x; register uint8_t x;
x = src[i + 2]; x = src[i + 2];
dst[i + 1] = src[i + 1]; dst[i + 1] = src[i + 1];
...@@ -1469,8 +1434,7 @@ static inline void RENAME(yuvPlanartoyuy2)(const uint8_t *ysrc, const uint8_t *u ...@@ -1469,8 +1434,7 @@ static inline void RENAME(yuvPlanartoyuy2)(const uint8_t *ysrc, const uint8_t *u
{ {
long y; long y;
const x86_reg chromWidth= width>>1; const x86_reg chromWidth= width>>1;
for (y=0; y<height; y++) for (y=0; y<height; y++) {
{
#if HAVE_MMX #if HAVE_MMX
//FIXME handle 2 lines at once (fewer prefetches, reuse some chroma, but very likely memory-limited anyway) //FIXME handle 2 lines at once (fewer prefetches, reuse some chroma, but very likely memory-limited anyway)
__asm__ volatile( __asm__ volatile(
...@@ -1530,7 +1494,7 @@ static inline void RENAME(yuvPlanartoyuy2)(const uint8_t *ysrc, const uint8_t *u ...@@ -1530,7 +1494,7 @@ static inline void RENAME(yuvPlanartoyuy2)(const uint8_t *ysrc, const uint8_t *u
const uint32_t *yc = (uint32_t *) ysrc; const uint32_t *yc = (uint32_t *) ysrc;
const uint32_t *yc2 = (uint32_t *) (ysrc + lumStride); const uint32_t *yc2 = (uint32_t *) (ysrc + lumStride);
const uint16_t *uc = (uint16_t*) usrc, *vc = (uint16_t*) vsrc; const uint16_t *uc = (uint16_t*) usrc, *vc = (uint16_t*) vsrc;
for (i = 0; i < chromWidth; i += 8){ for (i = 0; i < chromWidth; i += 8) {
uint64_t y1, y2, yuv1, yuv2; uint64_t y1, y2, yuv1, yuv2;
uint64_t u, v; uint64_t u, v;
/* Prefetch */ /* Prefetch */
...@@ -1559,7 +1523,7 @@ static inline void RENAME(yuvPlanartoyuy2)(const uint8_t *ysrc, const uint8_t *u ...@@ -1559,7 +1523,7 @@ static inline void RENAME(yuvPlanartoyuy2)(const uint8_t *ysrc, const uint8_t *u
int i; int i;
uint64_t *ldst = (uint64_t *) dst; uint64_t *ldst = (uint64_t *) dst;
const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc; const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
for (i = 0; i < chromWidth; i += 2){ for (i = 0; i < chromWidth; i += 2) {
uint64_t k, l; uint64_t k, l;
k = yc[0] + (uc[0] << 8) + k = yc[0] + (uc[0] << 8) +
(yc[1] << 16) + (vc[0] << 24); (yc[1] << 16) + (vc[0] << 24);
...@@ -1574,7 +1538,7 @@ static inline void RENAME(yuvPlanartoyuy2)(const uint8_t *ysrc, const uint8_t *u ...@@ -1574,7 +1538,7 @@ static inline void RENAME(yuvPlanartoyuy2)(const uint8_t *ysrc, const uint8_t *u
#else #else
int i, *idst = (int32_t *) dst; int i, *idst = (int32_t *) dst;
const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc; const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
for (i = 0; i < chromWidth; i++){ for (i = 0; i < chromWidth; i++) {
#if HAVE_BIGENDIAN #if HAVE_BIGENDIAN
*idst++ = (yc[0] << 24)+ (uc[0] << 16) + *idst++ = (yc[0] << 24)+ (uc[0] << 16) +
(yc[1] << 8) + (vc[0] << 0); (yc[1] << 8) + (vc[0] << 0);
...@@ -1588,8 +1552,7 @@ static inline void RENAME(yuvPlanartoyuy2)(const uint8_t *ysrc, const uint8_t *u ...@@ -1588,8 +1552,7 @@ static inline void RENAME(yuvPlanartoyuy2)(const uint8_t *ysrc, const uint8_t *u
} }
#endif #endif
#endif #endif
if ((y&(vertLumPerChroma-1)) == vertLumPerChroma-1) if ((y&(vertLumPerChroma-1)) == vertLumPerChroma-1) {
{
usrc += chromStride; usrc += chromStride;
vsrc += chromStride; vsrc += chromStride;
} }
...@@ -1621,8 +1584,7 @@ static inline void RENAME(yuvPlanartouyvy)(const uint8_t *ysrc, const uint8_t *u ...@@ -1621,8 +1584,7 @@ static inline void RENAME(yuvPlanartouyvy)(const uint8_t *ysrc, const uint8_t *u
{ {
long y; long y;
const x86_reg chromWidth= width>>1; const x86_reg chromWidth= width>>1;
for (y=0; y<height; y++) for (y=0; y<height; y++) {
{
#if HAVE_MMX #if HAVE_MMX
//FIXME handle 2 lines at once (fewer prefetches, reuse some chroma, but very likely memory-limited anyway) //FIXME handle 2 lines at once (fewer prefetches, reuse some chroma, but very likely memory-limited anyway)
__asm__ volatile( __asm__ volatile(
...@@ -1665,7 +1627,7 @@ static inline void RENAME(yuvPlanartouyvy)(const uint8_t *ysrc, const uint8_t *u ...@@ -1665,7 +1627,7 @@ static inline void RENAME(yuvPlanartouyvy)(const uint8_t *ysrc, const uint8_t *u
int i; int i;
uint64_t *ldst = (uint64_t *) dst; uint64_t *ldst = (uint64_t *) dst;
const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc; const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
for (i = 0; i < chromWidth; i += 2){ for (i = 0; i < chromWidth; i += 2) {
uint64_t k, l; uint64_t k, l;
k = uc[0] + (yc[0] << 8) + k = uc[0] + (yc[0] << 8) +
(vc[0] << 16) + (yc[1] << 24); (vc[0] << 16) + (yc[1] << 24);
...@@ -1680,7 +1642,7 @@ static inline void RENAME(yuvPlanartouyvy)(const uint8_t *ysrc, const uint8_t *u ...@@ -1680,7 +1642,7 @@ static inline void RENAME(yuvPlanartouyvy)(const uint8_t *ysrc, const uint8_t *u
#else #else
int i, *idst = (int32_t *) dst; int i, *idst = (int32_t *) dst;
const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc; const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
for (i = 0; i < chromWidth; i++){ for (i = 0; i < chromWidth; i++) {
#if HAVE_BIGENDIAN #if HAVE_BIGENDIAN
*idst++ = (uc[0] << 24)+ (yc[0] << 16) + *idst++ = (uc[0] << 24)+ (yc[0] << 16) +
(vc[0] << 8) + (yc[1] << 0); (vc[0] << 8) + (yc[1] << 0);
...@@ -1694,8 +1656,7 @@ static inline void RENAME(yuvPlanartouyvy)(const uint8_t *ysrc, const uint8_t *u ...@@ -1694,8 +1656,7 @@ static inline void RENAME(yuvPlanartouyvy)(const uint8_t *ysrc, const uint8_t *u
} }
#endif #endif
#endif #endif
if ((y&(vertLumPerChroma-1)) == vertLumPerChroma-1) if ((y&(vertLumPerChroma-1)) == vertLumPerChroma-1) {
{
usrc += chromStride; usrc += chromStride;
vsrc += chromStride; vsrc += chromStride;
} }
...@@ -1751,8 +1712,7 @@ static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t ...@@ -1751,8 +1712,7 @@ static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t
{ {
long y; long y;
const x86_reg chromWidth= width>>1; const x86_reg chromWidth= width>>1;
for (y=0; y<height; y+=2) for (y=0; y<height; y+=2) {
{
#if HAVE_MMX #if HAVE_MMX
__asm__ volatile( __asm__ volatile(
"xor %%"REG_a", %%"REG_a" \n\t" "xor %%"REG_a", %%"REG_a" \n\t"
...@@ -1837,8 +1797,7 @@ static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t ...@@ -1837,8 +1797,7 @@ static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t
); );
#else #else
long i; long i;
for (i=0; i<chromWidth; i++) for (i=0; i<chromWidth; i++) {
{
ydst[2*i+0] = src[4*i+0]; ydst[2*i+0] = src[4*i+0];
udst[i] = src[4*i+1]; udst[i] = src[4*i+1];
ydst[2*i+1] = src[4*i+2]; ydst[2*i+1] = src[4*i+2];
...@@ -1847,8 +1806,7 @@ static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t ...@@ -1847,8 +1806,7 @@ static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t
ydst += lumStride; ydst += lumStride;
src += srcStride; src += srcStride;
for (i=0; i<chromWidth; i++) for (i=0; i<chromWidth; i++) {
{
ydst[2*i+0] = src[4*i+0]; ydst[2*i+0] = src[4*i+0];
ydst[2*i+1] = src[4*i+2]; ydst[2*i+1] = src[4*i+2];
} }
...@@ -1882,7 +1840,7 @@ static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, long srcWi ...@@ -1882,7 +1840,7 @@ static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, long srcWi
dst[0]= src[0]; dst[0]= src[0];
// first line // first line
for (x=0; x<srcWidth-1; x++){ for (x=0; x<srcWidth-1; x++) {
dst[2*x+1]= (3*src[x] + src[x+1])>>2; dst[2*x+1]= (3*src[x] + src[x+1])>>2;
dst[2*x+2]= ( src[x] + 3*src[x+1])>>2; dst[2*x+2]= ( src[x] + 3*src[x+1])>>2;
} }
...@@ -1890,7 +1848,7 @@ static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, long srcWi ...@@ -1890,7 +1848,7 @@ static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, long srcWi
dst+= dstStride; dst+= dstStride;
for (y=1; y<srcHeight; y++){ for (y=1; y<srcHeight; y++) {
#if HAVE_MMX2 || HAVE_AMD3DNOW #if HAVE_MMX2 || HAVE_AMD3DNOW
const x86_reg mmxSize= srcWidth&~15; const x86_reg mmxSize= srcWidth&~15;
__asm__ volatile( __asm__ volatile(
...@@ -1941,7 +1899,7 @@ static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, long srcWi ...@@ -1941,7 +1899,7 @@ static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, long srcWi
dst[0 ]= (3*src[0] + src[srcStride])>>2; dst[0 ]= (3*src[0] + src[srcStride])>>2;
dst[dstStride]= ( src[0] + 3*src[srcStride])>>2; dst[dstStride]= ( src[0] + 3*src[srcStride])>>2;
for (x=mmxSize-1; x<srcWidth-1; x++){ for (x=mmxSize-1; x<srcWidth-1; x++) {
dst[2*x +1]= (3*src[x+0] + src[x+srcStride+1])>>2; dst[2*x +1]= (3*src[x+0] + src[x+srcStride+1])>>2;
dst[2*x+dstStride+2]= ( src[x+0] + 3*src[x+srcStride+1])>>2; dst[2*x+dstStride+2]= ( src[x+0] + 3*src[x+srcStride+1])>>2;
dst[2*x+dstStride+1]= ( src[x+1] + 3*src[x+srcStride ])>>2; dst[2*x+dstStride+1]= ( src[x+1] + 3*src[x+srcStride ])>>2;
...@@ -1958,13 +1916,13 @@ static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, long srcWi ...@@ -1958,13 +1916,13 @@ static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, long srcWi
#if 1 #if 1
dst[0]= src[0]; dst[0]= src[0];
for (x=0; x<srcWidth-1; x++){ for (x=0; x<srcWidth-1; x++) {
dst[2*x+1]= (3*src[x] + src[x+1])>>2; dst[2*x+1]= (3*src[x] + src[x+1])>>2;
dst[2*x+2]= ( src[x] + 3*src[x+1])>>2; dst[2*x+2]= ( src[x] + 3*src[x+1])>>2;
} }
dst[2*srcWidth-1]= src[srcWidth-1]; dst[2*srcWidth-1]= src[srcWidth-1];
#else #else
for (x=0; x<srcWidth; x++){ for (x=0; x<srcWidth; x++) {
dst[2*x+0]= dst[2*x+0]=
dst[2*x+1]= src[x]; dst[2*x+1]= src[x];
} }
...@@ -1989,8 +1947,7 @@ static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t ...@@ -1989,8 +1947,7 @@ static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t
{ {
long y; long y;
const x86_reg chromWidth= width>>1; const x86_reg chromWidth= width>>1;
for (y=0; y<height; y+=2) for (y=0; y<height; y+=2) {
{
#if HAVE_MMX #if HAVE_MMX
__asm__ volatile( __asm__ volatile(
"xor %%"REG_a", %%"REG_a" \n\t" "xor %%"REG_a", %%"REG_a" \n\t"
...@@ -2075,8 +2032,7 @@ static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t ...@@ -2075,8 +2032,7 @@ static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t
); );
#else #else
long i; long i;
for (i=0; i<chromWidth; i++) for (i=0; i<chromWidth; i++) {
{
udst[i] = src[4*i+0]; udst[i] = src[4*i+0];
ydst[2*i+0] = src[4*i+1]; ydst[2*i+0] = src[4*i+1];
vdst[i] = src[4*i+2]; vdst[i] = src[4*i+2];
...@@ -2085,8 +2041,7 @@ static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t ...@@ -2085,8 +2041,7 @@ static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t
ydst += lumStride; ydst += lumStride;
src += srcStride; src += srcStride;
for (i=0; i<chromWidth; i++) for (i=0; i<chromWidth; i++) {
{
ydst[2*i+0] = src[4*i+1]; ydst[2*i+0] = src[4*i+1];
ydst[2*i+1] = src[4*i+3]; ydst[2*i+1] = src[4*i+3];
} }
...@@ -2117,11 +2072,9 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_ ...@@ -2117,11 +2072,9 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_
long y; long y;
const x86_reg chromWidth= width>>1; const x86_reg chromWidth= width>>1;
#if HAVE_MMX #if HAVE_MMX
for (y=0; y<height-2; y+=2) for (y=0; y<height-2; y+=2) {
{
long i; long i;
for (i=0; i<2; i++) for (i=0; i<2; i++) {
{
__asm__ volatile( __asm__ volatile(
"mov %2, %%"REG_a" \n\t" "mov %2, %%"REG_a" \n\t"
"movq "MANGLE(ff_bgr2YCoeff)", %%mm6 \n\t" "movq "MANGLE(ff_bgr2YCoeff)", %%mm6 \n\t"
...@@ -2355,11 +2308,9 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_ ...@@ -2355,11 +2308,9 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_
#else #else
y=0; y=0;
#endif #endif
for (; y<height; y+=2) for (; y<height; y+=2) {
{
long i; long i;
for (i=0; i<chromWidth; i++) for (i=0; i<chromWidth; i++) {
{
unsigned int b = src[6*i+0]; unsigned int b = src[6*i+0];
unsigned int g = src[6*i+1]; unsigned int g = src[6*i+1];
unsigned int r = src[6*i+2]; unsigned int r = src[6*i+2];
...@@ -2382,8 +2333,7 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_ ...@@ -2382,8 +2333,7 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_
ydst += lumStride; ydst += lumStride;
src += srcStride; src += srcStride;
for (i=0; i<chromWidth; i++) for (i=0; i<chromWidth; i++) {
{
unsigned int b = src[6*i+0]; unsigned int b = src[6*i+0];
unsigned int g = src[6*i+1]; unsigned int g = src[6*i+1];
unsigned int r = src[6*i+2]; unsigned int r = src[6*i+2];
...@@ -2408,11 +2358,11 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_ ...@@ -2408,11 +2358,11 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_
static void RENAME(interleaveBytes)(uint8_t *src1, uint8_t *src2, uint8_t *dest, static void RENAME(interleaveBytes)(uint8_t *src1, uint8_t *src2, uint8_t *dest,
long width, long height, long src1Stride, long width, long height, long src1Stride,
long src2Stride, long dstStride){ long src2Stride, long dstStride)
{
long h; long h;
for (h=0; h < height; h++) for (h=0; h < height; h++) {
{
long w; long w;
#if HAVE_MMX #if HAVE_MMX
...@@ -2462,14 +2412,12 @@ static void RENAME(interleaveBytes)(uint8_t *src1, uint8_t *src2, uint8_t *dest, ...@@ -2462,14 +2412,12 @@ static void RENAME(interleaveBytes)(uint8_t *src1, uint8_t *src2, uint8_t *dest,
: "memory", "%"REG_a : "memory", "%"REG_a
); );
#endif #endif
for (w= (width&(~15)); w < width; w++) for (w= (width&(~15)); w < width; w++) {
{
dest[2*w+0] = src1[w]; dest[2*w+0] = src1[w];
dest[2*w+1] = src2[w]; dest[2*w+1] = src2[w];
} }
#else #else
for (w=0; w < width; w++) for (w=0; w < width; w++) {
{
dest[2*w+0] = src1[w]; dest[2*w+0] = src1[w];
dest[2*w+1] = src2[w]; dest[2*w+1] = src2[w];
} }
...@@ -2502,13 +2450,12 @@ static inline void RENAME(vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2, ...@@ -2502,13 +2450,12 @@ static inline void RENAME(vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2,
PREFETCH" %1 \n\t" PREFETCH" %1 \n\t"
::"m"(*(src1+srcStride1)),"m"(*(src2+srcStride2)):"memory"); ::"m"(*(src1+srcStride1)),"m"(*(src2+srcStride2)):"memory");
#endif #endif
for (y=0;y<h;y++){ for (y=0;y<h;y++) {
const uint8_t* s1=src1+srcStride1*(y>>1); const uint8_t* s1=src1+srcStride1*(y>>1);
uint8_t* d=dst1+dstStride1*y; uint8_t* d=dst1+dstStride1*y;
x=0; x=0;
#if HAVE_MMX #if HAVE_MMX
for (;x<w-31;x+=32) for (;x<w-31;x+=32) {
{
__asm__ volatile( __asm__ volatile(
PREFETCH" 32%1 \n\t" PREFETCH" 32%1 \n\t"
"movq %1, %%mm0 \n\t" "movq %1, %%mm0 \n\t"
...@@ -2542,13 +2489,12 @@ static inline void RENAME(vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2, ...@@ -2542,13 +2489,12 @@ static inline void RENAME(vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2,
#endif #endif
for (;x<w;x++) d[2*x]=d[2*x+1]=s1[x]; for (;x<w;x++) d[2*x]=d[2*x+1]=s1[x];
} }
for (y=0;y<h;y++){ for (y=0;y<h;y++) {
const uint8_t* s2=src2+srcStride2*(y>>1); const uint8_t* s2=src2+srcStride2*(y>>1);
uint8_t* d=dst2+dstStride2*y; uint8_t* d=dst2+dstStride2*y;
x=0; x=0;
#if HAVE_MMX #if HAVE_MMX
for (;x<w-31;x+=32) for (;x<w-31;x+=32) {
{
__asm__ volatile( __asm__ volatile(
PREFETCH" 32%1 \n\t" PREFETCH" 32%1 \n\t"
"movq %1, %%mm0 \n\t" "movq %1, %%mm0 \n\t"
...@@ -2600,15 +2546,14 @@ static inline void RENAME(yvu9_to_yuy2)(const uint8_t *src1, const uint8_t *src2 ...@@ -2600,15 +2546,14 @@ static inline void RENAME(yvu9_to_yuy2)(const uint8_t *src1, const uint8_t *src2
x86_reg x; x86_reg x;
long y,w,h; long y,w,h;
w=width/2; h=height; w=width/2; h=height;
for (y=0;y<h;y++){ for (y=0;y<h;y++) {
const uint8_t* yp=src1+srcStride1*y; const uint8_t* yp=src1+srcStride1*y;
const uint8_t* up=src2+srcStride2*(y>>2); const uint8_t* up=src2+srcStride2*(y>>2);
const uint8_t* vp=src3+srcStride3*(y>>2); const uint8_t* vp=src3+srcStride3*(y>>2);
uint8_t* d=dst+dstStride*y; uint8_t* d=dst+dstStride*y;
x=0; x=0;
#if HAVE_MMX #if HAVE_MMX
for (;x<w-7;x+=8) for (;x<w-7;x+=8) {
{
__asm__ volatile( __asm__ volatile(
PREFETCH" 32(%1, %0) \n\t" PREFETCH" 32(%1, %0) \n\t"
PREFETCH" 32(%2, %0) \n\t" PREFETCH" 32(%2, %0) \n\t"
...@@ -2661,8 +2606,7 @@ static inline void RENAME(yvu9_to_yuy2)(const uint8_t *src1, const uint8_t *src2 ...@@ -2661,8 +2606,7 @@ static inline void RENAME(yvu9_to_yuy2)(const uint8_t *src1, const uint8_t *src2
:"memory"); :"memory");
} }
#endif #endif
for (; x<w; x++) for (; x<w; x++) {
{
const long x2 = x<<2; const long x2 = x<<2;
d[8*x+0] = yp[x2]; d[8*x+0] = yp[x2];
d[8*x+1] = up[x]; d[8*x+1] = up[x];
...@@ -2690,7 +2634,7 @@ static void RENAME(extract_even)(const uint8_t *src, uint8_t *dst, x86_reg count ...@@ -2690,7 +2634,7 @@ static void RENAME(extract_even)(const uint8_t *src, uint8_t *dst, x86_reg count
count= - count; count= - count;
#if HAVE_MMX #if HAVE_MMX
if(count <= -16){ if(count <= -16) {
count += 15; count += 15;
__asm__ volatile( __asm__ volatile(
"pcmpeqw %%mm7, %%mm7 \n\t" "pcmpeqw %%mm7, %%mm7 \n\t"
...@@ -2716,7 +2660,7 @@ static void RENAME(extract_even)(const uint8_t *src, uint8_t *dst, x86_reg count ...@@ -2716,7 +2660,7 @@ static void RENAME(extract_even)(const uint8_t *src, uint8_t *dst, x86_reg count
count -= 15; count -= 15;
} }
#endif #endif
while(count<0){ while(count<0) {
dst[count]= src[2*count]; dst[count]= src[2*count];
count++; count++;
} }
...@@ -2729,7 +2673,7 @@ static void RENAME(extract_even2)(const uint8_t *src, uint8_t *dst0, uint8_t *ds ...@@ -2729,7 +2673,7 @@ static void RENAME(extract_even2)(const uint8_t *src, uint8_t *dst0, uint8_t *ds
src += 4*count; src += 4*count;
count= - count; count= - count;
#if HAVE_MMX #if HAVE_MMX
if(count <= -8){ if(count <= -8) {
count += 7; count += 7;
__asm__ volatile( __asm__ volatile(
"pcmpeqw %%mm7, %%mm7 \n\t" "pcmpeqw %%mm7, %%mm7 \n\t"
...@@ -2763,7 +2707,7 @@ static void RENAME(extract_even2)(const uint8_t *src, uint8_t *dst0, uint8_t *ds ...@@ -2763,7 +2707,7 @@ static void RENAME(extract_even2)(const uint8_t *src, uint8_t *dst0, uint8_t *ds
count -= 7; count -= 7;
} }
#endif #endif
while(count<0){ while(count<0) {
dst0[count]= src[4*count+0]; dst0[count]= src[4*count+0];
dst1[count]= src[4*count+2]; dst1[count]= src[4*count+2];
count++; count++;
...@@ -2778,7 +2722,7 @@ static void RENAME(extract_even2avg)(const uint8_t *src0, const uint8_t *src1, u ...@@ -2778,7 +2722,7 @@ static void RENAME(extract_even2avg)(const uint8_t *src0, const uint8_t *src1, u
src1 += 4*count; src1 += 4*count;
count= - count; count= - count;
#ifdef PAVGB #ifdef PAVGB
if(count <= -8){ if(count <= -8) {
count += 7; count += 7;
__asm__ volatile( __asm__ volatile(
"pcmpeqw %%mm7, %%mm7 \n\t" "pcmpeqw %%mm7, %%mm7 \n\t"
...@@ -2816,7 +2760,7 @@ static void RENAME(extract_even2avg)(const uint8_t *src0, const uint8_t *src1, u ...@@ -2816,7 +2760,7 @@ static void RENAME(extract_even2avg)(const uint8_t *src0, const uint8_t *src1, u
count -= 7; count -= 7;
} }
#endif #endif
while(count<0){ while(count<0) {
dst0[count]= (src0[4*count+0]+src1[4*count+0])>>1; dst0[count]= (src0[4*count+0]+src1[4*count+0])>>1;
dst1[count]= (src0[4*count+2]+src1[4*count+2])>>1; dst1[count]= (src0[4*count+2]+src1[4*count+2])>>1;
count++; count++;
...@@ -2830,7 +2774,7 @@ static void RENAME(extract_odd2)(const uint8_t *src, uint8_t *dst0, uint8_t *dst ...@@ -2830,7 +2774,7 @@ static void RENAME(extract_odd2)(const uint8_t *src, uint8_t *dst0, uint8_t *dst
src += 4*count; src += 4*count;
count= - count; count= - count;
#if HAVE_MMX #if HAVE_MMX
if(count <= -8){ if(count <= -8) {
count += 7; count += 7;
__asm__ volatile( __asm__ volatile(
"pcmpeqw %%mm7, %%mm7 \n\t" "pcmpeqw %%mm7, %%mm7 \n\t"
...@@ -2865,7 +2809,7 @@ static void RENAME(extract_odd2)(const uint8_t *src, uint8_t *dst0, uint8_t *dst ...@@ -2865,7 +2809,7 @@ static void RENAME(extract_odd2)(const uint8_t *src, uint8_t *dst0, uint8_t *dst
} }
#endif #endif
src++; src++;
while(count<0){ while(count<0) {
dst0[count]= src[4*count+0]; dst0[count]= src[4*count+0];
dst1[count]= src[4*count+2]; dst1[count]= src[4*count+2];
count++; count++;
...@@ -2880,7 +2824,7 @@ static void RENAME(extract_odd2avg)(const uint8_t *src0, const uint8_t *src1, ui ...@@ -2880,7 +2824,7 @@ static void RENAME(extract_odd2avg)(const uint8_t *src0, const uint8_t *src1, ui
src1 += 4*count; src1 += 4*count;
count= - count; count= - count;
#ifdef PAVGB #ifdef PAVGB
if(count <= -8){ if(count <= -8) {
count += 7; count += 7;
__asm__ volatile( __asm__ volatile(
"pcmpeqw %%mm7, %%mm7 \n\t" "pcmpeqw %%mm7, %%mm7 \n\t"
...@@ -2920,7 +2864,7 @@ static void RENAME(extract_odd2avg)(const uint8_t *src0, const uint8_t *src1, ui ...@@ -2920,7 +2864,7 @@ static void RENAME(extract_odd2avg)(const uint8_t *src0, const uint8_t *src1, ui
#endif #endif
src0++; src0++;
src1++; src1++;
while(count<0){ while(count<0) {
dst0[count]= (src0[4*count+0]+src1[4*count+0])>>1; dst0[count]= (src0[4*count+0]+src1[4*count+0])>>1;
dst1[count]= (src0[4*count+2]+src1[4*count+2])>>1; dst1[count]= (src0[4*count+2]+src1[4*count+2])>>1;
count++; count++;
...@@ -2934,9 +2878,9 @@ static void RENAME(yuyvtoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, co ...@@ -2934,9 +2878,9 @@ static void RENAME(yuyvtoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, co
long y; long y;
const long chromWidth= -((-width)>>1); const long chromWidth= -((-width)>>1);
for (y=0; y<height; y++){ for (y=0; y<height; y++) {
RENAME(extract_even)(src, ydst, width); RENAME(extract_even)(src, ydst, width);
if(y&1){ if(y&1) {
RENAME(extract_odd2avg)(src-srcStride, src, udst, vdst, chromWidth); RENAME(extract_odd2avg)(src-srcStride, src, udst, vdst, chromWidth);
udst+= chromStride; udst+= chromStride;
vdst+= chromStride; vdst+= chromStride;
...@@ -2961,7 +2905,7 @@ static void RENAME(yuyvtoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, co ...@@ -2961,7 +2905,7 @@ static void RENAME(yuyvtoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, co
long y; long y;
const long chromWidth= -((-width)>>1); const long chromWidth= -((-width)>>1);
for (y=0; y<height; y++){ for (y=0; y<height; y++) {
RENAME(extract_even)(src, ydst, width); RENAME(extract_even)(src, ydst, width);
RENAME(extract_odd2)(src, udst, vdst, chromWidth); RENAME(extract_odd2)(src, udst, vdst, chromWidth);
...@@ -2986,9 +2930,9 @@ static void RENAME(uyvytoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, co ...@@ -2986,9 +2930,9 @@ static void RENAME(uyvytoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, co
long y; long y;
const long chromWidth= -((-width)>>1); const long chromWidth= -((-width)>>1);
for (y=0; y<height; y++){ for (y=0; y<height; y++) {
RENAME(extract_even)(src+1, ydst, width); RENAME(extract_even)(src+1, ydst, width);
if(y&1){ if(y&1) {
RENAME(extract_even2avg)(src-srcStride, src, udst, vdst, chromWidth); RENAME(extract_even2avg)(src-srcStride, src, udst, vdst, chromWidth);
udst+= chromStride; udst+= chromStride;
vdst+= chromStride; vdst+= chromStride;
...@@ -3013,7 +2957,7 @@ static void RENAME(uyvytoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, co ...@@ -3013,7 +2957,7 @@ static void RENAME(uyvytoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, co
long y; long y;
const long chromWidth= -((-width)>>1); const long chromWidth= -((-width)>>1);
for (y=0; y<height; y++){ for (y=0; y<height; y++) {
RENAME(extract_even)(src+1, ydst, width); RENAME(extract_even)(src+1, ydst, width);
RENAME(extract_even2)(src, udst, vdst, chromWidth); RENAME(extract_even2)(src, udst, vdst, chromWidth);
...@@ -3031,7 +2975,8 @@ static void RENAME(uyvytoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, co ...@@ -3031,7 +2975,8 @@ static void RENAME(uyvytoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, co
#endif #endif
} }
static inline void RENAME(rgb2rgb_init)(void){ static inline void RENAME(rgb2rgb_init)(void)
{
rgb15to16 = RENAME(rgb15to16); rgb15to16 = RENAME(rgb15to16);
rgb15tobgr24 = RENAME(rgb15tobgr24); rgb15tobgr24 = RENAME(rgb15tobgr24);
rgb15to32 = RENAME(rgb15to32); rgb15to32 = RENAME(rgb15to32);
......
...@@ -82,7 +82,8 @@ ...@@ -82,7 +82,8 @@
// FIXME: must be changed to set alpha to 255 instead of 0 // FIXME: must be changed to set alpha to 255 instead of 0
static int vis_420P_ARGB32(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, static int vis_420P_ARGB32(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
int srcSliceH, uint8_t* dst[], int dstStride[]){ int srcSliceH, uint8_t* dst[], int dstStride[])
{
int y, out1, out2, out3, out4, out5, out6; int y, out1, out2, out3, out4, out5, out6;
for(y=0;y < srcSliceH;++y) { for(y=0;y < srcSliceH;++y) {
...@@ -134,7 +135,8 @@ static int vis_420P_ARGB32(SwsContext *c, uint8_t* src[], int srcStride[], int s ...@@ -134,7 +135,8 @@ static int vis_420P_ARGB32(SwsContext *c, uint8_t* src[], int srcStride[], int s
// FIXME: must be changed to set alpha to 255 instead of 0 // FIXME: must be changed to set alpha to 255 instead of 0
static int vis_422P_ARGB32(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, static int vis_422P_ARGB32(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
int srcSliceH, uint8_t* dst[], int dstStride[]){ int srcSliceH, uint8_t* dst[], int dstStride[])
{
int y, out1, out2, out3, out4, out5, out6; int y, out1, out2, out3, out4, out5, out6;
for(y=0;y < srcSliceH;++y) { for(y=0;y < srcSliceH;++y) {
...@@ -184,7 +186,8 @@ static int vis_422P_ARGB32(SwsContext *c, uint8_t* src[], int srcStride[], int s ...@@ -184,7 +186,8 @@ static int vis_422P_ARGB32(SwsContext *c, uint8_t* src[], int srcStride[], int s
return srcSliceH; return srcSliceH;
} }
SwsFunc ff_yuv2rgb_init_vis(SwsContext *c){ SwsFunc ff_yuv2rgb_init_vis(SwsContext *c)
{
c->sparc_coeffs[5]=c->yCoeff; c->sparc_coeffs[5]=c->yCoeff;
c->sparc_coeffs[6]=c->vgCoeff; c->sparc_coeffs[6]=c->vgCoeff;
c->sparc_coeffs[7]=c->vrCoeff; c->sparc_coeffs[7]=c->vrCoeff;
......
...@@ -50,14 +50,15 @@ const char *sws_format_name(enum PixelFormat format); ...@@ -50,14 +50,15 @@ const char *sws_format_name(enum PixelFormat format);
|| (x)==PIX_FMT_YUVA420P \ || (x)==PIX_FMT_YUVA420P \
) )
static uint64_t getSSD(uint8_t *src1, uint8_t *src2, int stride1, int stride2, int w, int h){ static uint64_t getSSD(uint8_t *src1, uint8_t *src2, int stride1, int stride2, int w, int h)
{
int x,y; int x,y;
uint64_t ssd=0; uint64_t ssd=0;
//printf("%d %d\n", w, h); //printf("%d %d\n", w, h);
for (y=0; y<h; y++){ for (y=0; y<h; y++) {
for (x=0; x<w; x++){ for (x=0; x<w; x++) {
int d= src1[x + y*stride1] - src2[x + y*stride2]; int d= src1[x + y*stride1] - src2[x + y*stride2];
ssd+= d*d; ssd+= d*d;
//printf("%d", abs(src1[x + y*stride1] - src2[x + y*stride2])/26 ); //printf("%d", abs(src1[x + y*stride1] - src2[x + y*stride2])/26 );
...@@ -70,7 +71,8 @@ static uint64_t getSSD(uint8_t *src1, uint8_t *src2, int stride1, int stride2, i ...@@ -70,7 +71,8 @@ static uint64_t getSSD(uint8_t *src1, uint8_t *src2, int stride1, int stride2, i
// test by ref -> src -> dst -> out & compare out against ref // test by ref -> src -> dst -> out & compare out against ref
// ref & out are YV12 // ref & out are YV12
static int doTest(uint8_t *ref[4], int refStride[4], int w, int h, int srcFormat, int dstFormat, static int doTest(uint8_t *ref[4], int refStride[4], int w, int h, int srcFormat, int dstFormat,
int srcW, int srcH, int dstW, int dstH, int flags){ int srcW, int srcH, int dstW, int dstH, int flags)
{
uint8_t *src[4] = {0}; uint8_t *src[4] = {0};
uint8_t *dst[4] = {0}; uint8_t *dst[4] = {0};
uint8_t *out[4] = {0}; uint8_t *out[4] = {0};
...@@ -82,7 +84,7 @@ static int doTest(uint8_t *ref[4], int refStride[4], int w, int h, int srcFormat ...@@ -82,7 +84,7 @@ static int doTest(uint8_t *ref[4], int refStride[4], int w, int h, int srcFormat
int res; int res;
res = 0; res = 0;
for (i=0; i<4; i++){ for (i=0; i<4; i++) {
// avoid stride % bpp != 0 // avoid stride % bpp != 0
if (srcFormat==PIX_FMT_RGB24 || srcFormat==PIX_FMT_BGR24) if (srcFormat==PIX_FMT_RGB24 || srcFormat==PIX_FMT_BGR24)
srcStride[i]= srcW*3; srcStride[i]= srcW*3;
...@@ -169,7 +171,7 @@ static int doTest(uint8_t *ref[4], int refStride[4], int w, int h, int srcFormat ...@@ -169,7 +171,7 @@ static int doTest(uint8_t *ref[4], int refStride[4], int w, int h, int srcFormat
sws_freeContext(dstContext); sws_freeContext(dstContext);
sws_freeContext(outContext); sws_freeContext(outContext);
for (i=0; i<4; i++){ for (i=0; i<4; i++) {
free(src[i]); free(src[i]);
free(dst[i]); free(dst[i]);
free(out[i]); free(out[i]);
...@@ -178,7 +180,8 @@ static int doTest(uint8_t *ref[4], int refStride[4], int w, int h, int srcFormat ...@@ -178,7 +180,8 @@ static int doTest(uint8_t *ref[4], int refStride[4], int w, int h, int srcFormat
return res; return res;
} }
static void selfTest(uint8_t *src[4], int stride[4], int w, int h){ static void selfTest(uint8_t *src[4], int stride[4], int w, int h)
{
enum PixelFormat srcFormat, dstFormat; enum PixelFormat srcFormat, dstFormat;
int srcW, srcH, dstW, dstH; int srcW, srcH, dstW, dstH;
int flags; int flags;
...@@ -206,7 +209,8 @@ static void selfTest(uint8_t *src[4], int stride[4], int w, int h){ ...@@ -206,7 +209,8 @@ static void selfTest(uint8_t *src[4], int stride[4], int w, int h){
#define W 96 #define W 96
#define H 96 #define H 96
int main(int argc, char **argv){ int main(int argc, char **argv)
{
uint8_t *rgb_data = malloc (W*H*4); uint8_t *rgb_data = malloc (W*H*4);
uint8_t *rgb_src[3]= {rgb_data, NULL, NULL}; uint8_t *rgb_src[3]= {rgb_data, NULL, NULL};
int rgb_stride[3]={4*W, 0, 0}; int rgb_stride[3]={4*W, 0, 0};
...@@ -221,8 +225,8 @@ int main(int argc, char **argv){ ...@@ -221,8 +225,8 @@ int main(int argc, char **argv){
av_lfg_init(&rand, 1); av_lfg_init(&rand, 1);
for (y=0; y<H; y++){ for (y=0; y<H; y++) {
for (x=0; x<W*4; x++){ for (x=0; x<W*4; x++) {
rgb_data[ x + y*4*W]= av_lfg_get(&rand); rgb_data[ x + y*4*W]= av_lfg_get(&rand);
} }
} }
......
...@@ -561,8 +561,7 @@ static inline void yuv2yuvXinC(const int16_t *lumFilter, const int16_t **lumSrc, ...@@ -561,8 +561,7 @@ static inline void yuv2yuvXinC(const int16_t *lumFilter, const int16_t **lumSrc,
{ {
//FIXME Optimize (just quickly written not optimized..) //FIXME Optimize (just quickly written not optimized..)
int i; int i;
for (i=0; i<dstW; i++) for (i=0; i<dstW; i++) {
{
int val=1<<18; int val=1<<18;
int j; int j;
for (j=0; j<lumFilterSize; j++) for (j=0; j<lumFilterSize; j++)
...@@ -572,13 +571,11 @@ static inline void yuv2yuvXinC(const int16_t *lumFilter, const int16_t **lumSrc, ...@@ -572,13 +571,11 @@ static inline void yuv2yuvXinC(const int16_t *lumFilter, const int16_t **lumSrc,
} }
if (uDest) if (uDest)
for (i=0; i<chrDstW; i++) for (i=0; i<chrDstW; i++) {
{
int u=1<<18; int u=1<<18;
int v=1<<18; int v=1<<18;
int j; int j;
for (j=0; j<chrFilterSize; j++) for (j=0; j<chrFilterSize; j++) {
{
u += chrSrc[j][i] * chrFilter[j]; u += chrSrc[j][i] * chrFilter[j];
v += chrSrc[j][i + VOFW] * chrFilter[j]; v += chrSrc[j][i + VOFW] * chrFilter[j];
} }
...@@ -588,7 +585,7 @@ static inline void yuv2yuvXinC(const int16_t *lumFilter, const int16_t **lumSrc, ...@@ -588,7 +585,7 @@ static inline void yuv2yuvXinC(const int16_t *lumFilter, const int16_t **lumSrc,
} }
if (CONFIG_SWSCALE_ALPHA && aDest) if (CONFIG_SWSCALE_ALPHA && aDest)
for (i=0; i<dstW; i++){ for (i=0; i<dstW; i++) {
int val=1<<18; int val=1<<18;
int j; int j;
for (j=0; j<lumFilterSize; j++) for (j=0; j<lumFilterSize; j++)
...@@ -605,8 +602,7 @@ static inline void yuv2nv12XinC(const int16_t *lumFilter, const int16_t **lumSrc ...@@ -605,8 +602,7 @@ static inline void yuv2nv12XinC(const int16_t *lumFilter, const int16_t **lumSrc
{ {
//FIXME Optimize (just quickly written not optimized..) //FIXME Optimize (just quickly written not optimized..)
int i; int i;
for (i=0; i<dstW; i++) for (i=0; i<dstW; i++) {
{
int val=1<<18; int val=1<<18;
int j; int j;
for (j=0; j<lumFilterSize; j++) for (j=0; j<lumFilterSize; j++)
...@@ -619,13 +615,11 @@ static inline void yuv2nv12XinC(const int16_t *lumFilter, const int16_t **lumSrc ...@@ -619,13 +615,11 @@ static inline void yuv2nv12XinC(const int16_t *lumFilter, const int16_t **lumSrc
return; return;
if (dstFormat == PIX_FMT_NV12) if (dstFormat == PIX_FMT_NV12)
for (i=0; i<chrDstW; i++) for (i=0; i<chrDstW; i++) {
{
int u=1<<18; int u=1<<18;
int v=1<<18; int v=1<<18;
int j; int j;
for (j=0; j<chrFilterSize; j++) for (j=0; j<chrFilterSize; j++) {
{
u += chrSrc[j][i] * chrFilter[j]; u += chrSrc[j][i] * chrFilter[j];
v += chrSrc[j][i + VOFW] * chrFilter[j]; v += chrSrc[j][i + VOFW] * chrFilter[j];
} }
...@@ -634,13 +628,11 @@ static inline void yuv2nv12XinC(const int16_t *lumFilter, const int16_t **lumSrc ...@@ -634,13 +628,11 @@ static inline void yuv2nv12XinC(const int16_t *lumFilter, const int16_t **lumSrc
uDest[2*i+1]= av_clip_uint8(v>>19); uDest[2*i+1]= av_clip_uint8(v>>19);
} }
else else
for (i=0; i<chrDstW; i++) for (i=0; i<chrDstW; i++) {
{
int u=1<<18; int u=1<<18;
int v=1<<18; int v=1<<18;
int j; int j;
for (j=0; j<chrFilterSize; j++) for (j=0; j<chrFilterSize; j++) {
{
u += chrSrc[j][i] * chrFilter[j]; u += chrSrc[j][i] * chrFilter[j];
v += chrSrc[j][i + VOFW] * chrFilter[j]; v += chrSrc[j][i + VOFW] * chrFilter[j];
} }
...@@ -651,7 +643,7 @@ static inline void yuv2nv12XinC(const int16_t *lumFilter, const int16_t **lumSrc ...@@ -651,7 +643,7 @@ static inline void yuv2nv12XinC(const int16_t *lumFilter, const int16_t **lumSrc
} }
#define YSCALE_YUV_2_PACKEDX_NOCLIP_C(type,alpha) \ #define YSCALE_YUV_2_PACKEDX_NOCLIP_C(type,alpha) \
for (i=0; i<(dstW>>1); i++){\ for (i=0; i<(dstW>>1); i++) {\
int j;\ int j;\
int Y1 = 1<<18;\ int Y1 = 1<<18;\
int Y2 = 1<<18;\ int Y2 = 1<<18;\
...@@ -661,13 +653,11 @@ static inline void yuv2nv12XinC(const int16_t *lumFilter, const int16_t **lumSrc ...@@ -661,13 +653,11 @@ static inline void yuv2nv12XinC(const int16_t *lumFilter, const int16_t **lumSrc
type av_unused *r, *b, *g;\ type av_unused *r, *b, *g;\
const int i2= 2*i;\ const int i2= 2*i;\
\ \
for (j=0; j<lumFilterSize; j++)\ for (j=0; j<lumFilterSize; j++) {\
{\
Y1 += lumSrc[j][i2] * lumFilter[j];\ Y1 += lumSrc[j][i2] * lumFilter[j];\
Y2 += lumSrc[j][i2+1] * lumFilter[j];\ Y2 += lumSrc[j][i2+1] * lumFilter[j];\
}\ }\
for (j=0; j<chrFilterSize; j++)\ for (j=0; j<chrFilterSize; j++) {\
{\
U += chrSrc[j][i] * chrFilter[j];\ U += chrSrc[j][i] * chrFilter[j];\
V += chrSrc[j][i+VOFW] * chrFilter[j];\ V += chrSrc[j][i+VOFW] * chrFilter[j];\
}\ }\
...@@ -675,10 +665,10 @@ static inline void yuv2nv12XinC(const int16_t *lumFilter, const int16_t **lumSrc ...@@ -675,10 +665,10 @@ static inline void yuv2nv12XinC(const int16_t *lumFilter, const int16_t **lumSrc
Y2>>=19;\ Y2>>=19;\
U >>=19;\ U >>=19;\
V >>=19;\ V >>=19;\
if (alpha){\ if (alpha) {\
A1 = 1<<18;\ A1 = 1<<18;\
A2 = 1<<18;\ A2 = 1<<18;\
for (j=0; j<lumFilterSize; j++){\ for (j=0; j<lumFilterSize; j++) {\
A1 += alpSrc[j][i2 ] * lumFilter[j];\ A1 += alpSrc[j][i2 ] * lumFilter[j];\
A2 += alpSrc[j][i2+1] * lumFilter[j];\ A2 += alpSrc[j][i2+1] * lumFilter[j];\
}\ }\
...@@ -688,8 +678,7 @@ static inline void yuv2nv12XinC(const int16_t *lumFilter, const int16_t **lumSrc ...@@ -688,8 +678,7 @@ static inline void yuv2nv12XinC(const int16_t *lumFilter, const int16_t **lumSrc
#define YSCALE_YUV_2_PACKEDX_C(type,alpha) \ #define YSCALE_YUV_2_PACKEDX_C(type,alpha) \
YSCALE_YUV_2_PACKEDX_NOCLIP_C(type,alpha)\ YSCALE_YUV_2_PACKEDX_NOCLIP_C(type,alpha)\
if ((Y1|Y2|U|V)&256)\ if ((Y1|Y2|U|V)&256) {\
{\
if (Y1>255) Y1=255; \ if (Y1>255) Y1=255; \
else if (Y1<0)Y1=0; \ else if (Y1<0)Y1=0; \
if (Y2>255) Y2=255; \ if (Y2>255) Y2=255; \
...@@ -699,13 +688,13 @@ static inline void yuv2nv12XinC(const int16_t *lumFilter, const int16_t **lumSrc ...@@ -699,13 +688,13 @@ static inline void yuv2nv12XinC(const int16_t *lumFilter, const int16_t **lumSrc
if (V>255) V=255; \ if (V>255) V=255; \
else if (V<0) V=0; \ else if (V<0) V=0; \
}\ }\
if (alpha && ((A1|A2)&256)){\ if (alpha && ((A1|A2)&256)) {\
A1=av_clip_uint8(A1);\ A1=av_clip_uint8(A1);\
A2=av_clip_uint8(A2);\ A2=av_clip_uint8(A2);\
} }
#define YSCALE_YUV_2_PACKEDX_FULL_C(rnd,alpha) \ #define YSCALE_YUV_2_PACKEDX_FULL_C(rnd,alpha) \
for (i=0; i<dstW; i++){\ for (i=0; i<dstW; i++) {\
int j;\ int j;\
int Y = 0;\ int Y = 0;\
int U = -128<<19;\ int U = -128<<19;\
...@@ -713,17 +702,17 @@ static inline void yuv2nv12XinC(const int16_t *lumFilter, const int16_t **lumSrc ...@@ -713,17 +702,17 @@ static inline void yuv2nv12XinC(const int16_t *lumFilter, const int16_t **lumSrc
int av_unused A;\ int av_unused A;\
int R,G,B;\ int R,G,B;\
\ \
for (j=0; j<lumFilterSize; j++){\ for (j=0; j<lumFilterSize; j++) {\
Y += lumSrc[j][i ] * lumFilter[j];\ Y += lumSrc[j][i ] * lumFilter[j];\
}\ }\
for (j=0; j<chrFilterSize; j++){\ for (j=0; j<chrFilterSize; j++) {\
U += chrSrc[j][i ] * chrFilter[j];\ U += chrSrc[j][i ] * chrFilter[j];\
V += chrSrc[j][i+VOFW] * chrFilter[j];\ V += chrSrc[j][i+VOFW] * chrFilter[j];\
}\ }\
Y >>=10;\ Y >>=10;\
U >>=10;\ U >>=10;\
V >>=10;\ V >>=10;\
if (alpha){\ if (alpha) {\
A = rnd;\ A = rnd;\
for (j=0; j<lumFilterSize; j++)\ for (j=0; j<lumFilterSize; j++)\
A += alpSrc[j][i ] * lumFilter[j];\ A += alpSrc[j][i ] * lumFilter[j];\
...@@ -740,7 +729,7 @@ static inline void yuv2nv12XinC(const int16_t *lumFilter, const int16_t **lumSrc ...@@ -740,7 +729,7 @@ static inline void yuv2nv12XinC(const int16_t *lumFilter, const int16_t **lumSrc
R= Y + V*c->yuv2rgb_v2r_coeff;\ R= Y + V*c->yuv2rgb_v2r_coeff;\
G= Y + V*c->yuv2rgb_v2g_coeff + U*c->yuv2rgb_u2g_coeff;\ G= Y + V*c->yuv2rgb_v2g_coeff + U*c->yuv2rgb_u2g_coeff;\
B= Y + U*c->yuv2rgb_u2b_coeff;\ B= Y + U*c->yuv2rgb_u2b_coeff;\
if ((R|G|B)&(0xC0000000)){\ if ((R|G|B)&(0xC0000000)) {\
if (R>=(256<<22)) R=(256<<22)-1; \ if (R>=(256<<22)) R=(256<<22)-1; \
else if (R<0)R=0; \ else if (R<0)R=0; \
if (G>=(256<<22)) G=(256<<22)-1; \ if (G>=(256<<22)) G=(256<<22)-1; \
...@@ -751,7 +740,7 @@ static inline void yuv2nv12XinC(const int16_t *lumFilter, const int16_t **lumSrc ...@@ -751,7 +740,7 @@ static inline void yuv2nv12XinC(const int16_t *lumFilter, const int16_t **lumSrc
#define YSCALE_YUV_2_GRAY16_C \ #define YSCALE_YUV_2_GRAY16_C \
for (i=0; i<(dstW>>1); i++){\ for (i=0; i<(dstW>>1); i++) {\
int j;\ int j;\
int Y1 = 1<<18;\ int Y1 = 1<<18;\
int Y2 = 1<<18;\ int Y2 = 1<<18;\
...@@ -760,15 +749,13 @@ static inline void yuv2nv12XinC(const int16_t *lumFilter, const int16_t **lumSrc ...@@ -760,15 +749,13 @@ static inline void yuv2nv12XinC(const int16_t *lumFilter, const int16_t **lumSrc
\ \
const int i2= 2*i;\ const int i2= 2*i;\
\ \
for (j=0; j<lumFilterSize; j++)\ for (j=0; j<lumFilterSize; j++) {\
{\
Y1 += lumSrc[j][i2] * lumFilter[j];\ Y1 += lumSrc[j][i2] * lumFilter[j];\
Y2 += lumSrc[j][i2+1] * lumFilter[j];\ Y2 += lumSrc[j][i2+1] * lumFilter[j];\
}\ }\
Y1>>=11;\ Y1>>=11;\
Y2>>=11;\ Y2>>=11;\
if ((Y1|Y2|U|V)&65536)\ if ((Y1|Y2|U|V)&65536) {\
{\
if (Y1>65535) Y1=65535; \ if (Y1>65535) Y1=65535; \
else if (Y1<0)Y1=0; \ else if (Y1<0)Y1=0; \
if (Y2>65535) Y2=65535; \ if (Y2>65535) Y2=65535; \
...@@ -782,7 +769,7 @@ static inline void yuv2nv12XinC(const int16_t *lumFilter, const int16_t **lumSrc ...@@ -782,7 +769,7 @@ static inline void yuv2nv12XinC(const int16_t *lumFilter, const int16_t **lumSrc
b = (type *)c->table_bU[U]; \ b = (type *)c->table_bU[U]; \
#define YSCALE_YUV_2_PACKED2_C(type,alpha) \ #define YSCALE_YUV_2_PACKED2_C(type,alpha) \
for (i=0; i<(dstW>>1); i++){ \ for (i=0; i<(dstW>>1); i++) { \
const int i2= 2*i; \ const int i2= 2*i; \
int Y1= (buf0[i2 ]*yalpha1+buf1[i2 ]*yalpha)>>19; \ int Y1= (buf0[i2 ]*yalpha1+buf1[i2 ]*yalpha)>>19; \
int Y2= (buf0[i2+1]*yalpha1+buf1[i2+1]*yalpha)>>19; \ int Y2= (buf0[i2+1]*yalpha1+buf1[i2+1]*yalpha)>>19; \
...@@ -790,13 +777,13 @@ static inline void yuv2nv12XinC(const int16_t *lumFilter, const int16_t **lumSrc ...@@ -790,13 +777,13 @@ static inline void yuv2nv12XinC(const int16_t *lumFilter, const int16_t **lumSrc
int V= (uvbuf0[i+VOFW]*uvalpha1+uvbuf1[i+VOFW]*uvalpha)>>19; \ int V= (uvbuf0[i+VOFW]*uvalpha1+uvbuf1[i+VOFW]*uvalpha)>>19; \
type av_unused *r, *b, *g; \ type av_unused *r, *b, *g; \
int av_unused A1, A2; \ int av_unused A1, A2; \
if (alpha){\ if (alpha) {\
A1= (abuf0[i2 ]*yalpha1+abuf1[i2 ]*yalpha)>>19; \ A1= (abuf0[i2 ]*yalpha1+abuf1[i2 ]*yalpha)>>19; \
A2= (abuf0[i2+1]*yalpha1+abuf1[i2+1]*yalpha)>>19; \ A2= (abuf0[i2+1]*yalpha1+abuf1[i2+1]*yalpha)>>19; \
}\ }\
#define YSCALE_YUV_2_GRAY16_2_C \ #define YSCALE_YUV_2_GRAY16_2_C \
for (i=0; i<(dstW>>1); i++){ \ for (i=0; i<(dstW>>1); i++) { \
const int i2= 2*i; \ const int i2= 2*i; \
int Y1= (buf0[i2 ]*yalpha1+buf1[i2 ]*yalpha)>>11; \ int Y1= (buf0[i2 ]*yalpha1+buf1[i2 ]*yalpha)>>11; \
int Y2= (buf0[i2+1]*yalpha1+buf1[i2+1]*yalpha)>>11; \ int Y2= (buf0[i2+1]*yalpha1+buf1[i2+1]*yalpha)>>11; \
...@@ -808,7 +795,7 @@ static inline void yuv2nv12XinC(const int16_t *lumFilter, const int16_t **lumSrc ...@@ -808,7 +795,7 @@ static inline void yuv2nv12XinC(const int16_t *lumFilter, const int16_t **lumSrc
b = (type *)c->table_bU[U];\ b = (type *)c->table_bU[U];\
#define YSCALE_YUV_2_PACKED1_C(type,alpha) \ #define YSCALE_YUV_2_PACKED1_C(type,alpha) \
for (i=0; i<(dstW>>1); i++){\ for (i=0; i<(dstW>>1); i++) {\
const int i2= 2*i;\ const int i2= 2*i;\
int Y1= buf0[i2 ]>>7;\ int Y1= buf0[i2 ]>>7;\
int Y2= buf0[i2+1]>>7;\ int Y2= buf0[i2+1]>>7;\
...@@ -816,13 +803,13 @@ static inline void yuv2nv12XinC(const int16_t *lumFilter, const int16_t **lumSrc ...@@ -816,13 +803,13 @@ static inline void yuv2nv12XinC(const int16_t *lumFilter, const int16_t **lumSrc
int V= (uvbuf1[i+VOFW])>>7;\ int V= (uvbuf1[i+VOFW])>>7;\
type av_unused *r, *b, *g;\ type av_unused *r, *b, *g;\
int av_unused A1, A2;\ int av_unused A1, A2;\
if (alpha){\ if (alpha) {\
A1= abuf0[i2 ]>>7;\ A1= abuf0[i2 ]>>7;\
A2= abuf0[i2+1]>>7;\ A2= abuf0[i2+1]>>7;\
}\ }\
#define YSCALE_YUV_2_GRAY16_1_C \ #define YSCALE_YUV_2_GRAY16_1_C \
for (i=0; i<(dstW>>1); i++){\ for (i=0; i<(dstW>>1); i++) {\
const int i2= 2*i;\ const int i2= 2*i;\
int Y1= buf0[i2 ]<<1;\ int Y1= buf0[i2 ]<<1;\
int Y2= buf0[i2+1]<<1;\ int Y2= buf0[i2+1]<<1;\
...@@ -834,7 +821,7 @@ static inline void yuv2nv12XinC(const int16_t *lumFilter, const int16_t **lumSrc ...@@ -834,7 +821,7 @@ static inline void yuv2nv12XinC(const int16_t *lumFilter, const int16_t **lumSrc
b = (type *)c->table_bU[U];\ b = (type *)c->table_bU[U];\
#define YSCALE_YUV_2_PACKED1B_C(type,alpha) \ #define YSCALE_YUV_2_PACKED1B_C(type,alpha) \
for (i=0; i<(dstW>>1); i++){\ for (i=0; i<(dstW>>1); i++) {\
const int i2= 2*i;\ const int i2= 2*i;\
int Y1= buf0[i2 ]>>7;\ int Y1= buf0[i2 ]>>7;\
int Y2= buf0[i2+1]>>7;\ int Y2= buf0[i2+1]>>7;\
...@@ -842,7 +829,7 @@ static inline void yuv2nv12XinC(const int16_t *lumFilter, const int16_t **lumSrc ...@@ -842,7 +829,7 @@ static inline void yuv2nv12XinC(const int16_t *lumFilter, const int16_t **lumSrc
int V= (uvbuf0[i+VOFW] + uvbuf1[i+VOFW])>>8;\ int V= (uvbuf0[i+VOFW] + uvbuf1[i+VOFW])>>8;\
type av_unused *r, *b, *g;\ type av_unused *r, *b, *g;\
int av_unused A1, A2;\ int av_unused A1, A2;\
if (alpha){\ if (alpha) {\
A1= abuf0[i2 ]>>7;\ A1= abuf0[i2 ]>>7;\
A2= abuf0[i2+1]>>7;\ A2= abuf0[i2+1]>>7;\
}\ }\
...@@ -856,7 +843,7 @@ static inline void yuv2nv12XinC(const int16_t *lumFilter, const int16_t **lumSrc ...@@ -856,7 +843,7 @@ static inline void yuv2nv12XinC(const int16_t *lumFilter, const int16_t **lumSrc
#define YSCALE_YUV_2_MONO2_C \ #define YSCALE_YUV_2_MONO2_C \
const uint8_t * const d128=dither_8x8_220[y&7];\ const uint8_t * const d128=dither_8x8_220[y&7];\
uint8_t *g= c->table_gU[128] + c->table_gV[128];\ uint8_t *g= c->table_gU[128] + c->table_gV[128];\
for (i=0; i<dstW-7; i+=8){\ for (i=0; i<dstW-7; i+=8) {\
int acc;\ int acc;\
acc = g[((buf0[i ]*yalpha1+buf1[i ]*yalpha)>>19) + d128[0]];\ acc = g[((buf0[i ]*yalpha1+buf1[i ]*yalpha)>>19) + d128[0]];\
acc+= acc + g[((buf0[i+1]*yalpha1+buf1[i+1]*yalpha)>>19) + d128[1]];\ acc+= acc + g[((buf0[i+1]*yalpha1+buf1[i+1]*yalpha)>>19) + d128[1]];\
...@@ -875,20 +862,18 @@ static inline void yuv2nv12XinC(const int16_t *lumFilter, const int16_t **lumSrc ...@@ -875,20 +862,18 @@ static inline void yuv2nv12XinC(const int16_t *lumFilter, const int16_t **lumSrc
const uint8_t * const d128=dither_8x8_220[y&7];\ const uint8_t * const d128=dither_8x8_220[y&7];\
uint8_t *g= c->table_gU[128] + c->table_gV[128];\ uint8_t *g= c->table_gU[128] + c->table_gV[128];\
int acc=0;\ int acc=0;\
for (i=0; i<dstW-1; i+=2){\ for (i=0; i<dstW-1; i+=2) {\
int j;\ int j;\
int Y1=1<<18;\ int Y1=1<<18;\
int Y2=1<<18;\ int Y2=1<<18;\
\ \
for (j=0; j<lumFilterSize; j++)\ for (j=0; j<lumFilterSize; j++) {\
{\
Y1 += lumSrc[j][i] * lumFilter[j];\ Y1 += lumSrc[j][i] * lumFilter[j];\
Y2 += lumSrc[j][i+1] * lumFilter[j];\ Y2 += lumSrc[j][i+1] * lumFilter[j];\
}\ }\
Y1>>=19;\ Y1>>=19;\
Y2>>=19;\ Y2>>=19;\
if ((Y1|Y2)&256)\ if ((Y1|Y2)&256) {\
{\
if (Y1>255) Y1=255;\ if (Y1>255) Y1=255;\
else if (Y1<0)Y1=0;\ else if (Y1<0)Y1=0;\
if (Y2>255) Y2=255;\ if (Y2>255) Y2=255;\
...@@ -896,7 +881,7 @@ static inline void yuv2nv12XinC(const int16_t *lumFilter, const int16_t **lumSrc ...@@ -896,7 +881,7 @@ static inline void yuv2nv12XinC(const int16_t *lumFilter, const int16_t **lumSrc
}\ }\
acc+= acc + g[Y1+d128[(i+0)&7]];\ acc+= acc + g[Y1+d128[(i+0)&7]];\
acc+= acc + g[Y2+d128[(i+1)&7]];\ acc+= acc + g[Y2+d128[(i+1)&7]];\
if ((i&7)==6){\ if ((i&7)==6) {\
((uint8_t*)dest)[0]= c->dstFormat == PIX_FMT_MONOBLACK ? acc : ~acc;\ ((uint8_t*)dest)[0]= c->dstFormat == PIX_FMT_MONOBLACK ? acc : ~acc;\
dest++;\ dest++;\
}\ }\
...@@ -904,8 +889,7 @@ static inline void yuv2nv12XinC(const int16_t *lumFilter, const int16_t **lumSrc ...@@ -904,8 +889,7 @@ static inline void yuv2nv12XinC(const int16_t *lumFilter, const int16_t **lumSrc
#define YSCALE_YUV_2_ANYRGB_C(func, func2, func_g16, func_monoblack)\ #define YSCALE_YUV_2_ANYRGB_C(func, func2, func_g16, func_monoblack)\
switch(c->dstFormat)\ switch(c->dstFormat) {\
{\
case PIX_FMT_RGB48BE:\ case PIX_FMT_RGB48BE:\
case PIX_FMT_RGB48LE:\ case PIX_FMT_RGB48LE:\
func(uint8_t,0)\ func(uint8_t,0)\
...@@ -926,19 +910,19 @@ static inline void yuv2nv12XinC(const int16_t *lumFilter, const int16_t **lumSrc ...@@ -926,19 +910,19 @@ static inline void yuv2nv12XinC(const int16_t *lumFilter, const int16_t **lumSrc
break;\ break;\
case PIX_FMT_RGBA:\ case PIX_FMT_RGBA:\
case PIX_FMT_BGRA:\ case PIX_FMT_BGRA:\
if (CONFIG_SMALL){\ if (CONFIG_SMALL) {\
int needAlpha = CONFIG_SWSCALE_ALPHA && c->alpPixBuf;\ int needAlpha = CONFIG_SWSCALE_ALPHA && c->alpPixBuf;\
func(uint32_t,needAlpha)\ func(uint32_t,needAlpha)\
((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1] + (needAlpha ? (A1<<24) : 0);\ ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1] + (needAlpha ? (A1<<24) : 0);\
((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2] + (needAlpha ? (A2<<24) : 0);\ ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2] + (needAlpha ? (A2<<24) : 0);\
}\ }\
}else{\ } else {\
if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf){\ if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {\
func(uint32_t,1)\ func(uint32_t,1)\
((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1] + (A1<<24);\ ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1] + (A1<<24);\
((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2] + (A2<<24);\ ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2] + (A2<<24);\
}\ }\
}else{\ } else {\
func(uint32_t,0)\ func(uint32_t,0)\
((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1];\ ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1];\
((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2];\ ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2];\
...@@ -948,19 +932,19 @@ static inline void yuv2nv12XinC(const int16_t *lumFilter, const int16_t **lumSrc ...@@ -948,19 +932,19 @@ static inline void yuv2nv12XinC(const int16_t *lumFilter, const int16_t **lumSrc
break;\ break;\
case PIX_FMT_ARGB:\ case PIX_FMT_ARGB:\
case PIX_FMT_ABGR:\ case PIX_FMT_ABGR:\
if (CONFIG_SMALL){\ if (CONFIG_SMALL) {\
int needAlpha = CONFIG_SWSCALE_ALPHA && c->alpPixBuf;\ int needAlpha = CONFIG_SWSCALE_ALPHA && c->alpPixBuf;\
func(uint32_t,needAlpha)\ func(uint32_t,needAlpha)\
((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1] + (needAlpha ? A1 : 0);\ ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1] + (needAlpha ? A1 : 0);\
((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2] + (needAlpha ? A2 : 0);\ ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2] + (needAlpha ? A2 : 0);\
}\ }\
}else{\ } else {\
if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf){\ if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {\
func(uint32_t,1)\ func(uint32_t,1)\
((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1] + A1;\ ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1] + A1;\
((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2] + A2;\ ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2] + A2;\
}\ }\
}else{\ } else {\
func(uint32_t,0)\ func(uint32_t,0)\
((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1];\ ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1];\
((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2];\ ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2];\
...@@ -1110,14 +1094,14 @@ static inline void yuv2rgbXinC_full(SwsContext *c, const int16_t *lumFilter, con ...@@ -1110,14 +1094,14 @@ static inline void yuv2rgbXinC_full(SwsContext *c, const int16_t *lumFilter, con
int step= fmt_depth(c->dstFormat)/8; int step= fmt_depth(c->dstFormat)/8;
int aidx= 3; int aidx= 3;
switch(c->dstFormat){ switch(c->dstFormat) {
case PIX_FMT_ARGB: case PIX_FMT_ARGB:
dest++; dest++;
aidx= 0; aidx= 0;
case PIX_FMT_RGB24: case PIX_FMT_RGB24:
aidx--; aidx--;
case PIX_FMT_RGBA: case PIX_FMT_RGBA:
if (CONFIG_SMALL){ if (CONFIG_SMALL) {
int needAlpha = CONFIG_SWSCALE_ALPHA && c->alpPixBuf; int needAlpha = CONFIG_SWSCALE_ALPHA && c->alpPixBuf;
YSCALE_YUV_2_RGBX_FULL_C(1<<21, needAlpha) YSCALE_YUV_2_RGBX_FULL_C(1<<21, needAlpha)
dest[aidx]= needAlpha ? A : 255; dest[aidx]= needAlpha ? A : 255;
...@@ -1126,8 +1110,8 @@ static inline void yuv2rgbXinC_full(SwsContext *c, const int16_t *lumFilter, con ...@@ -1126,8 +1110,8 @@ static inline void yuv2rgbXinC_full(SwsContext *c, const int16_t *lumFilter, con
dest[2]= B>>22; dest[2]= B>>22;
dest+= step; dest+= step;
} }
}else{ } else {
if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf){ if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
YSCALE_YUV_2_RGBX_FULL_C(1<<21, 1) YSCALE_YUV_2_RGBX_FULL_C(1<<21, 1)
dest[aidx]= A; dest[aidx]= A;
dest[0]= R>>22; dest[0]= R>>22;
...@@ -1135,7 +1119,7 @@ static inline void yuv2rgbXinC_full(SwsContext *c, const int16_t *lumFilter, con ...@@ -1135,7 +1119,7 @@ static inline void yuv2rgbXinC_full(SwsContext *c, const int16_t *lumFilter, con
dest[2]= B>>22; dest[2]= B>>22;
dest+= step; dest+= step;
} }
}else{ } else {
YSCALE_YUV_2_RGBX_FULL_C(1<<21, 0) YSCALE_YUV_2_RGBX_FULL_C(1<<21, 0)
dest[aidx]= 255; dest[aidx]= 255;
dest[0]= R>>22; dest[0]= R>>22;
...@@ -1152,7 +1136,7 @@ static inline void yuv2rgbXinC_full(SwsContext *c, const int16_t *lumFilter, con ...@@ -1152,7 +1136,7 @@ static inline void yuv2rgbXinC_full(SwsContext *c, const int16_t *lumFilter, con
case PIX_FMT_BGR24: case PIX_FMT_BGR24:
aidx--; aidx--;
case PIX_FMT_BGRA: case PIX_FMT_BGRA:
if (CONFIG_SMALL){ if (CONFIG_SMALL) {
int needAlpha = CONFIG_SWSCALE_ALPHA && c->alpPixBuf; int needAlpha = CONFIG_SWSCALE_ALPHA && c->alpPixBuf;
YSCALE_YUV_2_RGBX_FULL_C(1<<21, needAlpha) YSCALE_YUV_2_RGBX_FULL_C(1<<21, needAlpha)
dest[aidx]= needAlpha ? A : 255; dest[aidx]= needAlpha ? A : 255;
...@@ -1161,8 +1145,8 @@ static inline void yuv2rgbXinC_full(SwsContext *c, const int16_t *lumFilter, con ...@@ -1161,8 +1145,8 @@ static inline void yuv2rgbXinC_full(SwsContext *c, const int16_t *lumFilter, con
dest[2]= R>>22; dest[2]= R>>22;
dest+= step; dest+= step;
} }
}else{ } else {
if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf){ if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
YSCALE_YUV_2_RGBX_FULL_C(1<<21, 1) YSCALE_YUV_2_RGBX_FULL_C(1<<21, 1)
dest[aidx]= A; dest[aidx]= A;
dest[0]= B>>22; dest[0]= B>>22;
...@@ -1170,7 +1154,7 @@ static inline void yuv2rgbXinC_full(SwsContext *c, const int16_t *lumFilter, con ...@@ -1170,7 +1154,7 @@ static inline void yuv2rgbXinC_full(SwsContext *c, const int16_t *lumFilter, con
dest[2]= R>>22; dest[2]= R>>22;
dest+= step; dest+= step;
} }
}else{ } else {
YSCALE_YUV_2_RGBX_FULL_C(1<<21, 0) YSCALE_YUV_2_RGBX_FULL_C(1<<21, 0)
dest[aidx]= 255; dest[aidx]= 255;
dest[0]= B>>22; dest[0]= B>>22;
...@@ -1186,10 +1170,11 @@ static inline void yuv2rgbXinC_full(SwsContext *c, const int16_t *lumFilter, con ...@@ -1186,10 +1170,11 @@ static inline void yuv2rgbXinC_full(SwsContext *c, const int16_t *lumFilter, con
} }
} }
static void fillPlane(uint8_t* plane, int stride, int width, int height, int y, uint8_t val){ static void fillPlane(uint8_t* plane, int stride, int width, int height, int y, uint8_t val)
{
int i; int i;
uint8_t *ptr = plane + stride*y; uint8_t *ptr = plane + stride*y;
for (i=0; i<height; i++){ for (i=0; i<height; i++) {
memset(ptr, val, width); memset(ptr, val, width);
ptr += stride; ptr += stride;
} }
...@@ -1241,8 +1226,7 @@ static inline void rgb48ToUV_half(uint8_t *dstU, uint8_t *dstV, ...@@ -1241,8 +1226,7 @@ static inline void rgb48ToUV_half(uint8_t *dstU, uint8_t *dstV,
static inline void name(uint8_t *dst, const uint8_t *src, long width, uint32_t *unused)\ static inline void name(uint8_t *dst, const uint8_t *src, long width, uint32_t *unused)\
{\ {\
int i;\ int i;\
for (i=0; i<width; i++)\ for (i=0; i<width; i++) {\
{\
int b= (((const type*)src)[i]>>shb)&maskb;\ int b= (((const type*)src)[i]>>shb)&maskb;\
int g= (((const type*)src)[i]>>shg)&maskg;\ int g= (((const type*)src)[i]>>shg)&maskg;\
int r= (((const type*)src)[i]>>shr)&maskr;\ int r= (((const type*)src)[i]>>shr)&maskr;\
...@@ -1258,9 +1242,10 @@ BGR2Y(uint16_t, bgr15ToY, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, RY<<10, GY<<5, BY ...@@ -1258,9 +1242,10 @@ BGR2Y(uint16_t, bgr15ToY, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, RY<<10, GY<<5, BY
BGR2Y(uint16_t, rgb16ToY, 0, 0, 0, 0xF800, 0x07E0, 0x001F, RY , GY<<5, BY<<11, RGB2YUV_SHIFT+8) BGR2Y(uint16_t, rgb16ToY, 0, 0, 0, 0xF800, 0x07E0, 0x001F, RY , GY<<5, BY<<11, RGB2YUV_SHIFT+8)
BGR2Y(uint16_t, rgb15ToY, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, RY , GY<<5, BY<<10, RGB2YUV_SHIFT+7) BGR2Y(uint16_t, rgb15ToY, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, RY , GY<<5, BY<<10, RGB2YUV_SHIFT+7)
static inline void abgrToA(uint8_t *dst, const uint8_t *src, long width, uint32_t *unused){ static inline void abgrToA(uint8_t *dst, const uint8_t *src, long width, uint32_t *unused)
{
int i; int i;
for (i=0; i<width; i++){ for (i=0; i<width; i++) {
dst[i]= src[4*i]; dst[i]= src[4*i];
} }
} }
...@@ -1269,8 +1254,7 @@ static inline void abgrToA(uint8_t *dst, const uint8_t *src, long width, uint32_ ...@@ -1269,8 +1254,7 @@ static inline void abgrToA(uint8_t *dst, const uint8_t *src, long width, uint32_
static inline void name(uint8_t *dstU, uint8_t *dstV, const uint8_t *src, const uint8_t *dummy, long width, uint32_t *unused)\ static inline void name(uint8_t *dstU, uint8_t *dstV, const uint8_t *src, const uint8_t *dummy, long width, uint32_t *unused)\
{\ {\
int i;\ int i;\
for (i=0; i<width; i++)\ for (i=0; i<width; i++) {\
{\
int b= (((const type*)src)[i]&maskb)>>shb;\ int b= (((const type*)src)[i]&maskb)>>shb;\
int g= (((const type*)src)[i]&maskg)>>shg;\ int g= (((const type*)src)[i]&maskg)>>shg;\
int r= (((const type*)src)[i]&maskr)>>shr;\ int r= (((const type*)src)[i]&maskr)>>shr;\
...@@ -1282,8 +1266,7 @@ static inline void name(uint8_t *dstU, uint8_t *dstV, const uint8_t *src, const ...@@ -1282,8 +1266,7 @@ static inline void name(uint8_t *dstU, uint8_t *dstV, const uint8_t *src, const
static inline void name ## _half(uint8_t *dstU, uint8_t *dstV, const uint8_t *src, const uint8_t *dummy, long width, uint32_t *unused)\ static inline void name ## _half(uint8_t *dstU, uint8_t *dstV, const uint8_t *src, const uint8_t *dummy, long width, uint32_t *unused)\
{\ {\
int i;\ int i;\
for (i=0; i<width; i++)\ for (i=0; i<width; i++) {\
{\
int pix0= ((const type*)src)[2*i+0];\ int pix0= ((const type*)src)[2*i+0];\
int pix1= ((const type*)src)[2*i+1];\ int pix1= ((const type*)src)[2*i+1];\
int g= (pix0&~(maskr|maskb))+(pix1&~(maskr|maskb));\ int g= (pix0&~(maskr|maskb))+(pix1&~(maskr|maskb));\
...@@ -1308,8 +1291,7 @@ BGR2UV(uint16_t, rgb15ToUV, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, RU ...@@ -1308,8 +1291,7 @@ BGR2UV(uint16_t, rgb15ToUV, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, RU
static inline void palToY(uint8_t *dst, const uint8_t *src, long width, uint32_t *pal) static inline void palToY(uint8_t *dst, const uint8_t *src, long width, uint32_t *pal)
{ {
int i; int i;
for (i=0; i<width; i++) for (i=0; i<width; i++) {
{
int d= src[i]; int d= src[i];
dst[i]= pal[d] & 0xFF; dst[i]= pal[d] & 0xFF;
...@@ -1322,8 +1304,7 @@ static inline void palToUV(uint8_t *dstU, uint8_t *dstV, ...@@ -1322,8 +1304,7 @@ static inline void palToUV(uint8_t *dstU, uint8_t *dstV,
{ {
int i; int i;
assert(src1 == src2); assert(src1 == src2);
for (i=0; i<width; i++) for (i=0; i<width; i++) {
{
int p= pal[src1[i]]; int p= pal[src1[i]];
dstU[i]= p>>8; dstU[i]= p>>8;
...@@ -1334,7 +1315,7 @@ static inline void palToUV(uint8_t *dstU, uint8_t *dstV, ...@@ -1334,7 +1315,7 @@ static inline void palToUV(uint8_t *dstU, uint8_t *dstV,
static inline void monowhite2Y(uint8_t *dst, const uint8_t *src, long width, uint32_t *unused) static inline void monowhite2Y(uint8_t *dst, const uint8_t *src, long width, uint32_t *unused)
{ {
int i, j; int i, j;
for (i=0; i<width/8; i++){ for (i=0; i<width/8; i++) {
int d= ~src[i]; int d= ~src[i];
for(j=0; j<8; j++) for(j=0; j<8; j++)
dst[8*i+j]= ((d>>(7-j))&1)*255; dst[8*i+j]= ((d>>(7-j))&1)*255;
...@@ -1344,7 +1325,7 @@ static inline void monowhite2Y(uint8_t *dst, const uint8_t *src, long width, uin ...@@ -1344,7 +1325,7 @@ static inline void monowhite2Y(uint8_t *dst, const uint8_t *src, long width, uin
static inline void monoblack2Y(uint8_t *dst, const uint8_t *src, long width, uint32_t *unused) static inline void monoblack2Y(uint8_t *dst, const uint8_t *src, long width, uint32_t *unused)
{ {
int i, j; int i, j;
for (i=0; i<width/8; i++){ for (i=0; i<width/8; i++) {
int d= src[i]; int d= src[i];
for(j=0; j<8; j++) for(j=0; j<8; j++)
dst[8*i+j]= ((d>>(7-j))&1)*255; dst[8*i+j]= ((d>>(7-j))&1)*255;
...@@ -1471,53 +1452,44 @@ static inline int initFilter(int16_t **outFilter, int16_t **filterPos, int *outF ...@@ -1471,53 +1452,44 @@ static inline int initFilter(int16_t **outFilter, int16_t **filterPos, int *outF
// NOTE: the +1 is for the MMX scaler which reads over the end // NOTE: the +1 is for the MMX scaler which reads over the end
*filterPos = av_malloc((dstW+1)*sizeof(int16_t)); *filterPos = av_malloc((dstW+1)*sizeof(int16_t));
if (FFABS(xInc - 0x10000) <10) // unscaled if (FFABS(xInc - 0x10000) <10) { // unscaled
{
int i; int i;
filterSize= 1; filterSize= 1;
filter= av_mallocz(dstW*sizeof(*filter)*filterSize); filter= av_mallocz(dstW*sizeof(*filter)*filterSize);
for (i=0; i<dstW; i++) for (i=0; i<dstW; i++) {
{
filter[i*filterSize]= fone; filter[i*filterSize]= fone;
(*filterPos)[i]=i; (*filterPos)[i]=i;
} }
} } else if (flags&SWS_POINT) { // lame looking point sampling mode
else if (flags&SWS_POINT) // lame looking point sampling mode
{
int i; int i;
int xDstInSrc; int xDstInSrc;
filterSize= 1; filterSize= 1;
filter= av_malloc(dstW*sizeof(*filter)*filterSize); filter= av_malloc(dstW*sizeof(*filter)*filterSize);
xDstInSrc= xInc/2 - 0x8000; xDstInSrc= xInc/2 - 0x8000;
for (i=0; i<dstW; i++) for (i=0; i<dstW; i++) {
{
int xx= (xDstInSrc - ((filterSize-1)<<15) + (1<<15))>>16; int xx= (xDstInSrc - ((filterSize-1)<<15) + (1<<15))>>16;
(*filterPos)[i]= xx; (*filterPos)[i]= xx;
filter[i]= fone; filter[i]= fone;
xDstInSrc+= xInc; xDstInSrc+= xInc;
} }
} } else if ((xInc <= (1<<16) && (flags&SWS_AREA)) || (flags&SWS_FAST_BILINEAR)) { // bilinear upscale
else if ((xInc <= (1<<16) && (flags&SWS_AREA)) || (flags&SWS_FAST_BILINEAR)) // bilinear upscale
{
int i; int i;
int xDstInSrc; int xDstInSrc;
filterSize= 2; filterSize= 2;
filter= av_malloc(dstW*sizeof(*filter)*filterSize); filter= av_malloc(dstW*sizeof(*filter)*filterSize);
xDstInSrc= xInc/2 - 0x8000; xDstInSrc= xInc/2 - 0x8000;
for (i=0; i<dstW; i++) for (i=0; i<dstW; i++) {
{
int xx= (xDstInSrc - ((filterSize-1)<<15) + (1<<15))>>16; int xx= (xDstInSrc - ((filterSize-1)<<15) + (1<<15))>>16;
int j; int j;
(*filterPos)[i]= xx; (*filterPos)[i]= xx;
//bilinear upscale / linear interpolate / area averaging //bilinear upscale / linear interpolate / area averaging
for (j=0; j<filterSize; j++) for (j=0; j<filterSize; j++) {
{
int64_t coeff= fone - FFABS((xx<<16) - xDstInSrc)*(fone>>16); int64_t coeff= fone - FFABS((xx<<16) - xDstInSrc)*(fone>>16);
if (coeff<0) coeff=0; if (coeff<0) coeff=0;
filter[i*filterSize + j]= coeff; filter[i*filterSize + j]= coeff;
...@@ -1525,9 +1497,7 @@ static inline int initFilter(int16_t **outFilter, int16_t **filterPos, int *outF ...@@ -1525,9 +1497,7 @@ static inline int initFilter(int16_t **outFilter, int16_t **filterPos, int *outF
} }
xDstInSrc+= xInc; xDstInSrc+= xInc;
} }
} } else {
else
{
int xDstInSrc; int xDstInSrc;
int sizeFactor; int sizeFactor;
...@@ -1552,13 +1522,11 @@ static inline int initFilter(int16_t **outFilter, int16_t **filterPos, int *outF ...@@ -1552,13 +1522,11 @@ static inline int initFilter(int16_t **outFilter, int16_t **filterPos, int *outF
filter= av_malloc(dstW*sizeof(*filter)*filterSize); filter= av_malloc(dstW*sizeof(*filter)*filterSize);
xDstInSrc= xInc - 0x10000; xDstInSrc= xInc - 0x10000;
for (i=0; i<dstW; i++) for (i=0; i<dstW; i++) {
{
int xx= (xDstInSrc - ((filterSize-2)<<16)) / (1<<17); int xx= (xDstInSrc - ((filterSize-2)<<16)) / (1<<17);
int j; int j;
(*filterPos)[i]= xx; (*filterPos)[i]= xx;
for (j=0; j<filterSize; j++) for (j=0; j<filterSize; j++) {
{
int64_t d= ((int64_t)FFABS((xx<<17) - xDstInSrc))<<13; int64_t d= ((int64_t)FFABS((xx<<17) - xDstInSrc))<<13;
double floatd; double floatd;
int64_t coeff; int64_t coeff;
...@@ -1567,8 +1535,7 @@ static inline int initFilter(int16_t **outFilter, int16_t **filterPos, int *outF ...@@ -1567,8 +1535,7 @@ static inline int initFilter(int16_t **outFilter, int16_t **filterPos, int *outF
d= d*dstW/srcW; d= d*dstW/srcW;
floatd= d * (1.0/(1<<30)); floatd= d * (1.0/(1<<30));
if (flags & SWS_BICUBIC) if (flags & SWS_BICUBIC) {
{
int64_t B= (param[0] != SWS_PARAM_DEFAULT ? param[0] : 0) * (1<<24); int64_t B= (param[0] != SWS_PARAM_DEFAULT ? param[0] : 0) * (1<<24);
int64_t C= (param[1] != SWS_PARAM_DEFAULT ? param[1] : 0.6) * (1<<24); int64_t C= (param[1] != SWS_PARAM_DEFAULT ? param[1] : 0.6) * (1<<24);
int64_t dd = ( d*d)>>30; int64_t dd = ( d*d)>>30;
...@@ -1582,14 +1549,12 @@ static inline int initFilter(int16_t **outFilter, int16_t **filterPos, int *outF ...@@ -1582,14 +1549,12 @@ static inline int initFilter(int16_t **outFilter, int16_t **filterPos, int *outF
coeff=0.0; coeff=0.0;
coeff *= fone>>(30+24); coeff *= fone>>(30+24);
} }
/* else if (flags & SWS_X) /* else if (flags & SWS_X) {
{
double p= param ? param*0.01 : 0.3; double p= param ? param*0.01 : 0.3;
coeff = d ? sin(d*PI)/(d*PI) : 1.0; coeff = d ? sin(d*PI)/(d*PI) : 1.0;
coeff*= pow(2.0, - p*d*d); coeff*= pow(2.0, - p*d*d);
}*/ }*/
else if (flags & SWS_X) else if (flags & SWS_X) {
{
double A= param[0] != SWS_PARAM_DEFAULT ? param[0] : 1.0; double A= param[0] != SWS_PARAM_DEFAULT ? param[0] : 1.0;
double c; double c;
...@@ -1600,42 +1565,29 @@ static inline int initFilter(int16_t **outFilter, int16_t **filterPos, int *outF ...@@ -1600,42 +1565,29 @@ static inline int initFilter(int16_t **outFilter, int16_t **filterPos, int *outF
if (c<0.0) c= -pow(-c, A); if (c<0.0) c= -pow(-c, A);
else c= pow( c, A); else c= pow( c, A);
coeff= (c*0.5 + 0.5)*fone; coeff= (c*0.5 + 0.5)*fone;
} } else if (flags & SWS_AREA) {
else if (flags & SWS_AREA)
{
int64_t d2= d - (1<<29); int64_t d2= d - (1<<29);
if (d2*xInc < -(1LL<<(29+16))) coeff= 1.0 * (1LL<<(30+16)); if (d2*xInc < -(1LL<<(29+16))) coeff= 1.0 * (1LL<<(30+16));
else if (d2*xInc < (1LL<<(29+16))) coeff= -d2*xInc + (1LL<<(29+16)); else if (d2*xInc < (1LL<<(29+16))) coeff= -d2*xInc + (1LL<<(29+16));
else coeff=0.0; else coeff=0.0;
coeff *= fone>>(30+16); coeff *= fone>>(30+16);
} } else if (flags & SWS_GAUSS) {
else if (flags & SWS_GAUSS)
{
double p= param[0] != SWS_PARAM_DEFAULT ? param[0] : 3.0; double p= param[0] != SWS_PARAM_DEFAULT ? param[0] : 3.0;
coeff = (pow(2.0, - p*floatd*floatd))*fone; coeff = (pow(2.0, - p*floatd*floatd))*fone;
} } else if (flags & SWS_SINC) {
else if (flags & SWS_SINC)
{
coeff = (d ? sin(floatd*PI)/(floatd*PI) : 1.0)*fone; coeff = (d ? sin(floatd*PI)/(floatd*PI) : 1.0)*fone;
} } else if (flags & SWS_LANCZOS) {
else if (flags & SWS_LANCZOS)
{
double p= param[0] != SWS_PARAM_DEFAULT ? param[0] : 3.0; double p= param[0] != SWS_PARAM_DEFAULT ? param[0] : 3.0;
coeff = (d ? sin(floatd*PI)*sin(floatd*PI/p)/(floatd*floatd*PI*PI/p) : 1.0)*fone; coeff = (d ? sin(floatd*PI)*sin(floatd*PI/p)/(floatd*floatd*PI*PI/p) : 1.0)*fone;
if (floatd>p) coeff=0; if (floatd>p) coeff=0;
} } else if (flags & SWS_BILINEAR) {
else if (flags & SWS_BILINEAR)
{
coeff= (1<<30) - d; coeff= (1<<30) - d;
if (coeff<0) coeff=0; if (coeff<0) coeff=0;
coeff *= fone >> 30; coeff *= fone >> 30;
} } else if (flags & SWS_SPLINE) {
else if (flags & SWS_SPLINE)
{
double p=-2.196152422706632; double p=-2.196152422706632;
coeff = getSplineCoeff(1.0, 0.0, p, -p-1.0, floatd) * fone; coeff = getSplineCoeff(1.0, 0.0, p, -p-1.0, floatd) * fone;
} } else {
else {
coeff= 0.0; //GCC warning killer coeff= 0.0; //GCC warning killer
assert(0); assert(0);
} }
...@@ -1657,16 +1609,15 @@ static inline int initFilter(int16_t **outFilter, int16_t **filterPos, int *outF ...@@ -1657,16 +1609,15 @@ static inline int initFilter(int16_t **outFilter, int16_t **filterPos, int *outF
assert(filter2Size>0); assert(filter2Size>0);
filter2= av_mallocz(filter2Size*dstW*sizeof(*filter2)); filter2= av_mallocz(filter2Size*dstW*sizeof(*filter2));
for (i=0; i<dstW; i++) for (i=0; i<dstW; i++) {
{
int j, k; int j, k;
if(srcFilter){ if(srcFilter) {
for (k=0; k<srcFilter->length; k++){ for (k=0; k<srcFilter->length; k++) {
for (j=0; j<filterSize; j++) for (j=0; j<filterSize; j++)
filter2[i*filter2Size + k + j] += srcFilter->coeff[k]*filter[i*filterSize + j]; filter2[i*filter2Size + k + j] += srcFilter->coeff[k]*filter[i*filterSize + j];
} }
}else{ } else {
for (j=0; j<filterSize; j++) for (j=0; j<filterSize; j++)
filter2[i*filter2Size + j]= filter[i*filterSize + j]; filter2[i*filter2Size + j]= filter[i*filterSize + j];
} }
...@@ -1679,15 +1630,13 @@ static inline int initFilter(int16_t **outFilter, int16_t **filterPos, int *outF ...@@ -1679,15 +1630,13 @@ static inline int initFilter(int16_t **outFilter, int16_t **filterPos, int *outF
/* try to reduce the filter-size (step1 find size and shift left) */ /* try to reduce the filter-size (step1 find size and shift left) */
// Assume it is near normalized (*0.5 or *2.0 is OK but * 0.001 is not). // Assume it is near normalized (*0.5 or *2.0 is OK but * 0.001 is not).
minFilterSize= 0; minFilterSize= 0;
for (i=dstW-1; i>=0; i--) for (i=dstW-1; i>=0; i--) {
{
int min= filter2Size; int min= filter2Size;
int j; int j;
int64_t cutOff=0.0; int64_t cutOff=0.0;
/* get rid off near zero elements on the left by shifting left */ /* get rid off near zero elements on the left by shifting left */
for (j=0; j<filter2Size; j++) for (j=0; j<filter2Size; j++) {
{
int k; int k;
cutOff += FFABS(filter2[i*filter2Size]); cutOff += FFABS(filter2[i*filter2Size]);
...@@ -1705,8 +1654,7 @@ static inline int initFilter(int16_t **outFilter, int16_t **filterPos, int *outF ...@@ -1705,8 +1654,7 @@ static inline int initFilter(int16_t **outFilter, int16_t **filterPos, int *outF
cutOff=0; cutOff=0;
/* count near zeros on the right */ /* count near zeros on the right */
for (j=filter2Size-1; j>0; j--) for (j=filter2Size-1; j>0; j--) {
{
cutOff += FFABS(filter2[i*filter2Size + j]); cutOff += FFABS(filter2[i*filter2Size + j]);
if (cutOff > SWS_MAX_REDUCE_CUTOFF*fone) break; if (cutOff > SWS_MAX_REDUCE_CUTOFF*fone) break;
...@@ -1748,12 +1696,10 @@ static inline int initFilter(int16_t **outFilter, int16_t **filterPos, int *outF ...@@ -1748,12 +1696,10 @@ static inline int initFilter(int16_t **outFilter, int16_t **filterPos, int *outF
if (flags&SWS_PRINT_INFO) if (flags&SWS_PRINT_INFO)
av_log(NULL, AV_LOG_VERBOSE, "SwScaler: reducing / aligning filtersize %d -> %d\n", filter2Size, filterSize); av_log(NULL, AV_LOG_VERBOSE, "SwScaler: reducing / aligning filtersize %d -> %d\n", filter2Size, filterSize);
/* try to reduce the filter-size (step2 reduce it) */ /* try to reduce the filter-size (step2 reduce it) */
for (i=0; i<dstW; i++) for (i=0; i<dstW; i++) {
{
int j; int j;
for (j=0; j<filterSize; j++) for (j=0; j<filterSize; j++) {
{
if (j>=filter2Size) filter[i*filterSize + j]= 0; if (j>=filter2Size) filter[i*filterSize + j]= 0;
else filter[i*filterSize + j]= filter2[i*filter2Size + j]; else filter[i*filterSize + j]= filter2[i*filter2Size + j];
if((flags & SWS_BITEXACT) && j>=minFilterSize) if((flags & SWS_BITEXACT) && j>=minFilterSize)
...@@ -1765,14 +1711,11 @@ static inline int initFilter(int16_t **outFilter, int16_t **filterPos, int *outF ...@@ -1765,14 +1711,11 @@ static inline int initFilter(int16_t **outFilter, int16_t **filterPos, int *outF
//FIXME try to align filterPos if possible //FIXME try to align filterPos if possible
//fix borders //fix borders
for (i=0; i<dstW; i++) for (i=0; i<dstW; i++) {
{
int j; int j;
if ((*filterPos)[i] < 0) if ((*filterPos)[i] < 0) {
{
// move filter coefficients left to compensate for filterPos // move filter coefficients left to compensate for filterPos
for (j=1; j<filterSize; j++) for (j=1; j<filterSize; j++) {
{
int left= FFMAX(j + (*filterPos)[i], 0); int left= FFMAX(j + (*filterPos)[i], 0);
filter[i*filterSize + left] += filter[i*filterSize + j]; filter[i*filterSize + left] += filter[i*filterSize + j];
filter[i*filterSize + j]=0; filter[i*filterSize + j]=0;
...@@ -1780,12 +1723,10 @@ static inline int initFilter(int16_t **outFilter, int16_t **filterPos, int *outF ...@@ -1780,12 +1723,10 @@ static inline int initFilter(int16_t **outFilter, int16_t **filterPos, int *outF
(*filterPos)[i]= 0; (*filterPos)[i]= 0;
} }
if ((*filterPos)[i] + filterSize > srcW) if ((*filterPos)[i] + filterSize > srcW) {
{
int shift= (*filterPos)[i] + filterSize - srcW; int shift= (*filterPos)[i] + filterSize - srcW;
// move filter coefficients right to compensate for filterPos // move filter coefficients right to compensate for filterPos
for (j=filterSize-2; j>=0; j--) for (j=filterSize-2; j>=0; j--) {
{
int right= FFMIN(j + shift, filterSize-1); int right= FFMIN(j + shift, filterSize-1);
filter[i*filterSize +right] += filter[i*filterSize +j]; filter[i*filterSize +right] += filter[i*filterSize +j];
filter[i*filterSize +j]=0; filter[i*filterSize +j]=0;
...@@ -1799,19 +1740,16 @@ static inline int initFilter(int16_t **outFilter, int16_t **filterPos, int *outF ...@@ -1799,19 +1740,16 @@ static inline int initFilter(int16_t **outFilter, int16_t **filterPos, int *outF
*outFilter= av_mallocz(*outFilterSize*(dstW+1)*sizeof(int16_t)); *outFilter= av_mallocz(*outFilterSize*(dstW+1)*sizeof(int16_t));
/* normalize & store in outFilter */ /* normalize & store in outFilter */
for (i=0; i<dstW; i++) for (i=0; i<dstW; i++) {
{
int j; int j;
int64_t error=0; int64_t error=0;
int64_t sum=0; int64_t sum=0;
for (j=0; j<filterSize; j++) for (j=0; j<filterSize; j++) {
{
sum+= filter[i*filterSize + j]; sum+= filter[i*filterSize + j];
} }
sum= (sum + one/2)/ one; sum= (sum + one/2)/ one;
for (j=0; j<*outFilterSize; j++) for (j=0; j<*outFilterSize; j++) {
{
int64_t v= filter[i*filterSize + j] + error; int64_t v= filter[i*filterSize + j] + error;
int intV= ROUNDED_DIV(v, sum); int intV= ROUNDED_DIV(v, sum);
(*outFilter)[i*(*outFilterSize) + j]= intV; (*outFilter)[i*(*outFilterSize) + j]= intV;
...@@ -1820,8 +1758,7 @@ static inline int initFilter(int16_t **outFilter, int16_t **filterPos, int *outF ...@@ -1820,8 +1758,7 @@ static inline int initFilter(int16_t **outFilter, int16_t **filterPos, int *outF
} }
(*filterPos)[dstW]= (*filterPos)[dstW-1]; // the MMX scaler will read over the end (*filterPos)[dstW]= (*filterPos)[dstW-1]; // the MMX scaler will read over the end
for (i=0; i<*outFilterSize; i++) for (i=0; i<*outFilterSize; i++) {
{
int j= dstW*(*outFilterSize); int j= dstW*(*outFilterSize);
(*outFilter)[j + i]= (*outFilter)[j + i - (*outFilterSize)]; (*outFilter)[j + i]= (*outFilter)[j + i - (*outFilterSize)];
} }
...@@ -1933,12 +1870,10 @@ static int initMMX2HScaler(int dstW, int xInc, uint8_t *filterCode, int16_t *fil ...@@ -1933,12 +1870,10 @@ static int initMMX2HScaler(int dstW, int xInc, uint8_t *filterCode, int16_t *fil
xpos= 0; //lumXInc/2 - 0x8000; // difference between pixel centers xpos= 0; //lumXInc/2 - 0x8000; // difference between pixel centers
fragmentPos=0; fragmentPos=0;
for (i=0; i<dstW/numSplits; i++) for (i=0; i<dstW/numSplits; i++) {
{
int xx=xpos>>16; int xx=xpos>>16;
if ((i&3) == 0) if ((i&3) == 0) {
{
int a=0; int a=0;
int b=((xpos+xInc)>>16) - xx; int b=((xpos+xInc)>>16) - xx;
int c=((xpos+xInc*2)>>16) - xx; int c=((xpos+xInc*2)>>16) - xx;
...@@ -1968,8 +1903,7 @@ static int initMMX2HScaler(int dstW, int xInc, uint8_t *filterCode, int16_t *fil ...@@ -1968,8 +1903,7 @@ static int initMMX2HScaler(int dstW, int xInc, uint8_t *filterCode, int16_t *fil
if (i+4-inc>=dstW) shift=maxShift; //avoid overread if (i+4-inc>=dstW) shift=maxShift; //avoid overread
else if ((filterPos[i/2]&3) <= maxShift) shift=filterPos[i/2]&3; //Align else if ((filterPos[i/2]&3) <= maxShift) shift=filterPos[i/2]&3; //Align
if (shift && i>=shift) if (shift && i>=shift) {
{
filterCode[fragmentPos + imm8OfPShufW1]+= 0x55*shift; filterCode[fragmentPos + imm8OfPShufW1]+= 0x55*shift;
filterCode[fragmentPos + imm8OfPShufW2]+= 0x55*shift; filterCode[fragmentPos + imm8OfPShufW2]+= 0x55*shift;
filterPos[i/2]-=shift; filterPos[i/2]-=shift;
...@@ -1990,10 +1924,11 @@ static int initMMX2HScaler(int dstW, int xInc, uint8_t *filterCode, int16_t *fil ...@@ -1990,10 +1924,11 @@ static int initMMX2HScaler(int dstW, int xInc, uint8_t *filterCode, int16_t *fil
} }
#endif /* COMPILE_MMX2 */ #endif /* COMPILE_MMX2 */
static void globalInit(void){ static void globalInit(void)
{
// generating tables: // generating tables:
int i; int i;
for (i=0; i<768; i++){ for (i=0; i<768; i++) {
int c= av_clip_uint8(i-256); int c= av_clip_uint8(i-256);
clip_table[i]=c; clip_table[i]=c;
} }
...@@ -2054,18 +1989,17 @@ static SwsFunc getSwsFunc(SwsContext *c) ...@@ -2054,18 +1989,17 @@ static SwsFunc getSwsFunc(SwsContext *c)
} }
static int PlanarToNV12Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, static int PlanarToNV12Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
int srcSliceH, uint8_t* dstParam[], int dstStride[]){ int srcSliceH, uint8_t* dstParam[], int dstStride[])
{
uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY; uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY;
/* Copy Y plane */ /* Copy Y plane */
if (dstStride[0]==srcStride[0] && srcStride[0] > 0) if (dstStride[0]==srcStride[0] && srcStride[0] > 0)
memcpy(dst, src[0], srcSliceH*dstStride[0]); memcpy(dst, src[0], srcSliceH*dstStride[0]);
else else {
{
int i; int i;
const uint8_t *srcPtr= src[0]; const uint8_t *srcPtr= src[0];
uint8_t *dstPtr= dst; uint8_t *dstPtr= dst;
for (i=0; i<srcSliceH; i++) for (i=0; i<srcSliceH; i++) {
{
memcpy(dstPtr, srcPtr, c->srcW); memcpy(dstPtr, srcPtr, c->srcW);
srcPtr+= srcStride[0]; srcPtr+= srcStride[0];
dstPtr+= dstStride[0]; dstPtr+= dstStride[0];
...@@ -2081,7 +2015,8 @@ static int PlanarToNV12Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], i ...@@ -2081,7 +2015,8 @@ static int PlanarToNV12Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], i
} }
static int PlanarToYuy2Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, static int PlanarToYuy2Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
int srcSliceH, uint8_t* dstParam[], int dstStride[]){ int srcSliceH, uint8_t* dstParam[], int dstStride[])
{
uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY; uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY;
yv12toyuy2(src[0], src[1], src[2], dst, c->srcW, srcSliceH, srcStride[0], srcStride[1], dstStride[0]); yv12toyuy2(src[0], src[1], src[2], dst, c->srcW, srcSliceH, srcStride[0], srcStride[1], dstStride[0]);
...@@ -2090,7 +2025,8 @@ static int PlanarToYuy2Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], i ...@@ -2090,7 +2025,8 @@ static int PlanarToYuy2Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], i
} }
static int PlanarToUyvyWrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, static int PlanarToUyvyWrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
int srcSliceH, uint8_t* dstParam[], int dstStride[]){ int srcSliceH, uint8_t* dstParam[], int dstStride[])
{
uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY; uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY;
yv12touyvy(src[0], src[1], src[2], dst, c->srcW, srcSliceH, srcStride[0], srcStride[1], dstStride[0]); yv12touyvy(src[0], src[1], src[2], dst, c->srcW, srcSliceH, srcStride[0], srcStride[1], dstStride[0]);
...@@ -2099,7 +2035,8 @@ static int PlanarToUyvyWrapper(SwsContext *c, uint8_t* src[], int srcStride[], i ...@@ -2099,7 +2035,8 @@ static int PlanarToUyvyWrapper(SwsContext *c, uint8_t* src[], int srcStride[], i
} }
static int YUV422PToYuy2Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, static int YUV422PToYuy2Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
int srcSliceH, uint8_t* dstParam[], int dstStride[]){ int srcSliceH, uint8_t* dstParam[], int dstStride[])
{
uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY; uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY;
yuv422ptoyuy2(src[0],src[1],src[2],dst,c->srcW,srcSliceH,srcStride[0],srcStride[1],dstStride[0]); yuv422ptoyuy2(src[0],src[1],src[2],dst,c->srcW,srcSliceH,srcStride[0],srcStride[1],dstStride[0]);
...@@ -2108,7 +2045,8 @@ static int YUV422PToYuy2Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], ...@@ -2108,7 +2045,8 @@ static int YUV422PToYuy2Wrapper(SwsContext *c, uint8_t* src[], int srcStride[],
} }
static int YUV422PToUyvyWrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, static int YUV422PToUyvyWrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
int srcSliceH, uint8_t* dstParam[], int dstStride[]){ int srcSliceH, uint8_t* dstParam[], int dstStride[])
{
uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY; uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY;
yuv422ptouyvy(src[0],src[1],src[2],dst,c->srcW,srcSliceH,srcStride[0],srcStride[1],dstStride[0]); yuv422ptouyvy(src[0],src[1],src[2],dst,c->srcW,srcSliceH,srcStride[0],srcStride[1],dstStride[0]);
...@@ -2117,7 +2055,8 @@ static int YUV422PToUyvyWrapper(SwsContext *c, uint8_t* src[], int srcStride[], ...@@ -2117,7 +2055,8 @@ static int YUV422PToUyvyWrapper(SwsContext *c, uint8_t* src[], int srcStride[],
} }
static int YUYV2YUV420Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, static int YUYV2YUV420Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
int srcSliceH, uint8_t* dstParam[], int dstStride[]){ int srcSliceH, uint8_t* dstParam[], int dstStride[])
{
uint8_t *ydst=dstParam[0] + dstStride[0]*srcSliceY; uint8_t *ydst=dstParam[0] + dstStride[0]*srcSliceY;
uint8_t *udst=dstParam[1] + dstStride[1]*srcSliceY/2; uint8_t *udst=dstParam[1] + dstStride[1]*srcSliceY/2;
uint8_t *vdst=dstParam[2] + dstStride[2]*srcSliceY/2; uint8_t *vdst=dstParam[2] + dstStride[2]*srcSliceY/2;
...@@ -2131,7 +2070,8 @@ static int YUYV2YUV420Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], in ...@@ -2131,7 +2070,8 @@ static int YUYV2YUV420Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], in
} }
static int YUYV2YUV422Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, static int YUYV2YUV422Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
int srcSliceH, uint8_t* dstParam[], int dstStride[]){ int srcSliceH, uint8_t* dstParam[], int dstStride[])
{
uint8_t *ydst=dstParam[0] + dstStride[0]*srcSliceY; uint8_t *ydst=dstParam[0] + dstStride[0]*srcSliceY;
uint8_t *udst=dstParam[1] + dstStride[1]*srcSliceY; uint8_t *udst=dstParam[1] + dstStride[1]*srcSliceY;
uint8_t *vdst=dstParam[2] + dstStride[2]*srcSliceY; uint8_t *vdst=dstParam[2] + dstStride[2]*srcSliceY;
...@@ -2142,7 +2082,8 @@ static int YUYV2YUV422Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], in ...@@ -2142,7 +2082,8 @@ static int YUYV2YUV422Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], in
} }
static int UYVY2YUV420Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, static int UYVY2YUV420Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
int srcSliceH, uint8_t* dstParam[], int dstStride[]){ int srcSliceH, uint8_t* dstParam[], int dstStride[])
{
uint8_t *ydst=dstParam[0] + dstStride[0]*srcSliceY; uint8_t *ydst=dstParam[0] + dstStride[0]*srcSliceY;
uint8_t *udst=dstParam[1] + dstStride[1]*srcSliceY/2; uint8_t *udst=dstParam[1] + dstStride[1]*srcSliceY/2;
uint8_t *vdst=dstParam[2] + dstStride[2]*srcSliceY/2; uint8_t *vdst=dstParam[2] + dstStride[2]*srcSliceY/2;
...@@ -2156,7 +2097,8 @@ static int UYVY2YUV420Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], in ...@@ -2156,7 +2097,8 @@ static int UYVY2YUV420Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], in
} }
static int UYVY2YUV422Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, static int UYVY2YUV422Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
int srcSliceH, uint8_t* dstParam[], int dstStride[]){ int srcSliceH, uint8_t* dstParam[], int dstStride[])
{
uint8_t *ydst=dstParam[0] + dstStride[0]*srcSliceY; uint8_t *ydst=dstParam[0] + dstStride[0]*srcSliceY;
uint8_t *udst=dstParam[1] + dstStride[1]*srcSliceY; uint8_t *udst=dstParam[1] + dstStride[1]*srcSliceY;
uint8_t *vdst=dstParam[2] + dstStride[2]*srcSliceY; uint8_t *vdst=dstParam[2] + dstStride[2]*srcSliceY;
...@@ -2167,7 +2109,8 @@ static int UYVY2YUV422Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], in ...@@ -2167,7 +2109,8 @@ static int UYVY2YUV422Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], in
} }
static int pal2rgbWrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, static int pal2rgbWrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
int srcSliceH, uint8_t* dst[], int dstStride[]){ int srcSliceH, uint8_t* dst[], int dstStride[])
{
const enum PixelFormat srcFormat= c->srcFormat; const enum PixelFormat srcFormat= c->srcFormat;
const enum PixelFormat dstFormat= c->dstFormat; const enum PixelFormat dstFormat= c->dstFormat;
void (*conv)(const uint8_t *src, uint8_t *dst, long num_pixels, void (*conv)(const uint8_t *src, uint8_t *dst, long num_pixels,
...@@ -2180,7 +2123,7 @@ static int pal2rgbWrapper(SwsContext *c, uint8_t* src[], int srcStride[], int sr ...@@ -2180,7 +2123,7 @@ static int pal2rgbWrapper(SwsContext *c, uint8_t* src[], int srcStride[], int sr
av_log(c, AV_LOG_ERROR, "internal error %s -> %s converter\n", av_log(c, AV_LOG_ERROR, "internal error %s -> %s converter\n",
sws_format_name(srcFormat), sws_format_name(dstFormat)); sws_format_name(srcFormat), sws_format_name(dstFormat));
switch(dstFormat){ switch(dstFormat) {
case PIX_FMT_RGB32 : conv = palette8topacked32; break; case PIX_FMT_RGB32 : conv = palette8topacked32; break;
case PIX_FMT_BGR32 : conv = palette8topacked32; break; case PIX_FMT_BGR32 : conv = palette8topacked32; break;
case PIX_FMT_BGR32_1: conv = palette8topacked32; break; case PIX_FMT_BGR32_1: conv = palette8topacked32; break;
...@@ -2203,7 +2146,8 @@ static int pal2rgbWrapper(SwsContext *c, uint8_t* src[], int srcStride[], int sr ...@@ -2203,7 +2146,8 @@ static int pal2rgbWrapper(SwsContext *c, uint8_t* src[], int srcStride[], int sr
/* {RGB,BGR}{15,16,24,32,32_1} -> {RGB,BGR}{15,16,24,32} */ /* {RGB,BGR}{15,16,24,32,32_1} -> {RGB,BGR}{15,16,24,32} */
static int rgb2rgbWrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, static int rgb2rgbWrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
int srcSliceH, uint8_t* dst[], int dstStride[]){ int srcSliceH, uint8_t* dst[], int dstStride[])
{
const enum PixelFormat srcFormat= c->srcFormat; const enum PixelFormat srcFormat= c->srcFormat;
const enum PixelFormat dstFormat= c->dstFormat; const enum PixelFormat dstFormat= c->dstFormat;
const int srcBpp= (fmt_depth(srcFormat) + 7) >> 3; const int srcBpp= (fmt_depth(srcFormat) + 7) >> 3;
...@@ -2214,8 +2158,8 @@ static int rgb2rgbWrapper(SwsContext *c, uint8_t* src[], int srcStride[], int sr ...@@ -2214,8 +2158,8 @@ static int rgb2rgbWrapper(SwsContext *c, uint8_t* src[], int srcStride[], int sr
/* BGR -> BGR */ /* BGR -> BGR */
if ( (isBGR(srcFormat) && isBGR(dstFormat)) if ( (isBGR(srcFormat) && isBGR(dstFormat))
|| (isRGB(srcFormat) && isRGB(dstFormat))){ || (isRGB(srcFormat) && isRGB(dstFormat))) {
switch(srcId | (dstId<<4)){ switch(srcId | (dstId<<4)) {
case 0x34: conv= rgb16to15; break; case 0x34: conv= rgb16to15; break;
case 0x36: conv= rgb24to15; break; case 0x36: conv= rgb24to15; break;
case 0x38: conv= rgb32to15; break; case 0x38: conv= rgb32to15; break;
...@@ -2231,9 +2175,9 @@ static int rgb2rgbWrapper(SwsContext *c, uint8_t* src[], int srcStride[], int sr ...@@ -2231,9 +2175,9 @@ static int rgb2rgbWrapper(SwsContext *c, uint8_t* src[], int srcStride[], int sr
default: av_log(c, AV_LOG_ERROR, "internal error %s -> %s converter\n", default: av_log(c, AV_LOG_ERROR, "internal error %s -> %s converter\n",
sws_format_name(srcFormat), sws_format_name(dstFormat)); break; sws_format_name(srcFormat), sws_format_name(dstFormat)); break;
} }
}else if ( (isBGR(srcFormat) && isRGB(dstFormat)) } else if ( (isBGR(srcFormat) && isRGB(dstFormat))
|| (isRGB(srcFormat) && isBGR(dstFormat))){ || (isRGB(srcFormat) && isBGR(dstFormat))) {
switch(srcId | (dstId<<4)){ switch(srcId | (dstId<<4)) {
case 0x33: conv= rgb15tobgr15; break; case 0x33: conv= rgb15tobgr15; break;
case 0x34: conv= rgb16tobgr15; break; case 0x34: conv= rgb16tobgr15; break;
case 0x36: conv= rgb24tobgr15; break; case 0x36: conv= rgb24tobgr15; break;
...@@ -2253,26 +2197,23 @@ static int rgb2rgbWrapper(SwsContext *c, uint8_t* src[], int srcStride[], int sr ...@@ -2253,26 +2197,23 @@ static int rgb2rgbWrapper(SwsContext *c, uint8_t* src[], int srcStride[], int sr
default: av_log(c, AV_LOG_ERROR, "internal error %s -> %s converter\n", default: av_log(c, AV_LOG_ERROR, "internal error %s -> %s converter\n",
sws_format_name(srcFormat), sws_format_name(dstFormat)); break; sws_format_name(srcFormat), sws_format_name(dstFormat)); break;
} }
}else{ } else {
av_log(c, AV_LOG_ERROR, "internal error %s -> %s converter\n", av_log(c, AV_LOG_ERROR, "internal error %s -> %s converter\n",
sws_format_name(srcFormat), sws_format_name(dstFormat)); sws_format_name(srcFormat), sws_format_name(dstFormat));
} }
if(conv) if(conv) {
{
uint8_t *srcPtr= src[0]; uint8_t *srcPtr= src[0];
if(srcFormat == PIX_FMT_RGB32_1 || srcFormat == PIX_FMT_BGR32_1) if(srcFormat == PIX_FMT_RGB32_1 || srcFormat == PIX_FMT_BGR32_1)
srcPtr += ALT32_CORR; srcPtr += ALT32_CORR;
if (dstStride[0]*srcBpp == srcStride[0]*dstBpp && srcStride[0] > 0) if (dstStride[0]*srcBpp == srcStride[0]*dstBpp && srcStride[0] > 0)
conv(srcPtr, dst[0] + dstStride[0]*srcSliceY, srcSliceH*srcStride[0]); conv(srcPtr, dst[0] + dstStride[0]*srcSliceY, srcSliceH*srcStride[0]);
else else {
{
int i; int i;
uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY; uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY;
for (i=0; i<srcSliceH; i++) for (i=0; i<srcSliceH; i++) {
{
conv(srcPtr, dstPtr, c->srcW*srcBpp); conv(srcPtr, dstPtr, c->srcW*srcBpp);
srcPtr+= srcStride[0]; srcPtr+= srcStride[0];
dstPtr+= dstStride[0]; dstPtr+= dstStride[0];
...@@ -2283,7 +2224,8 @@ static int rgb2rgbWrapper(SwsContext *c, uint8_t* src[], int srcStride[], int sr ...@@ -2283,7 +2224,8 @@ static int rgb2rgbWrapper(SwsContext *c, uint8_t* src[], int srcStride[], int sr
} }
static int bgr24toyv12Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, static int bgr24toyv12Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
int srcSliceH, uint8_t* dst[], int dstStride[]){ int srcSliceH, uint8_t* dst[], int dstStride[])
{
rgb24toyv12( rgb24toyv12(
src[0], src[0],
...@@ -2298,30 +2240,30 @@ static int bgr24toyv12Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], in ...@@ -2298,30 +2240,30 @@ static int bgr24toyv12Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], in
} }
static int yvu9toyv12Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, static int yvu9toyv12Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
int srcSliceH, uint8_t* dst[], int dstStride[]){ int srcSliceH, uint8_t* dst[], int dstStride[])
{
int i; int i;
/* copy Y */ /* copy Y */
if (srcStride[0]==dstStride[0] && srcStride[0] > 0) if (srcStride[0]==dstStride[0] && srcStride[0] > 0)
memcpy(dst[0]+ srcSliceY*dstStride[0], src[0], srcStride[0]*srcSliceH); memcpy(dst[0]+ srcSliceY*dstStride[0], src[0], srcStride[0]*srcSliceH);
else{ else {
uint8_t *srcPtr= src[0]; uint8_t *srcPtr= src[0];
uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY; uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY;
for (i=0; i<srcSliceH; i++) for (i=0; i<srcSliceH; i++) {
{
memcpy(dstPtr, srcPtr, c->srcW); memcpy(dstPtr, srcPtr, c->srcW);
srcPtr+= srcStride[0]; srcPtr+= srcStride[0];
dstPtr+= dstStride[0]; dstPtr+= dstStride[0];
} }
} }
if (c->dstFormat==PIX_FMT_YUV420P || c->dstFormat==PIX_FMT_YUVA420P){ if (c->dstFormat==PIX_FMT_YUV420P || c->dstFormat==PIX_FMT_YUVA420P) {
planar2x(src[1], dst[1] + dstStride[1]*(srcSliceY >> 1), c->chrSrcW, planar2x(src[1], dst[1] + dstStride[1]*(srcSliceY >> 1), c->chrSrcW,
srcSliceH >> 2, srcStride[1], dstStride[1]); srcSliceH >> 2, srcStride[1], dstStride[1]);
planar2x(src[2], dst[2] + dstStride[2]*(srcSliceY >> 1), c->chrSrcW, planar2x(src[2], dst[2] + dstStride[2]*(srcSliceY >> 1), c->chrSrcW,
srcSliceH >> 2, srcStride[2], dstStride[2]); srcSliceH >> 2, srcStride[2], dstStride[2]);
}else{ } else {
planar2x(src[1], dst[2] + dstStride[2]*(srcSliceY >> 1), c->chrSrcW, planar2x(src[1], dst[2] + dstStride[2]*(srcSliceY >> 1), c->chrSrcW,
srcSliceH >> 2, srcStride[1], dstStride[2]); srcSliceH >> 2, srcStride[1], dstStride[2]);
planar2x(src[2], dst[1] + dstStride[1]*(srcSliceY >> 1), c->chrSrcW, planar2x(src[2], dst[1] + dstStride[1]*(srcSliceY >> 1), c->chrSrcW,
...@@ -2338,8 +2280,7 @@ static int packedCopy(SwsContext *c, uint8_t* src[], int srcStride[], int srcSli ...@@ -2338,8 +2280,7 @@ static int packedCopy(SwsContext *c, uint8_t* src[], int srcStride[], int srcSli
{ {
if (dstStride[0]==srcStride[0] && srcStride[0] > 0) if (dstStride[0]==srcStride[0] && srcStride[0] > 0)
memcpy(dst[0] + dstStride[0]*srcSliceY, src[0], srcSliceH*dstStride[0]); memcpy(dst[0] + dstStride[0]*srcSliceY, src[0], srcSliceH*dstStride[0]);
else else {
{
int i; int i;
uint8_t *srcPtr= src[0]; uint8_t *srcPtr= src[0];
uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY; uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY;
...@@ -2350,8 +2291,7 @@ static int packedCopy(SwsContext *c, uint8_t* src[], int srcStride[], int srcSli ...@@ -2350,8 +2291,7 @@ static int packedCopy(SwsContext *c, uint8_t* src[], int srcStride[], int srcSli
&& length+c->srcW <= FFABS(srcStride[0])) length+= c->srcW; && length+c->srcW <= FFABS(srcStride[0])) length+= c->srcW;
assert(length!=0); assert(length!=0);
for (i=0; i<srcSliceH; i++) for (i=0; i<srcSliceH; i++) {
{
memcpy(dstPtr, srcPtr, length); memcpy(dstPtr, srcPtr, length);
srcPtr+= srcStride[0]; srcPtr+= srcStride[0];
dstPtr+= dstStride[0]; dstPtr+= dstStride[0];
...@@ -2364,8 +2304,7 @@ static int planarCopy(SwsContext *c, uint8_t* src[], int srcStride[], int srcSli ...@@ -2364,8 +2304,7 @@ static int planarCopy(SwsContext *c, uint8_t* src[], int srcStride[], int srcSli
int srcSliceH, uint8_t* dst[], int dstStride[]) int srcSliceH, uint8_t* dst[], int dstStride[])
{ {
int plane, i, j; int plane, i, j;
for (plane=0; plane<4; plane++) for (plane=0; plane<4; plane++) {
{
int length= (plane==0 || plane==3) ? c->srcW : -((-c->srcW )>>c->chrDstHSubSample); int length= (plane==0 || plane==3) ? c->srcW : -((-c->srcW )>>c->chrDstHSubSample);
int y= (plane==0 || plane==3) ? srcSliceY: -((-srcSliceY)>>c->chrDstVSubSample); int y= (plane==0 || plane==3) ? srcSliceY: -((-srcSliceY)>>c->chrDstVSubSample);
int height= (plane==0 || plane==3) ? srcSliceH: -((-srcSliceH)>>c->chrDstVSubSample); int height= (plane==0 || plane==3) ? srcSliceH: -((-srcSliceH)>>c->chrDstVSubSample);
...@@ -2375,32 +2314,31 @@ static int planarCopy(SwsContext *c, uint8_t* src[], int srcStride[], int srcSli ...@@ -2375,32 +2314,31 @@ static int planarCopy(SwsContext *c, uint8_t* src[], int srcStride[], int srcSli
if (!dst[plane]) continue; if (!dst[plane]) continue;
// ignore palette for GRAY8 // ignore palette for GRAY8
if (plane == 1 && !dst[2]) continue; if (plane == 1 && !dst[2]) continue;
if (!src[plane] || (plane == 1 && !src[2])){ if (!src[plane] || (plane == 1 && !src[2])) {
if(is16BPS(c->dstFormat)) if(is16BPS(c->dstFormat))
length*=2; length*=2;
fillPlane(dst[plane], dstStride[plane], length, height, y, (plane==3) ? 255 : 128); fillPlane(dst[plane], dstStride[plane], length, height, y, (plane==3) ? 255 : 128);
}else } else {
{ if(is16BPS(c->srcFormat) && !is16BPS(c->dstFormat)) {
if(is16BPS(c->srcFormat) && !is16BPS(c->dstFormat)){
if (!isBE(c->srcFormat)) srcPtr++; if (!isBE(c->srcFormat)) srcPtr++;
for (i=0; i<height; i++){ for (i=0; i<height; i++) {
for (j=0; j<length; j++) dstPtr[j] = srcPtr[j<<1]; for (j=0; j<length; j++) dstPtr[j] = srcPtr[j<<1];
srcPtr+= srcStride[plane]; srcPtr+= srcStride[plane];
dstPtr+= dstStride[plane]; dstPtr+= dstStride[plane];
} }
}else if(!is16BPS(c->srcFormat) && is16BPS(c->dstFormat)){ } else if(!is16BPS(c->srcFormat) && is16BPS(c->dstFormat)) {
for (i=0; i<height; i++){ for (i=0; i<height; i++) {
for (j=0; j<length; j++){ for (j=0; j<length; j++) {
dstPtr[ j<<1 ] = srcPtr[j]; dstPtr[ j<<1 ] = srcPtr[j];
dstPtr[(j<<1)+1] = srcPtr[j]; dstPtr[(j<<1)+1] = srcPtr[j];
} }
srcPtr+= srcStride[plane]; srcPtr+= srcStride[plane];
dstPtr+= dstStride[plane]; dstPtr+= dstStride[plane];
} }
}else if(is16BPS(c->srcFormat) && is16BPS(c->dstFormat) } else if(is16BPS(c->srcFormat) && is16BPS(c->dstFormat)
&& isBE(c->srcFormat) != isBE(c->dstFormat)){ && isBE(c->srcFormat) != isBE(c->dstFormat)) {
for (i=0; i<height; i++){ for (i=0; i<height; i++) {
for (j=0; j<length; j++) for (j=0; j<length; j++)
((uint16_t*)dstPtr)[j] = bswap_16(((uint16_t*)srcPtr)[j]); ((uint16_t*)dstPtr)[j] = bswap_16(((uint16_t*)srcPtr)[j]);
srcPtr+= srcStride[plane]; srcPtr+= srcStride[plane];
...@@ -2408,12 +2346,10 @@ static int planarCopy(SwsContext *c, uint8_t* src[], int srcStride[], int srcSli ...@@ -2408,12 +2346,10 @@ static int planarCopy(SwsContext *c, uint8_t* src[], int srcStride[], int srcSli
} }
} else if (dstStride[plane]==srcStride[plane] && srcStride[plane] > 0) } else if (dstStride[plane]==srcStride[plane] && srcStride[plane] > 0)
memcpy(dst[plane] + dstStride[plane]*y, src[plane], height*dstStride[plane]); memcpy(dst[plane] + dstStride[plane]*y, src[plane], height*dstStride[plane]);
else else {
{
if(is16BPS(c->srcFormat) && is16BPS(c->dstFormat)) if(is16BPS(c->srcFormat) && is16BPS(c->dstFormat))
length*=2; length*=2;
for (i=0; i<height; i++) for (i=0; i<height; i++) {
{
memcpy(dstPtr, srcPtr, length); memcpy(dstPtr, srcPtr, length);
srcPtr+= srcStride[plane]; srcPtr+= srcStride[plane];
dstPtr+= dstStride[plane]; dstPtr+= dstStride[plane];
...@@ -2425,8 +2361,9 @@ static int planarCopy(SwsContext *c, uint8_t* src[], int srcStride[], int srcSli ...@@ -2425,8 +2361,9 @@ static int planarCopy(SwsContext *c, uint8_t* src[], int srcStride[], int srcSli
} }
static void getSubSampleFactors(int *h, int *v, int format){ static void getSubSampleFactors(int *h, int *v, int format)
switch(format){ {
switch(format) {
case PIX_FMT_UYVY422: case PIX_FMT_UYVY422:
case PIX_FMT_YUYV422: case PIX_FMT_YUYV422:
*h=1; *h=1;
...@@ -2475,14 +2412,16 @@ static void getSubSampleFactors(int *h, int *v, int format){ ...@@ -2475,14 +2412,16 @@ static void getSubSampleFactors(int *h, int *v, int format){
} }
} }
static uint16_t roundToInt16(int64_t f){ static uint16_t roundToInt16(int64_t f)
{
int r= (f + (1<<15))>>16; int r= (f + (1<<15))>>16;
if (r<-0x7FFF) return 0x8000; if (r<-0x7FFF) return 0x8000;
else if (r> 0x7FFF) return 0x7FFF; else if (r> 0x7FFF) return 0x7FFF;
else return r; else return r;
} }
int sws_setColorspaceDetails(SwsContext *c, const int inv_table[4], int srcRange, const int table[4], int dstRange, int brightness, int contrast, int saturation){ int sws_setColorspaceDetails(SwsContext *c, const int inv_table[4], int srcRange, const int table[4], int dstRange, int brightness, int contrast, int saturation)
{
int64_t crv = inv_table[0]; int64_t crv = inv_table[0];
int64_t cbu = inv_table[1]; int64_t cbu = inv_table[1];
int64_t cgu = -inv_table[2]; int64_t cgu = -inv_table[2];
...@@ -2503,10 +2442,10 @@ int sws_setColorspaceDetails(SwsContext *c, const int inv_table[4], int srcRange ...@@ -2503,10 +2442,10 @@ int sws_setColorspaceDetails(SwsContext *c, const int inv_table[4], int srcRange
c->uOffset= 0x0400040004000400LL; c->uOffset= 0x0400040004000400LL;
c->vOffset= 0x0400040004000400LL; c->vOffset= 0x0400040004000400LL;
if (!srcRange){ if (!srcRange) {
cy= (cy*255) / 219; cy= (cy*255) / 219;
oy= 16<<16; oy= 16<<16;
}else{ } else {
crv= (crv*224) / 255; crv= (crv*224) / 255;
cbu= (cbu*224) / 255; cbu= (cbu*224) / 255;
cgu= (cgu*224) / 255; cgu= (cgu*224) / 255;
...@@ -2545,7 +2484,8 @@ int sws_setColorspaceDetails(SwsContext *c, const int inv_table[4], int srcRange ...@@ -2545,7 +2484,8 @@ int sws_setColorspaceDetails(SwsContext *c, const int inv_table[4], int srcRange
return 0; return 0;
} }
int sws_getColorspaceDetails(SwsContext *c, int **inv_table, int *srcRange, int **table, int *dstRange, int *brightness, int *contrast, int *saturation){ int sws_getColorspaceDetails(SwsContext *c, int **inv_table, int *srcRange, int **table, int *dstRange, int *brightness, int *contrast, int *saturation)
{
if (isYUV(c->dstFormat) || isGray(c->dstFormat)) return -1; if (isYUV(c->dstFormat) || isGray(c->dstFormat)) return -1;
*inv_table = c->srcColorspaceTable; *inv_table = c->srcColorspaceTable;
...@@ -2619,13 +2559,11 @@ SwsContext *sws_getContext(int srcW, int srcH, enum PixelFormat srcFormat, int d ...@@ -2619,13 +2559,11 @@ SwsContext *sws_getContext(int srcW, int srcH, enum PixelFormat srcFormat, int d
srcRange = handle_jpeg(&srcFormat); srcRange = handle_jpeg(&srcFormat);
dstRange = handle_jpeg(&dstFormat); dstRange = handle_jpeg(&dstFormat);
if (!isSupportedIn(srcFormat)) if (!isSupportedIn(srcFormat)) {
{
av_log(NULL, AV_LOG_ERROR, "swScaler: %s is not supported as input pixel format\n", sws_format_name(srcFormat)); av_log(NULL, AV_LOG_ERROR, "swScaler: %s is not supported as input pixel format\n", sws_format_name(srcFormat));
return NULL; return NULL;
} }
if (!isSupportedOut(dstFormat)) if (!isSupportedOut(dstFormat)) {
{
av_log(NULL, AV_LOG_ERROR, "swScaler: %s is not supported as output pixel format\n", sws_format_name(dstFormat)); av_log(NULL, AV_LOG_ERROR, "swScaler: %s is not supported as output pixel format\n", sws_format_name(dstFormat));
return NULL; return NULL;
} }
...@@ -2641,20 +2579,18 @@ SwsContext *sws_getContext(int srcW, int srcH, enum PixelFormat srcFormat, int d ...@@ -2641,20 +2579,18 @@ SwsContext *sws_getContext(int srcW, int srcH, enum PixelFormat srcFormat, int d
|SWS_SINC |SWS_SINC
|SWS_SPLINE |SWS_SPLINE
|SWS_BICUBLIN); |SWS_BICUBLIN);
if(!i || (i & (i-1))) if(!i || (i & (i-1))) {
{
av_log(NULL, AV_LOG_ERROR, "swScaler: Exactly one scaler algorithm must be chosen\n"); av_log(NULL, AV_LOG_ERROR, "swScaler: Exactly one scaler algorithm must be chosen\n");
return NULL; return NULL;
} }
/* sanity check */ /* sanity check */
if (srcW<4 || srcH<1 || dstW<8 || dstH<1) //FIXME check if these are enough and try to lowwer them after fixing the relevant parts of the code if (srcW<4 || srcH<1 || dstW<8 || dstH<1) { //FIXME check if these are enough and try to lowwer them after fixing the relevant parts of the code
{
av_log(NULL, AV_LOG_ERROR, "swScaler: %dx%d -> %dx%d is invalid scaling dimension\n", av_log(NULL, AV_LOG_ERROR, "swScaler: %dx%d -> %dx%d is invalid scaling dimension\n",
srcW, srcH, dstW, dstH); srcW, srcH, dstW, dstH);
return NULL; return NULL;
} }
if(srcW > VOFW || dstW > VOFW){ if(srcW > VOFW || dstW > VOFW) {
av_log(NULL, AV_LOG_ERROR, "swScaler: Compile-time maximum width is "AV_STRINGIFY(VOFW)" change VOF/VOFW and recompile\n"); av_log(NULL, AV_LOG_ERROR, "swScaler: Compile-time maximum width is "AV_STRINGIFY(VOFW)" change VOF/VOFW and recompile\n");
return NULL; return NULL;
} }
...@@ -2704,10 +2640,10 @@ SwsContext *sws_getContext(int srcW, int srcH, enum PixelFormat srcFormat, int d ...@@ -2704,10 +2640,10 @@ SwsContext *sws_getContext(int srcW, int srcH, enum PixelFormat srcFormat, int d
&& ((dstW>>c->chrDstHSubSample) <= (srcW>>1) || (flags&(SWS_FAST_BILINEAR|SWS_POINT)))) && ((dstW>>c->chrDstHSubSample) <= (srcW>>1) || (flags&(SWS_FAST_BILINEAR|SWS_POINT))))
c->chrSrcHSubSample=1; c->chrSrcHSubSample=1;
if (param){ if (param) {
c->param[0] = param[0]; c->param[0] = param[0];
c->param[1] = param[1]; c->param[1] = param[1];
}else{ } else {
c->param[0] = c->param[0] =
c->param[1] = SWS_PARAM_DEFAULT; c->param[1] = SWS_PARAM_DEFAULT;
} }
...@@ -2721,22 +2657,18 @@ SwsContext *sws_getContext(int srcW, int srcH, enum PixelFormat srcFormat, int d ...@@ -2721,22 +2657,18 @@ SwsContext *sws_getContext(int srcW, int srcH, enum PixelFormat srcFormat, int d
sws_setColorspaceDetails(c, ff_yuv2rgb_coeffs[SWS_CS_DEFAULT], srcRange, ff_yuv2rgb_coeffs[SWS_CS_DEFAULT] /* FIXME*/, dstRange, 0, 1<<16, 1<<16); sws_setColorspaceDetails(c, ff_yuv2rgb_coeffs[SWS_CS_DEFAULT], srcRange, ff_yuv2rgb_coeffs[SWS_CS_DEFAULT] /* FIXME*/, dstRange, 0, 1<<16, 1<<16);
/* unscaled special cases */ /* unscaled special cases */
if (unscaled && !usesHFilter && !usesVFilter && (srcRange == dstRange || isBGR(dstFormat) || isRGB(dstFormat))) if (unscaled && !usesHFilter && !usesVFilter && (srcRange == dstRange || isBGR(dstFormat) || isRGB(dstFormat))) {
{
/* yv12_to_nv12 */ /* yv12_to_nv12 */
if ((srcFormat == PIX_FMT_YUV420P || srcFormat == PIX_FMT_YUVA420P) && (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21)) if ((srcFormat == PIX_FMT_YUV420P || srcFormat == PIX_FMT_YUVA420P) && (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21)) {
{
c->swScale= PlanarToNV12Wrapper; c->swScale= PlanarToNV12Wrapper;
} }
/* yuv2bgr */ /* yuv2bgr */
if ((srcFormat==PIX_FMT_YUV420P || srcFormat==PIX_FMT_YUV422P || srcFormat==PIX_FMT_YUVA420P) && (isBGR(dstFormat) || isRGB(dstFormat)) if ((srcFormat==PIX_FMT_YUV420P || srcFormat==PIX_FMT_YUV422P || srcFormat==PIX_FMT_YUVA420P) && (isBGR(dstFormat) || isRGB(dstFormat))
&& !(flags & SWS_ACCURATE_RND) && !(dstH&1)) && !(flags & SWS_ACCURATE_RND) && !(dstH&1)) {
{
c->swScale= ff_yuv2rgb_get_func_ptr(c); c->swScale= ff_yuv2rgb_get_func_ptr(c);
} }
if (srcFormat==PIX_FMT_YUV410P && (dstFormat==PIX_FMT_YUV420P || dstFormat==PIX_FMT_YUVA420P) && !(flags & SWS_BITEXACT)) if (srcFormat==PIX_FMT_YUV410P && (dstFormat==PIX_FMT_YUV420P || dstFormat==PIX_FMT_YUVA420P) && !(flags & SWS_BITEXACT)) {
{
c->swScale= yvu9toyv12Wrapper; c->swScale= yvu9toyv12Wrapper;
} }
...@@ -2771,8 +2703,7 @@ SwsContext *sws_getContext(int srcW, int srcH, enum PixelFormat srcFormat, int d ...@@ -2771,8 +2703,7 @@ SwsContext *sws_getContext(int srcW, int srcH, enum PixelFormat srcFormat, int d
dstFormat == PIX_FMT_BGR24))) dstFormat == PIX_FMT_BGR24)))
c->swScale= pal2rgbWrapper; c->swScale= pal2rgbWrapper;
if (srcFormat == PIX_FMT_YUV422P) if (srcFormat == PIX_FMT_YUV422P) {
{
if (dstFormat == PIX_FMT_YUYV422) if (dstFormat == PIX_FMT_YUYV422)
c->swScale= YUV422PToYuy2Wrapper; c->swScale= YUV422PToYuy2Wrapper;
else if (dstFormat == PIX_FMT_UYVY422) else if (dstFormat == PIX_FMT_UYVY422)
...@@ -2780,10 +2711,9 @@ SwsContext *sws_getContext(int srcW, int srcH, enum PixelFormat srcFormat, int d ...@@ -2780,10 +2711,9 @@ SwsContext *sws_getContext(int srcW, int srcH, enum PixelFormat srcFormat, int d
} }
/* LQ converters if -sws 0 or -sws 4*/ /* LQ converters if -sws 0 or -sws 4*/
if (c->flags&(SWS_FAST_BILINEAR|SWS_POINT)){ if (c->flags&(SWS_FAST_BILINEAR|SWS_POINT)) {
/* yv12_to_yuy2 */ /* yv12_to_yuy2 */
if (srcFormat == PIX_FMT_YUV420P || srcFormat == PIX_FMT_YUVA420P) if (srcFormat == PIX_FMT_YUV420P || srcFormat == PIX_FMT_YUVA420P) {
{
if (dstFormat == PIX_FMT_YUYV422) if (dstFormat == PIX_FMT_YUYV422)
c->swScale= PlanarToYuy2Wrapper; c->swScale= PlanarToYuy2Wrapper;
else if (dstFormat == PIX_FMT_UYVY422) else if (dstFormat == PIX_FMT_UYVY422)
...@@ -2834,7 +2764,7 @@ SwsContext *sws_getContext(int srcW, int srcH, enum PixelFormat srcFormat, int d ...@@ -2834,7 +2764,7 @@ SwsContext *sws_getContext(int srcW, int srcH, enum PixelFormat srcFormat, int d
ff_bfin_get_unscaled_swscale (c); ff_bfin_get_unscaled_swscale (c);
#endif #endif
if (c->swScale){ if (c->swScale) {
if (flags&SWS_PRINT_INFO) if (flags&SWS_PRINT_INFO)
av_log(c, AV_LOG_INFO, "using unscaled %s -> %s special converter\n", av_log(c, AV_LOG_INFO, "using unscaled %s -> %s special converter\n",
sws_format_name(srcFormat), sws_format_name(dstFormat)); sws_format_name(srcFormat), sws_format_name(dstFormat));
...@@ -2842,11 +2772,9 @@ SwsContext *sws_getContext(int srcW, int srcH, enum PixelFormat srcFormat, int d ...@@ -2842,11 +2772,9 @@ SwsContext *sws_getContext(int srcW, int srcH, enum PixelFormat srcFormat, int d
} }
} }
if (flags & SWS_CPU_CAPS_MMX2) if (flags & SWS_CPU_CAPS_MMX2) {
{
c->canMMX2BeUsed= (dstW >=srcW && (dstW&31)==0 && (srcW&15)==0) ? 1 : 0; c->canMMX2BeUsed= (dstW >=srcW && (dstW&31)==0 && (srcW&15)==0) ? 1 : 0;
if (!c->canMMX2BeUsed && dstW >=srcW && (srcW&15)==0 && (flags&SWS_FAST_BILINEAR)) if (!c->canMMX2BeUsed && dstW >=srcW && (srcW&15)==0 && (flags&SWS_FAST_BILINEAR)) {
{
if (flags&SWS_PRINT_INFO) if (flags&SWS_PRINT_INFO)
av_log(c, AV_LOG_INFO, "output width is not a multiple of 32 -> no MMX2 scaler\n"); av_log(c, AV_LOG_INFO, "output width is not a multiple of 32 -> no MMX2 scaler\n");
} }
...@@ -2864,16 +2792,13 @@ SwsContext *sws_getContext(int srcW, int srcH, enum PixelFormat srcFormat, int d ...@@ -2864,16 +2792,13 @@ SwsContext *sws_getContext(int srcW, int srcH, enum PixelFormat srcFormat, int d
// this is not perfect, but no one should notice the difference, the more correct variant // this is not perfect, but no one should notice the difference, the more correct variant
// would be like the vertical one, but that would require some special code for the // would be like the vertical one, but that would require some special code for the
// first and last pixel // first and last pixel
if (flags&SWS_FAST_BILINEAR) if (flags&SWS_FAST_BILINEAR) {
{ if (c->canMMX2BeUsed) {
if (c->canMMX2BeUsed)
{
c->lumXInc+= 20; c->lumXInc+= 20;
c->chrXInc+= 20; c->chrXInc+= 20;
} }
//we don't use the x86 asm scaler if MMX is available //we don't use the x86 asm scaler if MMX is available
else if (flags & SWS_CPU_CAPS_MMX) else if (flags & SWS_CPU_CAPS_MMX) {
{
c->lumXInc = ((srcW-2)<<16)/(dstW-2) - 20; c->lumXInc = ((srcW-2)<<16)/(dstW-2) - 20;
c->chrXInc = ((c->chrSrcW-2)<<16)/(c->chrDstW-2) - 20; c->chrXInc = ((c->chrSrcW-2)<<16)/(c->chrDstW-2) - 20;
} }
...@@ -2897,8 +2822,7 @@ SwsContext *sws_getContext(int srcW, int srcH, enum PixelFormat srcFormat, int d ...@@ -2897,8 +2822,7 @@ SwsContext *sws_getContext(int srcW, int srcH, enum PixelFormat srcFormat, int d
#if defined(COMPILE_MMX2) #if defined(COMPILE_MMX2)
// can't downscale !!! // can't downscale !!!
if (c->canMMX2BeUsed && (flags & SWS_FAST_BILINEAR)) if (c->canMMX2BeUsed && (flags & SWS_FAST_BILINEAR)) {
{
c->lumMmx2FilterCodeSize = initMMX2HScaler( dstW, c->lumXInc, NULL, NULL, NULL, 8); c->lumMmx2FilterCodeSize = initMMX2HScaler( dstW, c->lumXInc, NULL, NULL, NULL, 8);
c->chrMmx2FilterCodeSize = initMMX2HScaler(c->chrDstW, c->chrXInc, NULL, NULL, NULL, 4); c->chrMmx2FilterCodeSize = initMMX2HScaler(c->chrDstW, c->chrXInc, NULL, NULL, NULL, 4);
...@@ -2970,8 +2894,7 @@ SwsContext *sws_getContext(int srcW, int srcH, enum PixelFormat srcFormat, int d ...@@ -2970,8 +2894,7 @@ SwsContext *sws_getContext(int srcW, int srcH, enum PixelFormat srcFormat, int d
// calculate buffer sizes so that they won't run out while handling these damn slices // calculate buffer sizes so that they won't run out while handling these damn slices
c->vLumBufSize= c->vLumFilterSize; c->vLumBufSize= c->vLumFilterSize;
c->vChrBufSize= c->vChrFilterSize; c->vChrBufSize= c->vChrFilterSize;
for (i=0; i<dstH; i++) for (i=0; i<dstH; i++) {
{
int chrI= i*c->chrDstH / dstH; int chrI= i*c->chrDstH / dstH;
int nextSlice= FFMAX(c->vLumFilterPos[i ] + c->vLumFilterSize - 1, int nextSlice= FFMAX(c->vLumFilterPos[i ] + c->vLumFilterSize - 1,
((c->vChrFilterPos[chrI] + c->vChrFilterSize - 1)<<c->chrSrcVSubSample)); ((c->vChrFilterPos[chrI] + c->vChrFilterSize - 1)<<c->chrSrcVSubSample));
...@@ -3006,8 +2929,7 @@ SwsContext *sws_getContext(int srcW, int srcH, enum PixelFormat srcFormat, int d ...@@ -3006,8 +2929,7 @@ SwsContext *sws_getContext(int srcW, int srcH, enum PixelFormat srcFormat, int d
assert(c->chrDstH <= dstH); assert(c->chrDstH <= dstH);
if (flags&SWS_PRINT_INFO) if (flags&SWS_PRINT_INFO) {
{
#ifdef DITHER1XBPP #ifdef DITHER1XBPP
const char *dither= " dithered"; const char *dither= " dithered";
#else #else
...@@ -3057,14 +2979,11 @@ SwsContext *sws_getContext(int srcW, int srcH, enum PixelFormat srcFormat, int d ...@@ -3057,14 +2979,11 @@ SwsContext *sws_getContext(int srcW, int srcH, enum PixelFormat srcFormat, int d
av_log(c, AV_LOG_INFO, "using C\n"); av_log(c, AV_LOG_INFO, "using C\n");
} }
if (flags & SWS_PRINT_INFO) if (flags & SWS_PRINT_INFO) {
{ if (flags & SWS_CPU_CAPS_MMX) {
if (flags & SWS_CPU_CAPS_MMX)
{
if (c->canMMX2BeUsed && (flags&SWS_FAST_BILINEAR)) if (c->canMMX2BeUsed && (flags&SWS_FAST_BILINEAR))
av_log(c, AV_LOG_VERBOSE, "using FAST_BILINEAR MMX2 scaler for horizontal scaling\n"); av_log(c, AV_LOG_VERBOSE, "using FAST_BILINEAR MMX2 scaler for horizontal scaling\n");
else else {
{
if (c->hLumFilterSize==4) if (c->hLumFilterSize==4)
av_log(c, AV_LOG_VERBOSE, "using 4-tap MMX scaler for horizontal luminance scaling\n"); av_log(c, AV_LOG_VERBOSE, "using 4-tap MMX scaler for horizontal luminance scaling\n");
else if (c->hLumFilterSize==8) else if (c->hLumFilterSize==8)
...@@ -3079,9 +2998,7 @@ SwsContext *sws_getContext(int srcW, int srcH, enum PixelFormat srcFormat, int d ...@@ -3079,9 +2998,7 @@ SwsContext *sws_getContext(int srcW, int srcH, enum PixelFormat srcFormat, int d
else else
av_log(c, AV_LOG_VERBOSE, "using n-tap MMX scaler for horizontal chrominance scaling\n"); av_log(c, AV_LOG_VERBOSE, "using n-tap MMX scaler for horizontal chrominance scaling\n");
} }
} } else {
else
{
#if ARCH_X86 #if ARCH_X86
av_log(c, AV_LOG_VERBOSE, "using x86 asm scaler for horizontal scaling\n"); av_log(c, AV_LOG_VERBOSE, "using x86 asm scaler for horizontal scaling\n");
#else #else
...@@ -3091,15 +3008,12 @@ SwsContext *sws_getContext(int srcW, int srcH, enum PixelFormat srcFormat, int d ...@@ -3091,15 +3008,12 @@ SwsContext *sws_getContext(int srcW, int srcH, enum PixelFormat srcFormat, int d
av_log(c, AV_LOG_VERBOSE, "using C scaler for horizontal scaling\n"); av_log(c, AV_LOG_VERBOSE, "using C scaler for horizontal scaling\n");
#endif #endif
} }
if (isPlanarYUV(dstFormat)) if (isPlanarYUV(dstFormat)) {
{
if (c->vLumFilterSize==1) if (c->vLumFilterSize==1)
av_log(c, AV_LOG_VERBOSE, "using 1-tap %s \"scaler\" for vertical scaling (YV12 like)\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C"); av_log(c, AV_LOG_VERBOSE, "using 1-tap %s \"scaler\" for vertical scaling (YV12 like)\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
else else
av_log(c, AV_LOG_VERBOSE, "using n-tap %s scaler for vertical scaling (YV12 like)\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C"); av_log(c, AV_LOG_VERBOSE, "using n-tap %s scaler for vertical scaling (YV12 like)\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
} } else {
else
{
if (c->vLumFilterSize==1 && c->vChrFilterSize==2) if (c->vLumFilterSize==1 && c->vChrFilterSize==2)
av_log(c, AV_LOG_VERBOSE, "using 1-tap %s \"scaler\" for vertical luminance scaling (BGR)\n" av_log(c, AV_LOG_VERBOSE, "using 1-tap %s \"scaler\" for vertical luminance scaling (BGR)\n"
" 2-tap scaler for vertical chrominance scaling (BGR)\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C"); " 2-tap scaler for vertical chrominance scaling (BGR)\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
...@@ -3121,8 +3035,7 @@ SwsContext *sws_getContext(int srcW, int srcH, enum PixelFormat srcFormat, int d ...@@ -3121,8 +3035,7 @@ SwsContext *sws_getContext(int srcW, int srcH, enum PixelFormat srcFormat, int d
av_log(c, AV_LOG_VERBOSE, "%dx%d -> %dx%d\n", srcW, srcH, dstW, dstH); av_log(c, AV_LOG_VERBOSE, "%dx%d -> %dx%d\n", srcW, srcH, dstW, dstH);
} }
if (flags & SWS_PRINT_INFO) if (flags & SWS_PRINT_INFO) {
{
av_log(c, AV_LOG_DEBUG, "lum srcW=%d srcH=%d dstW=%d dstH=%d xInc=%d yInc=%d\n", av_log(c, AV_LOG_DEBUG, "lum srcW=%d srcH=%d dstW=%d dstH=%d xInc=%d yInc=%d\n",
c->srcW, c->srcH, c->dstW, c->dstH, c->lumXInc, c->lumYInc); c->srcW, c->srcH, c->dstW, c->dstH, c->lumXInc, c->lumYInc);
av_log(c, AV_LOG_DEBUG, "chr srcW=%d srcH=%d dstW=%d dstH=%d xInc=%d yInc=%d\n", av_log(c, AV_LOG_DEBUG, "chr srcW=%d srcH=%d dstW=%d dstH=%d xInc=%d yInc=%d\n",
...@@ -3133,10 +3046,11 @@ SwsContext *sws_getContext(int srcW, int srcH, enum PixelFormat srcFormat, int d ...@@ -3133,10 +3046,11 @@ SwsContext *sws_getContext(int srcW, int srcH, enum PixelFormat srcFormat, int d
return c; return c;
} }
static void reset_ptr(uint8_t* src[], int format){ static void reset_ptr(uint8_t* src[], int format)
{
if(!isALPHA(format)) if(!isALPHA(format))
src[3]=NULL; src[3]=NULL;
if(!isPlanarYUV(format)){ if(!isPlanarYUV(format)) {
src[3]=src[2]=NULL; src[3]=src[2]=NULL;
if( format != PIX_FMT_PAL8 if( format != PIX_FMT_PAL8
&& format != PIX_FMT_RGB8 && format != PIX_FMT_RGB8
...@@ -3153,7 +3067,8 @@ static void reset_ptr(uint8_t* src[], int format){ ...@@ -3153,7 +3067,8 @@ static void reset_ptr(uint8_t* src[], int format){
* Assumes planar YUV to be in YUV order instead of YVU. * Assumes planar YUV to be in YUV order instead of YVU.
*/ */
int sws_scale(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, int sws_scale(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
int srcSliceH, uint8_t* dst[], int dstStride[]){ int srcSliceH, uint8_t* dst[], int dstStride[])
{
int i; int i;
uint8_t* src2[4]= {src[0], src[1], src[2], src[3]}; uint8_t* src2[4]= {src[0], src[1], src[2], src[3]};
uint8_t* dst2[4]= {dst[0], dst[1], dst[2], dst[3]}; uint8_t* dst2[4]= {dst[0], dst[1], dst[2], dst[3]};
...@@ -3166,27 +3081,27 @@ int sws_scale(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, ...@@ -3166,27 +3081,27 @@ int sws_scale(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
if (srcSliceY == 0) c->sliceDir = 1; else c->sliceDir = -1; if (srcSliceY == 0) c->sliceDir = 1; else c->sliceDir = -1;
} }
if (usePal(c->srcFormat)){ if (usePal(c->srcFormat)) {
for (i=0; i<256; i++){ for (i=0; i<256; i++) {
int p, r, g, b,y,u,v; int p, r, g, b,y,u,v;
if(c->srcFormat == PIX_FMT_PAL8){ if(c->srcFormat == PIX_FMT_PAL8) {
p=((uint32_t*)(src[1]))[i]; p=((uint32_t*)(src[1]))[i];
r= (p>>16)&0xFF; r= (p>>16)&0xFF;
g= (p>> 8)&0xFF; g= (p>> 8)&0xFF;
b= p &0xFF; b= p &0xFF;
}else if(c->srcFormat == PIX_FMT_RGB8){ } else if(c->srcFormat == PIX_FMT_RGB8) {
r= (i>>5 )*36; r= (i>>5 )*36;
g= ((i>>2)&7)*36; g= ((i>>2)&7)*36;
b= (i&3 )*85; b= (i&3 )*85;
}else if(c->srcFormat == PIX_FMT_BGR8){ } else if(c->srcFormat == PIX_FMT_BGR8) {
b= (i>>6 )*85; b= (i>>6 )*85;
g= ((i>>3)&7)*36; g= ((i>>3)&7)*36;
r= (i&7 )*36; r= (i&7 )*36;
}else if(c->srcFormat == PIX_FMT_RGB4_BYTE){ } else if(c->srcFormat == PIX_FMT_RGB4_BYTE) {
r= (i>>3 )*255; r= (i>>3 )*255;
g= ((i>>1)&3)*85; g= ((i>>1)&3)*85;
b= (i&1 )*255; b= (i&1 )*255;
}else { } else {
assert(c->srcFormat == PIX_FMT_BGR4_BYTE); assert(c->srcFormat == PIX_FMT_BGR4_BYTE);
b= (i>>3 )*255; b= (i>>3 )*255;
g= ((i>>1)&3)*85; g= ((i>>1)&3)*85;
...@@ -3261,7 +3176,8 @@ int sws_scale(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, ...@@ -3261,7 +3176,8 @@ int sws_scale(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
#if LIBSWSCALE_VERSION_MAJOR < 1 #if LIBSWSCALE_VERSION_MAJOR < 1
int sws_scale_ordered(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, int sws_scale_ordered(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
int srcSliceH, uint8_t* dst[], int dstStride[]){ int srcSliceH, uint8_t* dst[], int dstStride[])
{
return sws_scale(c, src, srcStride, srcSliceY, srcSliceH, dst, dstStride); return sws_scale(c, src, srcStride, srcSliceY, srcSliceH, dst, dstStride);
} }
#endif #endif
...@@ -3273,23 +3189,23 @@ SwsFilter *sws_getDefaultFilter(float lumaGBlur, float chromaGBlur, ...@@ -3273,23 +3189,23 @@ SwsFilter *sws_getDefaultFilter(float lumaGBlur, float chromaGBlur,
{ {
SwsFilter *filter= av_malloc(sizeof(SwsFilter)); SwsFilter *filter= av_malloc(sizeof(SwsFilter));
if (lumaGBlur!=0.0){ if (lumaGBlur!=0.0) {
filter->lumH= sws_getGaussianVec(lumaGBlur, 3.0); filter->lumH= sws_getGaussianVec(lumaGBlur, 3.0);
filter->lumV= sws_getGaussianVec(lumaGBlur, 3.0); filter->lumV= sws_getGaussianVec(lumaGBlur, 3.0);
}else{ } else {
filter->lumH= sws_getIdentityVec(); filter->lumH= sws_getIdentityVec();
filter->lumV= sws_getIdentityVec(); filter->lumV= sws_getIdentityVec();
} }
if (chromaGBlur!=0.0){ if (chromaGBlur!=0.0) {
filter->chrH= sws_getGaussianVec(chromaGBlur, 3.0); filter->chrH= sws_getGaussianVec(chromaGBlur, 3.0);
filter->chrV= sws_getGaussianVec(chromaGBlur, 3.0); filter->chrV= sws_getGaussianVec(chromaGBlur, 3.0);
}else{ } else {
filter->chrH= sws_getIdentityVec(); filter->chrH= sws_getIdentityVec();
filter->chrV= sws_getIdentityVec(); filter->chrV= sws_getIdentityVec();
} }
if (chromaSharpen!=0.0){ if (chromaSharpen!=0.0) {
SwsVector *id= sws_getIdentityVec(); SwsVector *id= sws_getIdentityVec();
sws_scaleVec(filter->chrH, -chromaSharpen); sws_scaleVec(filter->chrH, -chromaSharpen);
sws_scaleVec(filter->chrV, -chromaSharpen); sws_scaleVec(filter->chrV, -chromaSharpen);
...@@ -3298,7 +3214,7 @@ SwsFilter *sws_getDefaultFilter(float lumaGBlur, float chromaGBlur, ...@@ -3298,7 +3214,7 @@ SwsFilter *sws_getDefaultFilter(float lumaGBlur, float chromaGBlur,
sws_freeVec(id); sws_freeVec(id);
} }
if (lumaSharpen!=0.0){ if (lumaSharpen!=0.0) {
SwsVector *id= sws_getIdentityVec(); SwsVector *id= sws_getIdentityVec();
sws_scaleVec(filter->lumH, -lumaSharpen); sws_scaleVec(filter->lumH, -lumaSharpen);
sws_scaleVec(filter->lumV, -lumaSharpen); sws_scaleVec(filter->lumV, -lumaSharpen);
...@@ -3324,7 +3240,8 @@ SwsFilter *sws_getDefaultFilter(float lumaGBlur, float chromaGBlur, ...@@ -3324,7 +3240,8 @@ SwsFilter *sws_getDefaultFilter(float lumaGBlur, float chromaGBlur,
return filter; return filter;
} }
SwsVector *sws_getGaussianVec(double variance, double quality){ SwsVector *sws_getGaussianVec(double variance, double quality)
{
const int length= (int)(variance*quality + 0.5) | 1; const int length= (int)(variance*quality + 0.5) | 1;
int i; int i;
double *coeff= av_malloc(length*sizeof(double)); double *coeff= av_malloc(length*sizeof(double));
...@@ -3334,8 +3251,7 @@ SwsVector *sws_getGaussianVec(double variance, double quality){ ...@@ -3334,8 +3251,7 @@ SwsVector *sws_getGaussianVec(double variance, double quality){
vec->coeff= coeff; vec->coeff= coeff;
vec->length= length; vec->length= length;
for (i=0; i<length; i++) for (i=0; i<length; i++) {
{
double dist= i-middle; double dist= i-middle;
coeff[i]= exp(-dist*dist/(2*variance*variance)) / sqrt(2*variance*PI); coeff[i]= exp(-dist*dist/(2*variance*variance)) / sqrt(2*variance*PI);
} }
...@@ -3345,7 +3261,8 @@ SwsVector *sws_getGaussianVec(double variance, double quality){ ...@@ -3345,7 +3261,8 @@ SwsVector *sws_getGaussianVec(double variance, double quality){
return vec; return vec;
} }
SwsVector *sws_getConstVec(double c, int length){ SwsVector *sws_getConstVec(double c, int length)
{
int i; int i;
double *coeff= av_malloc(length*sizeof(double)); double *coeff= av_malloc(length*sizeof(double));
SwsVector *vec= av_malloc(sizeof(SwsVector)); SwsVector *vec= av_malloc(sizeof(SwsVector));
...@@ -3360,11 +3277,13 @@ SwsVector *sws_getConstVec(double c, int length){ ...@@ -3360,11 +3277,13 @@ SwsVector *sws_getConstVec(double c, int length){
} }
SwsVector *sws_getIdentityVec(void){ SwsVector *sws_getIdentityVec(void)
{
return sws_getConstVec(1.0, 1); return sws_getConstVec(1.0, 1);
} }
double sws_dcVec(SwsVector *a){ double sws_dcVec(SwsVector *a)
{
int i; int i;
double sum=0; double sum=0;
...@@ -3374,18 +3293,21 @@ double sws_dcVec(SwsVector *a){ ...@@ -3374,18 +3293,21 @@ double sws_dcVec(SwsVector *a){
return sum; return sum;
} }
void sws_scaleVec(SwsVector *a, double scalar){ void sws_scaleVec(SwsVector *a, double scalar)
{
int i; int i;
for (i=0; i<a->length; i++) for (i=0; i<a->length; i++)
a->coeff[i]*= scalar; a->coeff[i]*= scalar;
} }
void sws_normalizeVec(SwsVector *a, double height){ void sws_normalizeVec(SwsVector *a, double height)
{
sws_scaleVec(a, height/sws_dcVec(a)); sws_scaleVec(a, height/sws_dcVec(a));
} }
static SwsVector *sws_getConvVec(SwsVector *a, SwsVector *b){ static SwsVector *sws_getConvVec(SwsVector *a, SwsVector *b)
{
int length= a->length + b->length - 1; int length= a->length + b->length - 1;
double *coeff= av_malloc(length*sizeof(double)); double *coeff= av_malloc(length*sizeof(double));
int i, j; int i, j;
...@@ -3396,10 +3318,8 @@ static SwsVector *sws_getConvVec(SwsVector *a, SwsVector *b){ ...@@ -3396,10 +3318,8 @@ static SwsVector *sws_getConvVec(SwsVector *a, SwsVector *b){
for (i=0; i<length; i++) coeff[i]= 0.0; for (i=0; i<length; i++) coeff[i]= 0.0;
for (i=0; i<a->length; i++) for (i=0; i<a->length; i++) {
{ for (j=0; j<b->length; j++) {
for (j=0; j<b->length; j++)
{
coeff[i+j]+= a->coeff[i]*b->coeff[j]; coeff[i+j]+= a->coeff[i]*b->coeff[j];
} }
} }
...@@ -3407,7 +3327,8 @@ static SwsVector *sws_getConvVec(SwsVector *a, SwsVector *b){ ...@@ -3407,7 +3327,8 @@ static SwsVector *sws_getConvVec(SwsVector *a, SwsVector *b){
return vec; return vec;
} }
static SwsVector *sws_sumVec(SwsVector *a, SwsVector *b){ static SwsVector *sws_sumVec(SwsVector *a, SwsVector *b)
{
int length= FFMAX(a->length, b->length); int length= FFMAX(a->length, b->length);
double *coeff= av_malloc(length*sizeof(double)); double *coeff= av_malloc(length*sizeof(double));
int i; int i;
...@@ -3424,7 +3345,8 @@ static SwsVector *sws_sumVec(SwsVector *a, SwsVector *b){ ...@@ -3424,7 +3345,8 @@ static SwsVector *sws_sumVec(SwsVector *a, SwsVector *b){
return vec; return vec;
} }
static SwsVector *sws_diffVec(SwsVector *a, SwsVector *b){ static SwsVector *sws_diffVec(SwsVector *a, SwsVector *b)
{
int length= FFMAX(a->length, b->length); int length= FFMAX(a->length, b->length);
double *coeff= av_malloc(length*sizeof(double)); double *coeff= av_malloc(length*sizeof(double));
int i; int i;
...@@ -3442,7 +3364,8 @@ static SwsVector *sws_diffVec(SwsVector *a, SwsVector *b){ ...@@ -3442,7 +3364,8 @@ static SwsVector *sws_diffVec(SwsVector *a, SwsVector *b){
} }
/* shift left / or right if "shift" is negative */ /* shift left / or right if "shift" is negative */
static SwsVector *sws_getShiftedVec(SwsVector *a, int shift){ static SwsVector *sws_getShiftedVec(SwsVector *a, int shift)
{
int length= a->length + FFABS(shift)*2; int length= a->length + FFABS(shift)*2;
double *coeff= av_malloc(length*sizeof(double)); double *coeff= av_malloc(length*sizeof(double));
int i; int i;
...@@ -3453,15 +3376,15 @@ static SwsVector *sws_getShiftedVec(SwsVector *a, int shift){ ...@@ -3453,15 +3376,15 @@ static SwsVector *sws_getShiftedVec(SwsVector *a, int shift){
for (i=0; i<length; i++) coeff[i]= 0.0; for (i=0; i<length; i++) coeff[i]= 0.0;
for (i=0; i<a->length; i++) for (i=0; i<a->length; i++) {
{
coeff[i + (length-1)/2 - (a->length-1)/2 - shift]= a->coeff[i]; coeff[i + (length-1)/2 - (a->length-1)/2 - shift]= a->coeff[i];
} }
return vec; return vec;
} }
void sws_shiftVec(SwsVector *a, int shift){ void sws_shiftVec(SwsVector *a, int shift)
{
SwsVector *shifted= sws_getShiftedVec(a, shift); SwsVector *shifted= sws_getShiftedVec(a, shift);
av_free(a->coeff); av_free(a->coeff);
a->coeff= shifted->coeff; a->coeff= shifted->coeff;
...@@ -3469,7 +3392,8 @@ void sws_shiftVec(SwsVector *a, int shift){ ...@@ -3469,7 +3392,8 @@ void sws_shiftVec(SwsVector *a, int shift){
av_free(shifted); av_free(shifted);
} }
void sws_addVec(SwsVector *a, SwsVector *b){ void sws_addVec(SwsVector *a, SwsVector *b)
{
SwsVector *sum= sws_sumVec(a, b); SwsVector *sum= sws_sumVec(a, b);
av_free(a->coeff); av_free(a->coeff);
a->coeff= sum->coeff; a->coeff= sum->coeff;
...@@ -3477,7 +3401,8 @@ void sws_addVec(SwsVector *a, SwsVector *b){ ...@@ -3477,7 +3401,8 @@ void sws_addVec(SwsVector *a, SwsVector *b){
av_free(sum); av_free(sum);
} }
void sws_subVec(SwsVector *a, SwsVector *b){ void sws_subVec(SwsVector *a, SwsVector *b)
{
SwsVector *diff= sws_diffVec(a, b); SwsVector *diff= sws_diffVec(a, b);
av_free(a->coeff); av_free(a->coeff);
a->coeff= diff->coeff; a->coeff= diff->coeff;
...@@ -3485,7 +3410,8 @@ void sws_subVec(SwsVector *a, SwsVector *b){ ...@@ -3485,7 +3410,8 @@ void sws_subVec(SwsVector *a, SwsVector *b){
av_free(diff); av_free(diff);
} }
void sws_convVec(SwsVector *a, SwsVector *b){ void sws_convVec(SwsVector *a, SwsVector *b)
{
SwsVector *conv= sws_getConvVec(a, b); SwsVector *conv= sws_getConvVec(a, b);
av_free(a->coeff); av_free(a->coeff);
a->coeff= conv->coeff; a->coeff= conv->coeff;
...@@ -3493,7 +3419,8 @@ void sws_convVec(SwsVector *a, SwsVector *b){ ...@@ -3493,7 +3419,8 @@ void sws_convVec(SwsVector *a, SwsVector *b){
av_free(conv); av_free(conv);
} }
SwsVector *sws_cloneVec(SwsVector *a){ SwsVector *sws_cloneVec(SwsVector *a)
{
double *coeff= av_malloc(a->length*sizeof(double)); double *coeff= av_malloc(a->length*sizeof(double));
int i; int i;
SwsVector *vec= av_malloc(sizeof(SwsVector)); SwsVector *vec= av_malloc(sizeof(SwsVector));
...@@ -3506,7 +3433,8 @@ SwsVector *sws_cloneVec(SwsVector *a){ ...@@ -3506,7 +3433,8 @@ SwsVector *sws_cloneVec(SwsVector *a){
return vec; return vec;
} }
void sws_printVec2(SwsVector *a, AVClass *log_ctx, int log_level){ void sws_printVec2(SwsVector *a, AVClass *log_ctx, int log_level)
{
int i; int i;
double max=0; double max=0;
double min=0; double min=0;
...@@ -3520,8 +3448,7 @@ void sws_printVec2(SwsVector *a, AVClass *log_ctx, int log_level){ ...@@ -3520,8 +3448,7 @@ void sws_printVec2(SwsVector *a, AVClass *log_ctx, int log_level){
range= max - min; range= max - min;
for (i=0; i<a->length; i++) for (i=0; i<a->length; i++) {
{
int x= (int)((a->coeff[i]-min)*60.0/range +0.5); int x= (int)((a->coeff[i]-min)*60.0/range +0.5);
av_log(log_ctx, log_level, "%1.3f ", a->coeff[i]); av_log(log_ctx, log_level, "%1.3f ", a->coeff[i]);
for (;x>0; x--) av_log(log_ctx, log_level, " "); for (;x>0; x--) av_log(log_ctx, log_level, " ");
...@@ -3530,19 +3457,22 @@ void sws_printVec2(SwsVector *a, AVClass *log_ctx, int log_level){ ...@@ -3530,19 +3457,22 @@ void sws_printVec2(SwsVector *a, AVClass *log_ctx, int log_level){
} }
#if LIBSWSCALE_VERSION_MAJOR < 1 #if LIBSWSCALE_VERSION_MAJOR < 1
void sws_printVec(SwsVector *a){ void sws_printVec(SwsVector *a)
{
sws_printVec2(a, NULL, AV_LOG_DEBUG); sws_printVec2(a, NULL, AV_LOG_DEBUG);
} }
#endif #endif
void sws_freeVec(SwsVector *a){ void sws_freeVec(SwsVector *a)
{
if (!a) return; if (!a) return;
av_freep(&a->coeff); av_freep(&a->coeff);
a->length=0; a->length=0;
av_free(a); av_free(a);
} }
void sws_freeFilter(SwsFilter *filter){ void sws_freeFilter(SwsFilter *filter)
{
if (!filter) return; if (!filter) return;
if (filter->lumH) sws_freeVec(filter->lumH); if (filter->lumH) sws_freeVec(filter->lumH);
...@@ -3553,25 +3483,24 @@ void sws_freeFilter(SwsFilter *filter){ ...@@ -3553,25 +3483,24 @@ void sws_freeFilter(SwsFilter *filter){
} }
void sws_freeContext(SwsContext *c){ void sws_freeContext(SwsContext *c)
{
int i; int i;
if (!c) return; if (!c) return;
if (c->lumPixBuf) if (c->lumPixBuf) {
{
for (i=0; i<c->vLumBufSize; i++) for (i=0; i<c->vLumBufSize; i++)
av_freep(&c->lumPixBuf[i]); av_freep(&c->lumPixBuf[i]);
av_freep(&c->lumPixBuf); av_freep(&c->lumPixBuf);
} }
if (c->chrPixBuf) if (c->chrPixBuf) {
{
for (i=0; i<c->vChrBufSize; i++) for (i=0; i<c->vChrBufSize; i++)
av_freep(&c->chrPixBuf[i]); av_freep(&c->chrPixBuf[i]);
av_freep(&c->chrPixBuf); av_freep(&c->chrPixBuf);
} }
if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf){ if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
for (i=0; i<c->vLumBufSize; i++) for (i=0; i<c->vLumBufSize; i++)
av_freep(&c->alpPixBuf[i]); av_freep(&c->alpPixBuf[i]);
av_freep(&c->alpPixBuf); av_freep(&c->alpPixBuf);
......
...@@ -64,7 +64,7 @@ typedef int (*SwsFunc)(struct SwsContext *context, uint8_t* src[], ...@@ -64,7 +64,7 @@ typedef int (*SwsFunc)(struct SwsContext *context, uint8_t* src[],
uint8_t* dst[], int dstStride[]); uint8_t* dst[], int dstStride[]);
/* This struct should be aligned on at least a 32-byte boundary. */ /* This struct should be aligned on at least a 32-byte boundary. */
typedef struct SwsContext{ typedef struct SwsContext {
/** /**
* info on struct for av_log * info on struct for av_log
*/ */
......
...@@ -906,23 +906,23 @@ static inline void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter, con ...@@ -906,23 +906,23 @@ static inline void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter, con
uint8_t *dest, uint8_t *uDest, uint8_t *vDest, uint8_t *aDest, long dstW, long chrDstW) uint8_t *dest, uint8_t *uDest, uint8_t *vDest, uint8_t *aDest, long dstW, long chrDstW)
{ {
#if COMPILE_TEMPLATE_MMX #if COMPILE_TEMPLATE_MMX
if(!(c->flags & SWS_BITEXACT)){ if(!(c->flags & SWS_BITEXACT)) {
if (c->flags & SWS_ACCURATE_RND){ if (c->flags & SWS_ACCURATE_RND) {
if (uDest){ if (uDest) {
YSCALEYUV2YV12X_ACCURATE( "0", CHR_MMX_FILTER_OFFSET, uDest, chrDstW) YSCALEYUV2YV12X_ACCURATE( "0", CHR_MMX_FILTER_OFFSET, uDest, chrDstW)
YSCALEYUV2YV12X_ACCURATE(AV_STRINGIFY(VOF), CHR_MMX_FILTER_OFFSET, vDest, chrDstW) YSCALEYUV2YV12X_ACCURATE(AV_STRINGIFY(VOF), CHR_MMX_FILTER_OFFSET, vDest, chrDstW)
} }
if (CONFIG_SWSCALE_ALPHA && aDest){ if (CONFIG_SWSCALE_ALPHA && aDest) {
YSCALEYUV2YV12X_ACCURATE( "0", ALP_MMX_FILTER_OFFSET, aDest, dstW) YSCALEYUV2YV12X_ACCURATE( "0", ALP_MMX_FILTER_OFFSET, aDest, dstW)
} }
YSCALEYUV2YV12X_ACCURATE("0", LUM_MMX_FILTER_OFFSET, dest, dstW) YSCALEYUV2YV12X_ACCURATE("0", LUM_MMX_FILTER_OFFSET, dest, dstW)
}else{ } else {
if (uDest){ if (uDest) {
YSCALEYUV2YV12X( "0", CHR_MMX_FILTER_OFFSET, uDest, chrDstW) YSCALEYUV2YV12X( "0", CHR_MMX_FILTER_OFFSET, uDest, chrDstW)
YSCALEYUV2YV12X(AV_STRINGIFY(VOF), CHR_MMX_FILTER_OFFSET, vDest, chrDstW) YSCALEYUV2YV12X(AV_STRINGIFY(VOF), CHR_MMX_FILTER_OFFSET, vDest, chrDstW)
} }
if (CONFIG_SWSCALE_ALPHA && aDest){ if (CONFIG_SWSCALE_ALPHA && aDest) {
YSCALEYUV2YV12X( "0", ALP_MMX_FILTER_OFFSET, aDest, dstW) YSCALEYUV2YV12X( "0", ALP_MMX_FILTER_OFFSET, aDest, dstW)
} }
...@@ -956,15 +956,15 @@ static inline void RENAME(yuv2yuv1)(SwsContext *c, const int16_t *lumSrc, const ...@@ -956,15 +956,15 @@ static inline void RENAME(yuv2yuv1)(SwsContext *c, const int16_t *lumSrc, const
{ {
int i; int i;
#if COMPILE_TEMPLATE_MMX #if COMPILE_TEMPLATE_MMX
if(!(c->flags & SWS_BITEXACT)){ if(!(c->flags & SWS_BITEXACT)) {
long p= 4; long p= 4;
uint8_t *src[4]= {alpSrc + dstW, lumSrc + dstW, chrSrc + chrDstW, chrSrc + VOFW + chrDstW}; uint8_t *src[4]= {alpSrc + dstW, lumSrc + dstW, chrSrc + chrDstW, chrSrc + VOFW + chrDstW};
uint8_t *dst[4]= {aDest, dest, uDest, vDest}; uint8_t *dst[4]= {aDest, dest, uDest, vDest};
x86_reg counter[4]= {dstW, dstW, chrDstW, chrDstW}; x86_reg counter[4]= {dstW, dstW, chrDstW, chrDstW};
if (c->flags & SWS_ACCURATE_RND){ if (c->flags & SWS_ACCURATE_RND) {
while(p--){ while(p--) {
if (dst[p]){ if (dst[p]) {
__asm__ volatile( __asm__ volatile(
YSCALEYUV2YV121_ACCURATE YSCALEYUV2YV121_ACCURATE
:: "r" (src[p]), "r" (dst[p] + counter[p]), :: "r" (src[p]), "r" (dst[p] + counter[p]),
...@@ -973,9 +973,9 @@ static inline void RENAME(yuv2yuv1)(SwsContext *c, const int16_t *lumSrc, const ...@@ -973,9 +973,9 @@ static inline void RENAME(yuv2yuv1)(SwsContext *c, const int16_t *lumSrc, const
); );
} }
} }
}else{ } else {
while(p--){ while(p--) {
if (dst[p]){ if (dst[p]) {
__asm__ volatile( __asm__ volatile(
YSCALEYUV2YV121 YSCALEYUV2YV121
:: "r" (src[p]), "r" (dst[p] + counter[p]), :: "r" (src[p]), "r" (dst[p] + counter[p]),
...@@ -988,11 +988,10 @@ static inline void RENAME(yuv2yuv1)(SwsContext *c, const int16_t *lumSrc, const ...@@ -988,11 +988,10 @@ static inline void RENAME(yuv2yuv1)(SwsContext *c, const int16_t *lumSrc, const
return; return;
} }
#endif #endif
for (i=0; i<dstW; i++) for (i=0; i<dstW; i++) {
{
int val= (lumSrc[i]+64)>>7; int val= (lumSrc[i]+64)>>7;
if (val&256){ if (val&256) {
if (val<0) val=0; if (val<0) val=0;
else val=255; else val=255;
} }
...@@ -1001,12 +1000,11 @@ static inline void RENAME(yuv2yuv1)(SwsContext *c, const int16_t *lumSrc, const ...@@ -1001,12 +1000,11 @@ static inline void RENAME(yuv2yuv1)(SwsContext *c, const int16_t *lumSrc, const
} }
if (uDest) if (uDest)
for (i=0; i<chrDstW; i++) for (i=0; i<chrDstW; i++) {
{
int u=(chrSrc[i ]+64)>>7; int u=(chrSrc[i ]+64)>>7;
int v=(chrSrc[i + VOFW]+64)>>7; int v=(chrSrc[i + VOFW]+64)>>7;
if ((u|v)&256){ if ((u|v)&256) {
if (u<0) u=0; if (u<0) u=0;
else if (u>255) u=255; else if (u>255) u=255;
if (v<0) v=0; if (v<0) v=0;
...@@ -1018,7 +1016,7 @@ static inline void RENAME(yuv2yuv1)(SwsContext *c, const int16_t *lumSrc, const ...@@ -1018,7 +1016,7 @@ static inline void RENAME(yuv2yuv1)(SwsContext *c, const int16_t *lumSrc, const
} }
if (CONFIG_SWSCALE_ALPHA && aDest) if (CONFIG_SWSCALE_ALPHA && aDest)
for (i=0; i<dstW; i++){ for (i=0; i<dstW; i++) {
int val= (alpSrc[i]+64)>>7; int val= (alpSrc[i]+64)>>7;
aDest[i]= av_clip_uint8(val); aDest[i]= av_clip_uint8(val);
} }
...@@ -1034,11 +1032,11 @@ static inline void RENAME(yuv2packedX)(SwsContext *c, const int16_t *lumFilter, ...@@ -1034,11 +1032,11 @@ static inline void RENAME(yuv2packedX)(SwsContext *c, const int16_t *lumFilter,
{ {
#if COMPILE_TEMPLATE_MMX #if COMPILE_TEMPLATE_MMX
x86_reg dummy=0; x86_reg dummy=0;
if(!(c->flags & SWS_BITEXACT)){ if(!(c->flags & SWS_BITEXACT)) {
if (c->flags & SWS_ACCURATE_RND){ if (c->flags & SWS_ACCURATE_RND) {
switch(c->dstFormat){ switch(c->dstFormat) {
case PIX_FMT_RGB32: case PIX_FMT_RGB32:
if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf){ if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
YSCALEYUV2PACKEDX_ACCURATE YSCALEYUV2PACKEDX_ACCURATE
YSCALEYUV2RGBX YSCALEYUV2RGBX
"movq %%mm2, "U_TEMP"(%0) \n\t" "movq %%mm2, "U_TEMP"(%0) \n\t"
...@@ -1052,7 +1050,7 @@ static inline void RENAME(yuv2packedX)(SwsContext *c, const int16_t *lumFilter, ...@@ -1052,7 +1050,7 @@ static inline void RENAME(yuv2packedX)(SwsContext *c, const int16_t *lumFilter,
WRITEBGR32(%4, %5, %%REGa, %%mm3, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm2, %%mm6) WRITEBGR32(%4, %5, %%REGa, %%mm3, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm2, %%mm6)
YSCALEYUV2PACKEDX_END YSCALEYUV2PACKEDX_END
}else{ } else {
YSCALEYUV2PACKEDX_ACCURATE YSCALEYUV2PACKEDX_ACCURATE
YSCALEYUV2RGBX YSCALEYUV2RGBX
"pcmpeqd %%mm7, %%mm7 \n\t" "pcmpeqd %%mm7, %%mm7 \n\t"
...@@ -1116,11 +1114,10 @@ static inline void RENAME(yuv2packedX)(SwsContext *c, const int16_t *lumFilter, ...@@ -1116,11 +1114,10 @@ static inline void RENAME(yuv2packedX)(SwsContext *c, const int16_t *lumFilter,
YSCALEYUV2PACKEDX_END YSCALEYUV2PACKEDX_END
return; return;
} }
}else{ } else {
switch(c->dstFormat) switch(c->dstFormat) {
{
case PIX_FMT_RGB32: case PIX_FMT_RGB32:
if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf){ if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
YSCALEYUV2PACKEDX YSCALEYUV2PACKEDX
YSCALEYUV2RGBX YSCALEYUV2RGBX
YSCALEYUV2PACKEDX_YA(ALP_MMX_FILTER_OFFSET, %%mm0, %%mm3, %%mm6, %%mm1, %%mm7) YSCALEYUV2PACKEDX_YA(ALP_MMX_FILTER_OFFSET, %%mm0, %%mm3, %%mm6, %%mm1, %%mm7)
...@@ -1129,7 +1126,7 @@ static inline void RENAME(yuv2packedX)(SwsContext *c, const int16_t *lumFilter, ...@@ -1129,7 +1126,7 @@ static inline void RENAME(yuv2packedX)(SwsContext *c, const int16_t *lumFilter,
"packuswb %%mm7, %%mm1 \n\t" "packuswb %%mm7, %%mm1 \n\t"
WRITEBGR32(%4, %5, %%REGa, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6) WRITEBGR32(%4, %5, %%REGa, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
YSCALEYUV2PACKEDX_END YSCALEYUV2PACKEDX_END
}else{ } else {
YSCALEYUV2PACKEDX YSCALEYUV2PACKEDX
YSCALEYUV2RGBX YSCALEYUV2RGBX
"pcmpeqd %%mm7, %%mm7 \n\t" "pcmpeqd %%mm7, %%mm7 \n\t"
...@@ -1222,12 +1219,11 @@ static inline void RENAME(yuv2packed2)(SwsContext *c, const uint16_t *buf0, cons ...@@ -1222,12 +1219,11 @@ static inline void RENAME(yuv2packed2)(SwsContext *c, const uint16_t *buf0, cons
int i; int i;
#if COMPILE_TEMPLATE_MMX #if COMPILE_TEMPLATE_MMX
if(!(c->flags & SWS_BITEXACT)){ if(!(c->flags & SWS_BITEXACT)) {
switch(c->dstFormat) switch(c->dstFormat) {
{
//Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :( //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
case PIX_FMT_RGB32: case PIX_FMT_RGB32:
if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf){ if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
#if ARCH_X86_64 #if ARCH_X86_64
__asm__ volatile( __asm__ volatile(
YSCALEYUV2RGB(%%REGBP, %5) YSCALEYUV2RGB(%%REGBP, %5)
...@@ -1268,7 +1264,7 @@ static inline void RENAME(yuv2packed2)(SwsContext *c, const uint16_t *buf0, cons ...@@ -1268,7 +1264,7 @@ static inline void RENAME(yuv2packed2)(SwsContext *c, const uint16_t *buf0, cons
"a" (&c->redDither) "a" (&c->redDither)
); );
#endif #endif
}else{ } else {
__asm__ volatile( __asm__ volatile(
"mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
"mov %4, %%"REG_b" \n\t" "mov %4, %%"REG_b" \n\t"
...@@ -1373,20 +1369,17 @@ static inline void RENAME(yuv2packed1)(SwsContext *c, const uint16_t *buf0, cons ...@@ -1373,20 +1369,17 @@ static inline void RENAME(yuv2packed1)(SwsContext *c, const uint16_t *buf0, cons
const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1 const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
const int yalpha= 4096; //FIXME ... const int yalpha= 4096; //FIXME ...
if (flags&SWS_FULL_CHR_H_INT) if (flags&SWS_FULL_CHR_H_INT) {
{
c->yuv2packed2(c, buf0, buf0, uvbuf0, uvbuf1, abuf0, abuf0, dest, dstW, 0, uvalpha, y); c->yuv2packed2(c, buf0, buf0, uvbuf0, uvbuf1, abuf0, abuf0, dest, dstW, 0, uvalpha, y);
return; return;
} }
#if COMPILE_TEMPLATE_MMX #if COMPILE_TEMPLATE_MMX
if(!(flags & SWS_BITEXACT)){ if(!(flags & SWS_BITEXACT)) {
if (uvalpha < 2048) // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
{ switch(dstFormat) {
switch(dstFormat)
{
case PIX_FMT_RGB32: case PIX_FMT_RGB32:
if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf){ if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
__asm__ volatile( __asm__ volatile(
"mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
"mov %4, %%"REG_b" \n\t" "mov %4, %%"REG_b" \n\t"
...@@ -1400,7 +1393,7 @@ static inline void RENAME(yuv2packed1)(SwsContext *c, const uint16_t *buf0, cons ...@@ -1400,7 +1393,7 @@ static inline void RENAME(yuv2packed1)(SwsContext *c, const uint16_t *buf0, cons
:: "c" (buf0), "d" (abuf0), "S" (uvbuf0), "D" (uvbuf1), "m" (dest), :: "c" (buf0), "d" (abuf0), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
"a" (&c->redDither) "a" (&c->redDither)
); );
}else{ } else {
__asm__ volatile( __asm__ volatile(
"mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
"mov %4, %%"REG_b" \n\t" "mov %4, %%"REG_b" \n\t"
...@@ -1489,13 +1482,10 @@ static inline void RENAME(yuv2packed1)(SwsContext *c, const uint16_t *buf0, cons ...@@ -1489,13 +1482,10 @@ static inline void RENAME(yuv2packed1)(SwsContext *c, const uint16_t *buf0, cons
); );
return; return;
} }
} } else {
else switch(dstFormat) {
{
switch(dstFormat)
{
case PIX_FMT_RGB32: case PIX_FMT_RGB32:
if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf){ if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
__asm__ volatile( __asm__ volatile(
"mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
"mov %4, %%"REG_b" \n\t" "mov %4, %%"REG_b" \n\t"
...@@ -1509,7 +1499,7 @@ static inline void RENAME(yuv2packed1)(SwsContext *c, const uint16_t *buf0, cons ...@@ -1509,7 +1499,7 @@ static inline void RENAME(yuv2packed1)(SwsContext *c, const uint16_t *buf0, cons
:: "c" (buf0), "d" (abuf0), "S" (uvbuf0), "D" (uvbuf1), "m" (dest), :: "c" (buf0), "d" (abuf0), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
"a" (&c->redDither) "a" (&c->redDither)
); );
}else{ } else {
__asm__ volatile( __asm__ volatile(
"mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
"mov %4, %%"REG_b" \n\t" "mov %4, %%"REG_b" \n\t"
...@@ -1601,10 +1591,9 @@ static inline void RENAME(yuv2packed1)(SwsContext *c, const uint16_t *buf0, cons ...@@ -1601,10 +1591,9 @@ static inline void RENAME(yuv2packed1)(SwsContext *c, const uint16_t *buf0, cons
} }
} }
#endif /* COMPILE_TEMPLATE_MMX */ #endif /* COMPILE_TEMPLATE_MMX */
if (uvalpha < 2048) if (uvalpha < 2048) {
{
YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1_C, YSCALE_YUV_2_PACKED1_C(void,0), YSCALE_YUV_2_GRAY16_1_C, YSCALE_YUV_2_MONO2_C) YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1_C, YSCALE_YUV_2_PACKED1_C(void,0), YSCALE_YUV_2_GRAY16_1_C, YSCALE_YUV_2_MONO2_C)
}else{ } else {
YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1B_C, YSCALE_YUV_2_PACKED1B_C(void,0), YSCALE_YUV_2_GRAY16_1_C, YSCALE_YUV_2_MONO2_C) YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1B_C, YSCALE_YUV_2_PACKED1B_C(void,0), YSCALE_YUV_2_GRAY16_1_C, YSCALE_YUV_2_MONO2_C)
} }
} }
...@@ -1662,8 +1651,7 @@ static inline void RENAME(yuy2ToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t ...@@ -1662,8 +1651,7 @@ static inline void RENAME(yuy2ToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t
); );
#else #else
int i; int i;
for (i=0; i<width; i++) for (i=0; i<width; i++) {
{
dstU[i]= src1[4*i + 1]; dstU[i]= src1[4*i + 1];
dstV[i]= src1[4*i + 3]; dstV[i]= src1[4*i + 3];
} }
...@@ -1696,8 +1684,7 @@ static inline void RENAME(LEToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *s ...@@ -1696,8 +1684,7 @@ static inline void RENAME(LEToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *s
); );
#else #else
int i; int i;
for (i=0; i<width; i++) for (i=0; i<width; i++) {
{
dstU[i]= src1[2*i + 1]; dstU[i]= src1[2*i + 1];
dstV[i]= src2[2*i + 1]; dstV[i]= src2[2*i + 1];
} }
...@@ -1756,8 +1743,7 @@ static inline void RENAME(uyvyToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t ...@@ -1756,8 +1743,7 @@ static inline void RENAME(uyvyToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t
); );
#else #else
int i; int i;
for (i=0; i<width; i++) for (i=0; i<width; i++) {
{
dstU[i]= src1[4*i + 0]; dstU[i]= src1[4*i + 0];
dstV[i]= src1[4*i + 2]; dstV[i]= src1[4*i + 2];
} }
...@@ -1791,8 +1777,7 @@ static inline void RENAME(BEToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *s ...@@ -1791,8 +1777,7 @@ static inline void RENAME(BEToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *s
); );
#else #else
int i; int i;
for (i=0; i<width; i++) for (i=0; i<width; i++) {
{
dstU[i]= src1[2*i]; dstU[i]= src1[2*i];
dstV[i]= src2[2*i]; dstV[i]= src2[2*i];
} }
...@@ -1803,13 +1788,13 @@ static inline void RENAME(BEToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *s ...@@ -1803,13 +1788,13 @@ static inline void RENAME(BEToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *s
static inline void RENAME(bgr24ToY_mmx)(uint8_t *dst, const uint8_t *src, long width, int srcFormat) static inline void RENAME(bgr24ToY_mmx)(uint8_t *dst, const uint8_t *src, long width, int srcFormat)
{ {
if(srcFormat == PIX_FMT_BGR24){ if(srcFormat == PIX_FMT_BGR24) {
__asm__ volatile( __asm__ volatile(
"movq "MANGLE(ff_bgr24toY1Coeff)", %%mm5 \n\t" "movq "MANGLE(ff_bgr24toY1Coeff)", %%mm5 \n\t"
"movq "MANGLE(ff_bgr24toY2Coeff)", %%mm6 \n\t" "movq "MANGLE(ff_bgr24toY2Coeff)", %%mm6 \n\t"
: :
); );
}else{ } else {
__asm__ volatile( __asm__ volatile(
"movq "MANGLE(ff_rgb24toY1Coeff)", %%mm5 \n\t" "movq "MANGLE(ff_rgb24toY1Coeff)", %%mm5 \n\t"
"movq "MANGLE(ff_rgb24toY2Coeff)", %%mm6 \n\t" "movq "MANGLE(ff_rgb24toY2Coeff)", %%mm6 \n\t"
...@@ -1918,8 +1903,7 @@ static inline void RENAME(bgr24ToY)(uint8_t *dst, const uint8_t *src, long width ...@@ -1918,8 +1903,7 @@ static inline void RENAME(bgr24ToY)(uint8_t *dst, const uint8_t *src, long width
RENAME(bgr24ToY_mmx)(dst, src, width, PIX_FMT_BGR24); RENAME(bgr24ToY_mmx)(dst, src, width, PIX_FMT_BGR24);
#else #else
int i; int i;
for (i=0; i<width; i++) for (i=0; i<width; i++) {
{
int b= src[i*3+0]; int b= src[i*3+0];
int g= src[i*3+1]; int g= src[i*3+1];
int r= src[i*3+2]; int r= src[i*3+2];
...@@ -1935,8 +1919,7 @@ static inline void RENAME(bgr24ToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t ...@@ -1935,8 +1919,7 @@ static inline void RENAME(bgr24ToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t
RENAME(bgr24ToUV_mmx)(dstU, dstV, src1, width, PIX_FMT_BGR24); RENAME(bgr24ToUV_mmx)(dstU, dstV, src1, width, PIX_FMT_BGR24);
#else #else
int i; int i;
for (i=0; i<width; i++) for (i=0; i<width; i++) {
{
int b= src1[3*i + 0]; int b= src1[3*i + 0];
int g= src1[3*i + 1]; int g= src1[3*i + 1];
int r= src1[3*i + 2]; int r= src1[3*i + 2];
...@@ -1951,8 +1934,7 @@ static inline void RENAME(bgr24ToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t ...@@ -1951,8 +1934,7 @@ static inline void RENAME(bgr24ToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t
static inline void RENAME(bgr24ToUV_half)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused) static inline void RENAME(bgr24ToUV_half)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
{ {
int i; int i;
for (i=0; i<width; i++) for (i=0; i<width; i++) {
{
int b= src1[6*i + 0] + src1[6*i + 3]; int b= src1[6*i + 0] + src1[6*i + 3];
int g= src1[6*i + 1] + src1[6*i + 4]; int g= src1[6*i + 1] + src1[6*i + 4];
int r= src1[6*i + 2] + src1[6*i + 5]; int r= src1[6*i + 2] + src1[6*i + 5];
...@@ -1969,8 +1951,7 @@ static inline void RENAME(rgb24ToY)(uint8_t *dst, const uint8_t *src, long width ...@@ -1969,8 +1951,7 @@ static inline void RENAME(rgb24ToY)(uint8_t *dst, const uint8_t *src, long width
RENAME(bgr24ToY_mmx)(dst, src, width, PIX_FMT_RGB24); RENAME(bgr24ToY_mmx)(dst, src, width, PIX_FMT_RGB24);
#else #else
int i; int i;
for (i=0; i<width; i++) for (i=0; i<width; i++) {
{
int r= src[i*3+0]; int r= src[i*3+0];
int g= src[i*3+1]; int g= src[i*3+1];
int b= src[i*3+2]; int b= src[i*3+2];
...@@ -1988,8 +1969,7 @@ static inline void RENAME(rgb24ToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t ...@@ -1988,8 +1969,7 @@ static inline void RENAME(rgb24ToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t
#else #else
int i; int i;
assert(src1==src2); assert(src1==src2);
for (i=0; i<width; i++) for (i=0; i<width; i++) {
{
int r= src1[3*i + 0]; int r= src1[3*i + 0];
int g= src1[3*i + 1]; int g= src1[3*i + 1];
int b= src1[3*i + 2]; int b= src1[3*i + 2];
...@@ -2004,8 +1984,7 @@ static inline void RENAME(rgb24ToUV_half)(uint8_t *dstU, uint8_t *dstV, const ui ...@@ -2004,8 +1984,7 @@ static inline void RENAME(rgb24ToUV_half)(uint8_t *dstU, uint8_t *dstV, const ui
{ {
int i; int i;
assert(src1==src2); assert(src1==src2);
for (i=0; i<width; i++) for (i=0; i<width; i++) {
{
int r= src1[6*i + 0] + src1[6*i + 3]; int r= src1[6*i + 0] + src1[6*i + 3];
int g= src1[6*i + 1] + src1[6*i + 4]; int g= src1[6*i + 1] + src1[6*i + 4];
int b= src1[6*i + 2] + src1[6*i + 5]; int b= src1[6*i + 2] + src1[6*i + 5];
...@@ -2022,8 +2001,7 @@ static inline void RENAME(hScale)(int16_t *dst, int dstW, const uint8_t *src, in ...@@ -2022,8 +2001,7 @@ static inline void RENAME(hScale)(int16_t *dst, int dstW, const uint8_t *src, in
{ {
#if COMPILE_TEMPLATE_MMX #if COMPILE_TEMPLATE_MMX
assert(filterSize % 4 == 0 && filterSize>0); assert(filterSize % 4 == 0 && filterSize>0);
if (filterSize==4) // Always true for upscaling, sometimes for down, too. if (filterSize==4) { // Always true for upscaling, sometimes for down, too.
{
x86_reg counter= -2*dstW; x86_reg counter= -2*dstW;
filter-= counter*2; filter-= counter*2;
filterPos-= counter/2; filterPos-= counter/2;
...@@ -2067,9 +2045,7 @@ static inline void RENAME(hScale)(int16_t *dst, int dstW, const uint8_t *src, in ...@@ -2067,9 +2045,7 @@ static inline void RENAME(hScale)(int16_t *dst, int dstW, const uint8_t *src, in
: "%"REG_b : "%"REG_b
#endif #endif
); );
} } else if (filterSize==8) {
else if (filterSize==8)
{
x86_reg counter= -2*dstW; x86_reg counter= -2*dstW;
filter-= counter*4; filter-= counter*4;
filterPos-= counter/2; filterPos-= counter/2;
...@@ -2124,9 +2100,7 @@ static inline void RENAME(hScale)(int16_t *dst, int dstW, const uint8_t *src, in ...@@ -2124,9 +2100,7 @@ static inline void RENAME(hScale)(int16_t *dst, int dstW, const uint8_t *src, in
: "%"REG_b : "%"REG_b
#endif #endif
); );
} } else {
else
{
uint8_t *offset = src+filterSize; uint8_t *offset = src+filterSize;
x86_reg counter= -2*dstW; x86_reg counter= -2*dstW;
//filter-= counter*filterSize/2; //filter-= counter*filterSize/2;
...@@ -2180,14 +2154,12 @@ static inline void RENAME(hScale)(int16_t *dst, int dstW, const uint8_t *src, in ...@@ -2180,14 +2154,12 @@ static inline void RENAME(hScale)(int16_t *dst, int dstW, const uint8_t *src, in
hScale_altivec_real(dst, dstW, src, srcW, xInc, filter, filterPos, filterSize); hScale_altivec_real(dst, dstW, src, srcW, xInc, filter, filterPos, filterSize);
#else #else
int i; int i;
for (i=0; i<dstW; i++) for (i=0; i<dstW; i++) {
{
int j; int j;
int srcPos= filterPos[i]; int srcPos= filterPos[i];
int val=0; int val=0;
//printf("filterPos: %d\n", filterPos[i]); //printf("filterPos: %d\n", filterPos[i]);
for (j=0; j<filterSize; j++) for (j=0; j<filterSize; j++) {
{
//printf("filter: %d, src: %d\n", filter[i], src[srcPos + j]); //printf("filter: %d, src: %d\n", filter[i], src[srcPos + j]);
val += ((int)src[srcPos + j])*filter[filterSize*i + j]; val += ((int)src[srcPos + j])*filter[filterSize*i + j];
} }
...@@ -2213,8 +2185,7 @@ static inline void RENAME(hyscale_fast)(SwsContext *c, int16_t *dst, ...@@ -2213,8 +2185,7 @@ static inline void RENAME(hyscale_fast)(SwsContext *c, int16_t *dst,
{ {
int i; int i;
unsigned int xpos=0; unsigned int xpos=0;
for (i=0;i<dstWidth;i++) for (i=0;i<dstWidth;i++) {
{
register unsigned int xx=xpos>>16; register unsigned int xx=xpos>>16;
register unsigned int xalpha=(xpos&0xFFFF)>>9; register unsigned int xalpha=(xpos&0xFFFF)>>9;
dst[i]= (src[xx]<<7) + (src[xx+1] - src[xx])*xalpha; dst[i]= (src[xx]<<7) + (src[xx+1] - src[xx])*xalpha;
...@@ -2259,17 +2230,14 @@ static inline void RENAME(hyscale)(SwsContext *c, uint16_t *dst, long dstWidth, ...@@ -2259,17 +2230,14 @@ static inline void RENAME(hyscale)(SwsContext *c, uint16_t *dst, long dstWidth,
#endif #endif
{ {
c->hScale(dst, dstWidth, src, srcW, xInc, hLumFilter, hLumFilterPos, hLumFilterSize); c->hScale(dst, dstWidth, src, srcW, xInc, hLumFilter, hLumFilterPos, hLumFilterSize);
} } else { // fast bilinear upscale / crap downscale
else // fast bilinear upscale / crap downscale
{
#if ARCH_X86 && CONFIG_GPL #if ARCH_X86 && CONFIG_GPL
#if COMPILE_TEMPLATE_MMX2 #if COMPILE_TEMPLATE_MMX2
int i; int i;
#if defined(PIC) #if defined(PIC)
DECLARE_ALIGNED(8, uint64_t, ebxsave); DECLARE_ALIGNED(8, uint64_t, ebxsave);
#endif #endif
if (canMMX2BeUsed) if (canMMX2BeUsed) {
{
__asm__ volatile( __asm__ volatile(
#if defined(PIC) #if defined(PIC)
"mov %%"REG_b", %5 \n\t" "mov %%"REG_b", %5 \n\t"
...@@ -2328,9 +2296,7 @@ static inline void RENAME(hyscale)(SwsContext *c, uint16_t *dst, long dstWidth, ...@@ -2328,9 +2296,7 @@ static inline void RENAME(hyscale)(SwsContext *c, uint16_t *dst, long dstWidth,
#endif #endif
); );
for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) dst[i] = src[srcW-1]*128; for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) dst[i] = src[srcW-1]*128;
} } else {
else
{
#endif /* COMPILE_TEMPLATE_MMX2 */ #endif /* COMPILE_TEMPLATE_MMX2 */
x86_reg xInc_shr16 = xInc >> 16; x86_reg xInc_shr16 = xInc >> 16;
uint16_t xInc_mask = xInc & 0xffff; uint16_t xInc_mask = xInc & 0xffff;
...@@ -2372,14 +2338,14 @@ static inline void RENAME(hyscale)(SwsContext *c, uint16_t *dst, long dstWidth, ...@@ -2372,14 +2338,14 @@ static inline void RENAME(hyscale)(SwsContext *c, uint16_t *dst, long dstWidth,
#endif /* ARCH_X86 */ #endif /* ARCH_X86 */
} }
if(!isAlpha && c->srcRange != c->dstRange && !(isRGB(c->dstFormat) || isBGR(c->dstFormat))){ if(!isAlpha && c->srcRange != c->dstRange && !(isRGB(c->dstFormat) || isBGR(c->dstFormat))) {
int i; int i;
//FIXME all pal and rgb srcFormats could do this convertion as well //FIXME all pal and rgb srcFormats could do this convertion as well
//FIXME all scalers more complex than bilinear could do half of this transform //FIXME all scalers more complex than bilinear could do half of this transform
if(c->srcRange){ if(c->srcRange) {
for (i=0; i<dstWidth; i++) for (i=0; i<dstWidth; i++)
dst[i]= (dst[i]*14071 + 33561947)>>14; dst[i]= (dst[i]*14071 + 33561947)>>14;
}else{ } else {
for (i=0; i<dstWidth; i++) for (i=0; i<dstWidth; i++)
dst[i]= (FFMIN(dst[i],30189)*19077 - 39057361)>>14; dst[i]= (FFMIN(dst[i],30189)*19077 - 39057361)>>14;
} }
...@@ -2392,8 +2358,7 @@ static inline void RENAME(hcscale_fast)(SwsContext *c, int16_t *dst, ...@@ -2392,8 +2358,7 @@ static inline void RENAME(hcscale_fast)(SwsContext *c, int16_t *dst,
{ {
int i; int i;
unsigned int xpos=0; unsigned int xpos=0;
for (i=0;i<dstWidth;i++) for (i=0;i<dstWidth;i++) {
{
register unsigned int xx=xpos>>16; register unsigned int xx=xpos>>16;
register unsigned int xalpha=(xpos&0xFFFF)>>9; register unsigned int xalpha=(xpos&0xFFFF)>>9;
dst[i]=(src1[xx]*(xalpha^127)+src1[xx+1]*xalpha); dst[i]=(src1[xx]*(xalpha^127)+src1[xx+1]*xalpha);
...@@ -2445,17 +2410,14 @@ inline static void RENAME(hcscale)(SwsContext *c, uint16_t *dst, long dstWidth, ...@@ -2445,17 +2410,14 @@ inline static void RENAME(hcscale)(SwsContext *c, uint16_t *dst, long dstWidth,
{ {
c->hScale(dst , dstWidth, src1, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize); c->hScale(dst , dstWidth, src1, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize);
c->hScale(dst+VOFW, dstWidth, src2, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize); c->hScale(dst+VOFW, dstWidth, src2, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize);
} } else { // fast bilinear upscale / crap downscale
else // fast bilinear upscale / crap downscale
{
#if ARCH_X86 && CONFIG_GPL #if ARCH_X86 && CONFIG_GPL
#if COMPILE_TEMPLATE_MMX2 #if COMPILE_TEMPLATE_MMX2
int i; int i;
#if defined(PIC) #if defined(PIC)
DECLARE_ALIGNED(8, uint64_t, ebxsave); DECLARE_ALIGNED(8, uint64_t, ebxsave);
#endif #endif
if (canMMX2BeUsed) if (canMMX2BeUsed) {
{
__asm__ volatile( __asm__ volatile(
#if defined(PIC) #if defined(PIC)
"mov %%"REG_b", %6 \n\t" "mov %%"REG_b", %6 \n\t"
...@@ -2500,15 +2462,12 @@ inline static void RENAME(hcscale)(SwsContext *c, uint16_t *dst, long dstWidth, ...@@ -2500,15 +2462,12 @@ inline static void RENAME(hcscale)(SwsContext *c, uint16_t *dst, long dstWidth,
,"%"REG_b ,"%"REG_b
#endif #endif
); );
for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) {
{
//printf("%d %d %d\n", dstWidth, i, srcW); //printf("%d %d %d\n", dstWidth, i, srcW);
dst[i] = src1[srcW-1]*128; dst[i] = src1[srcW-1]*128;
dst[i+VOFW] = src2[srcW-1]*128; dst[i+VOFW] = src2[srcW-1]*128;
} }
} } else {
else
{
#endif /* COMPILE_TEMPLATE_MMX2 */ #endif /* COMPILE_TEMPLATE_MMX2 */
x86_reg xInc_shr16 = (x86_reg) (xInc >> 16); x86_reg xInc_shr16 = (x86_reg) (xInc >> 16);
uint16_t xInc_mask = xInc & 0xffff; uint16_t xInc_mask = xInc & 0xffff;
...@@ -2552,17 +2511,17 @@ inline static void RENAME(hcscale)(SwsContext *c, uint16_t *dst, long dstWidth, ...@@ -2552,17 +2511,17 @@ inline static void RENAME(hcscale)(SwsContext *c, uint16_t *dst, long dstWidth,
c->hcscale_fast(c, dst, dstWidth, src1, src2, srcW, xInc); c->hcscale_fast(c, dst, dstWidth, src1, src2, srcW, xInc);
#endif /* ARCH_X86 */ #endif /* ARCH_X86 */
} }
if(c->srcRange != c->dstRange && !(isRGB(c->dstFormat) || isBGR(c->dstFormat))){ if(c->srcRange != c->dstRange && !(isRGB(c->dstFormat) || isBGR(c->dstFormat))) {
int i; int i;
//FIXME all pal and rgb srcFormats could do this convertion as well //FIXME all pal and rgb srcFormats could do this convertion as well
//FIXME all scalers more complex than bilinear could do half of this transform //FIXME all scalers more complex than bilinear could do half of this transform
if(c->srcRange){ if(c->srcRange) {
for (i=0; i<dstWidth; i++){ for (i=0; i<dstWidth; i++) {
dst[i ]= (dst[i ]*1799 + 4081085)>>11; //1469 dst[i ]= (dst[i ]*1799 + 4081085)>>11; //1469
dst[i+VOFW]= (dst[i+VOFW]*1799 + 4081085)>>11; //1469 dst[i+VOFW]= (dst[i+VOFW]*1799 + 4081085)>>11; //1469
} }
}else{ } else {
for (i=0; i<dstWidth; i++){ for (i=0; i<dstWidth; i++) {
dst[i ]= (FFMIN(dst[i ],30775)*4663 - 9289992)>>12; //-264 dst[i ]= (FFMIN(dst[i ],30775)*4663 - 9289992)>>12; //-264
dst[i+VOFW]= (FFMIN(dst[i+VOFW],30775)*4663 - 9289992)>>12; //-264 dst[i+VOFW]= (FFMIN(dst[i+VOFW],30775)*4663 - 9289992)>>12; //-264
} }
...@@ -2571,8 +2530,8 @@ inline static void RENAME(hcscale)(SwsContext *c, uint16_t *dst, long dstWidth, ...@@ -2571,8 +2530,8 @@ inline static void RENAME(hcscale)(SwsContext *c, uint16_t *dst, long dstWidth,
} }
static int RENAME(swScale)(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, static int RENAME(swScale)(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
int srcSliceH, uint8_t* dst[], int dstStride[]){ int srcSliceH, uint8_t* dst[], int dstStride[])
{
/* load a few things into local vars to make the code more readable? and faster */ /* load a few things into local vars to make the code more readable? and faster */
const int srcW= c->srcW; const int srcW= c->srcW;
const int dstW= c->dstW; const int dstW= c->dstW;
...@@ -2617,7 +2576,7 @@ static int RENAME(swScale)(SwsContext *c, uint8_t* src[], int srcStride[], int s ...@@ -2617,7 +2576,7 @@ static int RENAME(swScale)(SwsContext *c, uint8_t* src[], int srcStride[], int s
int lastInLumBuf= c->lastInLumBuf; int lastInLumBuf= c->lastInLumBuf;
int lastInChrBuf= c->lastInChrBuf; int lastInChrBuf= c->lastInChrBuf;
if (isPacked(c->srcFormat)){ if (isPacked(c->srcFormat)) {
src[0]= src[0]=
src[1]= src[1]=
src[2]= src[2]=
...@@ -2636,11 +2595,9 @@ static int RENAME(swScale)(SwsContext *c, uint8_t* src[], int srcStride[], int s ...@@ -2636,11 +2595,9 @@ static int RENAME(swScale)(SwsContext *c, uint8_t* src[], int srcStride[], int s
//printf("sws Strides:%d %d %d -> %d %d %d\n", srcStride[0],srcStride[1],srcStride[2], //printf("sws Strides:%d %d %d -> %d %d %d\n", srcStride[0],srcStride[1],srcStride[2],
//dstStride[0],dstStride[1],dstStride[2]); //dstStride[0],dstStride[1],dstStride[2]);
if (dstStride[0]%8 !=0 || dstStride[1]%8 !=0 || dstStride[2]%8 !=0 || dstStride[3]%8 != 0) if (dstStride[0]%8 !=0 || dstStride[1]%8 !=0 || dstStride[2]%8 !=0 || dstStride[3]%8 != 0) {
{
static int warnedAlready=0; //FIXME move this into the context perhaps static int warnedAlready=0; //FIXME move this into the context perhaps
if (flags & SWS_PRINT_INFO && !warnedAlready) if (flags & SWS_PRINT_INFO && !warnedAlready) {
{
av_log(c, AV_LOG_WARNING, "Warning: dstStride is not aligned!\n" av_log(c, AV_LOG_WARNING, "Warning: dstStride is not aligned!\n"
" ->cannot do aligned memory accesses anymore\n"); " ->cannot do aligned memory accesses anymore\n");
warnedAlready=1; warnedAlready=1;
...@@ -2650,7 +2607,7 @@ static int RENAME(swScale)(SwsContext *c, uint8_t* src[], int srcStride[], int s ...@@ -2650,7 +2607,7 @@ static int RENAME(swScale)(SwsContext *c, uint8_t* src[], int srcStride[], int s
/* Note the user might start scaling the picture in the middle so this /* Note the user might start scaling the picture in the middle so this
will not get executed. This is not really intended but works will not get executed. This is not really intended but works
currently, so people might do it. */ currently, so people might do it. */
if (srcSliceY ==0){ if (srcSliceY ==0) {
lumBufIndex=0; lumBufIndex=0;
chrBufIndex=0; chrBufIndex=0;
dstY=0; dstY=0;
...@@ -2660,7 +2617,7 @@ static int RENAME(swScale)(SwsContext *c, uint8_t* src[], int srcStride[], int s ...@@ -2660,7 +2617,7 @@ static int RENAME(swScale)(SwsContext *c, uint8_t* src[], int srcStride[], int s
lastDstY= dstY; lastDstY= dstY;
for (;dstY < dstH; dstY++){ for (;dstY < dstH; dstY++) {
unsigned char *dest =dst[0]+dstStride[0]*dstY; unsigned char *dest =dst[0]+dstStride[0]*dstY;
const int chrDstY= dstY>>c->chrDstVSubSample; const int chrDstY= dstY>>c->chrDstVSubSample;
unsigned char *uDest=dst[1]+dstStride[1]*chrDstY; unsigned char *uDest=dst[1]+dstStride[1]*chrDstY;
...@@ -2695,8 +2652,7 @@ static int RENAME(swScale)(SwsContext *c, uint8_t* src[], int srcStride[], int s ...@@ -2695,8 +2652,7 @@ static int RENAME(swScale)(SwsContext *c, uint8_t* src[], int srcStride[], int s
vChrBufSize, vLumBufSize);*/ vChrBufSize, vLumBufSize);*/
//Do horizontal scaling //Do horizontal scaling
while(lastInLumBuf < lastLumSrcY) while(lastInLumBuf < lastLumSrcY) {
{
uint8_t *src1= src[0]+(lastInLumBuf + 1 - srcSliceY)*srcStride[0]; uint8_t *src1= src[0]+(lastInLumBuf + 1 - srcSliceY)*srcStride[0];
uint8_t *src2= src[3]+(lastInLumBuf + 1 - srcSliceY)*srcStride[3]; uint8_t *src2= src[3]+(lastInLumBuf + 1 - srcSliceY)*srcStride[3];
lumBufIndex++; lumBufIndex++;
...@@ -2716,8 +2672,7 @@ static int RENAME(swScale)(SwsContext *c, uint8_t* src[], int srcStride[], int s ...@@ -2716,8 +2672,7 @@ static int RENAME(swScale)(SwsContext *c, uint8_t* src[], int srcStride[], int s
pal, 1); pal, 1);
lastInLumBuf++; lastInLumBuf++;
} }
while(lastInChrBuf < lastChrSrcY) while(lastInChrBuf < lastChrSrcY) {
{
uint8_t *src1= src[1]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[1]; uint8_t *src1= src[1]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[1];
uint8_t *src2= src[2]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[2]; uint8_t *src2= src[2]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[2];
chrBufIndex++; chrBufIndex++;
...@@ -2747,52 +2702,49 @@ static int RENAME(swScale)(SwsContext *c, uint8_t* src[], int srcStride[], int s ...@@ -2747,52 +2702,49 @@ static int RENAME(swScale)(SwsContext *c, uint8_t* src[], int srcStride[], int s
c->greenDither= ff_dither4[dstY&1]; c->greenDither= ff_dither4[dstY&1];
c->redDither= ff_dither8[(dstY+1)&1]; c->redDither= ff_dither8[(dstY+1)&1];
#endif #endif
if (dstY < dstH-2) if (dstY < dstH-2) {
{
const int16_t **lumSrcPtr= (const int16_t **) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize; const int16_t **lumSrcPtr= (const int16_t **) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
const int16_t **chrSrcPtr= (const int16_t **) chrPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize; const int16_t **chrSrcPtr= (const int16_t **) chrPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL; const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
#if COMPILE_TEMPLATE_MMX #if COMPILE_TEMPLATE_MMX
int i; int i;
if (flags & SWS_ACCURATE_RND){ if (flags & SWS_ACCURATE_RND) {
int s= APCK_SIZE / 8; int s= APCK_SIZE / 8;
for (i=0; i<vLumFilterSize; i+=2){ for (i=0; i<vLumFilterSize; i+=2) {
*(void**)&lumMmxFilter[s*i ]= lumSrcPtr[i ]; *(void**)&lumMmxFilter[s*i ]= lumSrcPtr[i ];
*(void**)&lumMmxFilter[s*i+APCK_PTR2/4 ]= lumSrcPtr[i+(vLumFilterSize>1)]; *(void**)&lumMmxFilter[s*i+APCK_PTR2/4 ]= lumSrcPtr[i+(vLumFilterSize>1)];
lumMmxFilter[s*i+APCK_COEF/4 ]= lumMmxFilter[s*i+APCK_COEF/4 ]=
lumMmxFilter[s*i+APCK_COEF/4+1]= vLumFilter[dstY*vLumFilterSize + i ] lumMmxFilter[s*i+APCK_COEF/4+1]= vLumFilter[dstY*vLumFilterSize + i ]
+ (vLumFilterSize>1 ? vLumFilter[dstY*vLumFilterSize + i + 1]<<16 : 0); + (vLumFilterSize>1 ? vLumFilter[dstY*vLumFilterSize + i + 1]<<16 : 0);
if (CONFIG_SWSCALE_ALPHA && alpPixBuf){ if (CONFIG_SWSCALE_ALPHA && alpPixBuf) {
*(void**)&alpMmxFilter[s*i ]= alpSrcPtr[i ]; *(void**)&alpMmxFilter[s*i ]= alpSrcPtr[i ];
*(void**)&alpMmxFilter[s*i+APCK_PTR2/4 ]= alpSrcPtr[i+(vLumFilterSize>1)]; *(void**)&alpMmxFilter[s*i+APCK_PTR2/4 ]= alpSrcPtr[i+(vLumFilterSize>1)];
alpMmxFilter[s*i+APCK_COEF/4 ]= alpMmxFilter[s*i+APCK_COEF/4 ]=
alpMmxFilter[s*i+APCK_COEF/4+1]= lumMmxFilter[s*i+APCK_COEF/4 ]; alpMmxFilter[s*i+APCK_COEF/4+1]= lumMmxFilter[s*i+APCK_COEF/4 ];
} }
} }
for (i=0; i<vChrFilterSize; i+=2){ for (i=0; i<vChrFilterSize; i+=2) {
*(void**)&chrMmxFilter[s*i ]= chrSrcPtr[i ]; *(void**)&chrMmxFilter[s*i ]= chrSrcPtr[i ];
*(void**)&chrMmxFilter[s*i+APCK_PTR2/4 ]= chrSrcPtr[i+(vChrFilterSize>1)]; *(void**)&chrMmxFilter[s*i+APCK_PTR2/4 ]= chrSrcPtr[i+(vChrFilterSize>1)];
chrMmxFilter[s*i+APCK_COEF/4 ]= chrMmxFilter[s*i+APCK_COEF/4 ]=
chrMmxFilter[s*i+APCK_COEF/4+1]= vChrFilter[chrDstY*vChrFilterSize + i ] chrMmxFilter[s*i+APCK_COEF/4+1]= vChrFilter[chrDstY*vChrFilterSize + i ]
+ (vChrFilterSize>1 ? vChrFilter[chrDstY*vChrFilterSize + i + 1]<<16 : 0); + (vChrFilterSize>1 ? vChrFilter[chrDstY*vChrFilterSize + i + 1]<<16 : 0);
} }
}else{ } else {
for (i=0; i<vLumFilterSize; i++) for (i=0; i<vLumFilterSize; i++) {
{
lumMmxFilter[4*i+0]= (int32_t)lumSrcPtr[i]; lumMmxFilter[4*i+0]= (int32_t)lumSrcPtr[i];
lumMmxFilter[4*i+1]= (uint64_t)lumSrcPtr[i] >> 32; lumMmxFilter[4*i+1]= (uint64_t)lumSrcPtr[i] >> 32;
lumMmxFilter[4*i+2]= lumMmxFilter[4*i+2]=
lumMmxFilter[4*i+3]= lumMmxFilter[4*i+3]=
((uint16_t)vLumFilter[dstY*vLumFilterSize + i])*0x10001; ((uint16_t)vLumFilter[dstY*vLumFilterSize + i])*0x10001;
if (CONFIG_SWSCALE_ALPHA && alpPixBuf){ if (CONFIG_SWSCALE_ALPHA && alpPixBuf) {
alpMmxFilter[4*i+0]= (int32_t)alpSrcPtr[i]; alpMmxFilter[4*i+0]= (int32_t)alpSrcPtr[i];
alpMmxFilter[4*i+1]= (uint64_t)alpSrcPtr[i] >> 32; alpMmxFilter[4*i+1]= (uint64_t)alpSrcPtr[i] >> 32;
alpMmxFilter[4*i+2]= alpMmxFilter[4*i+2]=
alpMmxFilter[4*i+3]= lumMmxFilter[4*i+2]; alpMmxFilter[4*i+3]= lumMmxFilter[4*i+2];
} }
} }
for (i=0; i<vChrFilterSize; i++) for (i=0; i<vChrFilterSize; i++) {
{
chrMmxFilter[4*i+0]= (int32_t)chrSrcPtr[i]; chrMmxFilter[4*i+0]= (int32_t)chrSrcPtr[i];
chrMmxFilter[4*i+1]= (uint64_t)chrSrcPtr[i] >> 32; chrMmxFilter[4*i+1]= (uint64_t)chrSrcPtr[i] >> 32;
chrMmxFilter[4*i+2]= chrMmxFilter[4*i+2]=
...@@ -2801,87 +2753,72 @@ static int RENAME(swScale)(SwsContext *c, uint8_t* src[], int srcStride[], int s ...@@ -2801,87 +2753,72 @@ static int RENAME(swScale)(SwsContext *c, uint8_t* src[], int srcStride[], int s
} }
} }
#endif #endif
if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21){ if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21) {
const int chrSkipMask= (1<<c->chrDstVSubSample)-1; const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
if (dstY&chrSkipMask) uDest= NULL; //FIXME split functions in lumi / chromi if (dstY&chrSkipMask) uDest= NULL; //FIXME split functions in lumi / chromi
c->yuv2nv12X(c, c->yuv2nv12X(c,
vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize, vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
dest, uDest, dstW, chrDstW, dstFormat); dest, uDest, dstW, chrDstW, dstFormat);
} } else if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12 like
else if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) //YV12 like
{
const int chrSkipMask= (1<<c->chrDstVSubSample)-1; const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi
if (is16BPS(dstFormat)) if (is16BPS(dstFormat)) {
{
yuv2yuvX16inC( yuv2yuvX16inC(
vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize, vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
alpSrcPtr, (uint16_t *) dest, (uint16_t *) uDest, (uint16_t *) vDest, (uint16_t *) aDest, dstW, chrDstW, alpSrcPtr, (uint16_t *) dest, (uint16_t *) uDest, (uint16_t *) vDest, (uint16_t *) aDest, dstW, chrDstW,
dstFormat); dstFormat);
} } else if (vLumFilterSize == 1 && vChrFilterSize == 1) { // unscaled YV12
else
if (vLumFilterSize == 1 && vChrFilterSize == 1) // unscaled YV12
{
int16_t *lumBuf = lumPixBuf[0]; int16_t *lumBuf = lumPixBuf[0];
int16_t *chrBuf= chrPixBuf[0]; int16_t *chrBuf= chrPixBuf[0];
int16_t *alpBuf= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? alpPixBuf[0] : NULL; int16_t *alpBuf= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? alpPixBuf[0] : NULL;
c->yuv2yuv1(c, lumBuf, chrBuf, alpBuf, dest, uDest, vDest, aDest, dstW, chrDstW); c->yuv2yuv1(c, lumBuf, chrBuf, alpBuf, dest, uDest, vDest, aDest, dstW, chrDstW);
} } else { //General YV12
else //General YV12
{
c->yuv2yuvX(c, c->yuv2yuvX(c,
vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize, vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
alpSrcPtr, dest, uDest, vDest, aDest, dstW, chrDstW); alpSrcPtr, dest, uDest, vDest, aDest, dstW, chrDstW);
} }
} } else {
else
{
assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2); assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
assert(chrSrcPtr + vChrFilterSize - 1 < chrPixBuf + vChrBufSize*2); assert(chrSrcPtr + vChrFilterSize - 1 < chrPixBuf + vChrBufSize*2);
if (vLumFilterSize == 1 && vChrFilterSize == 2) //unscaled RGB if (vLumFilterSize == 1 && vChrFilterSize == 2) { //unscaled RGB
{
int chrAlpha= vChrFilter[2*dstY+1]; int chrAlpha= vChrFilter[2*dstY+1];
if(flags & SWS_FULL_CHR_H_INT){ if(flags & SWS_FULL_CHR_H_INT) {
yuv2rgbXinC_full(c, //FIXME write a packed1_full function yuv2rgbXinC_full(c, //FIXME write a packed1_full function
vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize, vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
alpSrcPtr, dest, dstW, dstY); alpSrcPtr, dest, dstW, dstY);
}else{ } else {
c->yuv2packed1(c, *lumSrcPtr, *chrSrcPtr, *(chrSrcPtr+1), c->yuv2packed1(c, *lumSrcPtr, *chrSrcPtr, *(chrSrcPtr+1),
alpPixBuf ? *alpSrcPtr : NULL, alpPixBuf ? *alpSrcPtr : NULL,
dest, dstW, chrAlpha, dstFormat, flags, dstY); dest, dstW, chrAlpha, dstFormat, flags, dstY);
} }
} } else if (vLumFilterSize == 2 && vChrFilterSize == 2) { //bilinear upscale RGB
else if (vLumFilterSize == 2 && vChrFilterSize == 2) //bilinear upscale RGB
{
int lumAlpha= vLumFilter[2*dstY+1]; int lumAlpha= vLumFilter[2*dstY+1];
int chrAlpha= vChrFilter[2*dstY+1]; int chrAlpha= vChrFilter[2*dstY+1];
lumMmxFilter[2]= lumMmxFilter[2]=
lumMmxFilter[3]= vLumFilter[2*dstY ]*0x10001; lumMmxFilter[3]= vLumFilter[2*dstY ]*0x10001;
chrMmxFilter[2]= chrMmxFilter[2]=
chrMmxFilter[3]= vChrFilter[2*chrDstY]*0x10001; chrMmxFilter[3]= vChrFilter[2*chrDstY]*0x10001;
if(flags & SWS_FULL_CHR_H_INT){ if(flags & SWS_FULL_CHR_H_INT) {
yuv2rgbXinC_full(c, //FIXME write a packed2_full function yuv2rgbXinC_full(c, //FIXME write a packed2_full function
vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize, vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
alpSrcPtr, dest, dstW, dstY); alpSrcPtr, dest, dstW, dstY);
}else{ } else {
c->yuv2packed2(c, *lumSrcPtr, *(lumSrcPtr+1), *chrSrcPtr, *(chrSrcPtr+1), c->yuv2packed2(c, *lumSrcPtr, *(lumSrcPtr+1), *chrSrcPtr, *(chrSrcPtr+1),
alpPixBuf ? *alpSrcPtr : NULL, alpPixBuf ? *(alpSrcPtr+1) : NULL, alpPixBuf ? *alpSrcPtr : NULL, alpPixBuf ? *(alpSrcPtr+1) : NULL,
dest, dstW, lumAlpha, chrAlpha, dstY); dest, dstW, lumAlpha, chrAlpha, dstY);
} }
} } else { //general RGB
else //general RGB if(flags & SWS_FULL_CHR_H_INT) {
{
if(flags & SWS_FULL_CHR_H_INT){
yuv2rgbXinC_full(c, yuv2rgbXinC_full(c,
vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize, vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
alpSrcPtr, dest, dstW, dstY); alpSrcPtr, dest, dstW, dstY);
}else{ } else {
c->yuv2packedX(c, c->yuv2packedX(c,
vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize, vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
...@@ -2889,50 +2826,41 @@ static int RENAME(swScale)(SwsContext *c, uint8_t* src[], int srcStride[], int s ...@@ -2889,50 +2826,41 @@ static int RENAME(swScale)(SwsContext *c, uint8_t* src[], int srcStride[], int s
} }
} }
} }
} } else { // hmm looks like we can't use MMX here without overwriting this array's tail
else // hmm looks like we can't use MMX here without overwriting this array's tail
{
const int16_t **lumSrcPtr= (const int16_t **)lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize; const int16_t **lumSrcPtr= (const int16_t **)lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
const int16_t **chrSrcPtr= (const int16_t **)chrPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize; const int16_t **chrSrcPtr= (const int16_t **)chrPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **)alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL; const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **)alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21){ if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21) {
const int chrSkipMask= (1<<c->chrDstVSubSample)-1; const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
if (dstY&chrSkipMask) uDest= NULL; //FIXME split functions in lumi / chromi if (dstY&chrSkipMask) uDest= NULL; //FIXME split functions in lumi / chromi
yuv2nv12XinC( yuv2nv12XinC(
vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize, vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
dest, uDest, dstW, chrDstW, dstFormat); dest, uDest, dstW, chrDstW, dstFormat);
} } else if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12
else if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) //YV12
{
const int chrSkipMask= (1<<c->chrDstVSubSample)-1; const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi
if (is16BPS(dstFormat)) if (is16BPS(dstFormat)) {
{
yuv2yuvX16inC( yuv2yuvX16inC(
vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize, vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
alpSrcPtr, (uint16_t *) dest, (uint16_t *) uDest, (uint16_t *) vDest, (uint16_t *) aDest, dstW, chrDstW, alpSrcPtr, (uint16_t *) dest, (uint16_t *) uDest, (uint16_t *) vDest, (uint16_t *) aDest, dstW, chrDstW,
dstFormat); dstFormat);
} } else {
else
{
yuv2yuvXinC( yuv2yuvXinC(
vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize, vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
alpSrcPtr, dest, uDest, vDest, aDest, dstW, chrDstW); alpSrcPtr, dest, uDest, vDest, aDest, dstW, chrDstW);
} }
} } else {
else
{
assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2); assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
assert(chrSrcPtr + vChrFilterSize - 1 < chrPixBuf + vChrBufSize*2); assert(chrSrcPtr + vChrFilterSize - 1 < chrPixBuf + vChrBufSize*2);
if(flags & SWS_FULL_CHR_H_INT){ if(flags & SWS_FULL_CHR_H_INT) {
yuv2rgbXinC_full(c, yuv2rgbXinC_full(c,
vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize, vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
alpSrcPtr, dest, dstW, dstY); alpSrcPtr, dest, dstW, dstY);
}else{ } else {
yuv2packedXinC(c, yuv2packedXinC(c,
vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize, vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
......
...@@ -63,15 +63,15 @@ SwsFunc ff_yuv2rgb_init_mmx(SwsContext *c) ...@@ -63,15 +63,15 @@ SwsFunc ff_yuv2rgb_init_mmx(SwsContext *c)
if (c->flags & SWS_CPU_CAPS_MMX2) { if (c->flags & SWS_CPU_CAPS_MMX2) {
switch (c->dstFormat) { switch (c->dstFormat) {
case PIX_FMT_RGB32: case PIX_FMT_RGB32:
if (CONFIG_SWSCALE_ALPHA && c->srcFormat == PIX_FMT_YUVA420P){ if (CONFIG_SWSCALE_ALPHA && c->srcFormat == PIX_FMT_YUVA420P) {
if (HAVE_7REGS) return yuva420_rgb32_MMX2; if (HAVE_7REGS) return yuva420_rgb32_MMX2;
break; break;
}else return yuv420_rgb32_MMX2; } else return yuv420_rgb32_MMX2;
case PIX_FMT_BGR32: case PIX_FMT_BGR32:
if (CONFIG_SWSCALE_ALPHA && c->srcFormat == PIX_FMT_YUVA420P){ if (CONFIG_SWSCALE_ALPHA && c->srcFormat == PIX_FMT_YUVA420P) {
if (HAVE_7REGS) return yuva420_bgr32_MMX2; if (HAVE_7REGS) return yuva420_bgr32_MMX2;
break; break;
}else return yuv420_bgr32_MMX2; } else return yuv420_bgr32_MMX2;
case PIX_FMT_BGR24: return yuv420_rgb24_MMX2; case PIX_FMT_BGR24: return yuv420_rgb24_MMX2;
case PIX_FMT_RGB565: return yuv420_rgb16_MMX2; case PIX_FMT_RGB565: return yuv420_rgb16_MMX2;
case PIX_FMT_RGB555: return yuv420_rgb15_MMX2; case PIX_FMT_RGB555: return yuv420_rgb15_MMX2;
...@@ -80,15 +80,15 @@ SwsFunc ff_yuv2rgb_init_mmx(SwsContext *c) ...@@ -80,15 +80,15 @@ SwsFunc ff_yuv2rgb_init_mmx(SwsContext *c)
if (c->flags & SWS_CPU_CAPS_MMX) { if (c->flags & SWS_CPU_CAPS_MMX) {
switch (c->dstFormat) { switch (c->dstFormat) {
case PIX_FMT_RGB32: case PIX_FMT_RGB32:
if (CONFIG_SWSCALE_ALPHA && c->srcFormat == PIX_FMT_YUVA420P){ if (CONFIG_SWSCALE_ALPHA && c->srcFormat == PIX_FMT_YUVA420P) {
if (HAVE_7REGS) return yuva420_rgb32_MMX; if (HAVE_7REGS) return yuva420_rgb32_MMX;
break; break;
}else return yuv420_rgb32_MMX; } else return yuv420_rgb32_MMX;
case PIX_FMT_BGR32: case PIX_FMT_BGR32:
if (CONFIG_SWSCALE_ALPHA && c->srcFormat == PIX_FMT_YUVA420P){ if (CONFIG_SWSCALE_ALPHA && c->srcFormat == PIX_FMT_YUVA420P) {
if (HAVE_7REGS) return yuva420_bgr32_MMX; if (HAVE_7REGS) return yuva420_bgr32_MMX;
break; break;
}else return yuv420_bgr32_MMX; } else return yuv420_bgr32_MMX;
case PIX_FMT_BGR24: return yuv420_rgb24_MMX; case PIX_FMT_BGR24: return yuv420_rgb24_MMX;
case PIX_FMT_RGB565: return yuv420_rgb16_MMX; case PIX_FMT_RGB565: return yuv420_rgb16_MMX;
case PIX_FMT_RGB555: return yuv420_rgb15_MMX; case PIX_FMT_RGB555: return yuv420_rgb15_MMX;
......
...@@ -122,7 +122,7 @@ ...@@ -122,7 +122,7 @@
#define YUV422_UNSHIFT \ #define YUV422_UNSHIFT \
if(c->srcFormat == PIX_FMT_YUV422P){ \ if(c->srcFormat == PIX_FMT_YUV422P) {\
srcStride[1] *= 2; \ srcStride[1] *= 2; \
srcStride[2] *= 2; \ srcStride[2] *= 2; \
} \ } \
...@@ -180,7 +180,8 @@ ...@@ -180,7 +180,8 @@
return srcSliceH; \ return srcSliceH; \
static inline int RENAME(yuv420_rgb16)(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, static inline int RENAME(yuv420_rgb16)(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
int srcSliceH, uint8_t* dst[], int dstStride[]){ int srcSliceH, uint8_t* dst[], int dstStride[])
{
int y, h_size; int y, h_size;
YUV422_UNSHIFT YUV422_UNSHIFT
...@@ -236,7 +237,8 @@ static inline int RENAME(yuv420_rgb16)(SwsContext *c, uint8_t* src[], int srcStr ...@@ -236,7 +237,8 @@ static inline int RENAME(yuv420_rgb16)(SwsContext *c, uint8_t* src[], int srcStr
} }
static inline int RENAME(yuv420_rgb15)(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, static inline int RENAME(yuv420_rgb15)(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
int srcSliceH, uint8_t* dst[], int dstStride[]){ int srcSliceH, uint8_t* dst[], int dstStride[])
{
int y, h_size; int y, h_size;
YUV422_UNSHIFT YUV422_UNSHIFT
...@@ -294,7 +296,8 @@ static inline int RENAME(yuv420_rgb15)(SwsContext *c, uint8_t* src[], int srcStr ...@@ -294,7 +296,8 @@ static inline int RENAME(yuv420_rgb15)(SwsContext *c, uint8_t* src[], int srcStr
} }
static inline int RENAME(yuv420_rgb24)(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, static inline int RENAME(yuv420_rgb24)(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
int srcSliceH, uint8_t* dst[], int dstStride[]){ int srcSliceH, uint8_t* dst[], int dstStride[])
{
int y, h_size; int y, h_size;
YUV422_UNSHIFT YUV422_UNSHIFT
...@@ -470,7 +473,8 @@ etc. ...@@ -470,7 +473,8 @@ etc.
"movq 8 (%5, %0, 2), %%mm6;" /* Load 8 Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 */ \ "movq 8 (%5, %0, 2), %%mm6;" /* Load 8 Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 */ \
static inline int RENAME(yuv420_rgb32)(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, static inline int RENAME(yuv420_rgb32)(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
int srcSliceH, uint8_t* dst[], int dstStride[]){ int srcSliceH, uint8_t* dst[], int dstStride[])
{
int y, h_size; int y, h_size;
YUV422_UNSHIFT YUV422_UNSHIFT
...@@ -486,7 +490,8 @@ static inline int RENAME(yuv420_rgb32)(SwsContext *c, uint8_t* src[], int srcStr ...@@ -486,7 +490,8 @@ static inline int RENAME(yuv420_rgb32)(SwsContext *c, uint8_t* src[], int srcStr
} }
static inline int RENAME(yuva420_rgb32)(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, static inline int RENAME(yuva420_rgb32)(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
int srcSliceH, uint8_t* dst[], int dstStride[]){ int srcSliceH, uint8_t* dst[], int dstStride[])
{
#if HAVE_7REGS #if HAVE_7REGS
int y, h_size; int y, h_size;
...@@ -504,7 +509,8 @@ static inline int RENAME(yuva420_rgb32)(SwsContext *c, uint8_t* src[], int srcSt ...@@ -504,7 +509,8 @@ static inline int RENAME(yuva420_rgb32)(SwsContext *c, uint8_t* src[], int srcSt
} }
static inline int RENAME(yuv420_bgr32)(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY, static inline int RENAME(yuv420_bgr32)(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY,
int srcSliceH, uint8_t* dst[], int dstStride[]){ int srcSliceH, uint8_t* dst[], int dstStride[])
{
int y, h_size; int y, h_size;
YUV422_UNSHIFT YUV422_UNSHIFT
...@@ -520,7 +526,8 @@ static inline int RENAME(yuv420_bgr32)(SwsContext *c, const uint8_t* src[], int ...@@ -520,7 +526,8 @@ static inline int RENAME(yuv420_bgr32)(SwsContext *c, const uint8_t* src[], int
} }
static inline int RENAME(yuva420_bgr32)(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY, static inline int RENAME(yuva420_bgr32)(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY,
int srcSliceH, uint8_t* dst[], int dstStride[]){ int srcSliceH, uint8_t* dst[], int dstStride[])
{
#if HAVE_7REGS #if HAVE_7REGS
int y, h_size; int y, h_size;
......
...@@ -92,7 +92,8 @@ const int32_t ff_yuv2rgb_coeffs[8][4] = { ...@@ -92,7 +92,8 @@ const int32_t ff_yuv2rgb_coeffs[8][4] = {
#define YUV2RGBFUNC(func_name, dst_type, alpha) \ #define YUV2RGBFUNC(func_name, dst_type, alpha) \
static int func_name(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, \ static int func_name(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, \
int srcSliceH, uint8_t* dst[], int dstStride[]){\ int srcSliceH, uint8_t* dst[], int dstStride[]) \
{\
int y;\ int y;\
\ \
if (!alpha && c->srcFormat == PIX_FMT_YUV422P) {\ if (!alpha && c->srcFormat == PIX_FMT_YUV422P) {\
...@@ -110,7 +111,7 @@ static int func_name(SwsContext *c, uint8_t* src[], int srcStride[], int srcSlic ...@@ -110,7 +111,7 @@ static int func_name(SwsContext *c, uint8_t* src[], int srcStride[], int srcSlic
uint8_t *pv = src[2] + (y>>1)*srcStride[2];\ uint8_t *pv = src[2] + (y>>1)*srcStride[2];\
uint8_t av_unused *pa_1, *pa_2;\ uint8_t av_unused *pa_1, *pa_2;\
unsigned int h_size = c->dstW>>3;\ unsigned int h_size = c->dstW>>3;\
if (alpha){\ if (alpha) {\
pa_1 = src[3] + y*srcStride[3];\ pa_1 = src[3] + y*srcStride[3];\
pa_2 = pa_1 + srcStride[3];\ pa_2 = pa_1 + srcStride[3];\
}\ }\
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment