Commit 77a49659 authored by Michael Niedermayer's avatar Michael Niedermayer

cleanup

Originally committed as revision 9414 to svn://svn.mplayerhq.hu/mplayer/trunk/postproc
parent d271b84b
...@@ -2019,6 +2019,15 @@ SwsContext *getSwsContext(int srcW, int srcH, int srcFormat, int dstW, int dstH, ...@@ -2019,6 +2019,15 @@ SwsContext *getSwsContext(int srcW, int srcH, int srcFormat, int dstW, int dstH,
c->dstFormat= dstFormat; c->dstFormat= dstFormat;
c->srcFormat= srcFormat; c->srcFormat= srcFormat;
c->yCoeff= 0x2568256825682568LL;
c->vrCoeff= 0x3343334333433343LL;
c->ubCoeff= 0x40cf40cf40cf40cfLL;
c->vgCoeff= 0xE5E2E5E2E5E2E5E2LL;
c->ugCoeff= 0xF36EF36EF36EF36ELL;
c->yOffset= 0x0080008000800080LL;
c->uOffset= 0x0400040004000400LL;
c->vOffset= 0x0400040004000400LL;
usesFilter=0; usesFilter=0;
if(dstFilter->lumV!=NULL && dstFilter->lumV->length>1) usesFilter=1; if(dstFilter->lumV!=NULL && dstFilter->lumV->length>1) usesFilter=1;
if(dstFilter->lumH!=NULL && dstFilter->lumH->length>1) usesFilter=1; if(dstFilter->lumH!=NULL && dstFilter->lumH->length>1) usesFilter=1;
...@@ -2261,19 +2270,6 @@ SwsContext *getSwsContext(int srcW, int srcH, int srcFormat, int dstW, int dstH, ...@@ -2261,19 +2270,6 @@ SwsContext *getSwsContext(int srcW, int srcH, int srcFormat, int dstW, int dstH,
ASSERT(c->chrDstH <= dstH) ASSERT(c->chrDstH <= dstH)
// pack filter data for mmx code
if(cpuCaps.hasMMX)
{
c->lumMmxFilter= (int16_t*)memalign(8, c->vLumFilterSize* dstH*4*sizeof(int16_t));
c->chrMmxFilter= (int16_t*)memalign(8, c->vChrFilterSize*c->chrDstH*4*sizeof(int16_t));
for(i=0; i<c->vLumFilterSize*dstH; i++)
c->lumMmxFilter[4*i]=c->lumMmxFilter[4*i+1]=c->lumMmxFilter[4*i+2]=c->lumMmxFilter[4*i+3]=
c->vLumFilter[i];
for(i=0; i<c->vChrFilterSize*c->chrDstH; i++)
c->chrMmxFilter[4*i]=c->chrMmxFilter[4*i+1]=c->chrMmxFilter[4*i+2]=c->chrMmxFilter[4*i+3]=
c->vChrFilter[i];
}
if(flags&SWS_PRINT_INFO) if(flags&SWS_PRINT_INFO)
{ {
#ifdef DITHER1XBPP #ifdef DITHER1XBPP
...@@ -2668,11 +2664,6 @@ void freeSwsContext(SwsContext *c){ ...@@ -2668,11 +2664,6 @@ void freeSwsContext(SwsContext *c){
if(c->hChrFilterPos) free(c->hChrFilterPos); if(c->hChrFilterPos) free(c->hChrFilterPos);
c->hChrFilterPos = NULL; c->hChrFilterPos = NULL;
if(c->lumMmxFilter) free(c->lumMmxFilter);
c->lumMmxFilter = NULL;
if(c->chrMmxFilter) free(c->chrMmxFilter);
c->chrMmxFilter = NULL;
if(c->lumMmx2Filter) free(c->lumMmx2Filter); if(c->lumMmx2Filter) free(c->lumMmx2Filter);
c->lumMmx2Filter=NULL; c->lumMmx2Filter=NULL;
if(c->chrMmx2Filter) free(c->chrMmx2Filter); if(c->chrMmx2Filter) free(c->chrMmx2Filter);
......
...@@ -44,6 +44,7 @@ ...@@ -44,6 +44,7 @@
#define SWS_FULL_CHR_H_INP 0x4000 #define SWS_FULL_CHR_H_INP 0x4000
#define SWS_DIRECT_BGR 0x8000 #define SWS_DIRECT_BGR 0x8000
#define MAX_FILTER_SIZE 256
#define SWS_MAX_REDUCE_CUTOFF 0.002 #define SWS_MAX_REDUCE_CUTOFF 0.002
...@@ -70,9 +71,6 @@ typedef struct SwsContext{ ...@@ -70,9 +71,6 @@ typedef struct SwsContext{
int16_t *vChrFilter; int16_t *vChrFilter;
int16_t *vChrFilterPos; int16_t *vChrFilterPos;
// Contain simply the values from v(Lum|Chr)Filter just nicely packed for mmx
int16_t *lumMmxFilter;
int16_t *chrMmxFilter;
uint8_t formatConvBuffer[4000]; //FIXME dynamic alloc, but we have to change alot of code for this to be usefull uint8_t formatConvBuffer[4000]; //FIXME dynamic alloc, but we have to change alot of code for this to be usefull
int hLumFilterSize; int hLumFilterSize;
...@@ -105,8 +103,40 @@ typedef struct SwsContext{ ...@@ -105,8 +103,40 @@ typedef struct SwsContext{
void (*swScale)(struct SwsContext *context, uint8_t* src[], int srcStride[], int srcSliceY, void (*swScale)(struct SwsContext *context, uint8_t* src[], int srcStride[], int srcSliceY,
int srcSliceH, uint8_t* dst[], int dstStride[]); int srcSliceH, uint8_t* dst[], int dstStride[]);
#define RED_DITHER "0*8"
#define GREEN_DITHER "1*8"
#define BLUE_DITHER "2*8"
#define Y_COEFF "3*8"
#define VR_COEFF "4*8"
#define UB_COEFF "5*8"
#define VG_COEFF "6*8"
#define UG_COEFF "7*8"
#define Y_OFFSET "8*8"
#define U_OFFSET "9*8"
#define V_OFFSET "10*8"
#define LUM_MMX_FILTER_OFFSET "11*8"
#define CHR_MMX_FILTER_OFFSET "11*8+4*4*256"
uint64_t redDither __attribute__((aligned(8)));
uint64_t greenDither __attribute__((aligned(8)));
uint64_t blueDither __attribute__((aligned(8)));
uint64_t yCoeff __attribute__((aligned(8)));
uint64_t vrCoeff __attribute__((aligned(8)));
uint64_t ubCoeff __attribute__((aligned(8)));
uint64_t vgCoeff __attribute__((aligned(8)));
uint64_t ugCoeff __attribute__((aligned(8)));
uint64_t yOffset __attribute__((aligned(8)));
uint64_t uOffset __attribute__((aligned(8)));
uint64_t vOffset __attribute__((aligned(8)));
int32_t lumMmxFilter[4*MAX_FILTER_SIZE];
int32_t chrMmxFilter[4*MAX_FILTER_SIZE];
} SwsContext; } SwsContext;
//FIXME check init (where 0) //FIXME check init (where 0)
//FIXME split private & public
// when used for filters they must have an odd number of elements // when used for filters they must have an odd number of elements
// coeffs cannot be shared between vectors // coeffs cannot be shared between vectors
......
...@@ -59,32 +59,35 @@ ...@@ -59,32 +59,35 @@
#define MOVNTQ(a,b) "movq " #a ", " #b " \n\t" #define MOVNTQ(a,b) "movq " #a ", " #b " \n\t"
#endif #endif
#define YSCALEYUV2YV12X(x) \ #define YSCALEYUV2YV12X(x, offset) \
"xorl %%eax, %%eax \n\t"\ "xorl %%eax, %%eax \n\t"\
"pxor %%mm3, %%mm3 \n\t"\ "pxor %%mm3, %%mm3 \n\t"\
"pxor %%mm4, %%mm4 \n\t"\ "pxor %%mm4, %%mm4 \n\t"\
"movl %0, %%edx \n\t"\ "leal " offset "(%0), %%edx \n\t"\
"movl (%%edx), %%esi \n\t"\
".balign 16 \n\t" /* FIXME Unroll? */\ ".balign 16 \n\t" /* FIXME Unroll? */\
"1: \n\t"\ "1: \n\t"\
"movl (%1, %%edx, 4), %%esi \n\t"\ "movq 8(%%edx), %%mm0 \n\t" /* filterCoeff */\
"movq (%2, %%edx, 8), %%mm0 \n\t" /* filterCoeff */\
"movq " #x "(%%esi, %%eax, 2), %%mm2 \n\t" /* srcData */\ "movq " #x "(%%esi, %%eax, 2), %%mm2 \n\t" /* srcData */\
"movq 8+" #x "(%%esi, %%eax, 2), %%mm5 \n\t" /* srcData */\ "movq 8+" #x "(%%esi, %%eax, 2), %%mm5 \n\t" /* srcData */\
"addl $16, %%edx \n\t"\
"movl (%%edx), %%esi \n\t"\
"testl %%esi, %%esi \n\t"\
"pmulhw %%mm0, %%mm2 \n\t"\ "pmulhw %%mm0, %%mm2 \n\t"\
"pmulhw %%mm0, %%mm5 \n\t"\ "pmulhw %%mm0, %%mm5 \n\t"\
"paddw %%mm2, %%mm3 \n\t"\ "paddw %%mm2, %%mm3 \n\t"\
"paddw %%mm5, %%mm4 \n\t"\ "paddw %%mm5, %%mm4 \n\t"\
"addl $1, %%edx \n\t"\
" jnz 1b \n\t"\ " jnz 1b \n\t"\
"psraw $3, %%mm3 \n\t"\ "psraw $3, %%mm3 \n\t"\
"psraw $3, %%mm4 \n\t"\ "psraw $3, %%mm4 \n\t"\
"packuswb %%mm4, %%mm3 \n\t"\ "packuswb %%mm4, %%mm3 \n\t"\
MOVNTQ(%%mm3, (%3, %%eax))\ MOVNTQ(%%mm3, (%1, %%eax))\
"addl $8, %%eax \n\t"\ "addl $8, %%eax \n\t"\
"cmpl %4, %%eax \n\t"\ "cmpl %2, %%eax \n\t"\
"pxor %%mm3, %%mm3 \n\t"\ "pxor %%mm3, %%mm3 \n\t"\
"pxor %%mm4, %%mm4 \n\t"\ "pxor %%mm4, %%mm4 \n\t"\
"movl %0, %%edx \n\t"\ "leal " offset "(%0), %%edx \n\t"\
"movl (%%edx), %%esi \n\t"\
"jb 1b \n\t" "jb 1b \n\t"
#define YSCALEYUV2YV121 \ #define YSCALEYUV2YV121 \
...@@ -110,57 +113,60 @@ ...@@ -110,57 +113,60 @@
#define YSCALEYUV2PACKEDX \ #define YSCALEYUV2PACKEDX \
"xorl %%eax, %%eax \n\t"\ "xorl %%eax, %%eax \n\t"\
".balign 16 \n\t"\ ".balign 16 \n\t"\
"nop \n\t"\
"1: \n\t"\ "1: \n\t"\
"movl %1, %%edx \n\t" /* -chrFilterSize */\ "leal "CHR_MMX_FILTER_OFFSET"(%0), %%edx \n\t"\
"movl %3, %%ebx \n\t" /* chrMmxFilter+chrFilterSize */\ "movl (%%edx), %%esi \n\t"\
"movl %7, %%ecx \n\t" /* chrSrc+chrFilterSize */\
"pxor %%mm3, %%mm3 \n\t"\ "pxor %%mm3, %%mm3 \n\t"\
"pxor %%mm4, %%mm4 \n\t"\ "pxor %%mm4, %%mm4 \n\t"\
".balign 16 \n\t"\
"2: \n\t"\ "2: \n\t"\
"movl (%%ecx, %%edx, 4), %%esi \n\t"\ "movq 8(%%edx), %%mm0 \n\t" /* filterCoeff */\
"movq (%%ebx, %%edx, 8), %%mm0 \n\t" /* filterCoeff */\
"movq (%%esi, %%eax), %%mm2 \n\t" /* UsrcData */\ "movq (%%esi, %%eax), %%mm2 \n\t" /* UsrcData */\
"movq 4096(%%esi, %%eax), %%mm5 \n\t" /* VsrcData */\ "movq 4096(%%esi, %%eax), %%mm5 \n\t" /* VsrcData */\
"addl $16, %%edx \n\t"\
"movl (%%edx), %%esi \n\t"\
"pmulhw %%mm0, %%mm2 \n\t"\ "pmulhw %%mm0, %%mm2 \n\t"\
"pmulhw %%mm0, %%mm5 \n\t"\ "pmulhw %%mm0, %%mm5 \n\t"\
"paddw %%mm2, %%mm3 \n\t"\ "paddw %%mm2, %%mm3 \n\t"\
"paddw %%mm5, %%mm4 \n\t"\ "paddw %%mm5, %%mm4 \n\t"\
"addl $1, %%edx \n\t"\ "testl %%esi, %%esi \n\t"\
" jnz 2b \n\t"\ " jnz 2b \n\t"\
\ \
"movl %0, %%edx \n\t" /* -lumFilterSize */\ "leal "LUM_MMX_FILTER_OFFSET"(%0), %%edx \n\t"\
"movl %2, %%ebx \n\t" /* lumMmxFilter+lumFilterSize */\ "movl (%%edx), %%esi \n\t"\
"movl %6, %%ecx \n\t" /* lumSrc+lumFilterSize */\
"pxor %%mm1, %%mm1 \n\t"\ "pxor %%mm1, %%mm1 \n\t"\
"pxor %%mm7, %%mm7 \n\t"\ "pxor %%mm7, %%mm7 \n\t"\
".balign 16 \n\t"\
"2: \n\t"\ "2: \n\t"\
"movl (%%ecx, %%edx, 4), %%esi \n\t"\ "movq 8(%%edx), %%mm0 \n\t" /* filterCoeff */\
"movq (%%ebx, %%edx, 8), %%mm0 \n\t" /* filterCoeff */\
"movq (%%esi, %%eax, 2), %%mm2 \n\t" /* Y1srcData */\ "movq (%%esi, %%eax, 2), %%mm2 \n\t" /* Y1srcData */\
"movq 8(%%esi, %%eax, 2), %%mm5 \n\t" /* Y2srcData */\ "movq 8(%%esi, %%eax, 2), %%mm5 \n\t" /* Y2srcData */\
"addl $16, %%edx \n\t"\
"movl (%%edx), %%esi \n\t"\
"pmulhw %%mm0, %%mm2 \n\t"\ "pmulhw %%mm0, %%mm2 \n\t"\
"pmulhw %%mm0, %%mm5 \n\t"\ "pmulhw %%mm0, %%mm5 \n\t"\
"paddw %%mm2, %%mm1 \n\t"\ "paddw %%mm2, %%mm1 \n\t"\
"paddw %%mm5, %%mm7 \n\t"\ "paddw %%mm5, %%mm7 \n\t"\
"addl $1, %%edx \n\t"\ "testl %%esi, %%esi \n\t"\
" jnz 2b \n\t"\ " jnz 2b \n\t"\
#define YSCALEYUV2RGBX \ #define YSCALEYUV2RGBX \
YSCALEYUV2PACKEDX\ YSCALEYUV2PACKEDX\
"psubw "MANGLE(w400)", %%mm3 \n\t" /* (U-128)8*/\ "psubw "U_OFFSET"(%0), %%mm3 \n\t" /* (U-128)8*/\
"psubw "MANGLE(w400)", %%mm4 \n\t" /* (V-128)8*/\ "psubw "V_OFFSET"(%0), %%mm4 \n\t" /* (V-128)8*/\
"movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\ "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\
"movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\ "movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\
"pmulhw "MANGLE(ugCoeff)", %%mm3\n\t"\ "pmulhw "UG_COEFF"(%0), %%mm3 \n\t"\
"pmulhw "MANGLE(vgCoeff)", %%mm4\n\t"\ "pmulhw "VG_COEFF"(%0), %%mm4 \n\t"\
/* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\ /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
"pmulhw "MANGLE(ubCoeff)", %%mm2\n\t"\ "pmulhw "UB_COEFF"(%0), %%mm2 \n\t"\
"pmulhw "MANGLE(vrCoeff)", %%mm5\n\t"\ "pmulhw "VR_COEFF"(%0), %%mm5 \n\t"\
"psubw "MANGLE(w80)", %%mm1 \n\t" /* 8(Y-16)*/\ "psubw "Y_OFFSET"(%0), %%mm1 \n\t" /* 8(Y-16)*/\
"psubw "MANGLE(w80)", %%mm7 \n\t" /* 8(Y-16)*/\ "psubw "Y_OFFSET"(%0), %%mm7 \n\t" /* 8(Y-16)*/\
"pmulhw "MANGLE(yCoeff)", %%mm1 \n\t"\ "pmulhw "Y_COEFF"(%0), %%mm1 \n\t"\
"pmulhw "MANGLE(yCoeff)", %%mm7 \n\t"\ "pmulhw "Y_COEFF"(%0), %%mm7 \n\t"\
/* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\ /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
"paddw %%mm3, %%mm4 \n\t"\ "paddw %%mm3, %%mm4 \n\t"\
"movq %%mm2, %%mm0 \n\t"\ "movq %%mm2, %%mm0 \n\t"\
...@@ -183,7 +189,7 @@ ...@@ -183,7 +189,7 @@
"packuswb %%mm6, %%mm5 \n\t"\ "packuswb %%mm6, %%mm5 \n\t"\
"packuswb %%mm3, %%mm4 \n\t"\ "packuswb %%mm3, %%mm4 \n\t"\
"pxor %%mm7, %%mm7 \n\t" "pxor %%mm7, %%mm7 \n\t"
#if 0
#define FULL_YSCALEYUV2RGB \ #define FULL_YSCALEYUV2RGB \
"pxor %%mm7, %%mm7 \n\t"\ "pxor %%mm7, %%mm7 \n\t"\
"movd %6, %%mm6 \n\t" /*yalpha1*/\ "movd %6, %%mm6 \n\t" /*yalpha1*/\
...@@ -236,6 +242,7 @@ ...@@ -236,6 +242,7 @@
"paddw %%mm2, %%mm1 \n\t" /* G*/\ "paddw %%mm2, %%mm1 \n\t" /* G*/\
\ \
"packuswb %%mm1, %%mm1 \n\t" "packuswb %%mm1, %%mm1 \n\t"
#endif
#define YSCALEYUV2PACKED \ #define YSCALEYUV2PACKED \
"movd %6, %%mm6 \n\t" /*yalpha1*/\ "movd %6, %%mm6 \n\t" /*yalpha1*/\
...@@ -742,33 +749,34 @@ ...@@ -742,33 +749,34 @@
" jb 1b \n\t" " jb 1b \n\t"
static inline void RENAME(yuv2yuvX)(int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize, static inline void RENAME(yuv2yuvX)(SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize, int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
uint8_t *dest, uint8_t *uDest, uint8_t *vDest, int dstW, int chrDstW, uint8_t *dest, uint8_t *uDest, uint8_t *vDest, int dstW, int chrDstW,
int16_t * lumMmxFilter, int16_t * chrMmxFilter) int32_t * lumMmxFilter, int32_t * chrMmxFilter)
{ {
int dummy=0;
#ifdef HAVE_MMX #ifdef HAVE_MMX
if(uDest != NULL) if(uDest != NULL)
{ {
asm volatile( asm volatile(
YSCALEYUV2YV12X(0) YSCALEYUV2YV12X(0, CHR_MMX_FILTER_OFFSET)
:: "m" (-chrFilterSize), "r" (chrSrc+chrFilterSize), :: "r" (&c->redDither),
"r" (chrMmxFilter+chrFilterSize*4), "r" (uDest), "m" (chrDstW) "r" (uDest), "m" (chrDstW)
: "%eax", "%edx", "%esi" : "%eax", "%edx", "%esi"
); );
asm volatile( asm volatile(
YSCALEYUV2YV12X(4096) YSCALEYUV2YV12X(4096, CHR_MMX_FILTER_OFFSET)
:: "m" (-chrFilterSize), "r" (chrSrc+chrFilterSize), :: "r" (&c->redDither),
"r" (chrMmxFilter+chrFilterSize*4), "r" (vDest), "m" (chrDstW) "r" (vDest), "m" (chrDstW)
: "%eax", "%edx", "%esi" : "%eax", "%edx", "%esi"
); );
} }
asm volatile( asm volatile(
YSCALEYUV2YV12X(0) YSCALEYUV2YV12X(0, LUM_MMX_FILTER_OFFSET)
:: "m" (-lumFilterSize), "r" (lumSrc+lumFilterSize), :: "r" (&c->redDither),
"r" (lumMmxFilter+lumFilterSize*4), "r" (dest), "m" (dstW) "r" (dest), "m" (dstW)
: "%eax", "%edx", "%esi" : "%eax", "%edx", "%esi"
); );
#else #else
...@@ -844,8 +852,9 @@ static inline void RENAME(yuv2yuv1)(int16_t *lumSrc, int16_t *chrSrc, ...@@ -844,8 +852,9 @@ static inline void RENAME(yuv2yuv1)(int16_t *lumSrc, int16_t *chrSrc,
*/ */
static inline void RENAME(yuv2packedX)(SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize, static inline void RENAME(yuv2packedX)(SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize, int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
uint8_t *dest, int dstW, int16_t * lumMmxFilter, int16_t * chrMmxFilter, int dstY) uint8_t *dest, int dstW, int dstY)
{ {
int dummy=0;
switch(c->dstFormat) switch(c->dstFormat)
{ {
#ifdef HAVE_MMX #ifdef HAVE_MMX
...@@ -855,11 +864,10 @@ static inline void RENAME(yuv2packedX)(SwsContext *c, int16_t *lumFilter, int16_ ...@@ -855,11 +864,10 @@ static inline void RENAME(yuv2packedX)(SwsContext *c, int16_t *lumFilter, int16_
YSCALEYUV2RGBX YSCALEYUV2RGBX
WRITEBGR32 WRITEBGR32
:: "m" (-lumFilterSize), "m" (-chrFilterSize), :: "r" (&c->redDither),
"m" (lumMmxFilter+lumFilterSize*4), "m" (chrMmxFilter+chrFilterSize*4), "m" (dummy), "m" (dummy), "m" (dummy),
"r" (dest), "m" (dstW), "r" (dest), "m" (dstW)
"m" (lumSrc+lumFilterSize), "m" (chrSrc+chrFilterSize) : "%eax", "%edx", "%esi"
: "%eax", "%ebx", "%ecx", "%edx", "%esi"
); );
} }
break; break;
...@@ -871,11 +879,10 @@ static inline void RENAME(yuv2packedX)(SwsContext *c, int16_t *lumFilter, int16_ ...@@ -871,11 +879,10 @@ static inline void RENAME(yuv2packedX)(SwsContext *c, int16_t *lumFilter, int16_
"addl %4, %%ebx \n\t" "addl %4, %%ebx \n\t"
WRITEBGR24 WRITEBGR24
:: "m" (-lumFilterSize), "m" (-chrFilterSize), :: "r" (&c->redDither),
"m" (lumMmxFilter+lumFilterSize*4), "m" (chrMmxFilter+chrFilterSize*4), "m" (dummy), "m" (dummy), "m" (dummy),
"r" (dest), "m" (dstW), "r" (dest), "m" (dstW)
"m" (lumSrc+lumFilterSize), "m" (chrSrc+chrFilterSize) : "%eax", "%ebx", "%edx", "%esi" //FIXME ebx
: "%eax", "%ebx", "%ecx", "%edx", "%esi"
); );
} }
break; break;
...@@ -892,11 +899,10 @@ static inline void RENAME(yuv2packedX)(SwsContext *c, int16_t *lumFilter, int16_ ...@@ -892,11 +899,10 @@ static inline void RENAME(yuv2packedX)(SwsContext *c, int16_t *lumFilter, int16_
WRITEBGR15 WRITEBGR15
:: "m" (-lumFilterSize), "m" (-chrFilterSize), :: "r" (&c->redDither),
"m" (lumMmxFilter+lumFilterSize*4), "m" (chrMmxFilter+chrFilterSize*4), "m" (dummy), "m" (dummy), "m" (dummy),
"r" (dest), "m" (dstW), "r" (dest), "m" (dstW)
"m" (lumSrc+lumFilterSize), "m" (chrSrc+chrFilterSize) : "%eax", "%edx", "%esi"
: "%eax", "%ebx", "%ecx", "%edx", "%esi"
); );
} }
break; break;
...@@ -913,11 +919,10 @@ static inline void RENAME(yuv2packedX)(SwsContext *c, int16_t *lumFilter, int16_ ...@@ -913,11 +919,10 @@ static inline void RENAME(yuv2packedX)(SwsContext *c, int16_t *lumFilter, int16_
WRITEBGR16 WRITEBGR16
:: "m" (-lumFilterSize), "m" (-chrFilterSize), :: "r" (&c->redDither),
"m" (lumMmxFilter+lumFilterSize*4), "m" (chrMmxFilter+chrFilterSize*4), "m" (dummy), "m" (dummy), "m" (dummy),
"r" (dest), "m" (dstW), "r" (dest), "m" (dstW)
"m" (lumSrc+lumFilterSize), "m" (chrSrc+chrFilterSize) : "%eax", "%edx", "%esi"
: "%eax", "%ebx", "%ecx", "%edx", "%esi"
); );
} }
break; break;
...@@ -933,11 +938,10 @@ static inline void RENAME(yuv2packedX)(SwsContext *c, int16_t *lumFilter, int16_ ...@@ -933,11 +938,10 @@ static inline void RENAME(yuv2packedX)(SwsContext *c, int16_t *lumFilter, int16_
"psraw $3, %%mm7 \n\t" "psraw $3, %%mm7 \n\t"
WRITEYUY2 WRITEYUY2
:: "m" (-lumFilterSize), "m" (-chrFilterSize), :: "r" (&c->redDither),
"m" (lumMmxFilter+lumFilterSize*4), "m" (chrMmxFilter+chrFilterSize*4), "m" (dummy), "m" (dummy), "m" (dummy),
"r" (dest), "m" (dstW), "r" (dest), "m" (dstW)
"m" (lumSrc+lumFilterSize), "m" (chrSrc+chrFilterSize) : "%eax", "%edx", "%esi"
: "%eax", "%ebx", "%ecx", "%edx", "%esi"
); );
} }
break; break;
...@@ -2528,8 +2532,8 @@ static void RENAME(swScale)(SwsContext *c, uint8_t* srcParam[], int srcStridePar ...@@ -2528,8 +2532,8 @@ static void RENAME(swScale)(SwsContext *c, uint8_t* srcParam[], int srcStridePar
int16_t *vChrFilter= c->vChrFilter; int16_t *vChrFilter= c->vChrFilter;
int16_t *hLumFilter= c->hLumFilter; int16_t *hLumFilter= c->hLumFilter;
int16_t *hChrFilter= c->hChrFilter; int16_t *hChrFilter= c->hChrFilter;
int16_t *lumMmxFilter= c->lumMmxFilter; int32_t *lumMmxFilter= c->lumMmxFilter;
int16_t *chrMmxFilter= c->chrMmxFilter; int32_t *chrMmxFilter= c->chrMmxFilter;
const int vLumFilterSize= c->vLumFilterSize; const int vLumFilterSize= c->vLumFilterSize;
const int vChrFilterSize= c->vChrFilterSize; const int vChrFilterSize= c->vChrFilterSize;
const int hLumFilterSize= c->hLumFilterSize; const int hLumFilterSize= c->hLumFilterSize;
...@@ -2729,11 +2733,28 @@ i--; ...@@ -2729,11 +2733,28 @@ i--;
{ {
int16_t **lumSrcPtr= lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize; int16_t **lumSrcPtr= lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
int16_t **chrSrcPtr= chrPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize; int16_t **chrSrcPtr= chrPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
RENAME(yuv2yuvX)( int i;
#ifdef HAVE_MMX
for(i=0; i<vLumFilterSize; i++)
{
lumMmxFilter[4*i+0]= (int32_t)lumSrcPtr[i];
lumMmxFilter[4*i+2]=
lumMmxFilter[4*i+3]=
((uint16_t)vLumFilter[dstY*vLumFilterSize + i])*0x10001;
}
for(i=0; i<vChrFilterSize; i++)
{
chrMmxFilter[4*i+0]= (int32_t)chrSrcPtr[i];
chrMmxFilter[4*i+2]=
chrMmxFilter[4*i+3]=
((uint16_t)vChrFilter[chrDstY*vChrFilterSize + i])*0x10001;
}
#endif
RENAME(yuv2yuvX)(c,
vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize, vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
dest, uDest, vDest, dstW, chrDstW, dest, uDest, vDest, dstW, chrDstW,
lumMmxFilter+dstY*vLumFilterSize*4, chrMmxFilter+chrDstY*vChrFilterSize*4); lumMmxFilter, chrMmxFilter);
} }
} }
else else
...@@ -2760,11 +2781,27 @@ i--; ...@@ -2760,11 +2781,27 @@ i--;
} }
else //General RGB else //General RGB
{ {
int i;
#ifdef HAVE_MMX
for(i=0; i<vLumFilterSize; i++)
{
lumMmxFilter[4*i+0]= (int32_t)lumSrcPtr[i];
lumMmxFilter[4*i+2]=
lumMmxFilter[4*i+3]=
((uint16_t)vLumFilter[dstY*vLumFilterSize + i])*0x10001;
}
for(i=0; i<vChrFilterSize; i++)
{
chrMmxFilter[4*i+0]= (int32_t)chrSrcPtr[i];
chrMmxFilter[4*i+2]=
chrMmxFilter[4*i+3]=
((uint16_t)vChrFilter[chrDstY*vChrFilterSize + i])*0x10001;
}
#endif
RENAME(yuv2packedX)(c, RENAME(yuv2packedX)(c,
vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize, vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
dest, dstW, dest, dstW, dstY);
lumMmxFilter+dstY*vLumFilterSize*4, chrMmxFilter+dstY*vChrFilterSize*4, dstY);
} }
} }
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment