Commit 379a2036 authored by Michael Niedermayer's avatar Michael Niedermayer

rounding fixes

Originally committed as revision 11123 to svn://svn.mplayerhq.hu/mplayer/trunk/postproc
parent 93cb9d7f
...@@ -222,7 +222,7 @@ static inline void yuv2yuvXinC(int16_t *lumFilter, int16_t **lumSrc, int lumFilt ...@@ -222,7 +222,7 @@ static inline void yuv2yuvXinC(int16_t *lumFilter, int16_t **lumSrc, int lumFilt
int i; int i;
for(i=0; i<dstW; i++) for(i=0; i<dstW; i++)
{ {
int val=0; int val=1<<18;
int j; int j;
for(j=0; j<lumFilterSize; j++) for(j=0; j<lumFilterSize; j++)
val += lumSrc[j][i] * lumFilter[j]; val += lumSrc[j][i] * lumFilter[j];
...@@ -233,8 +233,8 @@ static inline void yuv2yuvXinC(int16_t *lumFilter, int16_t **lumSrc, int lumFilt ...@@ -233,8 +233,8 @@ static inline void yuv2yuvXinC(int16_t *lumFilter, int16_t **lumSrc, int lumFilt
if(uDest != NULL) if(uDest != NULL)
for(i=0; i<chrDstW; i++) for(i=0; i<chrDstW; i++)
{ {
int u=0; int u=1<<18;
int v=0; int v=1<<18;
int j; int j;
for(j=0; j<chrFilterSize; j++) for(j=0; j<chrFilterSize; j++)
{ {
...@@ -251,10 +251,10 @@ static inline void yuv2yuvXinC(int16_t *lumFilter, int16_t **lumSrc, int lumFilt ...@@ -251,10 +251,10 @@ static inline void yuv2yuvXinC(int16_t *lumFilter, int16_t **lumSrc, int lumFilt
#define YSCALE_YUV_2_PACKEDX_C(type) \ #define YSCALE_YUV_2_PACKEDX_C(type) \
for(i=0; i<(dstW>>1); i++){\ for(i=0; i<(dstW>>1); i++){\
int j;\ int j;\
int Y1=0;\ int Y1=1<<18;\
int Y2=0;\ int Y2=1<<18;\
int U=0;\ int U=1<<18;\
int V=0;\ int V=1<<18;\
type *r, *b, *g;\ type *r, *b, *g;\
const int i2= 2*i;\ const int i2= 2*i;\
\ \
...@@ -621,8 +621,8 @@ static inline void yuv2packedXinC(SwsContext *c, int16_t *lumFilter, int16_t **l ...@@ -621,8 +621,8 @@ static inline void yuv2packedXinC(SwsContext *c, int16_t *lumFilter, int16_t **l
int acc=0; int acc=0;
for(i=0; i<dstW-1; i+=2){ for(i=0; i<dstW-1; i+=2){
int j; int j;
int Y1=0; int Y1=1<<18;
int Y2=0; int Y2=1<<18;
for(j=0; j<lumFilterSize; j++) for(j=0; j<lumFilterSize; j++)
{ {
...@@ -1093,7 +1093,7 @@ static inline void initFilter(int16_t **outFilter, int16_t **filterPos, int *out ...@@ -1093,7 +1093,7 @@ static inline void initFilter(int16_t **outFilter, int16_t **filterPos, int *out
scale/= sum; scale/= sum;
for(j=0; j<*outFilterSize; j++) for(j=0; j<*outFilterSize; j++)
{ {
(*outFilter)[i*(*outFilterSize) + j]= (int)(filter[i*filterSize + j]*scale); (*outFilter)[i*(*outFilterSize) + j]= (int)(filter[i*filterSize + j]*scale + 0.5);
} }
} }
...@@ -1772,6 +1772,7 @@ SwsContext *sws_getContext(int srcW, int srcH, int origSrcFormat, int dstW, int ...@@ -1772,6 +1772,7 @@ SwsContext *sws_getContext(int srcW, int srcH, int origSrcFormat, int dstW, int
c->srcFormat= srcFormat; c->srcFormat= srcFormat;
c->origDstFormat= origDstFormat; c->origDstFormat= origDstFormat;
c->origSrcFormat= origSrcFormat; c->origSrcFormat= origSrcFormat;
c->vRounder= 4* 0x0001000100010001ULL;
usesFilter=0; usesFilter=0;
if(dstFilter->lumV!=NULL && dstFilter->lumV->length>1) usesFilter=1; if(dstFilter->lumV!=NULL && dstFilter->lumV->length>1) usesFilter=1;
......
...@@ -109,10 +109,11 @@ typedef struct SwsContext{ ...@@ -109,10 +109,11 @@ typedef struct SwsContext{
#define Y_OFFSET "8*8" #define Y_OFFSET "8*8"
#define U_OFFSET "9*8" #define U_OFFSET "9*8"
#define V_OFFSET "10*8" #define V_OFFSET "10*8"
#define LUM_MMX_FILTER_OFFSET "11*8" #define VROUNDER_OFFSET "11*8"
#define CHR_MMX_FILTER_OFFSET "11*8+4*4*256" #define LUM_MMX_FILTER_OFFSET "12*8"
#define DSTW_OFFSET "11*8+4*4*256*2" #define CHR_MMX_FILTER_OFFSET "12*8+4*4*256"
#define ESP_OFFSET "11*8+4*4*256*2+4" #define DSTW_OFFSET "12*8+4*4*256*2"
#define ESP_OFFSET "12*8+4*4*256*2+4"
uint64_t redDither __attribute__((aligned(8))); uint64_t redDither __attribute__((aligned(8)));
uint64_t greenDither __attribute__((aligned(8))); uint64_t greenDither __attribute__((aligned(8)));
...@@ -126,6 +127,7 @@ typedef struct SwsContext{ ...@@ -126,6 +127,7 @@ typedef struct SwsContext{
uint64_t yOffset __attribute__((aligned(8))); uint64_t yOffset __attribute__((aligned(8)));
uint64_t uOffset __attribute__((aligned(8))); uint64_t uOffset __attribute__((aligned(8)));
uint64_t vOffset __attribute__((aligned(8))); uint64_t vOffset __attribute__((aligned(8)));
uint64_t vRounder __attribute__((aligned(8)));
int32_t lumMmxFilter[4*MAX_FILTER_SIZE]; int32_t lumMmxFilter[4*MAX_FILTER_SIZE];
int32_t chrMmxFilter[4*MAX_FILTER_SIZE]; int32_t chrMmxFilter[4*MAX_FILTER_SIZE];
int dstW; int dstW;
......
...@@ -61,8 +61,8 @@ ...@@ -61,8 +61,8 @@
#define YSCALEYUV2YV12X(x, offset) \ #define YSCALEYUV2YV12X(x, offset) \
"xorl %%eax, %%eax \n\t"\ "xorl %%eax, %%eax \n\t"\
"pxor %%mm3, %%mm3 \n\t"\ "movq "VROUNDER_OFFSET"(%0), %%mm3\n\t"\
"pxor %%mm4, %%mm4 \n\t"\ "movq %%mm3, %%mm4 \n\t"\
"leal " offset "(%0), %%edx \n\t"\ "leal " offset "(%0), %%edx \n\t"\
"movl (%%edx), %%esi \n\t"\ "movl (%%edx), %%esi \n\t"\
".balign 16 \n\t" /* FIXME Unroll? */\ ".balign 16 \n\t" /* FIXME Unroll? */\
...@@ -84,8 +84,8 @@ ...@@ -84,8 +84,8 @@
MOVNTQ(%%mm3, (%1, %%eax))\ MOVNTQ(%%mm3, (%1, %%eax))\
"addl $8, %%eax \n\t"\ "addl $8, %%eax \n\t"\
"cmpl %2, %%eax \n\t"\ "cmpl %2, %%eax \n\t"\
"pxor %%mm3, %%mm3 \n\t"\ "movq "VROUNDER_OFFSET"(%0), %%mm3\n\t"\
"pxor %%mm4, %%mm4 \n\t"\ "movq %%mm3, %%mm4 \n\t"\
"leal " offset "(%0), %%edx \n\t"\ "leal " offset "(%0), %%edx \n\t"\
"movl (%%edx), %%esi \n\t"\ "movl (%%edx), %%esi \n\t"\
"jb 1b \n\t" "jb 1b \n\t"
...@@ -117,8 +117,8 @@ ...@@ -117,8 +117,8 @@
"1: \n\t"\ "1: \n\t"\
"leal "CHR_MMX_FILTER_OFFSET"(%0), %%edx \n\t"\ "leal "CHR_MMX_FILTER_OFFSET"(%0), %%edx \n\t"\
"movl (%%edx), %%esi \n\t"\ "movl (%%edx), %%esi \n\t"\
"pxor %%mm3, %%mm3 \n\t"\ "movq "VROUNDER_OFFSET"(%0), %%mm3\n\t"\
"pxor %%mm4, %%mm4 \n\t"\ "movq %%mm3, %%mm4 \n\t"\
".balign 16 \n\t"\ ".balign 16 \n\t"\
"2: \n\t"\ "2: \n\t"\
"movq 8(%%edx), %%mm0 \n\t" /* filterCoeff */\ "movq 8(%%edx), %%mm0 \n\t" /* filterCoeff */\
...@@ -135,8 +135,8 @@ ...@@ -135,8 +135,8 @@
\ \
"leal "LUM_MMX_FILTER_OFFSET"(%0), %%edx \n\t"\ "leal "LUM_MMX_FILTER_OFFSET"(%0), %%edx \n\t"\
"movl (%%edx), %%esi \n\t"\ "movl (%%edx), %%esi \n\t"\
"pxor %%mm1, %%mm1 \n\t"\ "movq "VROUNDER_OFFSET"(%0), %%mm1\n\t"\
"pxor %%mm7, %%mm7 \n\t"\ "movq %%mm1, %%mm7 \n\t"\
".balign 16 \n\t"\ ".balign 16 \n\t"\
"2: \n\t"\ "2: \n\t"\
"movq 8(%%edx), %%mm0 \n\t" /* filterCoeff */\ "movq 8(%%edx), %%mm0 \n\t" /* filterCoeff */\
...@@ -2611,6 +2611,8 @@ i--; ...@@ -2611,6 +2611,8 @@ i--;
const int lastLumSrcY= firstLumSrcY + vLumFilterSize -1; // Last line needed as input const int lastLumSrcY= firstLumSrcY + vLumFilterSize -1; // Last line needed as input
const int lastChrSrcY= firstChrSrcY + vChrFilterSize -1; // Last line needed as input const int lastChrSrcY= firstChrSrcY + vChrFilterSize -1; // Last line needed as input
//printf("dstY:%d dstH:%d firstLumSrcY:%d lastInLumBuf:%d vLumBufSize: %d vChrBufSize: %d slice: %d %d vLumFilterSize: %d firstChrSrcY: %d vChrFilterSize: %d c->chrSrcVSubSample: %d\n",
// dstY, dstH, firstLumSrcY, lastInLumBuf, vLumBufSize, vChrBufSize, srcSliceY, srcSliceH, vLumFilterSize, firstChrSrcY, vChrFilterSize, c->chrSrcVSubSample);
//handle holes (FAST_BILINEAR & weird filters) //handle holes (FAST_BILINEAR & weird filters)
if(firstLumSrcY > lastInLumBuf) lastInLumBuf= firstLumSrcY-1; if(firstLumSrcY > lastInLumBuf) lastInLumBuf= firstLumSrcY-1;
if(firstChrSrcY > lastInChrBuf) lastInChrBuf= firstChrSrcY-1; if(firstChrSrcY > lastInChrBuf) lastInChrBuf= firstChrSrcY-1;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment