Commit ec487e5d authored by Michael Niedermayer's avatar Michael Niedermayer

better deblocking filter

Originally committed as revision 7961 to svn://svn.mplayerhq.hu/mplayer/trunk/postproc
parent 02c7e177
...@@ -59,7 +59,6 @@ compare the quality & speed of all filters ...@@ -59,7 +59,6 @@ compare the quality & speed of all filters
split this huge file split this huge file
optimize c versions optimize c versions
try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
put fastmemcpy back
... ...
*/ */
...@@ -149,13 +148,14 @@ typedef struct PPContext{ ...@@ -149,13 +148,14 @@ typedef struct PPContext{
uint64_t __attribute__((aligned(8))) pQPb; uint64_t __attribute__((aligned(8))) pQPb;
uint64_t __attribute__((aligned(8))) pQPb2; uint64_t __attribute__((aligned(8))) pQPb2;
uint64_t __attribute__((aligned(8))) mmxDcOffset;
uint64_t __attribute__((aligned(8))) mmxDcThreshold;
uint64_t __attribute__((aligned(8))) mmxDcOffset[32];
uint64_t __attribute__((aligned(8))) mmxDcThreshold[32];
QP_STORE_T *nonBQPTable;
int QP; int QP;
int dcOffset; int nonBQP;
int dcThreshold;
int frameNum; int frameNum;
...@@ -247,8 +247,8 @@ static inline int isHorizDC(uint8_t src[], int stride, PPContext *c) ...@@ -247,8 +247,8 @@ static inline int isHorizDC(uint8_t src[], int stride, PPContext *c)
{ {
int numEq= 0; int numEq= 0;
int y; int y;
const int dcOffset= c->dcOffset; const int dcOffset= ((c->QP*c->ppMode.baseDcDiff)>>8) + 1;
const int dcThreshold= c->dcThreshold; const int dcThreshold= dcOffset*2 + 1;
for(y=0; y<BLOCK_SIZE; y++) for(y=0; y<BLOCK_SIZE; y++)
{ {
if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++; if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++;
...@@ -269,8 +269,8 @@ static inline int isHorizDC(uint8_t src[], int stride, PPContext *c) ...@@ -269,8 +269,8 @@ static inline int isHorizDC(uint8_t src[], int stride, PPContext *c)
static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c){ static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c){
int numEq= 0; int numEq= 0;
int y; int y;
const int dcOffset= c->dcOffset; const int dcOffset= ((c->QP*c->ppMode.baseDcDiff)>>8) + 1;
const int dcThreshold= c->dcThreshold; const int dcThreshold= dcOffset*2 + 1;
src+= stride*4; // src points to begin of the 8x8 Block src+= stride*4; // src points to begin of the 8x8 Block
for(y=0; y<BLOCK_SIZE-1; y++) for(y=0; y<BLOCK_SIZE-1; y++)
{ {
...@@ -725,7 +725,7 @@ struct PPMode getPPModeByNameAndQuality(char *name, int quality) ...@@ -725,7 +725,7 @@ struct PPMode getPPModeByNameAndQuality(char *name, int quality)
else if(filters[i].mask == V_DEBLOCK || filters[i].mask == H_DEBLOCK) else if(filters[i].mask == V_DEBLOCK || filters[i].mask == H_DEBLOCK)
{ {
int o; int o;
ppMode.maxDcDiff=1; ppMode.baseDcDiff=256/4;
// hFlatnessThreshold= 40; // hFlatnessThreshold= 40;
// vFlatnessThreshold= 40; // vFlatnessThreshold= 40;
...@@ -736,7 +736,7 @@ struct PPMode getPPModeByNameAndQuality(char *name, int quality) ...@@ -736,7 +736,7 @@ struct PPMode getPPModeByNameAndQuality(char *name, int quality)
if(tail==options[o]) break; if(tail==options[o]) break;
numOfUnknownOptions--; numOfUnknownOptions--;
if(o==0) ppMode.maxDcDiff= val; if(o==0) ppMode.baseDcDiff= val;
else ppMode.flatnessThreshold= val; else ppMode.flatnessThreshold= val;
} }
} }
...@@ -768,6 +768,8 @@ struct PPMode getPPModeByNameAndQuality(char *name, int quality) ...@@ -768,6 +768,8 @@ struct PPMode getPPModeByNameAndQuality(char *name, int quality)
void *getPPContext(int width, int height){ void *getPPContext(int width, int height){
PPContext *c= memalign(32, sizeof(PPContext)); PPContext *c= memalign(32, sizeof(PPContext));
int i; int i;
int mbWidth = (width+15)>>4;
int mbHeight= (height+15)>>4;
c->tempBlocks= (uint8_t*)memalign(8, 2*16*8); c->tempBlocks= (uint8_t*)memalign(8, 2*16*8);
c->yHistogram= (uint64_t*)memalign(8, 256*sizeof(uint64_t)); c->yHistogram= (uint64_t*)memalign(8, 256*sizeof(uint64_t));
...@@ -789,6 +791,8 @@ void *getPPContext(int width, int height){ ...@@ -789,6 +791,8 @@ void *getPPContext(int width, int height){
c->tempDstBlock= (uint8_t*)memalign(8, 1024*24); c->tempDstBlock= (uint8_t*)memalign(8, 1024*24);
c->tempSrcBlock= (uint8_t*)memalign(8, 1024*24); c->tempSrcBlock= (uint8_t*)memalign(8, 1024*24);
c->deintTemp= (uint8_t*)memalign(8, width+16); c->deintTemp= (uint8_t*)memalign(8, width+16);
c->nonBQPTable= (QP_STORE_T*)memalign(8, mbWidth*mbHeight*sizeof(QP_STORE_T));
memset(c->nonBQPTable, 0, mbWidth*mbHeight*sizeof(QP_STORE_T));
c->frameNum=-1; c->frameNum=-1;
...@@ -809,6 +813,7 @@ void freePPContext(void *vc){ ...@@ -809,6 +813,7 @@ void freePPContext(void *vc){
free(c->tempDstBlock); free(c->tempDstBlock);
free(c->tempSrcBlock); free(c->tempSrcBlock);
free(c->deintTemp); free(c->deintTemp);
free(c->nonBQPTable);
free(c); free(c);
} }
...@@ -841,12 +846,14 @@ void revertPPOpt(void *conf, char* opt) ...@@ -841,12 +846,14 @@ void revertPPOpt(void *conf, char* opt)
void postprocess(uint8_t * src[3], int srcStride[3], void postprocess(uint8_t * src[3], int srcStride[3],
uint8_t * dst[3], int dstStride[3], uint8_t * dst[3], int dstStride[3],
int horizontalSize, int verticalSize, int width, int height,
QP_STORE_T *QP_store, int QPStride, QP_STORE_T *QP_store, int QPStride,
PPMode *mode, void *c) PPMode *mode, void *vc, int pict_type)
{ {
int mbWidth = (width+15)>>4;
int mbHeight= (height+15)>>4;
QP_STORE_T quantArray[2048/8]; QP_STORE_T quantArray[2048/8];
PPContext *c = (PPContext*)vc;
if(QP_store==NULL || (mode->lumMode & FORCE_QUANT)) if(QP_store==NULL || (mode->lumMode & FORCE_QUANT))
{ {
...@@ -858,6 +865,29 @@ void postprocess(uint8_t * src[3], int srcStride[3], ...@@ -858,6 +865,29 @@ void postprocess(uint8_t * src[3], int srcStride[3],
else else
for(i=0; i<2048/8; i++) quantArray[i]= 1; for(i=0; i<2048/8; i++) quantArray[i]= 1;
} }
if(0){
int x,y;
for(y=0; y<mbHeight; y++){
for(x=0; x<mbWidth; x++){
printf("%2d ", QP_store[x + y*QPStride]);
}
printf("\n");
}
printf("\n");
}
//printf("pict_type:%d\n", pict_type);
if(pict_type!=3)
{
int x,y;
for(y=0; y<mbHeight; y++){
for(x=0; x<mbWidth; x++){
int qscale= QP_store[x + y*QPStride];
if(qscale&~31)
qscale=31;
c->nonBQPTable[y*mbWidth + x]= qscale;
}
}
}
if(firstTime2 && verbose) if(firstTime2 && verbose)
{ {
...@@ -866,30 +896,30 @@ void postprocess(uint8_t * src[3], int srcStride[3], ...@@ -866,30 +896,30 @@ void postprocess(uint8_t * src[3], int srcStride[3],
} }
postProcess(src[0], srcStride[0], dst[0], dstStride[0], postProcess(src[0], srcStride[0], dst[0], dstStride[0],
horizontalSize, verticalSize, QP_store, QPStride, 0, mode, c); width, height, QP_store, QPStride, 0, mode, c);
horizontalSize = (horizontalSize+1)>> 1; width = (width +1)>>1;
verticalSize = (verticalSize+1)>>1; height = (height+1)>>1;
if(mode->chromMode) if(mode->chromMode)
{ {
postProcess(src[1], srcStride[1], dst[1], dstStride[1], postProcess(src[1], srcStride[1], dst[1], dstStride[1],
horizontalSize, verticalSize, QP_store, QPStride, 1, mode, c); width, height, QP_store, QPStride, 1, mode, c);
postProcess(src[2], srcStride[2], dst[2], dstStride[2], postProcess(src[2], srcStride[2], dst[2], dstStride[2],
horizontalSize, verticalSize, QP_store, QPStride, 2, mode, c); width, height, QP_store, QPStride, 2, mode, c);
} }
else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2]) else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2])
{ {
memcpy(dst[1], src[1], srcStride[1]*verticalSize); memcpy(dst[1], src[1], srcStride[1]*height);
memcpy(dst[2], src[2], srcStride[2]*verticalSize); memcpy(dst[2], src[2], srcStride[2]*height);
} }
else else
{ {
int y; int y;
for(y=0; y<verticalSize; y++) for(y=0; y<height; y++)
{ {
memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), horizontalSize); memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), horizontalSize); memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);
} }
} }
} }
......
...@@ -54,7 +54,6 @@ ...@@ -54,7 +54,6 @@
#define TEMP_NOISE_FILTER 0x100000 #define TEMP_NOISE_FILTER 0x100000
#define FORCE_QUANT 0x200000 #define FORCE_QUANT 0x200000
#define GET_PP_QUALITY_MAX 6 #define GET_PP_QUALITY_MAX 6
//use if u want a faster postprocessing code //use if u want a faster postprocessing code
...@@ -76,8 +75,8 @@ typedef struct PPMode{ ...@@ -76,8 +75,8 @@ typedef struct PPMode{
int maxAllowedY; // for brihtness correction int maxAllowedY; // for brihtness correction
int maxTmpNoise[3]; // for Temporal Noise Reducing filter (Maximal sum of abs differences) int maxTmpNoise[3]; // for Temporal Noise Reducing filter (Maximal sum of abs differences)
int maxDcDiff; // max abs diff between pixels to be considered flat int baseDcDiff;
int flatnessThreshold; int flatnessThreshold;
int forcedQuant; // quantizer if FORCE_QUANT is used int forcedQuant; // quantizer if FORCE_QUANT is used
...@@ -87,7 +86,7 @@ void postprocess(uint8_t * src[3], int srcStride[3], ...@@ -87,7 +86,7 @@ void postprocess(uint8_t * src[3], int srcStride[3],
uint8_t * dst[3], int dstStride[3], uint8_t * dst[3], int dstStride[3],
int horizontalSize, int verticalSize, int horizontalSize, int verticalSize,
QP_STORE_T *QP_store, int QP_stride, QP_STORE_T *QP_store, int QP_stride,
PPMode *mode, void *ppContext); PPMode *mode, void *ppContext, int pict_type);
// name is the stuff after "-pp" on the command line // name is the stuff after "-pp" on the command line
PPMode getPPModeByNameAndQuality(char *name, int quality); PPMode getPPModeByNameAndQuality(char *name, int quality);
......
...@@ -56,8 +56,9 @@ asm volatile( ...@@ -56,8 +56,9 @@ asm volatile(
"leal (%1, %2), %%eax \n\t" "leal (%1, %2), %%eax \n\t"
// 0 1 2 3 4 5 6 7 8 9 // 0 1 2 3 4 5 6 7 8 9
// %1 eax eax+%2 eax+2%2 %1+4%2 ecx ecx+%2 ecx+2%2 %1+8%2 ecx+4%2 // %1 eax eax+%2 eax+2%2 %1+4%2 ecx ecx+%2 ecx+2%2 %1+8%2 ecx+4%2
"movq %3, %%mm7 \n\t" // mm7 = 0x7F "movq %3, %%mm7 \n\t"
"movq %4, %%mm6 \n\t" // mm6 = 0x7D "movq %4, %%mm6 \n\t"
"movq (%1), %%mm0 \n\t" "movq (%1), %%mm0 \n\t"
"movq (%%eax), %%mm1 \n\t" "movq (%%eax), %%mm1 \n\t"
"psubb %%mm1, %%mm0 \n\t" // mm0 = differnece "psubb %%mm1, %%mm0 \n\t" // mm0 = differnece
...@@ -119,7 +120,7 @@ asm volatile( ...@@ -119,7 +120,7 @@ asm volatile(
#endif #endif
"movd %%mm0, %0 \n\t" "movd %%mm0, %0 \n\t"
: "=r" (numEq) : "=r" (numEq)
: "r" (src), "r" (stride), "m" (c->mmxDcOffset), "m" (c->mmxDcThreshold) : "r" (src), "r" (stride), "m" (c->mmxDcOffset[c->nonBQP]), "m" (c->mmxDcThreshold[c->nonBQP])
: "%eax" : "%eax"
); );
numEq= (-numEq) &0xFF; numEq= (-numEq) &0xFF;
...@@ -150,6 +151,7 @@ static inline int RENAME(isVertMinMaxOk)(uint8_t src[], int stride, PPContext *c ...@@ -150,6 +151,7 @@ static inline int RENAME(isVertMinMaxOk)(uint8_t src[], int stride, PPContext *c
); );
return isOk==0; return isOk==0;
#else #else
#if 1
int x; int x;
const int QP= c->QP; const int QP= c->QP;
src+= stride*3; src+= stride*3;
...@@ -159,6 +161,24 @@ static inline int RENAME(isVertMinMaxOk)(uint8_t src[], int stride, PPContext *c ...@@ -159,6 +161,24 @@ static inline int RENAME(isVertMinMaxOk)(uint8_t src[], int stride, PPContext *c
} }
return 1; return 1;
#else
int x;
const int QP= c->QP;
src+= stride*4;
for(x=0; x<BLOCK_SIZE; x++)
{
int min=255;
int max=0;
int y;
for(y=0; y<8; y++){
int v= src[x + y*stride];
if(v>max) max=v;
if(v<min) min=v;
}
if(max-min > 2*QP) return 0;
}
return 1;
#endif
#endif #endif
} }
...@@ -2639,22 +2659,23 @@ static void RENAME(postProcess)(uint8_t src[], int srcStride, uint8_t dst[], int ...@@ -2639,22 +2659,23 @@ static void RENAME(postProcess)(uint8_t src[], int srcStride, uint8_t dst[], int
int black=0, white=255; // blackest black and whitest white in the picture int black=0, white=255; // blackest black and whitest white in the picture
int QPCorrecture= 256*256; int QPCorrecture= 256*256;
int copyAhead; int copyAhead, i;
//FIXME remove //FIXME remove
uint64_t * const yHistogram= c.yHistogram; uint64_t * const yHistogram= c.yHistogram;
uint8_t * const tempSrc= c.tempSrc; uint8_t * const tempSrc= c.tempSrc;
uint8_t * const tempDst= c.tempDst; uint8_t * const tempDst= c.tempDst;
const int mbWidth= isColor ? (width+7)>>3 : (width+15)>>4;
c.dcOffset= c.ppMode.maxDcDiff;
c.dcThreshold= c.ppMode.maxDcDiff*2 + 1;
#ifdef HAVE_MMX #ifdef HAVE_MMX
c.mmxDcOffset= 0x7F - c.dcOffset; for(i=0; i<32; i++){
c.mmxDcThreshold= 0x7F - c.dcThreshold; int offset= ((i*c.ppMode.baseDcDiff)>>8) + 1;
int threshold= offset*2 + 1;
c.mmxDcOffset*= 0x0101010101010101LL; c.mmxDcOffset[i]= 0x7F - offset;
c.mmxDcThreshold*= 0x0101010101010101LL; c.mmxDcThreshold[i]= 0x7F - threshold;
c.mmxDcOffset[i]*= 0x0101010101010101LL;
c.mmxDcThreshold[i]*= 0x0101010101010101LL;
}
#endif #endif
if(mode & CUBIC_IPOL_DEINT_FILTER) copyAhead=16; if(mode & CUBIC_IPOL_DEINT_FILTER) copyAhead=16;
...@@ -2814,11 +2835,8 @@ static void RENAME(postProcess)(uint8_t src[], int srcStride, uint8_t dst[], int ...@@ -2814,11 +2835,8 @@ static void RENAME(postProcess)(uint8_t src[], int srcStride, uint8_t dst[], int
uint8_t *tempBlock1= c.tempBlocks; uint8_t *tempBlock1= c.tempBlocks;
uint8_t *tempBlock2= c.tempBlocks + 8; uint8_t *tempBlock2= c.tempBlocks + 8;
#endif #endif
#ifdef ARCH_X86
int *QPptr= isColor ? &QPs[(y>>3)*QPStride] :&QPs[(y>>4)*QPStride]; int *QPptr= isColor ? &QPs[(y>>3)*QPStride] :&QPs[(y>>4)*QPStride];
int QPDelta= isColor ? (-1) : 1<<31; int *nonBQPptr= isColor ? &c.nonBQPTable[(y>>3)*mbWidth] :&c.nonBQPTable[(y>>4)*mbWidth];
int QPFrac= 1<<30;
#endif
int QP=0; int QP=0;
/* can we mess with a 8x16 block from srcBlock/dstBlock downwards and 1 line upwards /* can we mess with a 8x16 block from srcBlock/dstBlock downwards and 1 line upwards
if not than use a temporary buffer */ if not than use a temporary buffer */
...@@ -2855,28 +2873,19 @@ static void RENAME(postProcess)(uint8_t src[], int srcStride, uint8_t dst[], int ...@@ -2855,28 +2873,19 @@ static void RENAME(postProcess)(uint8_t src[], int srcStride, uint8_t dst[], int
#ifdef HAVE_MMX #ifdef HAVE_MMX
uint8_t *tmpXchg; uint8_t *tmpXchg;
#endif #endif
#ifdef ARCH_X86 if(isColor)
QP= *QPptr; {
asm volatile( QP= QPptr[x>>3];
"addl %2, %1 \n\t" c.nonBQP= nonBQPptr[x>>3];
"sbbl %%eax, %%eax \n\t" }
"shll $2, %%eax \n\t" else
"subl %%eax, %0 \n\t"
: "+r" (QPptr), "+m" (QPFrac)
: "r" (QPDelta)
: "%eax"
);
#else
QP= isColor ?
QPs[(y>>3)*QPStride + (x>>3)]:
QPs[(y>>4)*QPStride + (x>>4)];
#endif
if(!isColor)
{ {
QP= QPptr[x>>4];
QP= (QP* QPCorrecture + 256*128)>>16; QP= (QP* QPCorrecture + 256*128)>>16;
c.nonBQP= nonBQPptr[x>>4];
c.nonBQP= (c.nonBQP* QPCorrecture + 256*128)>>16;
yHistogram[ srcBlock[srcStride*12 + 4] ]++; yHistogram[ srcBlock[srcStride*12 + 4] ]++;
} }
//printf("%d ", QP);
c.QP= QP; c.QP= QP;
#ifdef HAVE_MMX #ifdef HAVE_MMX
asm volatile( asm volatile(
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment