Commit d33d485e authored by Alan Curry's avatar Alan Curry Committed by Diego Biurrun

Move the v{Y,C}CoeffsBank vectors into the SwsContext, filling them in just

once when the scaler is initialized, instead of building them and freeing
them over and over. This gives massive performance improvements.
patch by Alan Curry, pacman*at*TheWorld*dot*com

Originally committed as revision 17589 to svn://svn.mplayerhq.hu/mplayer/trunk/postproc
parent c9fa86df
......@@ -2110,6 +2110,25 @@ SwsContext *sws_getContext(int srcW, int srcH, int origSrcFormat, int dstW, int
c->chrSrcH, c->chrDstH, filterAlign, (1<<12)-4,
(flags&SWS_BICUBLIN) ? (flags|SWS_BILINEAR) : flags,
srcFilter->chrV, dstFilter->chrV, c->param);
#ifdef HAVE_ALTIVEC
c->vYCoeffsBank = memalign (16, sizeof (vector signed short)*c->vLumFilterSize*c->dstH);
c->vCCoeffsBank = memalign (16, sizeof (vector signed short)*c->vChrFilterSize*c->dstH);
for (i=0;i<c->vLumFilterSize*c->dstH;i++) {
int j;
short *p = (short *)&c->vYCoeffsBank[i];
for (j=0;j<8;j++)
p[j] = c->vLumFilter[i];
}
for (i=0;i<c->vChrFilterSize*c->dstH;i++) {
int j;
short *p = (short *)&c->vCCoeffsBank[i];
for (j=0;j<8;j++)
p[j] = c->vChrFilter[i];
}
#endif
}
// Calculate Buffer Sizes so that they won't run out while handling these damn slices
......@@ -2644,6 +2663,12 @@ void sws_freeContext(SwsContext *c){
c->hLumFilter = NULL;
if(c->hChrFilter) free(c->hChrFilter);
c->hChrFilter = NULL;
#ifdef HAVE_ALTIVEC
if(c->vYCoeffsBank) free(c->vYCoeffsBank);
c->vYCoeffsBank = NULL;
if(c->vCCoeffsBank) free(c->vCCoeffsBank);
c->vCCoeffsBank = NULL;
#endif
if(c->vLumFilterPos) free(c->vLumFilterPos);
c->vLumFilterPos = NULL;
......
......@@ -154,6 +154,7 @@ typedef struct SwsContext{
vector signed short CGV;
vector signed short OY;
vector unsigned short CSHIFT;
vector signed short *vYCoeffsBank, *vCCoeffsBank;
#endif
......
......@@ -774,8 +774,6 @@ altivec_yuv2packedX (SwsContext *c,
uint8_t *dest, int dstW, int dstY)
{
int i,j;
short tmp __attribute__((aligned (16)));
int16_t *p;
short *f;
vector signed short X,X0,X1,Y0,U0,V0,Y1,U1,V1,U,V;
vector signed short R0,G0,B0,R1,G1,B1;
......@@ -787,29 +785,10 @@ altivec_yuv2packedX (SwsContext *c,
vector unsigned short SCL = vec_splat((vector unsigned short)AVV(4),0);
unsigned long scratch[16] __attribute__ ((aligned (16)));
vector signed short *vYCoeffsBank, *vCCoeffsBank;
vector signed short *YCoeffs, *CCoeffs;
vYCoeffsBank = memalign (16, sizeof (vector signed short)*lumFilterSize*c->dstH);
vCCoeffsBank = memalign (16, sizeof (vector signed short)*chrFilterSize*c->dstH);
for (i=0;i<lumFilterSize*c->dstH;i++) {
tmp = c->vLumFilter[i];
p = &vYCoeffsBank[i];
for (j=0;j<8;j++)
p[j] = tmp;
}
for (i=0;i<chrFilterSize*c->dstH;i++) {
tmp = c->vChrFilter[i];
p = &vCCoeffsBank[i];
for (j=0;j<8;j++)
p[j] = tmp;
}
YCoeffs = vYCoeffsBank+dstY*lumFilterSize;
CCoeffs = vCCoeffsBank+dstY*chrFilterSize;
YCoeffs = c->vYCoeffsBank+dstY*lumFilterSize;
CCoeffs = c->vCCoeffsBank+dstY*chrFilterSize;
out = (vector unsigned char *)dest;
......@@ -962,7 +941,4 @@ altivec_yuv2packedX (SwsContext *c,
memcpy (&((uint32_t*)dest)[i], scratch, (dstW-i)/4);
}
if (vYCoeffsBank) free (vYCoeffsBank);
if (vCCoeffsBank) free (vCCoeffsBank);
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment