Commit b0ac780a authored by Michael Niedermayer's avatar Michael Niedermayer

altivec optimizations and horizontal filter fix by (Romain Dolbeau <dolbeau at irisa dot fr>)

Originally committed as revision 3162 to svn://svn.ffmpeg.org/ffmpeg/trunk
parent 12013f67
/* /*
Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at) Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at)
AltiVec optimizations (C) 2004 Romain Dolbeau <romain@dolbeau.org>
This program is free software; you can redistribute it and/or modify This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or the Free Software Foundation; either version 2 of the License, or
...@@ -22,16 +24,16 @@ ...@@ -22,16 +24,16 @@
*/ */
/* /*
C MMX MMX2 3DNow C MMX MMX2 3DNow AltiVec
isVertDC Ec Ec isVertDC Ec Ec Ec
isVertMinMaxOk Ec Ec isVertMinMaxOk Ec Ec Ec
doVertLowPass E e e doVertLowPass E e e Ec
doVertDefFilter Ec Ec e e doVertDefFilter Ec Ec e e Ec
isHorizDC Ec Ec isHorizDC Ec Ec
isHorizMinMaxOk a E isHorizMinMaxOk a E
doHorizLowPass E e e doHorizLowPass E e e
doHorizDefFilter Ec Ec e e doHorizDefFilter Ec Ec e e
deRing E e e* deRing E e e* Ecp
Vertical RKAlgo1 E a a Vertical RKAlgo1 E a a
Horizontal RKAlgo1 a a Horizontal RKAlgo1 a a
Vertical X1# a E E Vertical X1# a E E
...@@ -48,6 +50,7 @@ E = Exact implementation ...@@ -48,6 +50,7 @@ E = Exact implementation
e = allmost exact implementation (slightly different rounding,...) e = allmost exact implementation (slightly different rounding,...)
a = alternative / approximate impl a = alternative / approximate impl
c = checked against the other implementations (-vo md5) c = checked against the other implementations (-vo md5)
p = partially optimized, still some work to do
*/ */
/* /*
...@@ -194,7 +197,7 @@ static inline void prefetcht2(void *p) ...@@ -194,7 +197,7 @@ static inline void prefetcht2(void *p)
/** /**
* Check if the given 8x8 Block is mostly "flat" * Check if the given 8x8 Block is mostly "flat"
*/ */
static inline int isHorizDC(uint8_t src[], int stride, PPContext *c) static inline int isHorizDC_C(uint8_t src[], int stride, PPContext *c)
{ {
int numEq= 0; int numEq= 0;
int y; int y;
...@@ -240,7 +243,7 @@ static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c){ ...@@ -240,7 +243,7 @@ static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c){
return numEq > c->ppMode.flatnessThreshold; return numEq > c->ppMode.flatnessThreshold;
} }
static inline int isHorizMinMaxOk(uint8_t src[], int stride, int QP) static inline int isHorizMinMaxOk_C(uint8_t src[], int stride, int QP)
{ {
int i; int i;
#if 1 #if 1
...@@ -304,6 +307,17 @@ static inline int isVertMinMaxOk_C(uint8_t src[], int stride, int QP) ...@@ -304,6 +307,17 @@ static inline int isVertMinMaxOk_C(uint8_t src[], int stride, int QP)
#endif #endif
} }
static inline int horizClassify_C(uint8_t src[], int stride, PPContext *c){
if( isHorizDC_C(src, stride, c) ){
if( isHorizMinMaxOk_C(src, stride, c->QP) )
return 1;
else
return 0;
}else{
return 2;
}
}
static inline int vertClassify_C(uint8_t src[], int stride, PPContext *c){ static inline int vertClassify_C(uint8_t src[], int stride, PPContext *c){
if( isVertDC_C(src, stride, c) ){ if( isVertDC_C(src, stride, c) ){
if( isVertMinMaxOk_C(src, stride, c->QP) ) if( isVertMinMaxOk_C(src, stride, c->QP) )
...@@ -315,14 +329,14 @@ static inline int vertClassify_C(uint8_t src[], int stride, PPContext *c){ ...@@ -315,14 +329,14 @@ static inline int vertClassify_C(uint8_t src[], int stride, PPContext *c){
} }
} }
static inline void doHorizDefFilter(uint8_t dst[], int stride, int QP) static inline void doHorizDefFilter_C(uint8_t dst[], int stride, PPContext *c)
{ {
int y; int y;
for(y=0; y<BLOCK_SIZE; y++) for(y=0; y<BLOCK_SIZE; y++)
{ {
const int middleEnergy= 5*(dst[4] - dst[5]) + 2*(dst[2] - dst[5]); const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]);
if(ABS(middleEnergy) < 8*QP) if(ABS(middleEnergy) < 8*c->QP)
{ {
const int q=(dst[3] - dst[4])/2; const int q=(dst[3] - dst[4])/2;
const int leftEnergy= 5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]); const int leftEnergy= 5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
...@@ -356,14 +370,14 @@ static inline void doHorizDefFilter(uint8_t dst[], int stride, int QP) ...@@ -356,14 +370,14 @@ static inline void doHorizDefFilter(uint8_t dst[], int stride, int QP)
* Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block) * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block)
* using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version) * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version)
*/ */
static inline void doHorizLowPass(uint8_t dst[], int stride, int QP) static inline void doHorizLowPass_C(uint8_t dst[], int stride, PPContext *c)
{ {
int y; int y;
for(y=0; y<BLOCK_SIZE; y++) for(y=0; y<BLOCK_SIZE; y++)
{ {
const int first= ABS(dst[-1] - dst[0]) < QP ? dst[-1] : dst[0]; const int first= ABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0];
const int last= ABS(dst[8] - dst[7]) < QP ? dst[8] : dst[7]; const int last= ABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7];
int sums[9]; int sums[9];
sums[0] = first + dst[0]; sums[0] = first + dst[0];
...@@ -462,6 +476,17 @@ static inline void horizX1Filter(uint8_t *src, int stride, int QP) ...@@ -462,6 +476,17 @@ static inline void horizX1Filter(uint8_t *src, int stride, int QP)
#define COMPILE_C #define COMPILE_C
#endif #endif
#ifdef ARCH_POWERPC
#ifdef HAVE_ALTIVEC
#define COMPILE_ALTIVEC
#ifndef CONFIG_DARWIN
#warning "################################################################################"
#warning "WARNING: No gcc available as of today (2004-05-25) seems to be able to compile properly some of the code under non-Darwin PPC OSes. Some functions result in wrong results, while others simply won't compile (gcc explodes after allocating 1GiB+)."
#warning "################################################################################"
#endif //CONFIG_DARWIN
#endif //HAVE_ALTIVEC
#endif //ARCH_POWERPC
#ifdef ARCH_X86 #ifdef ARCH_X86
#if (defined (HAVE_MMX) && !defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT) #if (defined (HAVE_MMX) && !defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
...@@ -480,6 +505,7 @@ static inline void horizX1Filter(uint8_t *src, int stride, int QP) ...@@ -480,6 +505,7 @@ static inline void horizX1Filter(uint8_t *src, int stride, int QP)
#undef HAVE_MMX #undef HAVE_MMX
#undef HAVE_MMX2 #undef HAVE_MMX2
#undef HAVE_3DNOW #undef HAVE_3DNOW
#undef HAVE_ALTIVEC
#undef ARCH_X86 #undef ARCH_X86
#ifdef COMPILE_C #ifdef COMPILE_C
...@@ -491,6 +517,16 @@ static inline void horizX1Filter(uint8_t *src, int stride, int QP) ...@@ -491,6 +517,16 @@ static inline void horizX1Filter(uint8_t *src, int stride, int QP)
#include "postprocess_template.c" #include "postprocess_template.c"
#endif #endif
#ifdef ARCH_POWERPC
#ifdef COMPILE_ALTIVEC
#undef RENAME
#define HAVE_ALTIVEC
#define RENAME(a) a ## _altivec
#include "postprocess_altivec_template.c"
#include "postprocess_template.c"
#endif
#endif //ARCH_POWERPC
//MMX versions //MMX versions
#ifdef COMPILE_MMX #ifdef COMPILE_MMX
#undef RENAME #undef RENAME
...@@ -548,6 +584,13 @@ static inline void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int ...@@ -548,6 +584,13 @@ static inline void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int
else else
postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
#else #else
#ifdef ARCH_POWERPC
#ifdef HAVE_ALTIVEC
else if(c->cpuCaps & PP_CPU_CAPS_ALTIVEC)
postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
else
#endif
#endif
postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
#endif #endif
#else //RUNTIME_CPUDETECT #else //RUNTIME_CPUDETECT
...@@ -557,6 +600,8 @@ static inline void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int ...@@ -557,6 +600,8 @@ static inline void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int
postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
#elif defined (HAVE_MMX) #elif defined (HAVE_MMX)
postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
#elif defined (HAVE_ALTIVEC)
postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
#else #else
postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
#endif #endif
......
...@@ -59,6 +59,7 @@ void pp_free_context(pp_context_t *ppContext); ...@@ -59,6 +59,7 @@ void pp_free_context(pp_context_t *ppContext);
#define PP_CPU_CAPS_MMX 0x80000000 #define PP_CPU_CAPS_MMX 0x80000000
#define PP_CPU_CAPS_MMX2 0x20000000 #define PP_CPU_CAPS_MMX2 0x20000000
#define PP_CPU_CAPS_3DNOW 0x40000000 #define PP_CPU_CAPS_3DNOW 0x40000000
#define PP_CPU_CAPS_ALTIVEC 0x10000000
#define PP_FORMAT 0x00000008 #define PP_FORMAT 0x00000008
#define PP_FORMAT_420 (0x00000011|PP_FORMAT) #define PP_FORMAT_420 (0x00000011|PP_FORMAT)
......
This diff is collapsed.
...@@ -170,6 +170,7 @@ asm volatile( ...@@ -170,6 +170,7 @@ asm volatile(
* Do a vertical low pass filter on the 8x16 block (only write to the 8x8 block in the middle) * Do a vertical low pass filter on the 8x16 block (only write to the 8x8 block in the middle)
* using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16
*/ */
#ifndef HAVE_ALTIVEC
static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, PPContext *c) static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, PPContext *c)
{ {
#if defined (HAVE_MMX2) || defined (HAVE_3DNOW) #if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
...@@ -340,6 +341,7 @@ static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, PPContext *c) ...@@ -340,6 +341,7 @@ static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, PPContext *c)
} }
#endif #endif
} }
#endif //HAVE_ALTIVEC
#if 0 #if 0
/** /**
...@@ -582,6 +584,7 @@ static inline void RENAME(vertX1Filter)(uint8_t *src, int stride, PPContext *co) ...@@ -582,6 +584,7 @@ static inline void RENAME(vertX1Filter)(uint8_t *src, int stride, PPContext *co)
#endif #endif
} }
#ifndef HAVE_ALTIVEC
static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext *c) static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext *c)
{ {
#if defined (HAVE_MMX2) || defined (HAVE_3DNOW) #if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
...@@ -1149,7 +1152,9 @@ src-=8; ...@@ -1149,7 +1152,9 @@ src-=8;
} }
#endif #endif
} }
#endif //HAVE_ALTIVEC
#ifndef HAVE_ALTIVEC
static inline void RENAME(dering)(uint8_t src[], int stride, PPContext *c) static inline void RENAME(dering)(uint8_t src[], int stride, PPContext *c)
{ {
#if defined (HAVE_MMX2) || defined (HAVE_3DNOW) #if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
...@@ -1505,6 +1510,7 @@ DERING_CORE((%0, %1, 8),(%%edx, %1, 4) ,%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1,%%mm ...@@ -1505,6 +1510,7 @@ DERING_CORE((%0, %1, 8),(%%edx, %1, 4) ,%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1,%%mm
#endif #endif
#endif #endif
} }
#endif //HAVE_ALTIVEC
/** /**
* Deinterlaces the given block by linearly interpolating every second line. * Deinterlaces the given block by linearly interpolating every second line.
...@@ -3134,13 +3140,12 @@ static void RENAME(postProcess)(uint8_t src[], int srcStride, uint8_t dst[], int ...@@ -3134,13 +3140,12 @@ static void RENAME(postProcess)(uint8_t src[], int srcStride, uint8_t dst[], int
horizX1Filter(dstBlock-4, stride, QP); horizX1Filter(dstBlock-4, stride, QP);
else if(mode & H_DEBLOCK) else if(mode & H_DEBLOCK)
{ {
if( isHorizDC(dstBlock-4, stride, &c)) const int t= RENAME(horizClassify)(dstBlock-4, stride, &c);
{
if(isHorizMinMaxOk(dstBlock-4, stride, QP)) if(t==1)
doHorizLowPass(dstBlock-4, stride, QP); RENAME(doHorizLowPass)(dstBlock-4, stride, &c);
} else if(t==2)
else RENAME(doHorizDefFilter)(dstBlock-4, stride, &c);
doHorizDefFilter(dstBlock-4, stride, QP);
} }
#endif #endif
if(mode & DERING) if(mode & DERING)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment