Commit 9007f514 authored by Romain Dolbeau's avatar Romain Dolbeau Committed by Michael Niedermayer

better hadamard8_diff16 in AltiVec, and more patch by (Romain Dolbeau <dolbeau at irisa dot fr>)

Originally committed as revision 3038 to svn://svn.ffmpeg.org/ffmpeg/trunk
parent 2750b827
......@@ -516,6 +516,7 @@ fi
# Add processor-specific flags
TUNECPU="generic"
POWERPCMODE="32bits"
if test $tune != "generic"; then
case $tune in
601|ppc601|PowerPC601)
......@@ -561,11 +562,12 @@ if test $tune != "generic"; then
TUNECPU=ppc7400
;;
G5|g5|970|ppc970|PowerPC970|power4*|Power4*)
CFLAGS="$CFLAGS -mcpu=970 -mtune=970 -mpowerpc64 -force_cpusubtype_ALL "
CFLAGS="$CFLAGS -mcpu=970 -mtune=970 -mpowerpc-gfxopt -mpowerpc64"
if test $altivec = "no"; then
echo "WARNING: tuning for PPC970 but altivec disabled !";
fi
TUNECPU=ppc970
POWERPCMODE="64bits"
;;
*)
echo "WARNING: unknown CPU "$tune", ignored"
......@@ -1027,6 +1029,11 @@ elif test "$cpu" = "sparc64" ; then
elif test "$cpu" = "powerpc" ; then
echo "TARGET_ARCH_POWERPC=yes" >> config.mak
echo "#define ARCH_POWERPC 1" >> $TMPH
if test $POWERPCMODE = "32bits"; then
echo "#define POWERPC_MODE_32BITS 1" >> $TMPH
else
echo "#define POWERPC_MODE_64BITS 1" >> $TMPH
fi
if test "$powerpc_perf" = "yes"; then
echo "#define POWERPC_PERFORMANCE_REPORT 1" >> $TMPH
fi
......
......@@ -17,7 +17,7 @@ The firsts are always available, always active, but they're not very accurate :
The PMC are much more useful : not only they can report cycle-accurate timing, but they can also be used to monitor many other parameters, such as the number of AltiVec stalls for every kind of instructions, or instruction cache misses. The downside is that not all processors support the PMC (all G3, all G4 and the 970 do support them), and they're inactive by default - you need to activate them with a dedicated tool. Also, the number of available PMC depend on the procesor : the various 604 have 2, the various 75x (aka. G3) have 4, anbd the various 74xx (aka G4) have 6.
*WARNING*: The powerpc 970 is not very well documented, and it seems its PMC registers are 64bits wide. The current implementation in FFMpeg assume the register are 32bits wide, and will *not* work on a powerpc 970 (aka G5).
*WARNING*: The powerpc 970 is not very well documented, and its PMC registers are 64bits wide. To properly notify the code, you *must* tune for the 970 (using --tune=970), or the code will assume 32bits registers.
II - Enabling FFmpeg PowerPC performance support
......
This diff is collapsed.
......@@ -47,6 +47,7 @@ extern void put_no_rnd_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels
extern void put_pixels16_xy2_altivec(uint8_t * block, const uint8_t * pixels, int line_size, int h);
extern void put_no_rnd_pixels16_xy2_altivec(uint8_t * block, const uint8_t * pixels, int line_size, int h);
extern int hadamard8_diff8x8_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h);
extern int hadamard8_diff16_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h);
extern void gmc1_altivec(uint8_t *dst, uint8_t *src, int stride, int h, int x16, int y16, int rounder);
......
......@@ -61,6 +61,7 @@ static unsigned char* perfname[] = {
"put_pixels16_xy2_altivec",
"put_no_rnd_pixels16_xy2_altivec",
"hadamard8_diff8x8_altivec",
"hadamard8_diff16_altivec",
"clear_blocks_dcbz32_ppc",
"clear_blocks_dcbz128_ppc"
};
......@@ -226,12 +227,6 @@ long check_dcbzl_effect(void)
}
#endif
#ifdef HAVE_ALTIVEC
// can't put that in dsputil_altivec.c,
// has WARPER8_16_SQ declare the function "static" ...
WARPER8_16_SQ(hadamard8_diff8x8_altivec, hadamard8_diff16_altivec)
#endif
void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx)
{
// Common optimizations whether Altivec is available or not
......
......@@ -51,6 +51,7 @@ enum powerpc_perf_index {
altivec_put_pixels16_xy2_num,
altivec_put_no_rnd_pixels16_xy2_num,
altivec_hadamard8_diff8x8_num,
altivec_hadamard8_diff16_num,
powerpc_clear_blocks_dcbz32,
powerpc_clear_blocks_dcbz128,
powerpc_perf_total
......@@ -64,6 +65,8 @@ enum powerpc_data_index {
};
extern unsigned long long perfdata[POWERPC_NUM_PMC_ENABLED][powerpc_perf_total][powerpc_data_total];
#ifndef POWERPC_MODE_64BITS
#define POWERP_PMC_DATATYPE unsigned long
#define POWERPC_GET_PMC1(a) asm volatile("mfspr %0, 937" : "=r" (a))
#define POWERPC_GET_PMC2(a) asm volatile("mfspr %0, 938" : "=r" (a))
#if (POWERPC_NUM_PMC_ENABLED > 2)
......@@ -80,7 +83,30 @@ extern unsigned long long perfdata[POWERPC_NUM_PMC_ENABLED][powerpc_perf_total][
#define POWERPC_GET_PMC5(a) do {} while (0)
#define POWERPC_GET_PMC6(a) do {} while (0)
#endif
#define POWERPC_PERF_DECLARE(a, cond) unsigned long pmc_start[POWERPC_NUM_PMC_ENABLED], pmc_stop[POWERPC_NUM_PMC_ENABLED], pmc_loop_index;
#else /* POWERPC_MODE_64BITS */
#define POWERP_PMC_DATATYPE unsigned long long
#define POWERPC_GET_PMC1(a) asm volatile("mfspr %0, 771" : "=r" (a))
#define POWERPC_GET_PMC2(a) asm volatile("mfspr %0, 772" : "=r" (a))
#if (POWERPC_NUM_PMC_ENABLED > 2)
#define POWERPC_GET_PMC3(a) asm volatile("mfspr %0, 773" : "=r" (a))
#define POWERPC_GET_PMC4(a) asm volatile("mfspr %0, 774" : "=r" (a))
#else
#define POWERPC_GET_PMC3(a) do {} while (0)
#define POWERPC_GET_PMC4(a) do {} while (0)
#endif
#if (POWERPC_NUM_PMC_ENABLED > 4)
#define POWERPC_GET_PMC5(a) asm volatile("mfspr %0, 775" : "=r" (a))
#define POWERPC_GET_PMC6(a) asm volatile("mfspr %0, 776" : "=r" (a))
#else
#define POWERPC_GET_PMC5(a) do {} while (0)
#define POWERPC_GET_PMC6(a) do {} while (0)
#endif
#endif /* POWERPC_MODE_64BITS */
#define POWERPC_PERF_DECLARE(a, cond) \
POWERP_PMC_DATATYPE \
pmc_start[POWERPC_NUM_PMC_ENABLED], \
pmc_stop[POWERPC_NUM_PMC_ENABLED], \
pmc_loop_index;
#define POWERPC_PERF_START_COUNT(a, cond) do { \
POWERPC_GET_PMC6(pmc_start[5]); \
POWERPC_GET_PMC5(pmc_start[4]); \
......@@ -104,7 +130,7 @@ extern unsigned long long perfdata[POWERPC_NUM_PMC_ENABLED][powerpc_perf_total][
{ \
if (pmc_stop[pmc_loop_index] >= pmc_start[pmc_loop_index]) \
{ \
unsigned long diff = \
POWERP_PMC_DATATYPE diff = \
pmc_stop[pmc_loop_index] - pmc_start[pmc_loop_index]; \
if (diff < perfdata[pmc_loop_index][a][powerpc_data_min]) \
perfdata[pmc_loop_index][a][powerpc_data_min] = diff; \
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment