Commit a01e08ee authored by Luca Barbato's avatar Luca Barbato

hadamard8_diff* enabled on linux/ppc

Originally committed as revision 5272 to svn://svn.ffmpeg.org/ffmpeg/trunk
parent 265a1ac7
...@@ -1308,7 +1308,6 @@ POWERPC_PERF_STOP_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1); ...@@ -1308,7 +1308,6 @@ POWERPC_PERF_STOP_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1);
#endif /* ALTIVEC_USE_REFERENCE_C_CODE */ #endif /* ALTIVEC_USE_REFERENCE_C_CODE */
} }
#ifdef CONFIG_DARWIN
int hadamard8_diff8x8_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h){ int hadamard8_diff8x8_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h){
POWERPC_PERF_DECLARE(altivec_hadamard8_diff8x8_num, 1); POWERPC_PERF_DECLARE(altivec_hadamard8_diff8x8_num, 1);
int sum; int sum;
...@@ -1442,39 +1441,39 @@ POWERPC_PERF_STOP_COUNT(altivec_hadamard8_diff8x8_num, 1); ...@@ -1442,39 +1441,39 @@ POWERPC_PERF_STOP_COUNT(altivec_hadamard8_diff8x8_num, 1);
static int hadamard8_diff16x8_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h) { static int hadamard8_diff16x8_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h) {
int sum; int sum;
register vector signed short register vector signed short
temp0 asm ("v0"), temp0 REG_v(v0),
temp1 asm ("v1"), temp1 REG_v(v1),
temp2 asm ("v2"), temp2 REG_v(v2),
temp3 asm ("v3"), temp3 REG_v(v3),
temp4 asm ("v4"), temp4 REG_v(v4),
temp5 asm ("v5"), temp5 REG_v(v5),
temp6 asm ("v6"), temp6 REG_v(v6),
temp7 asm ("v7"); temp7 REG_v(v7);
register vector signed short register vector signed short
temp0S asm ("v8"), temp0S REG_v(v8),
temp1S asm ("v9"), temp1S REG_v(v9),
temp2S asm ("v10"), temp2S REG_v(v10),
temp3S asm ("v11"), temp3S REG_v(v11),
temp4S asm ("v12"), temp4S REG_v(v12),
temp5S asm ("v13"), temp5S REG_v(v13),
temp6S asm ("v14"), temp6S REG_v(v14),
temp7S asm ("v15"); temp7S REG_v(v15);
register const_vector unsigned char vzero asm ("v31")= (const_vector unsigned char)vec_splat_u8(0); register const_vector unsigned char vzero REG_v(v31)= (const_vector unsigned char)vec_splat_u8(0);
{ {
register const_vector signed short vprod1 asm ("v16")= (const_vector signed short)AVV( 1,-1, 1,-1, 1,-1, 1,-1); register const_vector signed short vprod1 REG_v(v16)= (const_vector signed short)AVV( 1,-1, 1,-1, 1,-1, 1,-1);
register const_vector signed short vprod2 asm ("v17")= (const_vector signed short)AVV( 1, 1,-1,-1, 1, 1,-1,-1); register const_vector signed short vprod2 REG_v(v17)= (const_vector signed short)AVV( 1, 1,-1,-1, 1, 1,-1,-1);
register const_vector signed short vprod3 asm ("v18")= (const_vector signed short)AVV( 1, 1, 1, 1,-1,-1,-1,-1); register const_vector signed short vprod3 REG_v(v18)= (const_vector signed short)AVV( 1, 1, 1, 1,-1,-1,-1,-1);
register const_vector unsigned char perm1 asm ("v19")= (const_vector unsigned char) register const_vector unsigned char perm1 REG_v(v19)= (const_vector unsigned char)
AVV(0x02, 0x03, 0x00, 0x01, AVV(0x02, 0x03, 0x00, 0x01,
0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05,
0x0A, 0x0B, 0x08, 0x09, 0x0A, 0x0B, 0x08, 0x09,
0x0E, 0x0F, 0x0C, 0x0D); 0x0E, 0x0F, 0x0C, 0x0D);
register const_vector unsigned char perm2 asm ("v20")= (const_vector unsigned char) register const_vector unsigned char perm2 REG_v(v20)= (const_vector unsigned char)
AVV(0x04, 0x05, 0x06, 0x07, AVV(0x04, 0x05, 0x06, 0x07,
0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03,
0x0C, 0x0D, 0x0E, 0x0F, 0x0C, 0x0D, 0x0E, 0x0F,
0x08, 0x09, 0x0A, 0x0B); 0x08, 0x09, 0x0A, 0x0B);
register const_vector unsigned char perm3 asm ("v21")= (const_vector unsigned char) register const_vector unsigned char perm3 REG_v(v21)= (const_vector unsigned char)
AVV(0x08, 0x09, 0x0A, 0x0B, AVV(0x08, 0x09, 0x0A, 0x0B,
0x0C, 0x0D, 0x0E, 0x0F, 0x0C, 0x0D, 0x0E, 0x0F,
0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03,
...@@ -1482,37 +1481,37 @@ static int hadamard8_diff16x8_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, ...@@ -1482,37 +1481,37 @@ static int hadamard8_diff16x8_altivec(/*MpegEncContext*/ void *s, uint8_t *dst,
#define ONEITERBUTTERFLY(i, res1, res2) \ #define ONEITERBUTTERFLY(i, res1, res2) \
{ \ { \
register vector unsigned char src1 asm ("v22"), src2 asm ("v23"); \ register vector unsigned char src1 REG_v(v22), src2 REG_v(v23); \
register vector unsigned char dst1 asm ("v24"), dst2 asm ("v25"); \ register vector unsigned char dst1 REG_v(v24), dst2 REG_v(v25); \
src1 = vec_ld(stride * i, src); \ src1 = vec_ld(stride * i, src); \
src2 = vec_ld((stride * i) + 16, src); \ src2 = vec_ld((stride * i) + 16, src); \
register vector unsigned char srcO asm ("v22") = vec_perm(src1, src2, vec_lvsl(stride * i, src)); \ register vector unsigned char srcO REG_v(v22) = vec_perm(src1, src2, vec_lvsl(stride * i, src)); \
dst1 = vec_ld(stride * i, dst); \ dst1 = vec_ld(stride * i, dst); \
dst2 = vec_ld((stride * i) + 16, dst); \ dst2 = vec_ld((stride * i) + 16, dst); \
register vector unsigned char dstO asm ("v23") = vec_perm(dst1, dst2, vec_lvsl(stride * i, dst)); \ register vector unsigned char dstO REG_v(v23) = vec_perm(dst1, dst2, vec_lvsl(stride * i, dst)); \
/* promote the unsigned chars to signed shorts */ \ /* promote the unsigned chars to signed shorts */ \
register vector signed short srcV asm ("v24") = \ register vector signed short srcV REG_v(v24) = \
(vector signed short)vec_mergeh((vector signed char)vzero, (vector signed char)srcO); \ (vector signed short)vec_mergeh((vector signed char)vzero, (vector signed char)srcO); \
register vector signed short dstV asm ("v25") = \ register vector signed short dstV REG_v(v25) = \
(vector signed short)vec_mergeh((vector signed char)vzero, (vector signed char)dstO); \ (vector signed short)vec_mergeh((vector signed char)vzero, (vector signed char)dstO); \
register vector signed short srcW asm ("v26") = \ register vector signed short srcW REG_v(v26) = \
(vector signed short)vec_mergel((vector signed char)vzero, (vector signed char)srcO); \ (vector signed short)vec_mergel((vector signed char)vzero, (vector signed char)srcO); \
register vector signed short dstW asm ("v27") = \ register vector signed short dstW REG_v(v27) = \
(vector signed short)vec_mergel((vector signed char)vzero, (vector signed char)dstO); \ (vector signed short)vec_mergel((vector signed char)vzero, (vector signed char)dstO); \
/* substractions inside the first butterfly */ \ /* substractions inside the first butterfly */ \
register vector signed short but0 asm ("v28") = vec_sub(srcV, dstV); \ register vector signed short but0 REG_v(v28) = vec_sub(srcV, dstV); \
register vector signed short but0S asm ("v29") = vec_sub(srcW, dstW); \ register vector signed short but0S REG_v(v29) = vec_sub(srcW, dstW); \
register vector signed short op1 asm ("v30") = vec_perm(but0, but0, perm1); \ register vector signed short op1 REG_v(v30) = vec_perm(but0, but0, perm1); \
register vector signed short but1 asm ("v22") = vec_mladd(but0, vprod1, op1); \ register vector signed short but1 REG_v(v22) = vec_mladd(but0, vprod1, op1); \
register vector signed short op1S asm ("v23") = vec_perm(but0S, but0S, perm1); \ register vector signed short op1S REG_v(v23) = vec_perm(but0S, but0S, perm1); \
register vector signed short but1S asm ("v24") = vec_mladd(but0S, vprod1, op1S); \ register vector signed short but1S REG_v(v24) = vec_mladd(but0S, vprod1, op1S); \
register vector signed short op2 asm ("v25") = vec_perm(but1, but1, perm2); \ register vector signed short op2 REG_v(v25) = vec_perm(but1, but1, perm2); \
register vector signed short but2 asm ("v26") = vec_mladd(but1, vprod2, op2); \ register vector signed short but2 REG_v(v26) = vec_mladd(but1, vprod2, op2); \
register vector signed short op2S asm ("v27") = vec_perm(but1S, but1S, perm2); \ register vector signed short op2S REG_v(v27) = vec_perm(but1S, but1S, perm2); \
register vector signed short but2S asm ("v28") = vec_mladd(but1S, vprod2, op2S); \ register vector signed short but2S REG_v(v28) = vec_mladd(but1S, vprod2, op2S); \
register vector signed short op3 asm ("v29") = vec_perm(but2, but2, perm3); \ register vector signed short op3 REG_v(v29) = vec_perm(but2, but2, perm3); \
res1 = vec_mladd(but2, vprod3, op3); \ res1 = vec_mladd(but2, vprod3, op3); \
register vector signed short op3S asm ("v30") = vec_perm(but2S, but2S, perm3); \ register vector signed short op3S REG_v(v30) = vec_perm(but2S, but2S, perm3); \
res2 = vec_mladd(but2S, vprod3, op3S); \ res2 = vec_mladd(but2S, vprod3, op3S); \
} }
ONEITERBUTTERFLY(0, temp0, temp0S); ONEITERBUTTERFLY(0, temp0, temp0S);
...@@ -1618,7 +1617,6 @@ POWERPC_PERF_START_COUNT(altivec_hadamard8_diff16_num, 1); ...@@ -1618,7 +1617,6 @@ POWERPC_PERF_START_COUNT(altivec_hadamard8_diff16_num, 1);
POWERPC_PERF_STOP_COUNT(altivec_hadamard8_diff16_num, 1); POWERPC_PERF_STOP_COUNT(altivec_hadamard8_diff16_num, 1);
return score; return score;
} }
#endif //CONFIG_DARWIN
int has_altivec(void) int has_altivec(void)
{ {
......
...@@ -303,10 +303,8 @@ void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx) ...@@ -303,10 +303,8 @@ void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx)
c->gmc1 = gmc1_altivec; c->gmc1 = gmc1_altivec;
#ifdef CONFIG_DARWIN // ATM gcc-3.3 and gcc-3.4 fail to compile these in linux...
c->hadamard8_diff[0] = hadamard8_diff16_altivec; c->hadamard8_diff[0] = hadamard8_diff16_altivec;
c->hadamard8_diff[1] = hadamard8_diff8x8_altivec; c->hadamard8_diff[1] = hadamard8_diff8x8_altivec;
#endif
c->horizontal_compose97i = ff_snow_horizontal_compose97i_altivec; c->horizontal_compose97i = ff_snow_horizontal_compose97i_altivec;
......
...@@ -17,8 +17,17 @@ ...@@ -17,8 +17,17 @@
# else # else
# define AVV # define AVV
# endif # endif
#define REG_v(a) asm ( #a )
#else #else
#define AVV(x...) {x} #define AVV(x...) {x}
#if (__GNUC__ < 4)
# define REG_v(a) asm ( #a )
#else
# define REG_v(a)
#endif
#if (__GNUC__ * 100 + __GNUC_MINOR__ < 303) #if (__GNUC__ * 100 + __GNUC_MINOR__ < 303)
/* This code was provided to me by Bartosch Pixa /* This code was provided to me by Bartosch Pixa
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment