Commit eb4b3dd3 authored by Zdenek Kabelac's avatar Zdenek Kabelac

* using DSPContext - so each codec could use its local (sub)set of CPU extension

Originally committed as revision 1194 to svn://svn.ffmpeg.org/ffmpeg/trunk
parent fb602cd1
...@@ -20,7 +20,7 @@ ...@@ -20,7 +20,7 @@
*/ */
#include "avcodec.h" #include "avcodec.h"
#include "dsputil.h" #include "dsputil.h"
/*
void (*get_pixels)(DCTELEM *block, const UINT8 *pixels, int line_size); void (*get_pixels)(DCTELEM *block, const UINT8 *pixels, int line_size);
void (*diff_pixels)(DCTELEM *block, const UINT8 *s1, const UINT8 *s2, int stride); void (*diff_pixels)(DCTELEM *block, const UINT8 *s1, const UINT8 *s2, int stride);
void (*put_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size); void (*put_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size);
...@@ -41,7 +41,7 @@ op_pixels_abs_func pix_abs8x8; ...@@ -41,7 +41,7 @@ op_pixels_abs_func pix_abs8x8;
op_pixels_abs_func pix_abs8x8_x2; op_pixels_abs_func pix_abs8x8_x2;
op_pixels_abs_func pix_abs8x8_y2; op_pixels_abs_func pix_abs8x8_y2;
op_pixels_abs_func pix_abs8x8_xy2; op_pixels_abs_func pix_abs8x8_xy2;
*/
int ff_bit_exact=0; int ff_bit_exact=0;
UINT8 cropTbl[256 + 2 * MAX_NEG_CROP]; UINT8 cropTbl[256 + 2 * MAX_NEG_CROP];
...@@ -84,7 +84,7 @@ const UINT8 ff_alternate_vertical_scan[64] = { ...@@ -84,7 +84,7 @@ const UINT8 ff_alternate_vertical_scan[64] = {
}; };
/* a*inverse[b]>>32 == a/b for all 0<=a<=65536 && 2<=b<=255 */ /* a*inverse[b]>>32 == a/b for all 0<=a<=65536 && 2<=b<=255 */
UINT32 inverse[256]={ const UINT32 inverse[256]={
0, 4294967295U,2147483648U,1431655766, 1073741824, 858993460, 715827883, 613566757, 0, 4294967295U,2147483648U,1431655766, 1073741824, 858993460, 715827883, 613566757,
536870912, 477218589, 429496730, 390451573, 357913942, 330382100, 306783379, 286331154, 536870912, 477218589, 429496730, 390451573, 357913942, 330382100, 306783379, 286331154,
268435456, 252645136, 238609295, 226050911, 214748365, 204522253, 195225787, 186737709, 268435456, 252645136, 238609295, 226050911, 214748365, 204522253, 195225787, 186737709,
...@@ -119,7 +119,7 @@ UINT32 inverse[256]={ ...@@ -119,7 +119,7 @@ UINT32 inverse[256]={
17318417, 17248865, 17179870, 17111424, 17043522, 16976156, 16909321, 16843010, 17318417, 17248865, 17179870, 17111424, 17043522, 16976156, 16909321, 16843010,
}; };
int pix_sum_c(UINT8 * pix, int line_size) static int pix_sum_c(UINT8 * pix, int line_size)
{ {
int s, i, j; int s, i, j;
...@@ -141,7 +141,7 @@ int pix_sum_c(UINT8 * pix, int line_size) ...@@ -141,7 +141,7 @@ int pix_sum_c(UINT8 * pix, int line_size)
return s; return s;
} }
int pix_norm1_c(UINT8 * pix, int line_size) static int pix_norm1_c(UINT8 * pix, int line_size)
{ {
int s, i, j; int s, i, j;
UINT32 *sq = squareTbl + 256; UINT32 *sq = squareTbl + 256;
...@@ -165,7 +165,7 @@ int pix_norm1_c(UINT8 * pix, int line_size) ...@@ -165,7 +165,7 @@ int pix_norm1_c(UINT8 * pix, int line_size)
} }
void get_pixels_c(DCTELEM *restrict block, const UINT8 *pixels, int line_size) static void get_pixels_c(DCTELEM *restrict block, const UINT8 *pixels, int line_size)
{ {
int i; int i;
...@@ -184,8 +184,8 @@ void get_pixels_c(DCTELEM *restrict block, const UINT8 *pixels, int line_size) ...@@ -184,8 +184,8 @@ void get_pixels_c(DCTELEM *restrict block, const UINT8 *pixels, int line_size)
} }
} }
void diff_pixels_c(DCTELEM *restrict block, const UINT8 *s1, const UINT8 *s2, static void diff_pixels_c(DCTELEM *restrict block, const UINT8 *s1,
int stride){ const UINT8 *s2, int stride){
int i; int i;
/* read the pixels */ /* read the pixels */
...@@ -205,8 +205,8 @@ void diff_pixels_c(DCTELEM *restrict block, const UINT8 *s1, const UINT8 *s2, ...@@ -205,8 +205,8 @@ void diff_pixels_c(DCTELEM *restrict block, const UINT8 *s1, const UINT8 *s2,
} }
void put_pixels_clamped_c(const DCTELEM *block, UINT8 *restrict pixels, static void put_pixels_clamped_c(const DCTELEM *block, UINT8 *restrict pixels,
int line_size) int line_size)
{ {
int i; int i;
UINT8 *cm = cropTbl + MAX_NEG_CROP; UINT8 *cm = cropTbl + MAX_NEG_CROP;
...@@ -227,7 +227,7 @@ void put_pixels_clamped_c(const DCTELEM *block, UINT8 *restrict pixels, ...@@ -227,7 +227,7 @@ void put_pixels_clamped_c(const DCTELEM *block, UINT8 *restrict pixels,
} }
} }
void add_pixels_clamped_c(const DCTELEM *block, UINT8 *restrict pixels, static void add_pixels_clamped_c(const DCTELEM *block, UINT8 *restrict pixels,
int line_size) int line_size)
{ {
int i; int i;
...@@ -1353,7 +1353,7 @@ QPEL_MC(0, avg_ , _ , op_avg) ...@@ -1353,7 +1353,7 @@ QPEL_MC(0, avg_ , _ , op_avg)
#undef op_put #undef op_put
#undef op_put_no_rnd #undef op_put_no_rnd
int pix_abs16x16_c(UINT8 *pix1, UINT8 *pix2, int line_size) static int pix_abs16x16_c(UINT8 *pix1, UINT8 *pix2, int line_size)
{ {
int s, i; int s, i;
...@@ -1381,7 +1381,7 @@ int pix_abs16x16_c(UINT8 *pix1, UINT8 *pix2, int line_size) ...@@ -1381,7 +1381,7 @@ int pix_abs16x16_c(UINT8 *pix1, UINT8 *pix2, int line_size)
return s; return s;
} }
int pix_abs16x16_x2_c(UINT8 *pix1, UINT8 *pix2, int line_size) static int pix_abs16x16_x2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
{ {
int s, i; int s, i;
...@@ -1409,7 +1409,7 @@ int pix_abs16x16_x2_c(UINT8 *pix1, UINT8 *pix2, int line_size) ...@@ -1409,7 +1409,7 @@ int pix_abs16x16_x2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
return s; return s;
} }
int pix_abs16x16_y2_c(UINT8 *pix1, UINT8 *pix2, int line_size) static int pix_abs16x16_y2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
{ {
int s, i; int s, i;
UINT8 *pix3 = pix2 + line_size; UINT8 *pix3 = pix2 + line_size;
...@@ -1439,7 +1439,7 @@ int pix_abs16x16_y2_c(UINT8 *pix1, UINT8 *pix2, int line_size) ...@@ -1439,7 +1439,7 @@ int pix_abs16x16_y2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
return s; return s;
} }
int pix_abs16x16_xy2_c(UINT8 *pix1, UINT8 *pix2, int line_size) static int pix_abs16x16_xy2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
{ {
int s, i; int s, i;
UINT8 *pix3 = pix2 + line_size; UINT8 *pix3 = pix2 + line_size;
...@@ -1469,7 +1469,7 @@ int pix_abs16x16_xy2_c(UINT8 *pix1, UINT8 *pix2, int line_size) ...@@ -1469,7 +1469,7 @@ int pix_abs16x16_xy2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
return s; return s;
} }
int pix_abs8x8_c(UINT8 *pix1, UINT8 *pix2, int line_size) static int pix_abs8x8_c(UINT8 *pix1, UINT8 *pix2, int line_size)
{ {
int s, i; int s, i;
...@@ -1489,7 +1489,7 @@ int pix_abs8x8_c(UINT8 *pix1, UINT8 *pix2, int line_size) ...@@ -1489,7 +1489,7 @@ int pix_abs8x8_c(UINT8 *pix1, UINT8 *pix2, int line_size)
return s; return s;
} }
int pix_abs8x8_x2_c(UINT8 *pix1, UINT8 *pix2, int line_size) static int pix_abs8x8_x2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
{ {
int s, i; int s, i;
...@@ -1509,7 +1509,7 @@ int pix_abs8x8_x2_c(UINT8 *pix1, UINT8 *pix2, int line_size) ...@@ -1509,7 +1509,7 @@ int pix_abs8x8_x2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
return s; return s;
} }
int pix_abs8x8_y2_c(UINT8 *pix1, UINT8 *pix2, int line_size) static int pix_abs8x8_y2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
{ {
int s, i; int s, i;
UINT8 *pix3 = pix2 + line_size; UINT8 *pix3 = pix2 + line_size;
...@@ -1531,7 +1531,7 @@ int pix_abs8x8_y2_c(UINT8 *pix1, UINT8 *pix2, int line_size) ...@@ -1531,7 +1531,7 @@ int pix_abs8x8_y2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
return s; return s;
} }
int pix_abs8x8_xy2_c(UINT8 *pix1, UINT8 *pix2, int line_size) static int pix_abs8x8_xy2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
{ {
int s, i; int s, i;
UINT8 *pix3 = pix2 + line_size; UINT8 *pix3 = pix2 + line_size;
...@@ -1574,12 +1574,12 @@ void ff_block_permute(INT16 *block, UINT8 *permutation, const UINT8 *scantable, ...@@ -1574,12 +1574,12 @@ void ff_block_permute(INT16 *block, UINT8 *permutation, const UINT8 *scantable,
} }
} }
void clear_blocks_c(DCTELEM *blocks) static void clear_blocks_c(DCTELEM *blocks)
{ {
memset(blocks, 0, sizeof(DCTELEM)*6*64); memset(blocks, 0, sizeof(DCTELEM)*6*64);
} }
void dsputil_init(void) void dsputil_init(DSPContext* c, unsigned mask)
{ {
int i; int i;
...@@ -1593,42 +1593,82 @@ void dsputil_init(void) ...@@ -1593,42 +1593,82 @@ void dsputil_init(void)
squareTbl[i] = (i - 256) * (i - 256); squareTbl[i] = (i - 256) * (i - 256);
} }
get_pixels = get_pixels_c; c->get_pixels = get_pixels_c;
diff_pixels = diff_pixels_c; c->diff_pixels = diff_pixels_c;
put_pixels_clamped = put_pixels_clamped_c; c->put_pixels_clamped = put_pixels_clamped_c;
add_pixels_clamped = add_pixels_clamped_c; c->add_pixels_clamped = add_pixels_clamped_c;
ff_gmc1= gmc1_c; c->gmc1 = gmc1_c;
ff_gmc= gmc_c; c->gmc = gmc_c;
clear_blocks= clear_blocks_c; c->clear_blocks = clear_blocks_c;
pix_sum= pix_sum_c; c->pix_sum = pix_sum_c;
pix_norm1= pix_norm1_c; c->pix_norm1 = pix_norm1_c;
pix_abs16x16 = pix_abs16x16_c; c->pix_abs16x16 = pix_abs16x16_c;
pix_abs16x16_x2 = pix_abs16x16_x2_c; c->pix_abs16x16_x2 = pix_abs16x16_x2_c;
pix_abs16x16_y2 = pix_abs16x16_y2_c; c->pix_abs16x16_y2 = pix_abs16x16_y2_c;
pix_abs16x16_xy2 = pix_abs16x16_xy2_c; c->pix_abs16x16_xy2 = pix_abs16x16_xy2_c;
pix_abs8x8 = pix_abs8x8_c; c->pix_abs8x8 = pix_abs8x8_c;
pix_abs8x8_x2 = pix_abs8x8_x2_c; c->pix_abs8x8_x2 = pix_abs8x8_x2_c;
pix_abs8x8_y2 = pix_abs8x8_y2_c; c->pix_abs8x8_y2 = pix_abs8x8_y2_c;
pix_abs8x8_xy2 = pix_abs8x8_xy2_c; c->pix_abs8x8_xy2 = pix_abs8x8_xy2_c;
c->put_pixels_tab[0][0] = put_pixels16;
c->put_pixels_tab[0][1] = put_pixels16_x2;
c->put_pixels_tab[0][2] = put_pixels16_y2;
c->put_pixels_tab[0][3] = put_pixels16_xy2;
c->put_no_rnd_pixels_tab[0][0] = put_pixels16;
c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2;
c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2;
c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2;
c->avg_pixels_tab[0][0] = avg_pixels16;
c->avg_pixels_tab[0][1] = avg_pixels16_x2;
c->avg_pixels_tab[0][2] = avg_pixels16_y2;
c->avg_pixels_tab[0][3] = avg_pixels16_xy2;
c->avg_no_rnd_pixels_tab[0][0] = avg_no_rnd_pixels16;
c->avg_no_rnd_pixels_tab[0][1] = avg_no_rnd_pixels16_x2;
c->avg_no_rnd_pixels_tab[0][2] = avg_no_rnd_pixels16_y2;
c->avg_no_rnd_pixels_tab[0][3] = avg_no_rnd_pixels16_xy2;
c->put_pixels_tab[1][0] = put_pixels8;
c->put_pixels_tab[1][1] = put_pixels8_x2;
c->put_pixels_tab[1][2] = put_pixels8_y2;
c->put_pixels_tab[1][3] = put_pixels8_xy2;
c->put_no_rnd_pixels_tab[1][0] = put_pixels8;
c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2;
c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2;
c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels8_xy2;
c->avg_pixels_tab[1][0] = avg_pixels8;
c->avg_pixels_tab[1][1] = avg_pixels8_x2;
c->avg_pixels_tab[1][2] = avg_pixels8_y2;
c->avg_pixels_tab[1][3] = avg_pixels8_xy2;
c->avg_no_rnd_pixels_tab[1][0] = avg_no_rnd_pixels8;
c->avg_no_rnd_pixels_tab[1][1] = avg_no_rnd_pixels8_x2;
c->avg_no_rnd_pixels_tab[1][2] = avg_no_rnd_pixels8_y2;
c->avg_no_rnd_pixels_tab[1][3] = avg_no_rnd_pixels8_xy2;
#ifdef HAVE_MMX #ifdef HAVE_MMX
dsputil_init_mmx(); dsputil_init_mmx(c, mask);
#endif #endif
#ifdef ARCH_ARMV4L #ifdef ARCH_ARMV4L
dsputil_init_armv4l(); dsputil_init_armv4l(c, mask);
#endif #endif
#ifdef HAVE_MLIB #ifdef HAVE_MLIB
dsputil_init_mlib(); dsputil_init_mlib(c, mask);
#endif #endif
#ifdef ARCH_ALPHA #ifdef ARCH_ALPHA
dsputil_init_alpha(); dsputil_init_alpha(c, mask);
#endif #endif
#ifdef ARCH_POWERPC #ifdef ARCH_POWERPC
dsputil_init_ppc(); dsputil_init_ppc(c, mask);
#endif #endif
#ifdef HAVE_MMI #ifdef HAVE_MMI
dsputil_init_mmi(); dsputil_init_mmi(c, mask);
#endif #endif
for(i=0; i<64; i++) inv_zigzag_direct16[ff_zigzag_direct[i]]= i+1; for(i=0; i<64; i++) inv_zigzag_direct16[ff_zigzag_direct[i]]= i+1;
...@@ -1639,7 +1679,8 @@ void avcodec_set_bit_exact(void) ...@@ -1639,7 +1679,8 @@ void avcodec_set_bit_exact(void)
{ {
ff_bit_exact=1; ff_bit_exact=1;
#ifdef HAVE_MMX #ifdef HAVE_MMX
dsputil_set_bit_exact_mmx(); #warning FIXME - set_bit_exact
// dsputil_set_bit_exact_mmx();
#endif #endif
} }
......
...@@ -45,10 +45,9 @@ extern const UINT8 ff_zigzag_direct[64]; ...@@ -45,10 +45,9 @@ extern const UINT8 ff_zigzag_direct[64];
extern UINT32 squareTbl[512]; extern UINT32 squareTbl[512];
extern UINT8 cropTbl[256 + 2 * MAX_NEG_CROP]; extern UINT8 cropTbl[256 + 2 * MAX_NEG_CROP];
void dsputil_init(void);
/* minimum alignment rules ;) /* minimum alignment rules ;)
if u notice errors in the align stuff, need more alignment for some asm code for some cpu if u notice errors in the align stuff, need more alignment for some asm code for some cpu
or need to use a function with less aligned data then send a mail to the ffmpeg-dev list, ... or need to use a function with less aligned data then send a mail to the ffmpeg-dev list, ...
!warning these alignments might not match reallity, (missing attribute((align)) stuff somewhere possible) !warning these alignments might not match reallity, (missing attribute((align)) stuff somewhere possible)
...@@ -57,39 +56,20 @@ i (michael) didnt check them, these are just the alignents which i think could b ...@@ -57,39 +56,20 @@ i (michael) didnt check them, these are just the alignents which i think could b
!future video codecs might need functions with less strict alignment !future video codecs might need functions with less strict alignment
*/ */
/* pixel ops : interface with DCT */ /*
extern void (*get_pixels)(DCTELEM *block/*align 16*/, const UINT8 *pixels/*align 8*/, int line_size);
extern void (*diff_pixels)(DCTELEM *block/*align 16*/, const UINT8 *s1/*align 8*/, const UINT8 *s2/*align 8*/, int stride);
extern void (*put_pixels_clamped)(const DCTELEM *block/*align 16*/, UINT8 *pixels/*align 8*/, int line_size);
extern void (*add_pixels_clamped)(const DCTELEM *block/*align 16*/, UINT8 *pixels/*align 8*/, int line_size);
extern void (*ff_gmc1)(UINT8 *dst/*align 8*/, UINT8 *src/*align 1*/, int srcStride, int h, int x16, int y16, int rounder);
extern void (*ff_gmc )(UINT8 *dst/*align 8*/, UINT8 *src/*align 1*/, int stride, int h, int ox, int oy,
int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height);
extern void (*clear_blocks)(DCTELEM *blocks/*align 16*/);
extern int (*pix_sum)(UINT8 * pix, int line_size);
extern int (*pix_norm1)(UINT8 * pix, int line_size);
void get_pixels_c(DCTELEM *block, const UINT8 *pixels, int line_size); void get_pixels_c(DCTELEM *block, const UINT8 *pixels, int line_size);
void diff_pixels_c(DCTELEM *block, const UINT8 *s1, const UINT8 *s2, int stride); void diff_pixels_c(DCTELEM *block, const UINT8 *s1, const UINT8 *s2, int stride);
void put_pixels_clamped_c(const DCTELEM *block, UINT8 *pixels, int line_size); void put_pixels_clamped_c(const DCTELEM *block, UINT8 *pixels, int line_size);
void add_pixels_clamped_c(const DCTELEM *block, UINT8 *pixels, int line_size); void add_pixels_clamped_c(const DCTELEM *block, UINT8 *pixels, int line_size);
void clear_blocks_c(DCTELEM *blocks); void clear_blocks_c(DCTELEM *blocks);
*/
/* add and put pixel (decoding) */ /* add and put pixel (decoding) */
// blocksizes for op_pixels_func are 8x4,8x8 16x8 16x16 // blocksizes for op_pixels_func are 8x4,8x8 16x8 16x16
typedef void (*op_pixels_func)(UINT8 *block/*align width (8 or 16)*/, const UINT8 *pixels/*align 1*/, int line_size, int h); typedef void (*op_pixels_func)(UINT8 *block/*align width (8 or 16)*/, const UINT8 *pixels/*align 1*/, int line_size, int h);
typedef void (*qpel_mc_func)(UINT8 *dst/*align width (8 or 16)*/, UINT8 *src/*align 1*/, int stride); typedef void (*qpel_mc_func)(UINT8 *dst/*align width (8 or 16)*/, UINT8 *src/*align 1*/, int stride);
extern op_pixels_func put_pixels_tab[2][4];
extern op_pixels_func avg_pixels_tab[2][4];
extern op_pixels_func put_no_rnd_pixels_tab[2][4];
extern op_pixels_func avg_no_rnd_pixels_tab[2][4];
extern qpel_mc_func put_qpel_pixels_tab[2][16];
extern qpel_mc_func avg_qpel_pixels_tab[2][16];
extern qpel_mc_func put_no_rnd_qpel_pixels_tab[2][16];
extern qpel_mc_func avg_no_rnd_qpel_pixels_tab[2][16];
#define CALL_2X_PIXELS(a, b, n)\ #define CALL_2X_PIXELS(a, b, n)\
static void a(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ static void a(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
...@@ -100,20 +80,46 @@ static void a(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ ...@@ -100,20 +80,46 @@ static void a(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
/* motion estimation */ /* motion estimation */
typedef int (*op_pixels_abs_func)(UINT8 *blk1/*align width (8 or 16)*/, UINT8 *blk2/*align 1*/, int line_size); typedef int (*op_pixels_abs_func)(UINT8 *blk1/*align width (8 or 16)*/, UINT8 *blk2/*align 1*/, int line_size);
/*
extern op_pixels_abs_func pix_abs16x16;
extern op_pixels_abs_func pix_abs16x16_x2;
extern op_pixels_abs_func pix_abs16x16_y2;
extern op_pixels_abs_func pix_abs16x16_xy2;
extern op_pixels_abs_func pix_abs8x8;
extern op_pixels_abs_func pix_abs8x8_x2;
extern op_pixels_abs_func pix_abs8x8_y2;
extern op_pixels_abs_func pix_abs8x8_xy2;
int pix_abs16x16_c(UINT8 *blk1, UINT8 *blk2, int lx); int pix_abs16x16_c(UINT8 *blk1, UINT8 *blk2, int lx);
int pix_abs16x16_x2_c(UINT8 *blk1, UINT8 *blk2, int lx); int pix_abs16x16_x2_c(UINT8 *blk1, UINT8 *blk2, int lx);
int pix_abs16x16_y2_c(UINT8 *blk1, UINT8 *blk2, int lx); int pix_abs16x16_y2_c(UINT8 *blk1, UINT8 *blk2, int lx);
int pix_abs16x16_xy2_c(UINT8 *blk1, UINT8 *blk2, int lx); int pix_abs16x16_xy2_c(UINT8 *blk1, UINT8 *blk2, int lx);
*/
typedef struct DSPContext {
/* pixel ops : interface with DCT */
void (*get_pixels)(DCTELEM *block/*align 16*/, const UINT8 *pixels/*align 8*/, int line_size);
void (*diff_pixels)(DCTELEM *block/*align 16*/, const UINT8 *s1/*align 8*/, const UINT8 *s2/*align 8*/, int stride);
void (*put_pixels_clamped)(const DCTELEM *block/*align 16*/, UINT8 *pixels/*align 8*/, int line_size);
void (*add_pixels_clamped)(const DCTELEM *block/*align 16*/, UINT8 *pixels/*align 8*/, int line_size);
void (*gmc1)(UINT8 *dst/*align 8*/, UINT8 *src/*align 1*/, int srcStride, int h, int x16, int y16, int rounder);
void (*gmc )(UINT8 *dst/*align 8*/, UINT8 *src/*align 1*/, int stride, int h, int ox, int oy,
int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height);
void (*clear_blocks)(DCTELEM *blocks/*align 16*/);
int (*pix_sum)(UINT8 * pix, int line_size);
int (*pix_norm1)(UINT8 * pix, int line_size);
/* maybe create an array for 16/8 functions */
op_pixels_func put_pixels_tab[2][4];
op_pixels_func avg_pixels_tab[2][4];
op_pixels_func put_no_rnd_pixels_tab[2][4];
op_pixels_func avg_no_rnd_pixels_tab[2][4];
qpel_mc_func put_qpel_pixels_tab[2][16];
qpel_mc_func avg_qpel_pixels_tab[2][16];
qpel_mc_func put_no_rnd_qpel_pixels_tab[2][16];
qpel_mc_func avg_no_rnd_qpel_pixels_tab[2][16];
op_pixels_abs_func pix_abs16x16;
op_pixels_abs_func pix_abs16x16_x2;
op_pixels_abs_func pix_abs16x16_y2;
op_pixels_abs_func pix_abs16x16_xy2;
op_pixels_abs_func pix_abs8x8;
op_pixels_abs_func pix_abs8x8_x2;
op_pixels_abs_func pix_abs8x8_y2;
op_pixels_abs_func pix_abs8x8_xy2;
} DSPContext;
void dsputil_init(DSPContext* p, unsigned mask);
/** /**
* permute block according to permuatation. * permute block according to permuatation.
...@@ -121,8 +127,12 @@ int pix_abs16x16_xy2_c(UINT8 *blk1, UINT8 *blk2, int lx); ...@@ -121,8 +127,12 @@ int pix_abs16x16_xy2_c(UINT8 *blk1, UINT8 *blk2, int lx);
*/ */
void ff_block_permute(INT16 *block, UINT8 *permutation, const UINT8 *scantable, int last); void ff_block_permute(INT16 *block, UINT8 *permutation, const UINT8 *scantable, int last);
#define emms_c()
#if defined(HAVE_MMX) #if defined(HAVE_MMX)
#undef emms_c()
#define MM_MMX 0x0001 /* standard MMX */ #define MM_MMX 0x0001 /* standard MMX */
#define MM_3DNOW 0x0004 /* AMD 3DNOW */ #define MM_3DNOW 0x0004 /* AMD 3DNOW */
#define MM_MMXEXT 0x0002 /* SSE integer functions or AMD MMX ext */ #define MM_MMXEXT 0x0002 /* SSE integer functions or AMD MMX ext */
...@@ -132,6 +142,8 @@ void ff_block_permute(INT16 *block, UINT8 *permutation, const UINT8 *scantable, ...@@ -132,6 +142,8 @@ void ff_block_permute(INT16 *block, UINT8 *permutation, const UINT8 *scantable,
extern int mm_flags; extern int mm_flags;
int mm_support(void); int mm_support(void);
void add_pixels_clamped_mmx(const DCTELEM *block, UINT8 *pixels, int line_size);
void put_pixels_clamped_mmx(const DCTELEM *block, UINT8 *pixels, int line_size);
static inline void emms(void) static inline void emms(void)
{ {
...@@ -146,54 +158,44 @@ static inline void emms(void) ...@@ -146,54 +158,44 @@ static inline void emms(void)
#define __align8 __attribute__ ((aligned (8))) #define __align8 __attribute__ ((aligned (8)))
void dsputil_init_mmx(void); void dsputil_init_mmx(DSPContext* c, unsigned mask);
void dsputil_set_bit_exact_mmx(void); void dsputil_set_bit_exact_mmx(DSPContext* c, unsigned mask);
#elif defined(ARCH_ARMV4L) #elif defined(ARCH_ARMV4L)
#define emms_c()
/* This is to use 4 bytes read to the IDCT pointers for some 'zero' /* This is to use 4 bytes read to the IDCT pointers for some 'zero'
line ptimizations */ line ptimizations */
#define __align8 __attribute__ ((aligned (4))) #define __align8 __attribute__ ((aligned (4)))
void dsputil_init_armv4l(void); void dsputil_init_armv4l(DSPContext* c, unsigned mask);
#elif defined(HAVE_MLIB) #elif defined(HAVE_MLIB)
#define emms_c()
/* SPARC/VIS IDCT needs 8-byte aligned DCT blocks */ /* SPARC/VIS IDCT needs 8-byte aligned DCT blocks */
#define __align8 __attribute__ ((aligned (8))) #define __align8 __attribute__ ((aligned (8)))
void dsputil_init_mlib(void); void dsputil_init_mlib(DSPContext* c, unsigned mask);
#elif defined(ARCH_ALPHA) #elif defined(ARCH_ALPHA)
#define emms_c()
#define __align8 __attribute__ ((aligned (8))) #define __align8 __attribute__ ((aligned (8)))
void dsputil_init_alpha(void); void dsputil_init_alpha(DSPContext* c, unsigned mask);
#elif defined(ARCH_POWERPC) #elif defined(ARCH_POWERPC)
#define emms_c()
#define __align8 __attribute__ ((aligned (16))) #define __align8 __attribute__ ((aligned (16)))
void dsputil_init_ppc(void); void dsputil_init_ppc(DSPContext* c, unsigned mask);
#elif defined(HAVE_MMI) #elif defined(HAVE_MMI)
#define emms_c()
#define __align8 __attribute__ ((aligned (16))) #define __align8 __attribute__ ((aligned (16)))
void dsputil_init_mmi(void); void dsputil_init_mmi(DSPContext* c, unsigned mask);
#else #else
#define emms_c()
#define __align8 #define __align8
#endif #endif
...@@ -263,9 +265,9 @@ typedef struct MDCTContext { ...@@ -263,9 +265,9 @@ typedef struct MDCTContext {
} MDCTContext; } MDCTContext;
int ff_mdct_init(MDCTContext *s, int nbits, int inverse); int ff_mdct_init(MDCTContext *s, int nbits, int inverse);
void ff_imdct_calc(MDCTContext *s, FFTSample *output, void ff_imdct_calc(MDCTContext *s, FFTSample *output,
const FFTSample *input, FFTSample *tmp); const FFTSample *input, FFTSample *tmp);
void ff_mdct_calc(MDCTContext *s, FFTSample *out, void ff_mdct_calc(MDCTContext *s, FFTSample *out,
const FFTSample *input, FFTSample *tmp); const FFTSample *input, FFTSample *tmp);
void ff_mdct_end(MDCTContext *s); void ff_mdct_end(MDCTContext *s);
......
...@@ -114,6 +114,7 @@ static int dvvideo_decode_init(AVCodecContext *avctx) ...@@ -114,6 +114,7 @@ static int dvvideo_decode_init(AVCodecContext *avctx)
/* XXX: fix it */ /* XXX: fix it */
memset(&s2, 0, sizeof(MpegEncContext)); memset(&s2, 0, sizeof(MpegEncContext));
s2.avctx = avctx; s2.avctx = avctx;
dsputil_init(&s2.dsp, avctx->dsp_mask);
if (DCT_common_init(&s2) < 0) if (DCT_common_init(&s2) < 0)
return -1; return -1;
......
...@@ -331,7 +331,7 @@ static void guess_mv(MpegEncContext *s){ ...@@ -331,7 +331,7 @@ static void guess_mv(MpegEncContext *s){
s->mv_type = MV_TYPE_16X16; s->mv_type = MV_TYPE_16X16;
s->mb_skiped=0; s->mb_skiped=0;
clear_blocks(s->block[0]); s->dsp.clear_blocks(s->block[0]);
s->mb_x= mb_x; s->mb_x= mb_x;
s->mb_y= mb_y; s->mb_y= mb_y;
...@@ -458,7 +458,7 @@ int score_sum=0; ...@@ -458,7 +458,7 @@ int score_sum=0;
s->mv_type = MV_TYPE_16X16; s->mv_type = MV_TYPE_16X16;
s->mb_skiped=0; s->mb_skiped=0;
clear_blocks(s->block[0]); s->dsp.clear_blocks(s->block[0]);
s->mb_x= mb_x; s->mb_x= mb_x;
s->mb_y= mb_y; s->mb_y= mb_y;
...@@ -559,8 +559,8 @@ static int is_intra_more_likely(MpegEncContext *s){ ...@@ -559,8 +559,8 @@ static int is_intra_more_likely(MpegEncContext *s){
UINT8 *mb_ptr = s->current_picture[0] + mb_x*16 + mb_y*16*s->linesize; UINT8 *mb_ptr = s->current_picture[0] + mb_x*16 + mb_y*16*s->linesize;
UINT8 *last_mb_ptr= s->last_picture [0] + mb_x*16 + mb_y*16*s->linesize; UINT8 *last_mb_ptr= s->last_picture [0] + mb_x*16 + mb_y*16*s->linesize;
is_intra_likely += pix_abs16x16(last_mb_ptr, mb_ptr , s->linesize); is_intra_likely += s->dsp.pix_abs16x16(last_mb_ptr, mb_ptr , s->linesize);
is_intra_likely -= pix_abs16x16(last_mb_ptr, last_mb_ptr+s->linesize*16, s->linesize); is_intra_likely -= s->dsp.pix_abs16x16(last_mb_ptr, last_mb_ptr+s->linesize*16, s->linesize);
}else{ }else{
if(s->mbintra_table[i]) //HACK (this is allways inited but we should use mb_type[]) if(s->mbintra_table[i]) //HACK (this is allways inited but we should use mb_type[])
is_intra_likely++; is_intra_likely++;
...@@ -738,7 +738,7 @@ void ff_error_resilience(MpegEncContext *s){ ...@@ -738,7 +738,7 @@ void ff_error_resilience(MpegEncContext *s){
s->mv[0][0][1] = s->motion_val[ mb_x*2+1 + (mb_y*2+1)*s->block_wrap[0] ][1]; s->mv[0][0][1] = s->motion_val[ mb_x*2+1 + (mb_y*2+1)*s->block_wrap[0] ][1];
} }
clear_blocks(s->block[0]); s->dsp.clear_blocks(s->block[0]);
s->mb_x= mb_x; s->mb_x= mb_x;
s->mb_y= mb_y; s->mb_y= mb_y;
...@@ -778,8 +778,8 @@ void ff_error_resilience(MpegEncContext *s){ ...@@ -778,8 +778,8 @@ void ff_error_resilience(MpegEncContext *s){
s->mv[1][0][0]= 0; s->mv[1][0][0]= 0;
s->mv[1][0][1]= 0; s->mv[1][0][1]= 0;
} }
clear_blocks(s->block[0]); s->dsp.clear_blocks(s->block[0]);
s->mb_x= mb_x; s->mb_x= mb_x;
s->mb_y= mb_y; s->mb_y= mb_y;
MPV_decode_mb(s, s->block); MPV_decode_mb(s, s->block);
......
...@@ -538,7 +538,7 @@ void mpeg4_encode_mb(MpegEncContext * s, ...@@ -538,7 +538,7 @@ void mpeg4_encode_mb(MpegEncContext * s,
if(s->coded_order[i+1].pict_type!=B_TYPE) break; if(s->coded_order[i+1].pict_type!=B_TYPE) break;
b_pic= s->coded_order[i+1].picture[0] + offset; b_pic= s->coded_order[i+1].picture[0] + offset;
diff= pix_abs16x16(p_pic, b_pic, s->linesize); diff= s->dsp.pix_abs16x16(p_pic, b_pic, s->linesize);
if(diff>s->qscale*70){ //FIXME check that 70 is optimal if(diff>s->qscale*70){ //FIXME check that 70 is optimal
s->mb_skiped=0; s->mb_skiped=0;
break; break;
......
...@@ -195,7 +195,7 @@ static int decode_slice(MpegEncContext *s){ ...@@ -195,7 +195,7 @@ static int decode_slice(MpegEncContext *s){
} }
/* DCT & quantize */ /* DCT & quantize */
clear_blocks(s->block[0]); s->dsp.clear_blocks(s->block[0]);
s->mv_dir = MV_DIR_FORWARD; s->mv_dir = MV_DIR_FORWARD;
s->mv_type = MV_TYPE_16X16; s->mv_type = MV_TYPE_16X16;
......
This diff is collapsed.
This diff is collapsed.
...@@ -1623,7 +1623,7 @@ static int mpeg_decode_slice(AVCodecContext *avctx, ...@@ -1623,7 +1623,7 @@ static int mpeg_decode_slice(AVCodecContext *avctx,
s->mb_incr= 1; s->mb_incr= 1;
for(;;) { for(;;) {
clear_blocks(s->block[0]); s->dsp.clear_blocks(s->block[0]);
ret = mpeg_decode_mb(s, s->block); ret = mpeg_decode_mb(s, s->block);
dprintf("ret=%d\n", ret); dprintf("ret=%d\n", ret);
......
This diff is collapsed.
...@@ -221,6 +221,7 @@ typedef struct MpegEncContext { ...@@ -221,6 +221,7 @@ typedef struct MpegEncContext {
int unrestricted_mv; int unrestricted_mv;
int h263_long_vectors; /* use horrible h263v1 long vector mode */ int h263_long_vectors; /* use horrible h263v1 long vector mode */
DSPContext dsp; /* pointers for accelerated dsp fucntions */
int f_code; /* forward MV resolution */ int f_code; /* forward MV resolution */
int b_code; /* backward MV resolution for B Frames (mpeg4) */ int b_code; /* backward MV resolution for B Frames (mpeg4) */
INT16 (*motion_val)[2]; /* used for MV prediction (4MV per MB) */ INT16 (*motion_val)[2]; /* used for MV prediction (4MV per MB) */
......
...@@ -447,7 +447,7 @@ static int rv10_decode_packet(AVCodecContext *avctx, ...@@ -447,7 +447,7 @@ static int rv10_decode_packet(AVCodecContext *avctx,
printf("**mb x=%d y=%d\n", s->mb_x, s->mb_y); printf("**mb x=%d y=%d\n", s->mb_x, s->mb_y);
#endif #endif
clear_blocks(s->block[0]); s->dsp.clear_blocks(s->block[0]);
s->mv_dir = MV_DIR_FORWARD; s->mv_dir = MV_DIR_FORWARD;
s->mv_type = MV_TYPE_16X16; s->mv_type = MV_TYPE_16X16;
if (ff_h263_decode_mb(s, s->block) == SLICE_ERROR) { if (ff_h263_decode_mb(s, s->block) == SLICE_ERROR) {
......
...@@ -804,7 +804,7 @@ static void svq1_skip_block (uint8_t *current, uint8_t *previous, int pitch, int ...@@ -804,7 +804,7 @@ static void svq1_skip_block (uint8_t *current, uint8_t *previous, int pitch, int
} }
} }
static int svq1_motion_inter_block (bit_buffer_t *bitbuf, static int svq1_motion_inter_block (MpegEncContext *s, bit_buffer_t *bitbuf,
uint8_t *current, uint8_t *previous, int pitch, uint8_t *current, uint8_t *previous, int pitch,
svq1_pmv_t *motion, int x, int y) { svq1_pmv_t *motion, int x, int y) {
uint8_t *src; uint8_t *src;
...@@ -839,12 +839,12 @@ static int svq1_motion_inter_block (bit_buffer_t *bitbuf, ...@@ -839,12 +839,12 @@ static int svq1_motion_inter_block (bit_buffer_t *bitbuf,
src = &previous[(x + (mv.x >> 1)) + (y + (mv.y >> 1))*pitch]; src = &previous[(x + (mv.x >> 1)) + (y + (mv.y >> 1))*pitch];
dst = current; dst = current;
put_pixels_tab[0][((mv.y & 1) << 1) | (mv.x & 1)](dst,src,pitch,16); s->dsp.put_pixels_tab[0][((mv.y & 1) << 1) | (mv.x & 1)](dst,src,pitch,16);
return 0; return 0;
} }
static int svq1_motion_inter_4v_block (bit_buffer_t *bitbuf, static int svq1_motion_inter_4v_block (MpegEncContext *s, bit_buffer_t *bitbuf,
uint8_t *current, uint8_t *previous, int pitch, uint8_t *current, uint8_t *previous, int pitch,
svq1_pmv_t *motion,int x, int y) { svq1_pmv_t *motion,int x, int y) {
uint8_t *src; uint8_t *src;
...@@ -906,7 +906,7 @@ static int svq1_motion_inter_4v_block (bit_buffer_t *bitbuf, ...@@ -906,7 +906,7 @@ static int svq1_motion_inter_4v_block (bit_buffer_t *bitbuf,
src = &previous[(x + (pmv[i]->x >> 1)) + (y + (pmv[i]->y >> 1))*pitch]; src = &previous[(x + (pmv[i]->x >> 1)) + (y + (pmv[i]->y >> 1))*pitch];
dst = current; dst = current;
put_pixels_tab[1][((pmv[i]->y & 1) << 1) | (pmv[i]->x & 1)](dst,src,pitch,8); s->dsp.put_pixels_tab[1][((pmv[i]->y & 1) << 1) | (pmv[i]->x & 1)](dst,src,pitch,8);
/* select next block */ /* select next block */
if (i & 1) { if (i & 1) {
...@@ -921,7 +921,7 @@ static int svq1_motion_inter_4v_block (bit_buffer_t *bitbuf, ...@@ -921,7 +921,7 @@ static int svq1_motion_inter_4v_block (bit_buffer_t *bitbuf,
return 0; return 0;
} }
static int svq1_decode_delta_block (bit_buffer_t *bitbuf, static int svq1_decode_delta_block (MpegEncContext *s, bit_buffer_t *bitbuf,
uint8_t *current, uint8_t *previous, int pitch, uint8_t *current, uint8_t *previous, int pitch,
svq1_pmv_t *motion, int x, int y) { svq1_pmv_t *motion, int x, int y) {
uint32_t bit_cache; uint32_t bit_cache;
...@@ -951,7 +951,7 @@ static int svq1_decode_delta_block (bit_buffer_t *bitbuf, ...@@ -951,7 +951,7 @@ static int svq1_decode_delta_block (bit_buffer_t *bitbuf,
break; break;
case SVQ1_BLOCK_INTER: case SVQ1_BLOCK_INTER:
result = svq1_motion_inter_block (bitbuf, current, previous, pitch, motion, x, y); result = svq1_motion_inter_block (s, bitbuf, current, previous, pitch, motion, x, y);
if (result != 0) if (result != 0)
{ {
...@@ -964,7 +964,7 @@ static int svq1_decode_delta_block (bit_buffer_t *bitbuf, ...@@ -964,7 +964,7 @@ static int svq1_decode_delta_block (bit_buffer_t *bitbuf,
break; break;
case SVQ1_BLOCK_INTER_4V: case SVQ1_BLOCK_INTER_4V:
result = svq1_motion_inter_4v_block (bitbuf, current, previous, pitch, motion, x, y); result = svq1_motion_inter_4v_block (s, bitbuf, current, previous, pitch, motion, x, y);
if (result != 0) if (result != 0)
{ {
...@@ -1142,8 +1142,8 @@ static int svq1_decode_frame(AVCodecContext *avctx, ...@@ -1142,8 +1142,8 @@ static int svq1_decode_frame(AVCodecContext *avctx,
for (y=0; y < height; y+=16) { for (y=0; y < height; y+=16) {
for (x=0; x < width; x+=16) { for (x=0; x < width; x+=16) {
result = svq1_decode_delta_block (&s->gb, &current[x], previous, result = svq1_decode_delta_block (s, &s->gb, &current[x], previous,
linesize, pmv, x, y); linesize, pmv, x, y);
if (result != 0) if (result != 0)
{ {
#ifdef DEBUG_SVQ1 #ifdef DEBUG_SVQ1
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment