Commit ede45c4e authored by Michael Niedermayer's avatar Michael Niedermayer

Merge commit '25841dfe'

* commit '25841dfe':
  Use ptrdiff_t instead of int for {avg, put}_pixels line_size parameter.

Conflicts:
	libavcodec/alpha/dsputil_alpha.c
	libavcodec/dsputil_template.c
Merged-by: 's avatarMichael Niedermayer <michaelni@gmx.at>
parents 127ff886 25841dfe
...@@ -32,7 +32,7 @@ void (*add_pixels_clamped_axp_p)(const int16_t *block, uint8_t *pixels, ...@@ -32,7 +32,7 @@ void (*add_pixels_clamped_axp_p)(const int16_t *block, uint8_t *pixels,
/* These functions were the base for the optimized assembler routines, /* These functions were the base for the optimized assembler routines,
and remain here for documentation purposes. */ and remain here for documentation purposes. */
static void put_pixels_clamped_mvi(const int16_t *block, uint8_t *pixels, static void put_pixels_clamped_mvi(const int16_t *block, uint8_t *pixels,
int line_size) ptrdiff_t line_size)
{ {
int i = 8; int i = 8;
uint64_t clampmask = zap(-1, 0xaa); /* 0x00ff00ff00ff00ff */ uint64_t clampmask = zap(-1, 0xaa); /* 0x00ff00ff00ff00ff */
...@@ -56,7 +56,7 @@ static void put_pixels_clamped_mvi(const int16_t *block, uint8_t *pixels, ...@@ -56,7 +56,7 @@ static void put_pixels_clamped_mvi(const int16_t *block, uint8_t *pixels,
} }
void add_pixels_clamped_mvi(const int16_t *block, uint8_t *pixels, void add_pixels_clamped_mvi(const int16_t *block, uint8_t *pixels,
int line_size) ptrdiff_t line_size)
{ {
int h = 8; int h = 8;
/* Keep this function a leaf function by generating the constants /* Keep this function a leaf function by generating the constants
...@@ -211,8 +211,8 @@ static inline uint64_t avg4(uint64_t l1, uint64_t l2, uint64_t l3, uint64_t l4) ...@@ -211,8 +211,8 @@ static inline uint64_t avg4(uint64_t l1, uint64_t l2, uint64_t l3, uint64_t l4)
#define MAKE_OP(OPNAME, SUFF, OPKIND, STORE) \ #define MAKE_OP(OPNAME, SUFF, OPKIND, STORE) \
static void OPNAME ## _pixels ## SUFF ## _axp \ static void OPNAME ## _pixels ## SUFF ## _axp \
(uint8_t *av_restrict block, const uint8_t *av_restrict pixels, \ (uint8_t *restrict block, const uint8_t *restrict pixels, \
int line_size, int h) \ ptrdiff_t line_size, int h) \
{ \ { \
if ((size_t) pixels & 0x7) { \ if ((size_t) pixels & 0x7) { \
OPKIND(uldq, STORE); \ OPKIND(uldq, STORE); \
...@@ -222,8 +222,8 @@ static void OPNAME ## _pixels ## SUFF ## _axp \ ...@@ -222,8 +222,8 @@ static void OPNAME ## _pixels ## SUFF ## _axp \
} \ } \
\ \
static void OPNAME ## _pixels16 ## SUFF ## _axp \ static void OPNAME ## _pixels16 ## SUFF ## _axp \
(uint8_t *av_restrict block, const uint8_t *av_restrict pixels, \ (uint8_t *restrict block, const uint8_t *restrict pixels, \
int line_size, int h) \ ptrdiff_t line_size, int h) \
{ \ { \
OPNAME ## _pixels ## SUFF ## _axp(block, pixels, line_size, h); \ OPNAME ## _pixels ## SUFF ## _axp(block, pixels, line_size, h); \
OPNAME ## _pixels ## SUFF ## _axp(block + 8, pixels + 8, line_size, h); \ OPNAME ## _pixels ## SUFF ## _axp(block + 8, pixels + 8, line_size, h); \
...@@ -262,7 +262,7 @@ PIXOP(put_no_rnd, STORE); ...@@ -262,7 +262,7 @@ PIXOP(put_no_rnd, STORE);
PIXOP(avg_no_rnd, STORE); PIXOP(avg_no_rnd, STORE);
static void put_pixels16_axp_asm(uint8_t *block, const uint8_t *pixels, static void put_pixels16_axp_asm(uint8_t *block, const uint8_t *pixels,
int line_size, int h) ptrdiff_t line_size, int h)
{ {
put_pixels_axp_asm(block, pixels, line_size, h); put_pixels_axp_asm(block, pixels, line_size, h);
put_pixels_axp_asm(block + 8, pixels + 8, line_size, h); put_pixels_axp_asm(block + 8, pixels + 8, line_size, h);
......
...@@ -26,7 +26,7 @@ void ff_simple_idct_put_axp(uint8_t *dest, int line_size, int16_t *block); ...@@ -26,7 +26,7 @@ void ff_simple_idct_put_axp(uint8_t *dest, int line_size, int16_t *block);
void ff_simple_idct_add_axp(uint8_t *dest, int line_size, int16_t *block); void ff_simple_idct_add_axp(uint8_t *dest, int line_size, int16_t *block);
void put_pixels_axp_asm(uint8_t *block, const uint8_t *pixels, void put_pixels_axp_asm(uint8_t *block, const uint8_t *pixels,
int line_size, int h); ptrdiff_t line_size, int h);
void put_pixels_clamped_mvi_asm(const int16_t *block, uint8_t *pixels, void put_pixels_clamped_mvi_asm(const int16_t *block, uint8_t *pixels,
int line_size); int line_size);
void add_pixels_clamped_mvi_asm(const int16_t *block, uint8_t *pixels, void add_pixels_clamped_mvi_asm(const int16_t *block, uint8_t *pixels,
......
...@@ -30,16 +30,16 @@ void ff_simple_idct_arm(int16_t *data); ...@@ -30,16 +30,16 @@ void ff_simple_idct_arm(int16_t *data);
static void (*ff_put_pixels_clamped)(const int16_t *block, uint8_t *pixels, int line_size); static void (*ff_put_pixels_clamped)(const int16_t *block, uint8_t *pixels, int line_size);
static void (*ff_add_pixels_clamped)(const int16_t *block, uint8_t *pixels, int line_size); static void (*ff_add_pixels_clamped)(const int16_t *block, uint8_t *pixels, int line_size);
void ff_put_pixels8_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h); void ff_put_pixels8_arm(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h);
void ff_put_pixels8_x2_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h); void ff_put_pixels8_x2_arm(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h);
void ff_put_pixels8_y2_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h); void ff_put_pixels8_y2_arm(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h);
void ff_put_pixels8_xy2_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h); void ff_put_pixels8_xy2_arm(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h);
void ff_put_no_rnd_pixels8_x2_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h); void ff_put_no_rnd_pixels8_x2_arm(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h);
void ff_put_no_rnd_pixels8_y2_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h); void ff_put_no_rnd_pixels8_y2_arm(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h);
void ff_put_no_rnd_pixels8_xy2_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h); void ff_put_no_rnd_pixels8_xy2_arm(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h);
void ff_put_pixels16_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h); void ff_put_pixels16_arm(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h);
CALL_2X_PIXELS(ff_put_pixels16_x2_arm, ff_put_pixels8_x2_arm, 8) CALL_2X_PIXELS(ff_put_pixels16_x2_arm, ff_put_pixels8_x2_arm, 8)
CALL_2X_PIXELS(ff_put_pixels16_y2_arm, ff_put_pixels8_y2_arm, 8) CALL_2X_PIXELS(ff_put_pixels16_y2_arm, ff_put_pixels8_y2_arm, 8)
......
...@@ -28,23 +28,23 @@ void ff_simple_idct_armv6(int16_t *data); ...@@ -28,23 +28,23 @@ void ff_simple_idct_armv6(int16_t *data);
void ff_simple_idct_put_armv6(uint8_t *dest, int line_size, int16_t *data); void ff_simple_idct_put_armv6(uint8_t *dest, int line_size, int16_t *data);
void ff_simple_idct_add_armv6(uint8_t *dest, int line_size, int16_t *data); void ff_simple_idct_add_armv6(uint8_t *dest, int line_size, int16_t *data);
void ff_put_pixels16_armv6(uint8_t *, const uint8_t *, int, int); void ff_put_pixels16_armv6(uint8_t *, const uint8_t *, ptrdiff_t, int);
void ff_put_pixels16_x2_armv6(uint8_t *, const uint8_t *, int, int); void ff_put_pixels16_x2_armv6(uint8_t *, const uint8_t *, ptrdiff_t, int);
void ff_put_pixels16_y2_armv6(uint8_t *, const uint8_t *, int, int); void ff_put_pixels16_y2_armv6(uint8_t *, const uint8_t *, ptrdiff_t, int);
void ff_put_pixels16_x2_no_rnd_armv6(uint8_t *, const uint8_t *, int, int); void ff_put_pixels16_x2_no_rnd_armv6(uint8_t *, const uint8_t *, ptrdiff_t, int);
void ff_put_pixels16_y2_no_rnd_armv6(uint8_t *, const uint8_t *, int, int); void ff_put_pixels16_y2_no_rnd_armv6(uint8_t *, const uint8_t *, ptrdiff_t, int);
void ff_avg_pixels16_armv6(uint8_t *, const uint8_t *, int, int); void ff_avg_pixels16_armv6(uint8_t *, const uint8_t *, ptrdiff_t, int);
void ff_put_pixels8_armv6(uint8_t *, const uint8_t *, int, int); void ff_put_pixels8_armv6(uint8_t *, const uint8_t *, ptrdiff_t, int);
void ff_put_pixels8_x2_armv6(uint8_t *, const uint8_t *, int, int); void ff_put_pixels8_x2_armv6(uint8_t *, const uint8_t *, ptrdiff_t, int);
void ff_put_pixels8_y2_armv6(uint8_t *, const uint8_t *, int, int); void ff_put_pixels8_y2_armv6(uint8_t *, const uint8_t *, ptrdiff_t, int);
void ff_put_pixels8_x2_no_rnd_armv6(uint8_t *, const uint8_t *, int, int); void ff_put_pixels8_x2_no_rnd_armv6(uint8_t *, const uint8_t *, ptrdiff_t, int);
void ff_put_pixels8_y2_no_rnd_armv6(uint8_t *, const uint8_t *, int, int); void ff_put_pixels8_y2_no_rnd_armv6(uint8_t *, const uint8_t *, ptrdiff_t, int);
void ff_avg_pixels8_armv6(uint8_t *, const uint8_t *, int, int); void ff_avg_pixels8_armv6(uint8_t *, const uint8_t *, ptrdiff_t, int);
void ff_add_pixels_clamped_armv6(const int16_t *block, void ff_add_pixels_clamped_armv6(const int16_t *block,
uint8_t *restrict pixels, uint8_t *restrict pixels,
......
...@@ -32,32 +32,32 @@ void ff_simple_idct_add_neon(uint8_t *dest, int line_size, int16_t *data); ...@@ -32,32 +32,32 @@ void ff_simple_idct_add_neon(uint8_t *dest, int line_size, int16_t *data);
void ff_clear_block_neon(int16_t *block); void ff_clear_block_neon(int16_t *block);
void ff_clear_blocks_neon(int16_t *blocks); void ff_clear_blocks_neon(int16_t *blocks);
void ff_put_pixels16_neon(uint8_t *, const uint8_t *, int, int); void ff_put_pixels16_neon(uint8_t *, const uint8_t *, ptrdiff_t, int);
void ff_put_pixels16_x2_neon(uint8_t *, const uint8_t *, int, int); void ff_put_pixels16_x2_neon(uint8_t *, const uint8_t *, ptrdiff_t, int);
void ff_put_pixels16_y2_neon(uint8_t *, const uint8_t *, int, int); void ff_put_pixels16_y2_neon(uint8_t *, const uint8_t *, ptrdiff_t, int);
void ff_put_pixels16_xy2_neon(uint8_t *, const uint8_t *, int, int); void ff_put_pixels16_xy2_neon(uint8_t *, const uint8_t *, ptrdiff_t, int);
void ff_put_pixels8_neon(uint8_t *, const uint8_t *, int, int); void ff_put_pixels8_neon(uint8_t *, const uint8_t *, ptrdiff_t, int);
void ff_put_pixels8_x2_neon(uint8_t *, const uint8_t *, int, int); void ff_put_pixels8_x2_neon(uint8_t *, const uint8_t *, ptrdiff_t, int);
void ff_put_pixels8_y2_neon(uint8_t *, const uint8_t *, int, int); void ff_put_pixels8_y2_neon(uint8_t *, const uint8_t *, ptrdiff_t, int);
void ff_put_pixels8_xy2_neon(uint8_t *, const uint8_t *, int, int); void ff_put_pixels8_xy2_neon(uint8_t *, const uint8_t *, ptrdiff_t, int);
void ff_put_pixels16_x2_no_rnd_neon(uint8_t *, const uint8_t *, int, int); void ff_put_pixels16_x2_no_rnd_neon(uint8_t *, const uint8_t *, ptrdiff_t, int);
void ff_put_pixels16_y2_no_rnd_neon(uint8_t *, const uint8_t *, int, int); void ff_put_pixels16_y2_no_rnd_neon(uint8_t *, const uint8_t *, ptrdiff_t, int);
void ff_put_pixels16_xy2_no_rnd_neon(uint8_t *, const uint8_t *, int, int); void ff_put_pixels16_xy2_no_rnd_neon(uint8_t *, const uint8_t *, ptrdiff_t, int);
void ff_put_pixels8_x2_no_rnd_neon(uint8_t *, const uint8_t *, int, int); void ff_put_pixels8_x2_no_rnd_neon(uint8_t *, const uint8_t *, ptrdiff_t, int);
void ff_put_pixels8_y2_no_rnd_neon(uint8_t *, const uint8_t *, int, int); void ff_put_pixels8_y2_no_rnd_neon(uint8_t *, const uint8_t *, ptrdiff_t, int);
void ff_put_pixels8_xy2_no_rnd_neon(uint8_t *, const uint8_t *, int, int); void ff_put_pixels8_xy2_no_rnd_neon(uint8_t *, const uint8_t *, ptrdiff_t, int);
void ff_avg_pixels16_neon(uint8_t *, const uint8_t *, int, int); void ff_avg_pixels16_neon(uint8_t *, const uint8_t *, ptrdiff_t, int);
void ff_avg_pixels16_x2_neon(uint8_t *, const uint8_t *, int, int); void ff_avg_pixels16_x2_neon(uint8_t *, const uint8_t *, ptrdiff_t, int);
void ff_avg_pixels16_y2_neon(uint8_t *, const uint8_t *, int, int); void ff_avg_pixels16_y2_neon(uint8_t *, const uint8_t *, ptrdiff_t, int);
void ff_avg_pixels16_xy2_neon(uint8_t *, const uint8_t *, int, int); void ff_avg_pixels16_xy2_neon(uint8_t *, const uint8_t *, ptrdiff_t, int);
void ff_avg_pixels8_neon(uint8_t *, const uint8_t *, int, int); void ff_avg_pixels8_neon(uint8_t *, const uint8_t *, ptrdiff_t, int);
void ff_avg_pixels8_x2_neon(uint8_t *, const uint8_t *, int, int); void ff_avg_pixels8_x2_neon(uint8_t *, const uint8_t *, ptrdiff_t, int);
void ff_avg_pixels8_y2_neon(uint8_t *, const uint8_t *, int, int); void ff_avg_pixels8_y2_neon(uint8_t *, const uint8_t *, ptrdiff_t, int);
void ff_avg_pixels8_xy2_neon(uint8_t *, const uint8_t *, int, int); void ff_avg_pixels8_xy2_neon(uint8_t *, const uint8_t *, ptrdiff_t, int);
void ff_avg_pixels16_x2_no_rnd_neon(uint8_t *, const uint8_t *, int, int); void ff_avg_pixels16_x2_no_rnd_neon(uint8_t *, const uint8_t *, ptrdiff_t, int);
void ff_avg_pixels16_y2_no_rnd_neon(uint8_t *, const uint8_t *, int, int); void ff_avg_pixels16_y2_no_rnd_neon(uint8_t *, const uint8_t *, ptrdiff_t, int);
void ff_avg_pixels16_xy2_no_rnd_neon(uint8_t *, const uint8_t *, int, int); void ff_avg_pixels16_xy2_no_rnd_neon(uint8_t *, const uint8_t *, ptrdiff_t, int);
void ff_add_pixels_clamped_neon(const int16_t *, uint8_t *, int); void ff_add_pixels_clamped_neon(const int16_t *, uint8_t *, int);
void ff_put_pixels_clamped_neon(const int16_t *, uint8_t *, int); void ff_put_pixels_clamped_neon(const int16_t *, uint8_t *, int);
......
...@@ -55,73 +55,73 @@ static void bfin_clear_blocks (int16_t *blocks) ...@@ -55,73 +55,73 @@ static void bfin_clear_blocks (int16_t *blocks)
static void bfin_put_pixels8 (uint8_t *block, const uint8_t *pixels, int line_size, int h) static void bfin_put_pixels8 (uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
{ {
ff_bfin_put_pixels8uc (block, pixels, pixels, line_size, line_size, h); ff_bfin_put_pixels8uc (block, pixels, pixels, line_size, line_size, h);
} }
static void bfin_put_pixels8_x2(uint8_t *block, const uint8_t *pixels, int line_size, int h) static void bfin_put_pixels8_x2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
{ {
ff_bfin_put_pixels8uc (block, pixels, pixels+1, line_size, line_size, h); ff_bfin_put_pixels8uc (block, pixels, pixels+1, line_size, line_size, h);
} }
static void bfin_put_pixels8_y2 (uint8_t *block, const uint8_t *pixels, int line_size, int h) static void bfin_put_pixels8_y2 (uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
{ {
ff_bfin_put_pixels8uc (block, pixels, pixels+line_size, line_size, line_size, h); ff_bfin_put_pixels8uc (block, pixels, pixels+line_size, line_size, line_size, h);
} }
static void bfin_put_pixels8_xy2 (uint8_t *block, const uint8_t *s0, int line_size, int h) static void bfin_put_pixels8_xy2 (uint8_t *block, const uint8_t *s0, ptrdiff_t line_size, int h)
{ {
ff_bfin_z_put_pixels8_xy2 (block,s0,line_size, line_size, h); ff_bfin_z_put_pixels8_xy2 (block,s0,line_size, line_size, h);
} }
static void bfin_put_pixels16 (uint8_t *block, const uint8_t *pixels, int line_size, int h) static void bfin_put_pixels16 (uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
{ {
ff_bfin_put_pixels16uc (block, pixels, pixels, line_size, line_size, h); ff_bfin_put_pixels16uc (block, pixels, pixels, line_size, line_size, h);
} }
static void bfin_put_pixels16_x2 (uint8_t *block, const uint8_t *pixels, int line_size, int h) static void bfin_put_pixels16_x2 (uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
{ {
ff_bfin_put_pixels16uc (block, pixels, pixels+1, line_size, line_size, h); ff_bfin_put_pixels16uc (block, pixels, pixels+1, line_size, line_size, h);
} }
static void bfin_put_pixels16_y2 (uint8_t *block, const uint8_t *pixels, int line_size, int h) static void bfin_put_pixels16_y2 (uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
{ {
ff_bfin_put_pixels16uc (block, pixels, pixels+line_size, line_size, line_size, h); ff_bfin_put_pixels16uc (block, pixels, pixels+line_size, line_size, line_size, h);
} }
static void bfin_put_pixels16_xy2 (uint8_t *block, const uint8_t *s0, int line_size, int h) static void bfin_put_pixels16_xy2 (uint8_t *block, const uint8_t *s0, ptrdiff_t line_size, int h)
{ {
ff_bfin_z_put_pixels16_xy2 (block,s0,line_size, line_size, h); ff_bfin_z_put_pixels16_xy2 (block,s0,line_size, line_size, h);
} }
static void bfin_put_pixels8_nornd (uint8_t *block, const uint8_t *pixels, int line_size, int h) static void bfin_put_pixels8_nornd (uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
{ {
ff_bfin_put_pixels8uc_nornd (block, pixels, pixels, line_size, h); ff_bfin_put_pixels8uc_nornd (block, pixels, pixels, line_size, h);
} }
static void bfin_put_pixels8_x2_nornd (uint8_t *block, const uint8_t *pixels, int line_size, int h) static void bfin_put_pixels8_x2_nornd (uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
{ {
ff_bfin_put_pixels8uc_nornd (block, pixels, pixels+1, line_size, h); ff_bfin_put_pixels8uc_nornd (block, pixels, pixels+1, line_size, h);
} }
static void bfin_put_pixels8_y2_nornd (uint8_t *block, const uint8_t *pixels, int line_size, int h) static void bfin_put_pixels8_y2_nornd (uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
{ {
ff_bfin_put_pixels8uc_nornd (block, pixels, pixels+line_size, line_size, h); ff_bfin_put_pixels8uc_nornd (block, pixels, pixels+line_size, line_size, h);
} }
static void bfin_put_pixels16_nornd (uint8_t *block, const uint8_t *pixels, int line_size, int h) static void bfin_put_pixels16_nornd (uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
{ {
ff_bfin_put_pixels16uc_nornd (block, pixels, pixels, line_size, h); ff_bfin_put_pixels16uc_nornd (block, pixels, pixels, line_size, h);
} }
static void bfin_put_pixels16_x2_nornd (uint8_t *block, const uint8_t *pixels, int line_size, int h) static void bfin_put_pixels16_x2_nornd (uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
{ {
ff_bfin_put_pixels16uc_nornd (block, pixels, pixels+1, line_size, h); ff_bfin_put_pixels16uc_nornd (block, pixels, pixels+1, line_size, h);
} }
static void bfin_put_pixels16_y2_nornd (uint8_t *block, const uint8_t *pixels, int line_size, int h) static void bfin_put_pixels16_y2_nornd (uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
{ {
ff_bfin_put_pixels16uc_nornd (block, pixels, pixels+line_size, line_size, h); ff_bfin_put_pixels16uc_nornd (block, pixels, pixels+line_size, line_size, h);
} }
......
...@@ -144,7 +144,7 @@ void clear_blocks_c(int16_t *blocks); ...@@ -144,7 +144,7 @@ void clear_blocks_c(int16_t *blocks);
/* add and put pixel (decoding) */ /* add and put pixel (decoding) */
// blocksizes for op_pixels_func are 8x4,8x8 16x8 16x16 // blocksizes for op_pixels_func are 8x4,8x8 16x8 16x16
//h for op_pixels_func is limited to {width/2, width} but never larger than 16 and never smaller than 4 //h for op_pixels_func is limited to {width/2, width} but never larger than 16 and never smaller than 4
typedef void (*op_pixels_func)(uint8_t *block/*align width (8 or 16)*/, const uint8_t *pixels/*align 1*/, int line_size, int h); typedef void (*op_pixels_func)(uint8_t *block/*align width (8 or 16)*/, const uint8_t *pixels/*align 1*/, ptrdiff_t line_size, int h);
typedef void (*tpel_mc_func)(uint8_t *block/*align width (8 or 16)*/, const uint8_t *pixels/*align 1*/, int line_size, int w, int h); typedef void (*tpel_mc_func)(uint8_t *block/*align width (8 or 16)*/, const uint8_t *pixels/*align 1*/, int line_size, int w, int h);
typedef void (*qpel_mc_func)(uint8_t *dst/*align width (8 or 16)*/, uint8_t *src/*align 1*/, int stride); typedef void (*qpel_mc_func)(uint8_t *dst/*align width (8 or 16)*/, uint8_t *src/*align 1*/, int stride);
typedef void (*h264_chroma_mc_func)(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int srcStride, int h, int x, int y); typedef void (*h264_chroma_mc_func)(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int srcStride, int h, int x, int y);
......
...@@ -172,19 +172,19 @@ static inline void FUNC(OPNAME ## _no_rnd_pixels16_l2)(uint8_t *dst, const uint8 ...@@ -172,19 +172,19 @@ static inline void FUNC(OPNAME ## _no_rnd_pixels16_l2)(uint8_t *dst, const uint8
FUNC(OPNAME ## _no_rnd_pixels8_l2)(dst+8*sizeof(pixel), src1+8*sizeof(pixel), src2+8*sizeof(pixel), dst_stride, src_stride1, src_stride2, h);\ FUNC(OPNAME ## _no_rnd_pixels8_l2)(dst+8*sizeof(pixel), src1+8*sizeof(pixel), src2+8*sizeof(pixel), dst_stride, src_stride1, src_stride2, h);\
}\ }\
\ \
static inline void FUNCC(OPNAME ## _no_rnd_pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ static inline void FUNCC(OPNAME ## _no_rnd_pixels8_x2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h){\
FUNC(OPNAME ## _no_rnd_pixels8_l2)(block, pixels, pixels+sizeof(pixel), line_size, line_size, line_size, h);\ FUNC(OPNAME ## _no_rnd_pixels8_l2)(block, pixels, pixels+sizeof(pixel), line_size, line_size, line_size, h);\
}\ }\
\ \
static inline void FUNCC(OPNAME ## _pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ static inline void FUNCC(OPNAME ## _pixels8_x2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h){\
FUNC(OPNAME ## _pixels8_l2)(block, pixels, pixels+sizeof(pixel), line_size, line_size, line_size, h);\ FUNC(OPNAME ## _pixels8_l2)(block, pixels, pixels+sizeof(pixel), line_size, line_size, line_size, h);\
}\ }\
\ \
static inline void FUNCC(OPNAME ## _no_rnd_pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ static inline void FUNCC(OPNAME ## _no_rnd_pixels8_y2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h){\
FUNC(OPNAME ## _no_rnd_pixels8_l2)(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\ FUNC(OPNAME ## _no_rnd_pixels8_l2)(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
}\ }\
\ \
static inline void FUNCC(OPNAME ## _pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ static inline void FUNCC(OPNAME ## _pixels8_y2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h){\
FUNC(OPNAME ## _pixels8_l2)(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\ FUNC(OPNAME ## _pixels8_l2)(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
}\ }\
\ \
...@@ -225,19 +225,19 @@ static inline void FUNC(OPNAME ## _pixels8_l4)(uint8_t *dst, const uint8_t *src1 ...@@ -225,19 +225,19 @@ static inline void FUNC(OPNAME ## _pixels8_l4)(uint8_t *dst, const uint8_t *src1
}\ }\
}\ }\
\ \
static inline void FUNCC(OPNAME ## _pixels4_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ static inline void FUNCC(OPNAME ## _pixels4_x2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h){\
FUNC(OPNAME ## _pixels4_l2)(block, pixels, pixels+sizeof(pixel), line_size, line_size, line_size, h);\ FUNC(OPNAME ## _pixels4_l2)(block, pixels, pixels+sizeof(pixel), line_size, line_size, line_size, h);\
}\ }\
\ \
static inline void FUNCC(OPNAME ## _pixels4_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ static inline void FUNCC(OPNAME ## _pixels4_y2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h){\
FUNC(OPNAME ## _pixels4_l2)(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\ FUNC(OPNAME ## _pixels4_l2)(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
}\ }\
\ \
static inline void FUNCC(OPNAME ## _pixels2_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ static inline void FUNCC(OPNAME ## _pixels2_x2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h){\
FUNC(OPNAME ## _pixels2_l2)(block, pixels, pixels+sizeof(pixel), line_size, line_size, line_size, h);\ FUNC(OPNAME ## _pixels2_l2)(block, pixels, pixels+sizeof(pixel), line_size, line_size, line_size, h);\
}\ }\
\ \
static inline void FUNCC(OPNAME ## _pixels2_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ static inline void FUNCC(OPNAME ## _pixels2_y2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h){\
FUNC(OPNAME ## _pixels2_l2)(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\ FUNC(OPNAME ## _pixels2_l2)(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
}\ }\
\ \
...@@ -288,11 +288,11 @@ static inline void FUNC(OPNAME ## _no_rnd_pixels16_l4)(uint8_t *dst, const uint8 ...@@ -288,11 +288,11 @@ static inline void FUNC(OPNAME ## _no_rnd_pixels16_l4)(uint8_t *dst, const uint8
FUNC(OPNAME ## _no_rnd_pixels8_l4)(dst+8*sizeof(pixel), src1+8*sizeof(pixel), src2+8*sizeof(pixel), src3+8*sizeof(pixel), src4+8*sizeof(pixel), dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\ FUNC(OPNAME ## _no_rnd_pixels8_l4)(dst+8*sizeof(pixel), src1+8*sizeof(pixel), src2+8*sizeof(pixel), src3+8*sizeof(pixel), src4+8*sizeof(pixel), dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
}\ }\
\ \
static inline void FUNCC(OPNAME ## _pixels2_xy2)(uint8_t *p_block, const uint8_t *p_pixels, int line_size, int h)\ static inline void FUNCC(OPNAME ## _pixels2_xy2)(uint8_t *_block, const uint8_t *_pixels, ptrdiff_t line_size, int h)\
{\ {\
int i, a0, b0, a1, b1;\ int i, a0, b0, a1, b1;\
pixel *block = (pixel*)p_block;\ pixel *block = (pixel*)_block;\
const pixel *pixels = (const pixel*)p_pixels;\ const pixel *pixels = (const pixel*)_pixels;\
line_size >>= sizeof(pixel)-1;\ line_size >>= sizeof(pixel)-1;\
a0= pixels[0];\ a0= pixels[0];\
b0= pixels[1] + 2;\ b0= pixels[1] + 2;\
...@@ -324,7 +324,7 @@ static inline void FUNCC(OPNAME ## _pixels2_xy2)(uint8_t *p_block, const uint8_t ...@@ -324,7 +324,7 @@ static inline void FUNCC(OPNAME ## _pixels2_xy2)(uint8_t *p_block, const uint8_t
}\ }\
}\ }\
\ \
static inline void FUNCC(OPNAME ## _pixels4_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ static inline void FUNCC(OPNAME ## _pixels4_xy2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)\
{\ {\
/* FIXME HIGH BIT DEPTH */\ /* FIXME HIGH BIT DEPTH */\
int i;\ int i;\
...@@ -403,7 +403,7 @@ static inline void FUNCC(OPNAME ## _pixels8_xy2)(uint8_t *block, const uint8_t * ...@@ -403,7 +403,7 @@ static inline void FUNCC(OPNAME ## _pixels8_xy2)(uint8_t *block, const uint8_t *
}\ }\
}\ }\
\ \
static inline void FUNCC(OPNAME ## _no_rnd_pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ static inline void FUNCC(OPNAME ## _no_rnd_pixels8_xy2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)\
{\ {\
/* FIXME HIGH BIT DEPTH */\ /* FIXME HIGH BIT DEPTH */\
int j;\ int j;\
......
...@@ -290,7 +290,7 @@ static int zero_cmp(void *s, uint8_t *a, uint8_t *b, int stride, int h){ ...@@ -290,7 +290,7 @@ static int zero_cmp(void *s, uint8_t *a, uint8_t *b, int stride, int h){
return 0; return 0;
} }
static void zero_hpel(uint8_t *a, const uint8_t *b, int stride, int h){ static void zero_hpel(uint8_t *a, const uint8_t *b, ptrdiff_t stride, int h){
} }
int ff_init_me(MpegEncContext *s){ int ff_init_me(MpegEncContext *s){
......
...@@ -607,7 +607,7 @@ static void add_bytes_altivec(uint8_t *dst, uint8_t *src, int w) { ...@@ -607,7 +607,7 @@ static void add_bytes_altivec(uint8_t *dst, uint8_t *src, int w) {
} }
/* next one assumes that ((line_size % 16) == 0) */ /* next one assumes that ((line_size % 16) == 0) */
void ff_put_pixels16_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h) void ff_put_pixels16_altivec(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
{ {
register vector unsigned char pixelsv1, pixelsv2; register vector unsigned char pixelsv1, pixelsv2;
register vector unsigned char pixelsv1B, pixelsv2B; register vector unsigned char pixelsv1B, pixelsv2B;
...@@ -616,9 +616,9 @@ void ff_put_pixels16_altivec(uint8_t *block, const uint8_t *pixels, int line_siz ...@@ -616,9 +616,9 @@ void ff_put_pixels16_altivec(uint8_t *block, const uint8_t *pixels, int line_siz
register vector unsigned char perm = vec_lvsl(0, pixels); register vector unsigned char perm = vec_lvsl(0, pixels);
int i; int i;
register int line_size_2 = line_size << 1; register ptrdiff_t line_size_2 = line_size << 1;
register int line_size_3 = line_size + line_size_2; register ptrdiff_t line_size_3 = line_size + line_size_2;
register int line_size_4 = line_size << 2; register ptrdiff_t line_size_4 = line_size << 2;
// hand-unrolling the loop by 4 gains about 15% // hand-unrolling the loop by 4 gains about 15%
// mininum execution time goes from 74 to 60 cycles // mininum execution time goes from 74 to 60 cycles
...@@ -649,7 +649,7 @@ void ff_put_pixels16_altivec(uint8_t *block, const uint8_t *pixels, int line_siz ...@@ -649,7 +649,7 @@ void ff_put_pixels16_altivec(uint8_t *block, const uint8_t *pixels, int line_siz
/* next one assumes that ((line_size % 16) == 0) */ /* next one assumes that ((line_size % 16) == 0) */
#define op_avg(a,b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEUL)>>1) ) #define op_avg(a,b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEUL)>>1) )
void ff_avg_pixels16_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h) void ff_avg_pixels16_altivec(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
{ {
register vector unsigned char pixelsv1, pixelsv2, pixelsv, blockv; register vector unsigned char pixelsv1, pixelsv2, pixelsv, blockv;
register vector unsigned char perm = vec_lvsl(0, pixels); register vector unsigned char perm = vec_lvsl(0, pixels);
...@@ -668,7 +668,7 @@ void ff_avg_pixels16_altivec(uint8_t *block, const uint8_t *pixels, int line_siz ...@@ -668,7 +668,7 @@ void ff_avg_pixels16_altivec(uint8_t *block, const uint8_t *pixels, int line_siz
} }
/* next one assumes that ((line_size % 8) == 0) */ /* next one assumes that ((line_size % 8) == 0) */
static void avg_pixels8_altivec(uint8_t * block, const uint8_t * pixels, int line_size, int h) static void avg_pixels8_altivec(uint8_t * block, const uint8_t * pixels, ptrdiff_t line_size, int h)
{ {
register vector unsigned char pixelsv1, pixelsv2, pixelsv, blockv; register vector unsigned char pixelsv1, pixelsv2, pixelsv, blockv;
int i; int i;
...@@ -699,7 +699,7 @@ static void avg_pixels8_altivec(uint8_t * block, const uint8_t * pixels, int lin ...@@ -699,7 +699,7 @@ static void avg_pixels8_altivec(uint8_t * block, const uint8_t * pixels, int lin
} }
/* next one assumes that ((line_size % 8) == 0) */ /* next one assumes that ((line_size % 8) == 0) */
static void put_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h) static void put_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
{ {
register int i; register int i;
register vector unsigned char pixelsv1, pixelsv2, pixelsavg; register vector unsigned char pixelsv1, pixelsv2, pixelsavg;
...@@ -758,7 +758,7 @@ static void put_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, int l ...@@ -758,7 +758,7 @@ static void put_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, int l
} }
/* next one assumes that ((line_size % 8) == 0) */ /* next one assumes that ((line_size % 8) == 0) */
static void put_no_rnd_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h) static void put_no_rnd_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
{ {
register int i; register int i;
register vector unsigned char pixelsv1, pixelsv2, pixelsavg; register vector unsigned char pixelsv1, pixelsv2, pixelsavg;
...@@ -818,7 +818,7 @@ static void put_no_rnd_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels ...@@ -818,7 +818,7 @@ static void put_no_rnd_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels
} }
/* next one assumes that ((line_size % 16) == 0) */ /* next one assumes that ((line_size % 16) == 0) */
static void put_pixels16_xy2_altivec(uint8_t * block, const uint8_t * pixels, int line_size, int h) static void put_pixels16_xy2_altivec(uint8_t * block, const uint8_t * pixels, ptrdiff_t line_size, int h)
{ {
register int i; register int i;
register vector unsigned char pixelsv1, pixelsv2, pixelsv3, pixelsv4; register vector unsigned char pixelsv1, pixelsv2, pixelsv3, pixelsv4;
...@@ -886,7 +886,7 @@ static void put_pixels16_xy2_altivec(uint8_t * block, const uint8_t * pixels, in ...@@ -886,7 +886,7 @@ static void put_pixels16_xy2_altivec(uint8_t * block, const uint8_t * pixels, in
} }
/* next one assumes that ((line_size % 16) == 0) */ /* next one assumes that ((line_size % 16) == 0) */
static void put_no_rnd_pixels16_xy2_altivec(uint8_t * block, const uint8_t * pixels, int line_size, int h) static void put_no_rnd_pixels16_xy2_altivec(uint8_t * block, const uint8_t * pixels, ptrdiff_t line_size, int h)
{ {
register int i; register int i;
register vector unsigned char pixelsv1, pixelsv2, pixelsv3, pixelsv4; register vector unsigned char pixelsv1, pixelsv2, pixelsv3, pixelsv4;
...@@ -1284,7 +1284,7 @@ static int hadamard8_diff16_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, ui ...@@ -1284,7 +1284,7 @@ static int hadamard8_diff16_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, ui
} }
/* next one assumes that ((line_size % 8) == 0) */ /* next one assumes that ((line_size % 8) == 0) */
static void avg_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h) static void avg_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
{ {
register int i; register int i;
register vector unsigned char pixelsv1, pixelsv2, pixelsavg; register vector unsigned char pixelsv1, pixelsv2, pixelsavg;
......
...@@ -26,9 +26,9 @@ ...@@ -26,9 +26,9 @@
#include <stdint.h> #include <stdint.h>
#include "libavcodec/dsputil.h" #include "libavcodec/dsputil.h"
void ff_put_pixels16_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h); void ff_put_pixels16_altivec(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h);
void ff_avg_pixels16_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h); void ff_avg_pixels16_altivec(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h);
void ff_fdct_altivec(int16_t *block); void ff_fdct_altivec(int16_t *block);
void ff_gmc1_altivec(uint8_t *dst, uint8_t *src, int stride, int h, void ff_gmc1_altivec(uint8_t *dst, uint8_t *src, int stride, int h,
......
...@@ -262,7 +262,7 @@ if (sz==16) { \ ...@@ -262,7 +262,7 @@ if (sz==16) { \
#define DEFFUNC(op,rnd,xy,sz,OP_N,avgfunc) \ #define DEFFUNC(op,rnd,xy,sz,OP_N,avgfunc) \
static void op##_##rnd##_pixels##sz##_##xy (uint8_t * dest, const uint8_t * ref, \ static void op##_##rnd##_pixels##sz##_##xy (uint8_t * dest, const uint8_t * ref, \
const int stride, int height) \ const int ptrdiff_t, int height) \
{ \ { \
switch((int)ref&3) { \ switch((int)ref&3) { \
case 0:OP_N##0(sz,rnd##_##avgfunc); return; \ case 0:OP_N##0(sz,rnd##_##avgfunc); return; \
......
This diff is collapsed.
...@@ -27,14 +27,14 @@ ...@@ -27,14 +27,14 @@
//FIXME the following could be optimized too ... //FIXME the following could be optimized too ...
static void DEF(ff_put_no_rnd_pixels16_x2)(uint8_t *block, static void DEF(ff_put_no_rnd_pixels16_x2)(uint8_t *block,
const uint8_t *pixels, const uint8_t *pixels,
int line_size, int h) ptrdiff_t line_size, int h)
{ {
DEF(ff_put_no_rnd_pixels8_x2)(block, pixels, line_size, h); DEF(ff_put_no_rnd_pixels8_x2)(block, pixels, line_size, h);
DEF(ff_put_no_rnd_pixels8_x2)(block + 8, pixels + 8, line_size, h); DEF(ff_put_no_rnd_pixels8_x2)(block + 8, pixels + 8, line_size, h);
} }
static void DEF(ff_put_pixels16_y2)(uint8_t *block, const uint8_t *pixels, static void DEF(ff_put_pixels16_y2)(uint8_t *block, const uint8_t *pixels,
int line_size, int h) ptrdiff_t line_size, int h)
{ {
DEF(ff_put_pixels8_y2)(block, pixels, line_size, h); DEF(ff_put_pixels8_y2)(block, pixels, line_size, h);
DEF(ff_put_pixels8_y2)(block + 8, pixels + 8, line_size, h); DEF(ff_put_pixels8_y2)(block + 8, pixels + 8, line_size, h);
...@@ -42,35 +42,35 @@ static void DEF(ff_put_pixels16_y2)(uint8_t *block, const uint8_t *pixels, ...@@ -42,35 +42,35 @@ static void DEF(ff_put_pixels16_y2)(uint8_t *block, const uint8_t *pixels,
static void DEF(ff_put_no_rnd_pixels16_y2)(uint8_t *block, static void DEF(ff_put_no_rnd_pixels16_y2)(uint8_t *block,
const uint8_t *pixels, const uint8_t *pixels,
int line_size, int h) ptrdiff_t line_size, int h)
{ {
DEF(ff_put_no_rnd_pixels8_y2)(block, pixels, line_size, h); DEF(ff_put_no_rnd_pixels8_y2)(block, pixels, line_size, h);
DEF(ff_put_no_rnd_pixels8_y2)(block + 8, pixels + 8, line_size, h); DEF(ff_put_no_rnd_pixels8_y2)(block + 8, pixels + 8, line_size, h);
} }
static void DEF(ff_avg_pixels16)(uint8_t *block, const uint8_t *pixels, static void DEF(ff_avg_pixels16)(uint8_t *block, const uint8_t *pixels,
int line_size, int h) ptrdiff_t line_size, int h)
{ {
DEF(ff_avg_pixels8)(block, pixels, line_size, h); DEF(ff_avg_pixels8)(block, pixels, line_size, h);
DEF(ff_avg_pixels8)(block + 8, pixels + 8, line_size, h); DEF(ff_avg_pixels8)(block + 8, pixels + 8, line_size, h);
} }
static void DEF(ff_avg_pixels16_x2)(uint8_t *block, const uint8_t *pixels, static void DEF(ff_avg_pixels16_x2)(uint8_t *block, const uint8_t *pixels,
int line_size, int h) ptrdiff_t line_size, int h)
{ {
DEF(ff_avg_pixels8_x2)(block, pixels, line_size, h); DEF(ff_avg_pixels8_x2)(block, pixels, line_size, h);
DEF(ff_avg_pixels8_x2)(block + 8, pixels + 8, line_size, h); DEF(ff_avg_pixels8_x2)(block + 8, pixels + 8, line_size, h);
} }
static void DEF(ff_avg_pixels16_y2)(uint8_t *block, const uint8_t *pixels, static void DEF(ff_avg_pixels16_y2)(uint8_t *block, const uint8_t *pixels,
int line_size, int h) ptrdiff_t line_size, int h)
{ {
DEF(ff_avg_pixels8_y2)(block, pixels, line_size, h); DEF(ff_avg_pixels8_y2)(block, pixels, line_size, h);
DEF(ff_avg_pixels8_y2)(block + 8, pixels + 8, line_size, h); DEF(ff_avg_pixels8_y2)(block + 8, pixels + 8, line_size, h);
} }
static void DEF(ff_avg_pixels16_xy2)(uint8_t *block, const uint8_t *pixels, static void DEF(ff_avg_pixels16_xy2)(uint8_t *block, const uint8_t *pixels,
int line_size, int h) ptrdiff_t line_size, int h)
{ {
DEF(ff_avg_pixels8_xy2)(block, pixels, line_size, h); DEF(ff_avg_pixels8_xy2)(block, pixels, line_size, h);
DEF(ff_avg_pixels8_xy2)(block + 8, pixels + 8, line_size, h); DEF(ff_avg_pixels8_xy2)(block + 8, pixels + 8, line_size, h);
......
...@@ -84,9 +84,9 @@ DECLARE_ALIGNED(16, const double, ff_pd_2)[2] = { 2.0, 2.0 }; ...@@ -84,9 +84,9 @@ DECLARE_ALIGNED(16, const double, ff_pd_2)[2] = { 2.0, 2.0 };
#if HAVE_YASM #if HAVE_YASM
void ff_put_pixels8_x2_mmxext(uint8_t *block, const uint8_t *pixels, void ff_put_pixels8_x2_mmxext(uint8_t *block, const uint8_t *pixels,
int line_size, int h); ptrdiff_t line_size, int h);
void ff_put_pixels8_x2_3dnow(uint8_t *block, const uint8_t *pixels, void ff_put_pixels8_x2_3dnow(uint8_t *block, const uint8_t *pixels,
int line_size, int h); ptrdiff_t line_size, int h);
void ff_put_pixels8_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2, void ff_put_pixels8_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2,
int dstStride, int src1Stride, int h); int dstStride, int src1Stride, int h);
void ff_put_no_rnd_pixels8_l2_mmxext(uint8_t *dst, uint8_t *src1, void ff_put_no_rnd_pixels8_l2_mmxext(uint8_t *dst, uint8_t *src1,
...@@ -95,9 +95,9 @@ void ff_put_no_rnd_pixels8_l2_mmxext(uint8_t *dst, uint8_t *src1, ...@@ -95,9 +95,9 @@ void ff_put_no_rnd_pixels8_l2_mmxext(uint8_t *dst, uint8_t *src1,
void ff_avg_pixels8_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2, void ff_avg_pixels8_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2,
int dstStride, int src1Stride, int h); int dstStride, int src1Stride, int h);
void ff_put_pixels16_x2_mmxext(uint8_t *block, const uint8_t *pixels, void ff_put_pixels16_x2_mmxext(uint8_t *block, const uint8_t *pixels,
int line_size, int h); ptrdiff_t line_size, int h);
void ff_put_pixels16_x2_3dnow(uint8_t *block, const uint8_t *pixels, void ff_put_pixels16_x2_3dnow(uint8_t *block, const uint8_t *pixels,
int line_size, int h); ptrdiff_t line_size, int h);
void ff_put_pixels16_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2, void ff_put_pixels16_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2,
int dstStride, int src1Stride, int h); int dstStride, int src1Stride, int h);
void ff_avg_pixels16_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2, void ff_avg_pixels16_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2,
...@@ -105,47 +105,47 @@ void ff_avg_pixels16_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2, ...@@ -105,47 +105,47 @@ void ff_avg_pixels16_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2,
void ff_put_no_rnd_pixels16_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2, void ff_put_no_rnd_pixels16_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2,
int dstStride, int src1Stride, int h); int dstStride, int src1Stride, int h);
void ff_put_no_rnd_pixels8_x2_mmxext(uint8_t *block, const uint8_t *pixels, void ff_put_no_rnd_pixels8_x2_mmxext(uint8_t *block, const uint8_t *pixels,
int line_size, int h); ptrdiff_t line_size, int h);
void ff_put_no_rnd_pixels8_x2_3dnow(uint8_t *block, const uint8_t *pixels, void ff_put_no_rnd_pixels8_x2_3dnow(uint8_t *block, const uint8_t *pixels,
int line_size, int h); ptrdiff_t line_size, int h);
void ff_put_no_rnd_pixels8_x2_exact_mmxext(uint8_t *block, void ff_put_no_rnd_pixels8_x2_exact_mmxext(uint8_t *block,
const uint8_t *pixels, const uint8_t *pixels,
int line_size, int h); ptrdiff_t line_size, int h);
void ff_put_no_rnd_pixels8_x2_exact_3dnow(uint8_t *block, void ff_put_no_rnd_pixels8_x2_exact_3dnow(uint8_t *block,
const uint8_t *pixels, const uint8_t *pixels,
int line_size, int h); ptrdiff_t line_size, int h);
void ff_put_pixels8_y2_mmxext(uint8_t *block, const uint8_t *pixels, void ff_put_pixels8_y2_mmxext(uint8_t *block, const uint8_t *pixels,
int line_size, int h); ptrdiff_t line_size, int h);
void ff_put_pixels8_y2_3dnow(uint8_t *block, const uint8_t *pixels, void ff_put_pixels8_y2_3dnow(uint8_t *block, const uint8_t *pixels,
int line_size, int h); ptrdiff_t line_size, int h);
void ff_put_no_rnd_pixels8_y2_mmxext(uint8_t *block, const uint8_t *pixels, void ff_put_no_rnd_pixels8_y2_mmxext(uint8_t *block, const uint8_t *pixels,
int line_size, int h); ptrdiff_t line_size, int h);
void ff_put_no_rnd_pixels8_y2_3dnow(uint8_t *block, const uint8_t *pixels, void ff_put_no_rnd_pixels8_y2_3dnow(uint8_t *block, const uint8_t *pixels,
int line_size, int h); ptrdiff_t line_size, int h);
void ff_put_no_rnd_pixels8_y2_exact_mmxext(uint8_t *block, void ff_put_no_rnd_pixels8_y2_exact_mmxext(uint8_t *block,
const uint8_t *pixels, const uint8_t *pixels,
int line_size, int h); ptrdiff_t line_size, int h);
void ff_put_no_rnd_pixels8_y2_exact_3dnow(uint8_t *block, void ff_put_no_rnd_pixels8_y2_exact_3dnow(uint8_t *block,
const uint8_t *pixels, const uint8_t *pixels,
int line_size, int h); ptrdiff_t line_size, int h);
void ff_avg_pixels8_mmxext(uint8_t *block, const uint8_t *pixels, void ff_avg_pixels8_mmxext(uint8_t *block, const uint8_t *pixels,
int line_size, int h); ptrdiff_t line_size, int h);
void ff_avg_pixels8_3dnow(uint8_t *block, const uint8_t *pixels, void ff_avg_pixels8_3dnow(uint8_t *block, const uint8_t *pixels,
int line_size, int h); ptrdiff_t line_size, int h);
void ff_avg_pixels8_x2_mmxext(uint8_t *block, const uint8_t *pixels, void ff_avg_pixels8_x2_mmxext(uint8_t *block, const uint8_t *pixels,
int line_size, int h); ptrdiff_t line_size, int h);
void ff_avg_pixels8_x2_3dnow(uint8_t *block, const uint8_t *pixels, void ff_avg_pixels8_x2_3dnow(uint8_t *block, const uint8_t *pixels,
int line_size, int h); ptrdiff_t line_size, int h);
void ff_avg_pixels8_y2_mmxext(uint8_t *block, const uint8_t *pixels, void ff_avg_pixels8_y2_mmxext(uint8_t *block, const uint8_t *pixels,
int line_size, int h); ptrdiff_t line_size, int h);
void ff_avg_pixels8_y2_3dnow(uint8_t *block, const uint8_t *pixels, void ff_avg_pixels8_y2_3dnow(uint8_t *block, const uint8_t *pixels,
int line_size, int h); ptrdiff_t line_size, int h);
void ff_avg_pixels8_xy2_mmxext(uint8_t *block, const uint8_t *pixels, void ff_avg_pixels8_xy2_mmxext(uint8_t *block, const uint8_t *pixels,
int line_size, int h); ptrdiff_t line_size, int h);
void ff_avg_pixels8_xy2_3dnow(uint8_t *block, const uint8_t *pixels, void ff_avg_pixels8_xy2_3dnow(uint8_t *block, const uint8_t *pixels,
int line_size, int h); ptrdiff_t line_size, int h);
void ff_put_pixels8_mmxext(uint8_t *block, const uint8_t *pixels, int line_size, int h); void ff_put_pixels8_mmxext(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h);
static void ff_put_pixels16_mmxext(uint8_t *block, const uint8_t *pixels, static void ff_put_pixels16_mmxext(uint8_t *block, const uint8_t *pixels,
int line_size, int h) int line_size, int h)
{ {
...@@ -1522,9 +1522,9 @@ static void gmc_mmx(uint8_t *dst, uint8_t *src, ...@@ -1522,9 +1522,9 @@ static void gmc_mmx(uint8_t *dst, uint8_t *src,
#endif /* HAVE_INLINE_ASM */ #endif /* HAVE_INLINE_ASM */
void ff_put_pixels16_sse2(uint8_t *block, const uint8_t *pixels, void ff_put_pixels16_sse2(uint8_t *block, const uint8_t *pixels,
int line_size, int h); ptrdiff_t line_size, int h);
void ff_avg_pixels16_sse2(uint8_t *block, const uint8_t *pixels, void ff_avg_pixels16_sse2(uint8_t *block, const uint8_t *pixels,
int line_size, int h); ptrdiff_t line_size, int h);
void ff_put_h264_chroma_mc8_rnd_mmx (uint8_t *dst, uint8_t *src, void ff_put_h264_chroma_mc8_rnd_mmx (uint8_t *dst, uint8_t *src,
int stride, int h, int x, int y); int stride, int h, int x, int y);
......
...@@ -25,7 +25,7 @@ ...@@ -25,7 +25,7 @@
*/ */
// put_pixels // put_pixels
static void DEF(put, pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) static void DEF(put, pixels8_x2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
{ {
MOVQ_BFE(mm6); MOVQ_BFE(mm6);
__asm__ volatile( __asm__ volatile(
...@@ -107,7 +107,7 @@ static void av_unused DEF(put, pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t ...@@ -107,7 +107,7 @@ static void av_unused DEF(put, pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t
:"memory"); :"memory");
} }
static void DEF(put, pixels16_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) static void DEF(put, pixels16_x2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
{ {
MOVQ_BFE(mm6); MOVQ_BFE(mm6);
__asm__ volatile( __asm__ volatile(
...@@ -202,7 +202,7 @@ static void av_unused DEF(put, pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t ...@@ -202,7 +202,7 @@ static void av_unused DEF(put, pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t
:"memory"); :"memory");
} }
static void DEF(put, pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) static void DEF(put, pixels8_y2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
{ {
MOVQ_BFE(mm6); MOVQ_BFE(mm6);
__asm__ volatile( __asm__ volatile(
...@@ -231,7 +231,7 @@ static void DEF(put, pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line ...@@ -231,7 +231,7 @@ static void DEF(put, pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line
:REG_a, "memory"); :REG_a, "memory");
} }
static void DEF(put, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) static void DEF(put, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
{ {
MOVQ_ZERO(mm7); MOVQ_ZERO(mm7);
SET_RND(mm6); // =2 for rnd and =1 for no_rnd version SET_RND(mm6); // =2 for rnd and =1 for no_rnd version
...@@ -298,7 +298,7 @@ static void DEF(put, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int lin ...@@ -298,7 +298,7 @@ static void DEF(put, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int lin
} }
// avg_pixels // avg_pixels
static void av_unused DEF(avg, pixels4)(uint8_t *block, const uint8_t *pixels, int line_size, int h) static void av_unused DEF(avg, pixels4)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
{ {
MOVQ_BFE(mm6); MOVQ_BFE(mm6);
JUMPALIGN(); JUMPALIGN();
...@@ -319,7 +319,7 @@ static void av_unused DEF(avg, pixels4)(uint8_t *block, const uint8_t *pixels, i ...@@ -319,7 +319,7 @@ static void av_unused DEF(avg, pixels4)(uint8_t *block, const uint8_t *pixels, i
#ifndef NO_RND #ifndef NO_RND
// in case more speed is needed - unroling would certainly help // in case more speed is needed - unroling would certainly help
static void DEF(avg, pixels8)(uint8_t *block, const uint8_t *pixels, int line_size, int h) static void DEF(avg, pixels8)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
{ {
MOVQ_BFE(mm6); MOVQ_BFE(mm6);
JUMPALIGN(); JUMPALIGN();
...@@ -339,7 +339,7 @@ static void DEF(avg, pixels8)(uint8_t *block, const uint8_t *pixels, int line_si ...@@ -339,7 +339,7 @@ static void DEF(avg, pixels8)(uint8_t *block, const uint8_t *pixels, int line_si
} }
#endif // NO_RND #endif // NO_RND
static void DEF(avg, pixels16)(uint8_t *block, const uint8_t *pixels, int line_size, int h) static void DEF(avg, pixels16)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
{ {
MOVQ_BFE(mm6); MOVQ_BFE(mm6);
JUMPALIGN(); JUMPALIGN();
...@@ -363,7 +363,7 @@ static void DEF(avg, pixels16)(uint8_t *block, const uint8_t *pixels, int line_s ...@@ -363,7 +363,7 @@ static void DEF(avg, pixels16)(uint8_t *block, const uint8_t *pixels, int line_s
} }
#ifndef NO_RND #ifndef NO_RND
static void DEF(avg, pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) static void DEF(avg, pixels8_x2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
{ {
MOVQ_BFE(mm6); MOVQ_BFE(mm6);
JUMPALIGN(); JUMPALIGN();
...@@ -405,7 +405,7 @@ static av_unused void DEF(avg, pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t ...@@ -405,7 +405,7 @@ static av_unused void DEF(avg, pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t
} while (--h); } while (--h);
} }
static void DEF(avg, pixels16_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) static void DEF(avg, pixels16_x2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
{ {
MOVQ_BFE(mm6); MOVQ_BFE(mm6);
JUMPALIGN(); JUMPALIGN();
...@@ -458,7 +458,7 @@ static av_unused void DEF(avg, pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t ...@@ -458,7 +458,7 @@ static av_unused void DEF(avg, pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t
} while (--h); } while (--h);
} }
static void DEF(avg, pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) static void DEF(avg, pixels8_y2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
{ {
MOVQ_BFE(mm6); MOVQ_BFE(mm6);
__asm__ volatile( __asm__ volatile(
...@@ -498,7 +498,7 @@ static void DEF(avg, pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line ...@@ -498,7 +498,7 @@ static void DEF(avg, pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line
} }
// this routine is 'slightly' suboptimal but mostly unused // this routine is 'slightly' suboptimal but mostly unused
static void DEF(avg, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) static void DEF(avg, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
{ {
MOVQ_ZERO(mm7); MOVQ_ZERO(mm7);
SET_RND(mm6); // =2 for rnd and =1 for no_rnd version SET_RND(mm6); // =2 for rnd and =1 for no_rnd version
...@@ -573,22 +573,22 @@ static void DEF(avg, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int lin ...@@ -573,22 +573,22 @@ static void DEF(avg, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int lin
} }
//FIXME optimize //FIXME optimize
static void DEF(put, pixels16_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){ static void DEF(put, pixels16_y2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h){
DEF(put, pixels8_y2)(block , pixels , line_size, h); DEF(put, pixels8_y2)(block , pixels , line_size, h);
DEF(put, pixels8_y2)(block+8, pixels+8, line_size, h); DEF(put, pixels8_y2)(block+8, pixels+8, line_size, h);
} }
static void DEF(put, pixels16_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){ static void DEF(put, pixels16_xy2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h){
DEF(put, pixels8_xy2)(block , pixels , line_size, h); DEF(put, pixels8_xy2)(block , pixels , line_size, h);
DEF(put, pixels8_xy2)(block+8, pixels+8, line_size, h); DEF(put, pixels8_xy2)(block+8, pixels+8, line_size, h);
} }
static void DEF(avg, pixels16_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){ static void DEF(avg, pixels16_y2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h){
DEF(avg, pixels8_y2)(block , pixels , line_size, h); DEF(avg, pixels8_y2)(block , pixels , line_size, h);
DEF(avg, pixels8_y2)(block+8, pixels+8, line_size, h); DEF(avg, pixels8_y2)(block+8, pixels+8, line_size, h);
} }
static void DEF(avg, pixels16_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){ static void DEF(avg, pixels16_xy2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h){
DEF(avg, pixels8_xy2)(block , pixels , line_size, h); DEF(avg, pixels8_xy2)(block , pixels , line_size, h);
DEF(avg, pixels8_xy2)(block+8, pixels+8, line_size, h); DEF(avg, pixels8_xy2)(block+8, pixels+8, line_size, h);
} }
...@@ -32,10 +32,9 @@ cextern pb_1 ...@@ -32,10 +32,9 @@ cextern pb_1
SECTION_TEXT SECTION_TEXT
; put_pixels8_x2(uint8_t *block, const uint8_t *pixels, int line_size, int h) ; put_pixels8_x2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
%macro PUT_PIXELS8_X2 0 %macro PUT_PIXELS8_X2 0
cglobal put_pixels8_x2, 4,5 cglobal put_pixels8_x2, 4,5
movsxdifnidn r2, r2d
lea r4, [r2*2] lea r4, [r2*2]
.loop: .loop:
mova m0, [r1] mova m0, [r1]
...@@ -65,10 +64,9 @@ INIT_MMX 3dnow ...@@ -65,10 +64,9 @@ INIT_MMX 3dnow
PUT_PIXELS8_X2 PUT_PIXELS8_X2
; put_pixels16_x2(uint8_t *block, const uint8_t *pixels, int line_size, int h) ; put_pixels16_x2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
%macro PUT_PIXELS_16 0 %macro PUT_PIXELS_16 0
cglobal put_pixels16_x2, 4,5 cglobal put_pixels16_x2, 4,5
movsxdifnidn r2, r2d
lea r4, [r2*2] lea r4, [r2*2]
.loop: .loop:
mova m0, [r1] mova m0, [r1]
...@@ -110,11 +108,10 @@ INIT_MMX 3dnow ...@@ -110,11 +108,10 @@ INIT_MMX 3dnow
PUT_PIXELS_16 PUT_PIXELS_16
; put_no_rnd_pixels8_x2(uint8_t *block, const uint8_t *pixels, int line_size, int h) ; put_no_rnd_pixels8_x2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
%macro PUT_NO_RND_PIXELS8_X2 0 %macro PUT_NO_RND_PIXELS8_X2 0
cglobal put_no_rnd_pixels8_x2, 4,5 cglobal put_no_rnd_pixels8_x2, 4,5
mova m6, [pb_1] mova m6, [pb_1]
movsxdifnidn r2, r2d
lea r4, [r2*2] lea r4, [r2*2]
.loop: .loop:
mova m0, [r1] mova m0, [r1]
...@@ -152,10 +149,9 @@ INIT_MMX 3dnow ...@@ -152,10 +149,9 @@ INIT_MMX 3dnow
PUT_NO_RND_PIXELS8_X2 PUT_NO_RND_PIXELS8_X2
; put_no_rnd_pixels8_x2_exact(uint8_t *block, const uint8_t *pixels, int line_size, int h) ; put_no_rnd_pixels8_x2_exact(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
%macro PUT_NO_RND_PIXELS8_X2_EXACT 0 %macro PUT_NO_RND_PIXELS8_X2_EXACT 0
cglobal put_no_rnd_pixels8_x2_exact, 4,5 cglobal put_no_rnd_pixels8_x2_exact, 4,5
movsxdifnidn r2, r2d
lea r4, [r2*3] lea r4, [r2*3]
pcmpeqb m6, m6 pcmpeqb m6, m6
.loop: .loop:
...@@ -200,10 +196,9 @@ INIT_MMX 3dnow ...@@ -200,10 +196,9 @@ INIT_MMX 3dnow
PUT_NO_RND_PIXELS8_X2_EXACT PUT_NO_RND_PIXELS8_X2_EXACT
; put_pixels8_y2(uint8_t *block, const uint8_t *pixels, int line_size, int h) ; put_pixels8_y2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
%macro PUT_PIXELS8_Y2 0 %macro PUT_PIXELS8_Y2 0
cglobal put_pixels8_y2, 4,5 cglobal put_pixels8_y2, 4,5
movsxdifnidn r2, r2d
lea r4, [r2*2] lea r4, [r2*2]
mova m0, [r1] mova m0, [r1]
sub r0, r2 sub r0, r2
...@@ -235,11 +230,10 @@ INIT_MMX 3dnow ...@@ -235,11 +230,10 @@ INIT_MMX 3dnow
PUT_PIXELS8_Y2 PUT_PIXELS8_Y2
; put_no_rnd_pixels8_y2(uint8_t *block, const uint8_t *pixels, int line_size, int h) ; put_no_rnd_pixels8_y2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
%macro PUT_NO_RND_PIXELS8_Y2 0 %macro PUT_NO_RND_PIXELS8_Y2 0
cglobal put_no_rnd_pixels8_y2, 4,5 cglobal put_no_rnd_pixels8_y2, 4,5
mova m6, [pb_1] mova m6, [pb_1]
movsxdifnidn r2, r2d
lea r4, [r2+r2] lea r4, [r2+r2]
mova m0, [r1] mova m0, [r1]
sub r0, r2 sub r0, r2
...@@ -273,10 +267,9 @@ INIT_MMX 3dnow ...@@ -273,10 +267,9 @@ INIT_MMX 3dnow
PUT_NO_RND_PIXELS8_Y2 PUT_NO_RND_PIXELS8_Y2
; put_no_rnd_pixels8_y2_exact(uint8_t *block, const uint8_t *pixels, int line_size, int h) ; put_no_rnd_pixels8_y2_exact(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
%macro PUT_NO_RND_PIXELS8_Y2_EXACT 0 %macro PUT_NO_RND_PIXELS8_Y2_EXACT 0
cglobal put_no_rnd_pixels8_y2_exact, 4,5 cglobal put_no_rnd_pixels8_y2_exact, 4,5
movsxdifnidn r2, r2d
lea r4, [r2*3] lea r4, [r2*3]
mova m0, [r1] mova m0, [r1]
pcmpeqb m6, m6 pcmpeqb m6, m6
...@@ -316,10 +309,9 @@ INIT_MMX 3dnow ...@@ -316,10 +309,9 @@ INIT_MMX 3dnow
PUT_NO_RND_PIXELS8_Y2_EXACT PUT_NO_RND_PIXELS8_Y2_EXACT
; avg_pixels8(uint8_t *block, const uint8_t *pixels, int line_size, int h) ; avg_pixels8(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
%macro AVG_PIXELS8 0 %macro AVG_PIXELS8 0
cglobal avg_pixels8, 4,5 cglobal avg_pixels8, 4,5
movsxdifnidn r2, r2d
lea r4, [r2*2] lea r4, [r2*2]
.loop: .loop:
mova m0, [r0] mova m0, [r0]
...@@ -347,10 +339,9 @@ INIT_MMX 3dnow ...@@ -347,10 +339,9 @@ INIT_MMX 3dnow
AVG_PIXELS8 AVG_PIXELS8
; avg_pixels8_x2(uint8_t *block, const uint8_t *pixels, int line_size, int h) ; avg_pixels8_x2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
%macro AVG_PIXELS8_X2 0 %macro AVG_PIXELS8_X2 0
cglobal avg_pixels8_x2, 4,5 cglobal avg_pixels8_x2, 4,5
movsxdifnidn r2, r2d
lea r4, [r2*2] lea r4, [r2*2]
.loop: .loop:
mova m0, [r1] mova m0, [r1]
...@@ -384,10 +375,9 @@ INIT_MMX 3dnow ...@@ -384,10 +375,9 @@ INIT_MMX 3dnow
AVG_PIXELS8_X2 AVG_PIXELS8_X2
; avg_pixels8_y2(uint8_t *block, const uint8_t *pixels, int line_size, int h) ; avg_pixels8_y2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
%macro AVG_PIXELS8_Y2 0 %macro AVG_PIXELS8_Y2 0
cglobal avg_pixels8_y2, 4,5 cglobal avg_pixels8_y2, 4,5
movsxdifnidn r2, r2d
lea r4, [r2*2] lea r4, [r2*2]
mova m0, [r1] mova m0, [r1]
sub r0, r2 sub r0, r2
...@@ -427,11 +417,10 @@ INIT_MMX 3dnow ...@@ -427,11 +417,10 @@ INIT_MMX 3dnow
AVG_PIXELS8_Y2 AVG_PIXELS8_Y2
; avg_pixels8_xy2(uint8_t *block, const uint8_t *pixels, int line_size, int h) ; avg_pixels8_xy2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
%macro AVG_PIXELS8_XY2 0 %macro AVG_PIXELS8_XY2 0
cglobal avg_pixels8_xy2, 4,5 cglobal avg_pixels8_xy2, 4,5
mova m6, [pb_1] mova m6, [pb_1]
movsxdifnidn r2, r2d
lea r4, [r2*2] lea r4, [r2*2]
mova m0, [r1] mova m0, [r1]
pavgb m0, [r1+1] pavgb m0, [r1+1]
...@@ -472,9 +461,8 @@ INIT_MMX 3dnow ...@@ -472,9 +461,8 @@ INIT_MMX 3dnow
AVG_PIXELS8_XY2 AVG_PIXELS8_XY2
INIT_XMM sse2 INIT_XMM sse2
; void put_pixels16_sse2(uint8_t *block, const uint8_t *pixels, int line_size, int h) ; void put_pixels16_sse2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
cglobal put_pixels16, 4,5,4 cglobal put_pixels16, 4,5,4
movsxdifnidn r2, r2d
lea r4, [r2*3] lea r4, [r2*3]
.loop: .loop:
movu m0, [r1] movu m0, [r1]
...@@ -491,9 +479,8 @@ cglobal put_pixels16, 4,5,4 ...@@ -491,9 +479,8 @@ cglobal put_pixels16, 4,5,4
jnz .loop jnz .loop
REP_RET REP_RET
; void avg_pixels16_sse2(uint8_t *block, const uint8_t *pixels, int line_size, int h) ; void avg_pixels16_sse2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
cglobal avg_pixels16, 4,5,4 cglobal avg_pixels16, 4,5,4
movsxdifnidn r2, r2d
lea r4, [r2*3] lea r4, [r2*3]
.loop: .loop:
movu m0, [r1] movu m0, [r1]
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment