Commit 649c00c9 authored by Michael Niedermayer's avatar Michael Niedermayer

sizeof(s->block) isnt 64*6*2 anymore bugfix

mpeg12 decoding optimization

Originally committed as revision 364 to svn://svn.ffmpeg.org/ffmpeg/trunk
parent cf8039b2
...@@ -30,6 +30,7 @@ void (*get_pixels)(DCTELEM *block, const UINT8 *pixels, int line_size); ...@@ -30,6 +30,7 @@ void (*get_pixels)(DCTELEM *block, const UINT8 *pixels, int line_size);
void (*put_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size); void (*put_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size);
void (*add_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size); void (*add_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size);
void (*gmc1)(UINT8 *dst, UINT8 *src, int srcStride, int h, int x16, int y16, int rounder); void (*gmc1)(UINT8 *dst, UINT8 *src, int srcStride, int h, int x16, int y16, int rounder);
void (*clear_blocks)(DCTELEM *blocks);
op_pixels_abs_func pix_abs16x16; op_pixels_abs_func pix_abs16x16;
op_pixels_abs_func pix_abs16x16_x2; op_pixels_abs_func pix_abs16x16_x2;
...@@ -866,6 +867,11 @@ void block_permute(INT16 *block) ...@@ -866,6 +867,11 @@ void block_permute(INT16 *block)
} }
#endif #endif
void clear_blocks_c(DCTELEM *blocks)
{
memset(blocks, 0, sizeof(DCTELEM)*6*64);
}
void dsputil_init(void) void dsputil_init(void)
{ {
int i, j; int i, j;
...@@ -890,6 +896,7 @@ void dsputil_init(void) ...@@ -890,6 +896,7 @@ void dsputil_init(void)
put_pixels_clamped = put_pixels_clamped_c; put_pixels_clamped = put_pixels_clamped_c;
add_pixels_clamped = add_pixels_clamped_c; add_pixels_clamped = add_pixels_clamped_c;
gmc1= gmc1_c; gmc1= gmc1_c;
clear_blocks= clear_blocks_c;
pix_abs16x16 = pix_abs16x16_c; pix_abs16x16 = pix_abs16x16_c;
pix_abs16x16_x2 = pix_abs16x16_x2_c; pix_abs16x16_x2 = pix_abs16x16_x2_c;
......
...@@ -40,11 +40,13 @@ extern void (*get_pixels)(DCTELEM *block, const UINT8 *pixels, int line_size); ...@@ -40,11 +40,13 @@ extern void (*get_pixels)(DCTELEM *block, const UINT8 *pixels, int line_size);
extern void (*put_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size); extern void (*put_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size);
extern void (*add_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size); extern void (*add_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size);
extern void (*gmc1)(UINT8 *dst, UINT8 *src, int srcStride, int h, int x16, int y16, int rounder); extern void (*gmc1)(UINT8 *dst, UINT8 *src, int srcStride, int h, int x16, int y16, int rounder);
extern void (*clear_blocks)(DCTELEM *blocks);
void get_pixels_c(DCTELEM *block, const UINT8 *pixels, int line_size); void get_pixels_c(DCTELEM *block, const UINT8 *pixels, int line_size);
void put_pixels_clamped_c(const DCTELEM *block, UINT8 *pixels, int line_size); void put_pixels_clamped_c(const DCTELEM *block, UINT8 *pixels, int line_size);
void add_pixels_clamped_c(const DCTELEM *block, UINT8 *pixels, int line_size); void add_pixels_clamped_c(const DCTELEM *block, UINT8 *pixels, int line_size);
void clear_blocks_c(DCTELEM *blocks);
/* add and put pixel (decoding) */ /* add and put pixel (decoding) */
typedef void (*op_pixels_func)(UINT8 *block, const UINT8 *pixels, int line_size, int h); typedef void (*op_pixels_func)(UINT8 *block, const UINT8 *pixels, int line_size, int h);
......
...@@ -156,6 +156,7 @@ static int h263_decode_frame(AVCodecContext *avctx, ...@@ -156,6 +156,7 @@ static int h263_decode_frame(AVCodecContext *avctx,
if (s->mb_y && !s->h263_pred) { if (s->mb_y && !s->h263_pred) {
s->first_gob_line = h263_decode_gob_header(s); s->first_gob_line = h263_decode_gob_header(s);
} }
s->block_index[0]= s->block_wrap[0]*(s->mb_y*2 + 1) - 1; s->block_index[0]= s->block_wrap[0]*(s->mb_y*2 + 1) - 1;
s->block_index[1]= s->block_wrap[0]*(s->mb_y*2 + 1); s->block_index[1]= s->block_wrap[0]*(s->mb_y*2 + 1);
s->block_index[2]= s->block_wrap[0]*(s->mb_y*2 + 2) - 1; s->block_index[2]= s->block_wrap[0]*(s->mb_y*2 + 2) - 1;
...@@ -183,28 +184,8 @@ static int h263_decode_frame(AVCodecContext *avctx, ...@@ -183,28 +184,8 @@ static int h263_decode_frame(AVCodecContext *avctx,
s->y_dc_scale = 8; s->y_dc_scale = 8;
s->c_dc_scale = 8; s->c_dc_scale = 8;
} }
clear_blocks(s->block[0]);
#ifdef HAVE_MMX
if (mm_flags & MM_MMX) {
asm volatile(
"pxor %%mm7, %%mm7 \n\t"
"movl $-128*6, %%eax \n\t"
"1: \n\t"
"movq %%mm7, (%0, %%eax) \n\t"
"movq %%mm7, 8(%0, %%eax) \n\t"
"movq %%mm7, 16(%0, %%eax) \n\t"
"movq %%mm7, 24(%0, %%eax) \n\t"
"addl $32, %%eax \n\t"
" js 1b \n\t"
: : "r" (((int)s->block)+128*6)
: "%eax"
);
}else{
memset(s->block, 0, sizeof(s->block));
}
#else
memset(s->block, 0, sizeof(s->block));
#endif
s->mv_dir = MV_DIR_FORWARD; s->mv_dir = MV_DIR_FORWARD;
s->mv_type = MV_TYPE_16X16; s->mv_type = MV_TYPE_16X16;
if (s->h263_msmpeg4) { if (s->h263_msmpeg4) {
......
...@@ -1025,6 +1025,23 @@ static void sub_pixels_xy2_mmx( DCTELEM *block, const UINT8 *pixels, int line ...@@ -1025,6 +1025,23 @@ static void sub_pixels_xy2_mmx( DCTELEM *block, const UINT8 *pixels, int line
} while(--h); } while(--h);
} }
static void clear_blocks_mmx(DCTELEM *blocks)
{
asm volatile(
"pxor %%mm7, %%mm7 \n\t"
"movl $-128*6, %%eax \n\t"
"1: \n\t"
"movq %%mm7, (%0, %%eax) \n\t"
"movq %%mm7, 8(%0, %%eax) \n\t"
"movq %%mm7, 16(%0, %%eax) \n\t"
"movq %%mm7, 24(%0, %%eax) \n\t"
"addl $32, %%eax \n\t"
" js 1b \n\t"
: : "r" (((int)blocks)+128*6)
: "%eax"
);
}
static void just_return() { return; } static void just_return() { return; }
void dsputil_init_mmx(void) void dsputil_init_mmx(void)
...@@ -1049,7 +1066,8 @@ void dsputil_init_mmx(void) ...@@ -1049,7 +1066,8 @@ void dsputil_init_mmx(void)
get_pixels = get_pixels_mmx; get_pixels = get_pixels_mmx;
put_pixels_clamped = put_pixels_clamped_mmx; put_pixels_clamped = put_pixels_clamped_mmx;
add_pixels_clamped = add_pixels_clamped_mmx; add_pixels_clamped = add_pixels_clamped_mmx;
clear_blocks= clear_blocks_mmx;
pix_abs16x16 = pix_abs16x16_mmx; pix_abs16x16 = pix_abs16x16_mmx;
pix_abs16x16_x2 = pix_abs16x16_x2_mmx; pix_abs16x16_x2 = pix_abs16x16_x2_mmx;
pix_abs16x16_y2 = pix_abs16x16_y2_mmx; pix_abs16x16_y2 = pix_abs16x16_y2_mmx;
......
...@@ -1402,7 +1402,7 @@ static int mpeg_decode_slice(AVCodecContext *avctx, ...@@ -1402,7 +1402,7 @@ static int mpeg_decode_slice(AVCodecContext *avctx,
} }
for(;;) { for(;;) {
memset(s->block, 0, sizeof(s->block)); clear_blocks(s->block[0]);
ret = mpeg_decode_mb(s, s->block); ret = mpeg_decode_mb(s, s->block);
dprintf("ret=%d\n", ret); dprintf("ret=%d\n", ret);
if (ret < 0) if (ret < 0)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment