Commit d43c1922 authored by Michael Niedermayer's avatar Michael Niedermayer

Keep mvd_table values of only 2 mb rows.

Originally committed as revision 22047 to svn://svn.ffmpeg.org/ffmpeg/trunk
parent b6a41853
...@@ -663,6 +663,7 @@ static void free_tables(H264Context *h){ ...@@ -663,6 +663,7 @@ static void free_tables(H264Context *h){
av_freep(&h->list_counts); av_freep(&h->list_counts);
av_freep(&h->mb2b_xy); av_freep(&h->mb2b_xy);
av_freep(&h->mb2br_xy);
av_freep(&h->mb2b8_xy); av_freep(&h->mb2b8_xy);
for(i = 0; i < MAX_THREADS; i++) { for(i = 0; i < MAX_THREADS; i++) {
...@@ -765,6 +766,7 @@ int ff_h264_alloc_tables(H264Context *h){ ...@@ -765,6 +766,7 @@ int ff_h264_alloc_tables(H264Context *h){
h->slice_table= h->slice_table_base + s->mb_stride*2 + 1; h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mb2b_xy , big_mb_num * sizeof(uint32_t), fail); FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mb2b_xy , big_mb_num * sizeof(uint32_t), fail);
FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mb2br_xy , big_mb_num * sizeof(uint32_t), fail);
FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mb2b8_xy , big_mb_num * sizeof(uint32_t), fail); FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mb2b8_xy , big_mb_num * sizeof(uint32_t), fail);
for(y=0; y<s->mb_height; y++){ for(y=0; y<s->mb_height; y++){
for(x=0; x<s->mb_width; x++){ for(x=0; x<s->mb_width; x++){
...@@ -773,6 +775,7 @@ int ff_h264_alloc_tables(H264Context *h){ ...@@ -773,6 +775,7 @@ int ff_h264_alloc_tables(H264Context *h){
const int b8_xy= 2*x + 2*y*h->b8_stride; const int b8_xy= 2*x + 2*y*h->b8_stride;
h->mb2b_xy [mb_xy]= b_xy; h->mb2b_xy [mb_xy]= b_xy;
h->mb2br_xy[mb_xy]= FMO ? b_xy : (b_xy % (8*h->b_stride));
h->mb2b8_xy[mb_xy]= b8_xy; h->mb2b8_xy[mb_xy]= b8_xy;
} }
} }
...@@ -797,6 +800,7 @@ static void clone_tables(H264Context *dst, H264Context *src){ ...@@ -797,6 +800,7 @@ static void clone_tables(H264Context *dst, H264Context *src){
dst->slice_table = src->slice_table; dst->slice_table = src->slice_table;
dst->cbp_table = src->cbp_table; dst->cbp_table = src->cbp_table;
dst->mb2b_xy = src->mb2b_xy; dst->mb2b_xy = src->mb2b_xy;
dst->mb2br_xy = src->mb2br_xy;
dst->mb2b8_xy = src->mb2b8_xy; dst->mb2b8_xy = src->mb2b8_xy;
dst->chroma_pred_mode_table = src->chroma_pred_mode_table; dst->chroma_pred_mode_table = src->chroma_pred_mode_table;
dst->mvd_table[0] = src->mvd_table[0]; dst->mvd_table[0] = src->mvd_table[0];
......
...@@ -61,6 +61,8 @@ ...@@ -61,6 +61,8 @@
#define ALLOW_NOCHROMA #define ALLOW_NOCHROMA
#define FMO 0
/** /**
* The maximum number of slices supported by the decoder. * The maximum number of slices supported by the decoder.
* must be a power of 2 * must be a power of 2
...@@ -344,6 +346,7 @@ typedef struct H264Context{ ...@@ -344,6 +346,7 @@ typedef struct H264Context{
int block_offset[2*(16+8)]; int block_offset[2*(16+8)];
uint32_t *mb2b_xy; //FIXME are these 4 a good idea? uint32_t *mb2b_xy; //FIXME are these 4 a good idea?
uint32_t *mb2br_xy;
uint32_t *mb2b8_xy; uint32_t *mb2b8_xy;
int b_stride; //FIXME use s->b4_stride int b_stride; //FIXME use s->b4_stride
int b8_stride; int b8_stride;
...@@ -1067,13 +1070,13 @@ static void fill_decode_caches(H264Context *h, int mb_type){ ...@@ -1067,13 +1070,13 @@ static void fill_decode_caches(H264Context *h, int mb_type){
if( CABAC ) { if( CABAC ) {
/* XXX beurk, Load mvd */ /* XXX beurk, Load mvd */
if(USES_LIST(top_type, list)){ if(USES_LIST(top_type, list)){
const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride; const int b_xy= h->mb2br_xy[top_xy] + 3*h->b_stride;
AV_COPY64(h->mvd_cache[list][scan8[0] + 0 - 1*8], h->mvd_table[list][b_xy + 0]); AV_COPY64(h->mvd_cache[list][scan8[0] + 0 - 1*8], h->mvd_table[list][b_xy + 0]);
}else{ }else{
AV_ZERO64(h->mvd_cache[list][scan8[0] + 0 - 1*8]); AV_ZERO64(h->mvd_cache[list][scan8[0] + 0 - 1*8]);
} }
if(USES_LIST(left_type[0], list)){ if(USES_LIST(left_type[0], list)){
const int b_xy= h->mb2b_xy[left_xy[0]] + 3; const int b_xy= h->mb2br_xy[left_xy[0]] + 3;
AV_COPY16(h->mvd_cache[list][scan8[0] - 1 + 0*8], h->mvd_table[list][b_xy + h->b_stride*left_block[0]]); AV_COPY16(h->mvd_cache[list][scan8[0] - 1 + 0*8], h->mvd_table[list][b_xy + h->b_stride*left_block[0]]);
AV_COPY16(h->mvd_cache[list][scan8[0] - 1 + 1*8], h->mvd_table[list][b_xy + h->b_stride*left_block[1]]); AV_COPY16(h->mvd_cache[list][scan8[0] - 1 + 1*8], h->mvd_table[list][b_xy + h->b_stride*left_block[1]]);
}else{ }else{
...@@ -1081,7 +1084,7 @@ static void fill_decode_caches(H264Context *h, int mb_type){ ...@@ -1081,7 +1084,7 @@ static void fill_decode_caches(H264Context *h, int mb_type){
AV_ZERO16(h->mvd_cache [list][scan8[0] - 1 + 1*8]); AV_ZERO16(h->mvd_cache [list][scan8[0] - 1 + 1*8]);
} }
if(USES_LIST(left_type[1], list)){ if(USES_LIST(left_type[1], list)){
const int b_xy= h->mb2b_xy[left_xy[1]] + 3; const int b_xy= h->mb2br_xy[left_xy[1]] + 3;
AV_COPY16(h->mvd_cache[list][scan8[0] - 1 + 2*8], h->mvd_table[list][b_xy + h->b_stride*left_block[2]]); AV_COPY16(h->mvd_cache[list][scan8[0] - 1 + 2*8], h->mvd_table[list][b_xy + h->b_stride*left_block[2]]);
AV_COPY16(h->mvd_cache[list][scan8[0] - 1 + 3*8], h->mvd_table[list][b_xy + h->b_stride*left_block[3]]); AV_COPY16(h->mvd_cache[list][scan8[0] - 1 + 3*8], h->mvd_table[list][b_xy + h->b_stride*left_block[3]]);
}else{ }else{
...@@ -1421,7 +1424,7 @@ static inline void write_back_motion(H264Context *h, int mb_type){ ...@@ -1421,7 +1424,7 @@ static inline void write_back_motion(H264Context *h, int mb_type){
AV_COPY128(mv_dst + y*b_stride, mv_src + 8*y); AV_COPY128(mv_dst + y*b_stride, mv_src + 8*y);
} }
if( CABAC ) { if( CABAC ) {
uint8_t (*mvd_dst)[2] = &h->mvd_table[list][b_xy]; uint8_t (*mvd_dst)[2] = &h->mvd_table[list][FMO ? b_xy : h->mb2br_xy[h->mb_xy]];
uint8_t (*mvd_src)[2] = &h->mvd_cache[list][scan8[0]]; uint8_t (*mvd_src)[2] = &h->mvd_cache[list][scan8[0]];
if(IS_SKIP(mb_type)) if(IS_SKIP(mb_type))
fill_rectangle(mvd_dst, 4, 4, h->b_stride, 0, 2); fill_rectangle(mvd_dst, 4, 4, h->b_stride, 0, 2);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment