Commit 5b0fb524 authored by Michael Niedermayer's avatar Michael Niedermayer

Store intra4x4_pred_mode per row only.

about 5 cpu cycles slower in the local code but should be overall faster
due to reduced cache use. (my sample though has too few intra4x4 blocks
for this to be meassureable easily either way)

Originally committed as revision 22052 to svn://svn.ffmpeg.org/ffmpeg/trunk
parent c2186cbd
...@@ -52,15 +52,15 @@ static const uint8_t div6[52]={ ...@@ -52,15 +52,15 @@ static const uint8_t div6[52]={
}; };
void ff_h264_write_back_intra_pred_mode(H264Context *h){ void ff_h264_write_back_intra_pred_mode(H264Context *h){
const int mb_xy= h->mb_xy; int8_t *mode= h->intra4x4_pred_mode + h->mb2br_xy[h->mb_xy];
h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1]; mode[0]= h->intra4x4_pred_mode_cache[7+8*1];
h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2]; mode[1]= h->intra4x4_pred_mode_cache[7+8*2];
h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3]; mode[2]= h->intra4x4_pred_mode_cache[7+8*3];
h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4]; mode[3]= h->intra4x4_pred_mode_cache[7+8*4];
h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4]; mode[4]= h->intra4x4_pred_mode_cache[4+8*4];
h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4]; mode[5]= h->intra4x4_pred_mode_cache[5+8*4];
h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4]; mode[6]= h->intra4x4_pred_mode_cache[6+8*4];
} }
/** /**
......
...@@ -298,7 +298,7 @@ typedef struct H264Context{ ...@@ -298,7 +298,7 @@ typedef struct H264Context{
int topleft_partition; int topleft_partition;
int8_t intra4x4_pred_mode_cache[5*8]; int8_t intra4x4_pred_mode_cache[5*8];
int8_t (*intra4x4_pred_mode)[8]; int8_t (*intra4x4_pred_mode);
H264PredContext hpc; H264PredContext hpc;
unsigned int topleft_samples_available; unsigned int topleft_samples_available;
unsigned int top_samples_available; unsigned int top_samples_available;
...@@ -886,10 +886,11 @@ static void fill_decode_caches(H264Context *h, int mb_type){ ...@@ -886,10 +886,11 @@ static void fill_decode_caches(H264Context *h, int mb_type){
if(IS_INTRA4x4(mb_type)){ if(IS_INTRA4x4(mb_type)){
if(IS_INTRA4x4(top_type)){ if(IS_INTRA4x4(top_type)){
h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4]; int8_t *mode= h->intra4x4_pred_mode + h->mb2br_xy[top_xy];
h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5]; h->intra4x4_pred_mode_cache[4+8*0]= mode[4];
h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6]; h->intra4x4_pred_mode_cache[5+8*0]= mode[5];
h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3]; h->intra4x4_pred_mode_cache[6+8*0]= mode[6];
h->intra4x4_pred_mode_cache[7+8*0]= mode[3];
}else{ }else{
int pred; int pred;
if(!(top_type & type_mask)) if(!(top_type & type_mask))
...@@ -904,8 +905,9 @@ static void fill_decode_caches(H264Context *h, int mb_type){ ...@@ -904,8 +905,9 @@ static void fill_decode_caches(H264Context *h, int mb_type){
} }
for(i=0; i<2; i++){ for(i=0; i<2; i++){
if(IS_INTRA4x4(left_type[i])){ if(IS_INTRA4x4(left_type[i])){
h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]]; int8_t *mode= h->intra4x4_pred_mode + h->mb2br_xy[left_xy[i]];
h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]]; h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= mode[left_block[0+2*i]];
h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= mode[left_block[1+2*i]];
}else{ }else{
int pred; int pred;
if(!(left_type[i] & type_mask)) if(!(left_type[i] & type_mask))
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment