Commit f24a5159 authored by Michael Niedermayer's avatar Michael Niedermayer

shift CABACContext.range right, this reduces the number of shifts needed in...

shift CABACContext.range right, this reduces the number of shifts needed in get_cabac() and is slightly faster on P3 (and should be much faster on P4 as the P4 except the more recent variants lacks an integer shifter and so  shifts have ~10 times longer latency then simple operations like adds)

Originally committed as revision 6702 to svn://svn.ffmpeg.org/ffmpeg/trunk
parent aa770811
......@@ -51,7 +51,7 @@ static const uint8_t lps_range[64][4]= {
};
uint8_t ff_h264_mlps_state[4*64];
uint8_t ff_h264_lps_range[2*65][4];
uint8_t ff_h264_lps_range[4][2*64];
uint8_t ff_h264_lps_state[2*64];
uint8_t ff_h264_mps_state[2*64];
......@@ -76,8 +76,8 @@ static const uint8_t lps_state[64]= {
33,33,34,34,35,35,35,36,
36,36,37,37,37,38,38,63,
};
const uint8_t ff_h264_norm_shift[128]= {
#if 0
const uint8_t ff_h264_norm_shift_old[128]= {
7,6,5,5,4,4,4,4,3,3,3,3,3,3,3,3,
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
......@@ -87,6 +87,29 @@ const uint8_t ff_h264_norm_shift[128]= {
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
};
#endif
const uint8_t ff_h264_norm_shift[512]= {
9,8,7,7,6,6,6,6,5,5,5,5,5,5,5,5,
4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
};
/**
*
......@@ -121,7 +144,7 @@ void ff_init_cabac_decoder(CABACContext *c, const uint8_t *buf, int buf_size){
c->low = (*c->bytestream++)<<10;
#endif
c->low+= ((*c->bytestream++)<<2) + 2;
c->range= 0x1FE<<(CABAC_BITS + 1);
c->range= 0x1FE;
}
void ff_init_cabac_states(CABACContext *c){
......@@ -129,8 +152,8 @@ void ff_init_cabac_states(CABACContext *c){
for(i=0; i<64; i++){
for(j=0; j<4; j++){ //FIXME check if this is worth the 1 shift we save
ff_h264_lps_range[2*i+0][j+4]=
ff_h264_lps_range[2*i+1][j+4]= lps_range[i][j];
ff_h264_lps_range[j][2*i+0]=
ff_h264_lps_range[j][2*i+1]= lps_range[i][j];
}
ff_h264_mlps_state[128+2*i+0]=
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment