Commit 556f8a06 authored by Jason Garrett-Glaser's avatar Jason Garrett-Glaser

H.264: template left MB handling

Faster H.264 decoding with ALLOW_INTERLACE off.
parent ca80f11e
...@@ -3046,7 +3046,7 @@ int ff_h264_get_slice_type(const H264Context *h) ...@@ -3046,7 +3046,7 @@ int ff_h264_get_slice_type(const H264Context *h)
} }
static av_always_inline void fill_filter_caches_inter(H264Context *h, MpegEncContext * const s, int mb_type, int top_xy, static av_always_inline void fill_filter_caches_inter(H264Context *h, MpegEncContext * const s, int mb_type, int top_xy,
int left_xy[2], int top_type, int left_type[2], int mb_xy, int list) int left_xy[LEFT_MBS], int top_type, int left_type[LEFT_MBS], int mb_xy, int list)
{ {
int b_stride = h->b_stride; int b_stride = h->b_stride;
int16_t (*mv_dst)[2] = &h->mv_cache[list][scan8[0]]; int16_t (*mv_dst)[2] = &h->mv_cache[list][scan8[0]];
...@@ -3066,11 +3066,11 @@ static av_always_inline void fill_filter_caches_inter(H264Context *h, MpegEncCon ...@@ -3066,11 +3066,11 @@ static av_always_inline void fill_filter_caches_inter(H264Context *h, MpegEncCon
AV_WN32A(&ref_cache[0 - 1*8], ((LIST_NOT_USED)&0xFF)*0x01010101u); AV_WN32A(&ref_cache[0 - 1*8], ((LIST_NOT_USED)&0xFF)*0x01010101u);
} }
if(!IS_INTERLACED(mb_type^left_type[0])){ if(!IS_INTERLACED(mb_type^left_type[LTOP])){
if(USES_LIST(left_type[0], list)){ if(USES_LIST(left_type[LTOP], list)){
const int b_xy= h->mb2b_xy[left_xy[0]] + 3; const int b_xy= h->mb2b_xy[left_xy[LTOP]] + 3;
const int b8_xy= 4*left_xy[0] + 1; const int b8_xy= 4*left_xy[LTOP] + 1;
int (*ref2frm)[64] = h->ref2frm[ h->slice_table[left_xy[0]]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2); int (*ref2frm)[64] = h->ref2frm[ h->slice_table[left_xy[LTOP]]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
AV_COPY32(mv_dst - 1 + 0, s->current_picture.motion_val[list][b_xy + b_stride*0]); AV_COPY32(mv_dst - 1 + 0, s->current_picture.motion_val[list][b_xy + b_stride*0]);
AV_COPY32(mv_dst - 1 + 8, s->current_picture.motion_val[list][b_xy + b_stride*1]); AV_COPY32(mv_dst - 1 + 8, s->current_picture.motion_val[list][b_xy + b_stride*1]);
AV_COPY32(mv_dst - 1 +16, s->current_picture.motion_val[list][b_xy + b_stride*2]); AV_COPY32(mv_dst - 1 +16, s->current_picture.motion_val[list][b_xy + b_stride*2]);
...@@ -3128,8 +3128,8 @@ static av_always_inline void fill_filter_caches_inter(H264Context *h, MpegEncCon ...@@ -3128,8 +3128,8 @@ static av_always_inline void fill_filter_caches_inter(H264Context *h, MpegEncCon
static int fill_filter_caches(H264Context *h, int mb_type){ static int fill_filter_caches(H264Context *h, int mb_type){
MpegEncContext * const s = &h->s; MpegEncContext * const s = &h->s;
const int mb_xy= h->mb_xy; const int mb_xy= h->mb_xy;
int top_xy, left_xy[2]; int top_xy, left_xy[LEFT_MBS];
int top_type, left_type[2]; int top_type, left_type[LEFT_MBS];
uint8_t *nnz; uint8_t *nnz;
uint8_t *nnz_cache; uint8_t *nnz_cache;
...@@ -3138,56 +3138,56 @@ static int fill_filter_caches(H264Context *h, int mb_type){ ...@@ -3138,56 +3138,56 @@ static int fill_filter_caches(H264Context *h, int mb_type){
/* Wow, what a mess, why didn't they simplify the interlacing & intra /* Wow, what a mess, why didn't they simplify the interlacing & intra
* stuff, I can't imagine that these complex rules are worth it. */ * stuff, I can't imagine that these complex rules are worth it. */
left_xy[1] = left_xy[0] = mb_xy-1; left_xy[LBOT] = left_xy[LTOP] = mb_xy-1;
if(FRAME_MBAFF){ if(FRAME_MBAFF){
const int left_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]); const int left_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]);
const int curr_mb_field_flag = IS_INTERLACED(mb_type); const int curr_mb_field_flag = IS_INTERLACED(mb_type);
if(s->mb_y&1){ if(s->mb_y&1){
if (left_mb_field_flag != curr_mb_field_flag) { if (left_mb_field_flag != curr_mb_field_flag) {
left_xy[0] -= s->mb_stride; left_xy[LTOP] -= s->mb_stride;
} }
}else{ }else{
if(curr_mb_field_flag){ if(curr_mb_field_flag){
top_xy += s->mb_stride & (((s->current_picture.mb_type[top_xy ]>>7)&1)-1); top_xy += s->mb_stride & (((s->current_picture.mb_type[top_xy ]>>7)&1)-1);
} }
if (left_mb_field_flag != curr_mb_field_flag) { if (left_mb_field_flag != curr_mb_field_flag) {
left_xy[1] += s->mb_stride; left_xy[LBOT] += s->mb_stride;
} }
} }
} }
h->top_mb_xy = top_xy; h->top_mb_xy = top_xy;
h->left_mb_xy[0] = left_xy[0]; h->left_mb_xy[LTOP] = left_xy[LTOP];
h->left_mb_xy[1] = left_xy[1]; h->left_mb_xy[LBOT] = left_xy[LBOT];
{ {
//for sufficiently low qp, filtering wouldn't do anything //for sufficiently low qp, filtering wouldn't do anything
//this is a conservative estimate: could also check beta_offset and more accurate chroma_qp //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
int qp_thresh = h->qp_thresh; //FIXME strictly we should store qp_thresh for each mb of a slice int qp_thresh = h->qp_thresh; //FIXME strictly we should store qp_thresh for each mb of a slice
int qp = s->current_picture.qscale_table[mb_xy]; int qp = s->current_picture.qscale_table[mb_xy];
if(qp <= qp_thresh if(qp <= qp_thresh
&& (left_xy[0]<0 || ((qp + s->current_picture.qscale_table[left_xy[0]] + 1)>>1) <= qp_thresh) && (left_xy[LTOP]<0 || ((qp + s->current_picture.qscale_table[left_xy[LTOP]] + 1)>>1) <= qp_thresh)
&& (top_xy < 0 || ((qp + s->current_picture.qscale_table[top_xy ] + 1)>>1) <= qp_thresh)){ && (top_xy <0 || ((qp + s->current_picture.qscale_table[top_xy ] + 1)>>1) <= qp_thresh)){
if(!FRAME_MBAFF) if(!FRAME_MBAFF)
return 1; return 1;
if( (left_xy[0]< 0 || ((qp + s->current_picture.qscale_table[left_xy[1] ] + 1)>>1) <= qp_thresh) if( (left_xy[LTOP]< 0 || ((qp + s->current_picture.qscale_table[left_xy[LBOT] ] + 1)>>1) <= qp_thresh)
&& (top_xy < s->mb_stride || ((qp + s->current_picture.qscale_table[top_xy -s->mb_stride] + 1)>>1) <= qp_thresh)) && (top_xy < s->mb_stride || ((qp + s->current_picture.qscale_table[top_xy -s->mb_stride] + 1)>>1) <= qp_thresh))
return 1; return 1;
} }
} }
top_type = s->current_picture.mb_type[top_xy] ; top_type = s->current_picture.mb_type[top_xy];
left_type[0] = s->current_picture.mb_type[left_xy[0]]; left_type[LTOP] = s->current_picture.mb_type[left_xy[LTOP]];
left_type[1] = s->current_picture.mb_type[left_xy[1]]; left_type[LBOT] = s->current_picture.mb_type[left_xy[LBOT]];
if(h->deblocking_filter == 2){ if(h->deblocking_filter == 2){
if(h->slice_table[top_xy ] != h->slice_num) top_type= 0; if(h->slice_table[top_xy ] != h->slice_num) top_type= 0;
if(h->slice_table[left_xy[0] ] != h->slice_num) left_type[0]= left_type[1]= 0; if(h->slice_table[left_xy[LBOT]] != h->slice_num) left_type[LTOP]= left_type[LBOT]= 0;
}else{ }else{
if(h->slice_table[top_xy ] == 0xFFFF) top_type= 0; if(h->slice_table[top_xy ] == 0xFFFF) top_type= 0;
if(h->slice_table[left_xy[0] ] == 0xFFFF) left_type[0]= left_type[1] =0; if(h->slice_table[left_xy[LBOT]] == 0xFFFF) left_type[LTOP]= left_type[LBOT] =0;
} }
h->top_type = top_type ; h->top_type = top_type;
h->left_type[0]= left_type[0]; h->left_type[LTOP]= left_type[LTOP];
h->left_type[1]= left_type[1]; h->left_type[LBOT]= left_type[LBOT];
if(IS_INTRA(mb_type)) if(IS_INTRA(mb_type))
return 0; return 0;
...@@ -3209,8 +3209,8 @@ static int fill_filter_caches(H264Context *h, int mb_type){ ...@@ -3209,8 +3209,8 @@ static int fill_filter_caches(H264Context *h, int mb_type){
AV_COPY32(&nnz_cache[4+8*0], &nnz[3*4]); AV_COPY32(&nnz_cache[4+8*0], &nnz[3*4]);
} }
if(left_type[0]){ if(left_type[LTOP]){
nnz = h->non_zero_count[left_xy[0]]; nnz = h->non_zero_count[left_xy[LTOP]];
nnz_cache[3+8*1]= nnz[3+0*4]; nnz_cache[3+8*1]= nnz[3+0*4];
nnz_cache[3+8*2]= nnz[3+1*4]; nnz_cache[3+8*2]= nnz[3+1*4];
nnz_cache[3+8*3]= nnz[3+2*4]; nnz_cache[3+8*3]= nnz[3+2*4];
...@@ -3225,13 +3225,13 @@ static int fill_filter_caches(H264Context *h, int mb_type){ ...@@ -3225,13 +3225,13 @@ static int fill_filter_caches(H264Context *h, int mb_type){
nnz_cache[6+8*0]= nnz_cache[6+8*0]=
nnz_cache[7+8*0]= (h->cbp_table[top_xy] & 0x8000) >> 12; nnz_cache[7+8*0]= (h->cbp_table[top_xy] & 0x8000) >> 12;
} }
if(IS_8x8DCT(left_type[0])){ if(IS_8x8DCT(left_type[LTOP])){
nnz_cache[3+8*1]= nnz_cache[3+8*1]=
nnz_cache[3+8*2]= (h->cbp_table[left_xy[0]]&0x2000) >> 12; //FIXME check MBAFF nnz_cache[3+8*2]= (h->cbp_table[left_xy[LTOP]]&0x2000) >> 12; //FIXME check MBAFF
} }
if(IS_8x8DCT(left_type[1])){ if(IS_8x8DCT(left_type[LBOT])){
nnz_cache[3+8*3]= nnz_cache[3+8*3]=
nnz_cache[3+8*4]= (h->cbp_table[left_xy[1]]&0x8000) >> 12; //FIXME check MBAFF nnz_cache[3+8*4]= (h->cbp_table[left_xy[LBOT]]&0x8000) >> 12; //FIXME check MBAFF
} }
if(IS_8x8DCT(mb_type)){ if(IS_8x8DCT(mb_type)){
......
This diff is collapsed.
...@@ -1296,9 +1296,9 @@ static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_sl ...@@ -1296,9 +1296,9 @@ static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_sl
if(intra_slice){ if(intra_slice){
int ctx=0; int ctx=0;
if( h->left_type[0] & (MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM)) if( h->left_type[LTOP] & (MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM))
ctx++; ctx++;
if( h->top_type & (MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM)) if( h->top_type & (MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM))
ctx++; ctx++;
if( get_cabac_noinline( &h->cabac, &state[ctx] ) == 0 ) if( get_cabac_noinline( &h->cabac, &state[ctx] ) == 0 )
return 0; /* I4x4 */ return 0; /* I4x4 */
...@@ -1376,10 +1376,10 @@ static int decode_cabac_mb_chroma_pre_mode( H264Context *h) { ...@@ -1376,10 +1376,10 @@ static int decode_cabac_mb_chroma_pre_mode( H264Context *h) {
int ctx = 0; int ctx = 0;
/* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */ /* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */
if( h->left_type[0] && h->chroma_pred_mode_table[mba_xy] != 0 ) if( h->left_type[LTOP] && h->chroma_pred_mode_table[mba_xy] != 0 )
ctx++; ctx++;
if( h->top_type && h->chroma_pred_mode_table[mbb_xy] != 0 ) if( h->top_type && h->chroma_pred_mode_table[mbb_xy] != 0 )
ctx++; ctx++;
if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+ctx] ) == 0 ) if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+ctx] ) == 0 )
...@@ -1880,7 +1880,7 @@ int ff_h264_decode_mb_cabac(H264Context *h) { ...@@ -1880,7 +1880,7 @@ int ff_h264_decode_mb_cabac(H264Context *h) {
int ctx = 0; int ctx = 0;
assert(h->slice_type_nos == AV_PICTURE_TYPE_B); assert(h->slice_type_nos == AV_PICTURE_TYPE_B);
if( !IS_DIRECT( h->left_type[0]-1 ) ) if( !IS_DIRECT( h->left_type[LTOP]-1 ) )
ctx++; ctx++;
if( !IS_DIRECT( h->top_type-1 ) ) if( !IS_DIRECT( h->top_type-1 ) )
ctx++; ctx++;
...@@ -2250,7 +2250,7 @@ decode_intra_mb: ...@@ -2250,7 +2250,7 @@ decode_intra_mb:
int i; int i;
uint8_t *nnz_cache = h->non_zero_count_cache; uint8_t *nnz_cache = h->non_zero_count_cache;
for (i = 0; i < 2; i++){ for (i = 0; i < 2; i++){
if (h->left_type[i] && !IS_8x8DCT(h->left_type[i])){ if (h->left_type[LEFT(i)] && !IS_8x8DCT(h->left_type[LEFT(i)])){
nnz_cache[3+8* 1 + 2*8*i]= nnz_cache[3+8* 1 + 2*8*i]=
nnz_cache[3+8* 2 + 2*8*i]= nnz_cache[3+8* 2 + 2*8*i]=
nnz_cache[3+8* 6 + 2*8*i]= nnz_cache[3+8* 6 + 2*8*i]=
......
...@@ -227,7 +227,7 @@ void ff_h264_filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, ...@@ -227,7 +227,7 @@ void ff_h264_filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y,
return; return;
} }
assert(!FRAME_MBAFF); assert(!FRAME_MBAFF);
left_type= h->left_type[0]; left_type= h->left_type[LTOP];
top_type= h->top_type; top_type= h->top_type;
mb_type = s->current_picture.mb_type[mb_xy]; mb_type = s->current_picture.mb_type[mb_xy];
...@@ -329,7 +329,7 @@ void ff_h264_filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, ...@@ -329,7 +329,7 @@ void ff_h264_filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y,
AV_WN64A(bS[1][2], 0x0002000200020002ULL); AV_WN64A(bS[1][2], 0x0002000200020002ULL);
} else { } else {
int mask_edge1 = (3*(((5*mb_type)>>5)&1)) | (mb_type>>4); //(mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 : (mb_type & MB_TYPE_16x8) ? 1 : 0; int mask_edge1 = (3*(((5*mb_type)>>5)&1)) | (mb_type>>4); //(mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 : (mb_type & MB_TYPE_16x8) ? 1 : 0;
int mask_edge0 = 3*((mask_edge1>>1) & ((5*left_type)>>5)&1); // (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) && (h->left_type[0] & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 : 0; int mask_edge0 = 3*((mask_edge1>>1) & ((5*left_type)>>5)&1); // (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) && (h->left_type[LTOP] & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 : 0;
int step = 1+(mb_type>>24); //IS_8x8DCT(mb_type) ? 2 : 1; int step = 1+(mb_type>>24); //IS_8x8DCT(mb_type) ? 2 : 1;
edges = 4 - 3*((mb_type>>3) & !(h->cbp & 15)); //(mb_type & MB_TYPE_16x16) && !(h->cbp & 15) ? 1 : 4; edges = 4 - 3*((mb_type>>3) & !(h->cbp & 15)); //(mb_type & MB_TYPE_16x16) && !(h->cbp & 15) ? 1 : 4;
h->h264dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache, h->h264dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache,
...@@ -411,7 +411,7 @@ static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, u ...@@ -411,7 +411,7 @@ static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, u
int edge; int edge;
int chroma_qp_avg[2]; int chroma_qp_avg[2];
const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy; const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
const int mbm_type = dir == 0 ? h->left_type[0] : h->top_type; const int mbm_type = dir == 0 ? h->left_type[LTOP] : h->top_type;
// how often to recheck mv-based bS when iterating between edges // how often to recheck mv-based bS when iterating between edges
static const uint8_t mask_edge_tab[2][8]={{0,3,3,3,1,1,1,1}, static const uint8_t mask_edge_tab[2][8]={{0,3,3,3,1,1,1,1},
...@@ -647,9 +647,9 @@ void ff_h264_filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint ...@@ -647,9 +647,9 @@ void ff_h264_filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint
if (FRAME_MBAFF if (FRAME_MBAFF
// and current and left pair do not have the same interlaced type // and current and left pair do not have the same interlaced type
&& IS_INTERLACED(mb_type^h->left_type[0]) && IS_INTERLACED(mb_type^h->left_type[LTOP])
// and left mb is in available to us // and left mb is in available to us
&& h->left_type[0]) { && h->left_type[LTOP]) {
/* First vertical edge is different in MBAFF frames /* First vertical edge is different in MBAFF frames
* There are 8 different bS to compute and 2 different Qp * There are 8 different bS to compute and 2 different Qp
*/ */
...@@ -677,8 +677,8 @@ void ff_h264_filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint ...@@ -677,8 +677,8 @@ void ff_h264_filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint
const uint8_t *off= offset[MB_FIELD][mb_y&1]; const uint8_t *off= offset[MB_FIELD][mb_y&1];
for( i = 0; i < 8; i++ ) { for( i = 0; i < 8; i++ ) {
int j= MB_FIELD ? i>>2 : i&1; int j= MB_FIELD ? i>>2 : i&1;
int mbn_xy = h->left_mb_xy[j]; int mbn_xy = h->left_mb_xy[LEFT(j)];
int mbn_type= h->left_type[j]; int mbn_type= h->left_type[LEFT(j)];
if( IS_INTRA( mbn_type ) ) if( IS_INTRA( mbn_type ) )
bS[i] = 4; bS[i] = 4;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment