Commit df2d5b16 authored by Michael Niedermayer's avatar Michael Niedermayer

H264: Allow templating hl_decode_mb based on pixel_size.

Signed-off-by: 's avatarMichael Niedermayer <michaelni@gmx.at>
parent 179106ed
...@@ -996,7 +996,7 @@ static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src ...@@ -996,7 +996,7 @@ static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src
} }
} }
static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int simple){ static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int simple, int pixel_shift){
MpegEncContext * const s = &h->s; MpegEncContext * const s = &h->s;
int deblock_left; int deblock_left;
int deblock_top; int deblock_top;
...@@ -1021,15 +1021,15 @@ static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_c ...@@ -1021,15 +1021,15 @@ static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_c
deblock_top = (s->mb_y > !!MB_FIELD); deblock_top = (s->mb_y > !!MB_FIELD);
} }
src_y -= linesize + 1 + h->pixel_shift; src_y -= linesize + 1 + pixel_shift;
src_cb -= uvlinesize + 1 + h->pixel_shift; src_cb -= uvlinesize + 1 + pixel_shift;
src_cr -= uvlinesize + 1 + h->pixel_shift; src_cr -= uvlinesize + 1 + pixel_shift;
top_border_m1 = h->top_borders[top_idx][s->mb_x-1]; top_border_m1 = h->top_borders[top_idx][s->mb_x-1];
top_border = h->top_borders[top_idx][s->mb_x]; top_border = h->top_borders[top_idx][s->mb_x];
#define XCHG(a,b,xchg)\ #define XCHG(a,b,xchg)\
if (h->pixel_shift) {\ if (pixel_shift) {\
if (xchg) {\ if (xchg) {\
AV_SWAP64(b+0,a+0);\ AV_SWAP64(b+0,a+0);\
AV_SWAP64(b+8,a+8);\ AV_SWAP64(b+8,a+8);\
...@@ -1042,41 +1042,41 @@ else AV_COPY64(b,a); ...@@ -1042,41 +1042,41 @@ else AV_COPY64(b,a);
if(deblock_top){ if(deblock_top){
if(deblock_left){ if(deblock_left){
XCHG(top_border_m1+(8<<h->pixel_shift), src_y -(7<<h->pixel_shift), 1); XCHG(top_border_m1+(8<<pixel_shift), src_y -(7<<h->pixel_shift), 1);
} }
XCHG(top_border+(0<<h->pixel_shift), src_y +(1<<h->pixel_shift), xchg); XCHG(top_border+(0<<pixel_shift), src_y +(1<<pixel_shift), xchg);
XCHG(top_border+(8<<h->pixel_shift), src_y +(9<<h->pixel_shift), 1); XCHG(top_border+(8<<pixel_shift), src_y +(9<<pixel_shift), 1);
if(s->mb_x+1 < s->mb_width){ if(s->mb_x+1 < s->mb_width){
XCHG(h->top_borders[top_idx][s->mb_x+1], src_y +(17<<h->pixel_shift), 1); XCHG(h->top_borders[top_idx][s->mb_x+1], src_y +(17<<pixel_shift), 1);
} }
} }
if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){ if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
if(deblock_top){ if(deblock_top){
if(deblock_left){ if(deblock_left){
XCHG(top_border_m1+(16<<h->pixel_shift), src_cb -(7<<h->pixel_shift), 1); XCHG(top_border_m1+(16<<pixel_shift), src_cb -(7<<pixel_shift), 1);
XCHG(top_border_m1+(24<<h->pixel_shift), src_cr -(7<<h->pixel_shift), 1); XCHG(top_border_m1+(24<<pixel_shift), src_cr -(7<<pixel_shift), 1);
} }
XCHG(top_border+(16<<h->pixel_shift), src_cb+1+h->pixel_shift, 1); XCHG(top_border+(16<<pixel_shift), src_cb+1+pixel_shift, 1);
XCHG(top_border+(24<<h->pixel_shift), src_cr+1+h->pixel_shift, 1); XCHG(top_border+(24<<pixel_shift), src_cr+1+pixel_shift, 1);
} }
} }
} }
static av_always_inline int dctcoef_get(H264Context *h, DCTELEM *mb, int index) { static av_always_inline int dctcoef_get(H264Context *h, DCTELEM *mb, int index, int pixel_shift) {
if (!h->pixel_shift) if (!pixel_shift)
return mb[index]; return mb[index];
else else
return ((int32_t*)mb)[index]; return ((int32_t*)mb)[index];
} }
static av_always_inline void dctcoef_set(H264Context *h, DCTELEM *mb, int index, int value) { static av_always_inline void dctcoef_set(H264Context *h, DCTELEM *mb, int index, int value, int pixel_shift) {
if (!h->pixel_shift) if (!pixel_shift)
mb[index] = value; mb[index] = value;
else else
((int32_t*)mb)[index] = value; ((int32_t*)mb)[index] = value;
} }
static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple, int pixel_shift){
MpegEncContext * const s = &h->s; MpegEncContext * const s = &h->s;
const int mb_x= s->mb_x; const int mb_x= s->mb_x;
const int mb_y= s->mb_y; const int mb_y= s->mb_y;
...@@ -1092,12 +1092,12 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){ ...@@ -1092,12 +1092,12 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride); void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride); void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
dest_y = s->current_picture.data[0] + ((mb_x<<h->pixel_shift) + mb_y * s->linesize ) * 16; dest_y = s->current_picture.data[0] + ((mb_x<<pixel_shift) + mb_y * s->linesize ) * 16;
dest_cb = s->current_picture.data[1] + ((mb_x<<h->pixel_shift) + mb_y * s->uvlinesize) * 8; dest_cb = s->current_picture.data[1] + ((mb_x<<pixel_shift) + mb_y * s->uvlinesize) * 8;
dest_cr = s->current_picture.data[2] + ((mb_x<<h->pixel_shift) + mb_y * s->uvlinesize) * 8; dest_cr = s->current_picture.data[2] + ((mb_x<<pixel_shift) + mb_y * s->uvlinesize) * 8;
s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + (64<<h->pixel_shift), s->linesize, 4); s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + (64<<pixel_shift), s->linesize, 4);
s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + (64<<h->pixel_shift), dest_cr - dest_cb, 2); s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + (64<<pixel_shift), dest_cr - dest_cb, 2);
h->list_counts[mb_xy]= h->list_count; h->list_counts[mb_xy]= h->list_count;
...@@ -1134,7 +1134,7 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){ ...@@ -1134,7 +1134,7 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
} }
if (!simple && IS_INTRA_PCM(mb_type)) { if (!simple && IS_INTRA_PCM(mb_type)) {
if (h->pixel_shift) { if (pixel_shift) {
const int bit_depth = h->sps.bit_depth_luma; const int bit_depth = h->sps.bit_depth_luma;
int j; int j;
GetBitContext gb; GetBitContext gb;
...@@ -1167,7 +1167,7 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){ ...@@ -1167,7 +1167,7 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
} else { } else {
if(IS_INTRA(mb_type)){ if(IS_INTRA(mb_type)){
if(h->deblocking_filter) if(h->deblocking_filter)
xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple); xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple, pixel_shift);
if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){ if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize); h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
...@@ -1188,16 +1188,16 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){ ...@@ -1188,16 +1188,16 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
uint8_t * const ptr= dest_y + block_offset[i]; uint8_t * const ptr= dest_y + block_offset[i];
const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ]; const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
if(transform_bypass && h->sps.profile_idc==244 && dir<=1){ if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
h->hpc.pred8x8l_add[dir](ptr, h->mb + (i*16<<h->pixel_shift), linesize); h->hpc.pred8x8l_add[dir](ptr, h->mb + (i*16<<pixel_shift), linesize);
}else{ }else{
const int nnz = h->non_zero_count_cache[ scan8[i] ]; const int nnz = h->non_zero_count_cache[ scan8[i] ];
h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000, h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
(h->topright_samples_available<<i)&0x4000, linesize); (h->topright_samples_available<<i)&0x4000, linesize);
if(nnz){ if(nnz){
if(nnz == 1 && dctcoef_get(h, h->mb, i*16)) if(nnz == 1 && dctcoef_get(h, h->mb, i*16, pixel_shift))
idct_dc_add(ptr, h->mb + (i*16<<h->pixel_shift), linesize); idct_dc_add(ptr, h->mb + (i*16<<pixel_shift), linesize);
else else
idct_add (ptr, h->mb + (i*16<<h->pixel_shift), linesize); idct_add (ptr, h->mb + (i*16<<pixel_shift), linesize);
} }
} }
} }
...@@ -1214,7 +1214,7 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){ ...@@ -1214,7 +1214,7 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ]; const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
if(transform_bypass && h->sps.profile_idc==244 && dir<=1){ if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
h->hpc.pred4x4_add[dir](ptr, h->mb + (i*16<<h->pixel_shift), linesize); h->hpc.pred4x4_add[dir](ptr, h->mb + (i*16<<pixel_shift), linesize);
}else{ }else{
uint8_t *topright; uint8_t *topright;
int nnz, tr; int nnz, tr;
...@@ -1223,7 +1223,7 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){ ...@@ -1223,7 +1223,7 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
const int topright_avail= (h->topright_samples_available<<i)&0x8000; const int topright_avail= (h->topright_samples_available<<i)&0x8000;
assert(mb_y || linesize <= block_offset[i]); assert(mb_y || linesize <= block_offset[i]);
if(!topright_avail){ if(!topright_avail){
if (h->pixel_shift) { if (pixel_shift) {
tr_high= ((uint16_t*)ptr)[3 - linesize/2]*0x0001000100010001ULL; tr_high= ((uint16_t*)ptr)[3 - linesize/2]*0x0001000100010001ULL;
topright= (uint8_t*) &tr_high; topright= (uint8_t*) &tr_high;
} else { } else {
...@@ -1231,7 +1231,7 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){ ...@@ -1231,7 +1231,7 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
topright= (uint8_t*) &tr; topright= (uint8_t*) &tr;
} }
}else }else
topright= ptr + (4<<h->pixel_shift) - linesize; topright= ptr + (4<<pixel_shift) - linesize;
}else }else
topright= NULL; topright= NULL;
...@@ -1239,10 +1239,10 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){ ...@@ -1239,10 +1239,10 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
nnz = h->non_zero_count_cache[ scan8[i] ]; nnz = h->non_zero_count_cache[ scan8[i] ];
if(nnz){ if(nnz){
if(is_h264){ if(is_h264){
if(nnz == 1 && dctcoef_get(h, h->mb, i*16)) if(nnz == 1 && dctcoef_get(h, h->mb, i*16, pixel_shift))
idct_dc_add(ptr, h->mb + (i*16<<h->pixel_shift), linesize); idct_dc_add(ptr, h->mb + (i*16<<pixel_shift), linesize);
else else
idct_add (ptr, h->mb + (i*16<<h->pixel_shift), linesize); idct_add (ptr, h->mb + (i*16<<pixel_shift), linesize);
} }
#if CONFIG_SVQ3_DECODER #if CONFIG_SVQ3_DECODER
else else
...@@ -1263,7 +1263,7 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){ ...@@ -1263,7 +1263,7 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
static const uint8_t dc_mapping[16] = { 0*16, 1*16, 4*16, 5*16, 2*16, 3*16, 6*16, 7*16, static const uint8_t dc_mapping[16] = { 0*16, 1*16, 4*16, 5*16, 2*16, 3*16, 6*16, 7*16,
8*16, 9*16,12*16,13*16,10*16,11*16,14*16,15*16}; 8*16, 9*16,12*16,13*16,10*16,11*16,14*16,15*16};
for(i = 0; i < 16; i++) for(i = 0; i < 16; i++)
dctcoef_set(h, h->mb, dc_mapping[i], dctcoef_get(h, h->mb_luma_dc, i)); dctcoef_set(h, h->mb, dc_mapping[i], dctcoef_get(h, h->mb_luma_dc, i,pixel_shift),pixel_shift);
} }
} }
} }
...@@ -1273,7 +1273,7 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){ ...@@ -1273,7 +1273,7 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
#endif #endif
} }
if(h->deblocking_filter) if(h->deblocking_filter)
xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple); xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple, pixel_shift);
}else if(is_h264){ }else if(is_h264){
ff_hl_motion(h, dest_y, dest_cb, dest_cr, ff_hl_motion(h, dest_y, dest_cb, dest_cr,
s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab, s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
...@@ -1290,8 +1290,8 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){ ...@@ -1290,8 +1290,8 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
h->hpc.pred16x16_add[h->intra16x16_pred_mode](dest_y, block_offset, h->mb, linesize); h->hpc.pred16x16_add[h->intra16x16_pred_mode](dest_y, block_offset, h->mb, linesize);
}else{ }else{
for(i=0; i<16; i++){ for(i=0; i<16; i++){
if(h->non_zero_count_cache[ scan8[i] ] || dctcoef_get(h, h->mb, i*16)) if(h->non_zero_count_cache[ scan8[i] ] || dctcoef_get(h, h->mb, i*16,pixel_shift))
s->dsp.add_pixels4(dest_y + block_offset[i], h->mb + (i*16<<h->pixel_shift), linesize); s->dsp.add_pixels4(dest_y + block_offset[i], h->mb + (i*16<<pixel_shift), linesize);
} }
} }
}else{ }else{
...@@ -1303,7 +1303,7 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){ ...@@ -1303,7 +1303,7 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
idct_add= IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4; idct_add= IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
for(i=0; i<16; i+=di){ for(i=0; i<16; i+=di){
if(h->non_zero_count_cache[ scan8[i] ]){ if(h->non_zero_count_cache[ scan8[i] ]){
idct_add(dest_y + block_offset[i], h->mb + (i*16<<h->pixel_shift), linesize); idct_add(dest_y + block_offset[i], h->mb + (i*16<<pixel_shift), linesize);
} }
} }
}else{ }else{
...@@ -1331,21 +1331,21 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){ ...@@ -1331,21 +1331,21 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
uint8_t *dest[2] = {dest_cb, dest_cr}; uint8_t *dest[2] = {dest_cb, dest_cr};
if(transform_bypass){ if(transform_bypass){
if(IS_INTRA(mb_type) && h->sps.profile_idc==244 && (h->chroma_pred_mode==VERT_PRED8x8 || h->chroma_pred_mode==HOR_PRED8x8)){ if(IS_INTRA(mb_type) && h->sps.profile_idc==244 && (h->chroma_pred_mode==VERT_PRED8x8 || h->chroma_pred_mode==HOR_PRED8x8)){
h->hpc.pred8x8_add[h->chroma_pred_mode](dest[0], block_offset + 16, h->mb + (16*16<<h->pixel_shift), uvlinesize); h->hpc.pred8x8_add[h->chroma_pred_mode](dest[0], block_offset + 16, h->mb + (16*16<<pixel_shift), uvlinesize);
h->hpc.pred8x8_add[h->chroma_pred_mode](dest[1], block_offset + 20, h->mb + (20*16<<h->pixel_shift), uvlinesize); h->hpc.pred8x8_add[h->chroma_pred_mode](dest[1], block_offset + 20, h->mb + (20*16<<pixel_shift), uvlinesize);
}else{ }else{
idct_add = s->dsp.add_pixels4; idct_add = s->dsp.add_pixels4;
for(i=16; i<16+8; i++){ for(i=16; i<16+8; i++){
if(h->non_zero_count_cache[ scan8[i] ] || dctcoef_get(h, h->mb, i*16)) if(h->non_zero_count_cache[ scan8[i] ] || dctcoef_get(h, h->mb, i*16,pixel_shift))
idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + (i*16<<h->pixel_shift), uvlinesize); idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + (i*16<<pixel_shift), uvlinesize);
} }
} }
}else{ }else{
if(is_h264){ if(is_h264){
if(h->non_zero_count_cache[ scan8[CHROMA_DC_BLOCK_INDEX+0] ]) if(h->non_zero_count_cache[ scan8[CHROMA_DC_BLOCK_INDEX+0] ])
h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + (16*16<<h->pixel_shift) , h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]); h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + (16*16<<pixel_shift) , h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
if(h->non_zero_count_cache[ scan8[CHROMA_DC_BLOCK_INDEX+1] ]) if(h->non_zero_count_cache[ scan8[CHROMA_DC_BLOCK_INDEX+1] ])
h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + ((16*16+4*16)<<h->pixel_shift), h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]); h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + ((16*16+4*16)<<pixel_shift), h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
h->h264dsp.h264_idct_add8(dest, block_offset, h->h264dsp.h264_idct_add8(dest, block_offset,
h->mb, uvlinesize, h->mb, uvlinesize,
h->non_zero_count_cache); h->non_zero_count_cache);
...@@ -1372,15 +1372,15 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){ ...@@ -1372,15 +1372,15 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
/** /**
* Process a macroblock; this case avoids checks for expensive uncommon cases. * Process a macroblock; this case avoids checks for expensive uncommon cases.
*/ */
static void hl_decode_mb_simple(H264Context *h){ static void hl_decode_mb_simple8(H264Context *h){
hl_decode_mb_internal(h, 1); hl_decode_mb_internal(h, 1, 0);
} }
/** /**
* Process a macroblock; this handles edge cases, such as interlacing. * Process a macroblock; this handles edge cases, such as interlacing.
*/ */
static void av_noinline hl_decode_mb_complex(H264Context *h){ static void av_noinline hl_decode_mb_complex(H264Context *h){
hl_decode_mb_internal(h, 0); hl_decode_mb_internal(h, 0, h->pixel_shift);
} }
void ff_h264_hl_decode_mb(H264Context *h){ void ff_h264_hl_decode_mb(H264Context *h){
...@@ -1389,9 +1389,11 @@ void ff_h264_hl_decode_mb(H264Context *h){ ...@@ -1389,9 +1389,11 @@ void ff_h264_hl_decode_mb(H264Context *h){
const int mb_type= s->current_picture.mb_type[mb_xy]; const int mb_type= s->current_picture.mb_type[mb_xy];
int is_complex = CONFIG_SMALL || h->is_complex || IS_INTRA_PCM(mb_type) || s->qscale == 0; int is_complex = CONFIG_SMALL || h->is_complex || IS_INTRA_PCM(mb_type) || s->qscale == 0;
if (is_complex) if (is_complex || h->pixel_shift)
hl_decode_mb_complex(h); hl_decode_mb_complex(h);
else hl_decode_mb_simple(h); else{
hl_decode_mb_simple8(h);
}
} }
static int pred_weight_table(H264Context *h){ static int pred_weight_table(H264Context *h){
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment