Commit dc172ecc authored by Oskar Arvidsson's avatar Oskar Arvidsson Committed by Michael Niedermayer

Add the notion of pixel size in h264 related functions.

In high bit depth the pixels will not be stored in uint8_t like in the
normal case, but in uint16_t. The pixel size is thus 1 in normal bit
depth and 2 in high bit depth.

Preparatory patch for high bit depth h264 decoding support.
Signed-off-by: 's avatarMichael Niedermayer <michaelni@gmx.at>
parent 86b0d9cd
......@@ -1582,6 +1582,7 @@ static int input_get_buffer(AVCodecContext *codec, AVFrame *pic)
int perms = AV_PERM_WRITE;
int i, w, h, stride[4];
unsigned edge;
int pixel_size;
av_assert0(codec->flags & CODEC_FLAG_EMU_EDGE);
......@@ -1609,6 +1610,7 @@ static int input_get_buffer(AVCodecContext *codec, AVFrame *pic)
if(!(ref = avfilter_get_video_buffer(ctx->outputs[0], perms, w, h)))
return -1;
pixel_size = av_pix_fmt_descriptors[ref->format].comp[0].step_minus1+1;
ref->video->w = codec->width;
ref->video->h = codec->height;
for(i = 0; i < 4; i ++) {
......@@ -1616,7 +1618,7 @@ static int input_get_buffer(AVCodecContext *codec, AVFrame *pic)
unsigned vshift = (i == 1 || i == 2) ? av_pix_fmt_descriptors[ref->format].log2_chroma_h : 0;
if (ref->data[i]) {
ref->data[i] += (edge >> hshift) + ((edge * ref->linesize[i]) >> vshift);
ref->data[i] += ((edge * pixel_size) >> hshift) + ((edge * ref->linesize[i]) >> vshift);
}
pic->data[i] = ref->data[i];
pic->linesize[i] = ref->linesize[i];
......
This diff is collapsed.
......@@ -265,6 +265,7 @@ typedef struct MMCO{
typedef struct H264Context{
MpegEncContext s;
H264DSPContext h264dsp;
int pixel_size;
int chroma_qp[2]; //QPc
int qp_thresh; ///< QP threshold to skip loopfilter
......@@ -296,7 +297,7 @@ typedef struct H264Context{
unsigned int top_samples_available;
unsigned int topright_samples_available;
unsigned int left_samples_available;
uint8_t (*top_borders[2])[16+2*8];
uint8_t (*top_borders[2])[(16+2*8)*2];
/**
* non zero coeff count cache.
......@@ -406,9 +407,9 @@ typedef struct H264Context{
GetBitContext *intra_gb_ptr;
GetBitContext *inter_gb_ptr;
DECLARE_ALIGNED(16, DCTELEM, mb)[16*24];
DECLARE_ALIGNED(16, DCTELEM, mb_luma_dc)[16];
DCTELEM mb_padding[256]; ///< as mb is addressed by scantable[i] and scantable is uint8_t we can either check that i is not too large or ensure that there is some unused stuff after mb
DECLARE_ALIGNED(16, DCTELEM, mb)[16*24*2]; ///< as a dct coeffecient is int32_t in high depth, we need to reserve twice the space.
DECLARE_ALIGNED(16, DCTELEM, mb_luma_dc)[16*2];
DCTELEM mb_padding[256*2]; ///< as mb is addressed by scantable[i] and scantable is uint8_t we can either check that i is not too large or ensure that there is some unused stuff after mb
/**
* Cabac
......
......@@ -1105,40 +1105,47 @@ static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCT
int j= scantable[index[--coeff_count]];
if( get_cabac( CC, ctx ) == 0 ) {
node_ctx = coeff_abs_level_transition[0][node_ctx];
if( is_dc ) {
block[j] = get_cabac_bypass_sign( CC, -1);
}else{
block[j] = (get_cabac_bypass_sign( CC, -qmul[j]) + 32) >> 6;
}
} else {
int coeff_abs = 2;
ctx = coeff_abs_levelgt1_ctx[node_ctx] + abs_level_m1_ctx_base;
node_ctx = coeff_abs_level_transition[1][node_ctx];
while( coeff_abs < 15 && get_cabac( CC, ctx ) ) {
coeff_abs++;
}
if( coeff_abs >= 15 ) {
int j = 0;
while( get_cabac_bypass( CC ) ) {
j++;
}
coeff_abs=1;
while( j-- ) {
coeff_abs += coeff_abs + get_cabac_bypass( CC );
}
coeff_abs+= 14;
}
#define STORE_BLOCK(type) \
if( get_cabac( CC, ctx ) == 0 ) { \
node_ctx = coeff_abs_level_transition[0][node_ctx]; \
if( is_dc ) { \
((type*)block)[j] = get_cabac_bypass_sign( CC, -1); \
}else{ \
((type*)block)[j] = (get_cabac_bypass_sign( CC, -qmul[j]) + 32) >> 6; \
} \
} else { \
int coeff_abs = 2; \
ctx = coeff_abs_levelgt1_ctx[node_ctx] + abs_level_m1_ctx_base; \
node_ctx = coeff_abs_level_transition[1][node_ctx]; \
\
while( coeff_abs < 15 && get_cabac( CC, ctx ) ) { \
coeff_abs++; \
} \
\
if( coeff_abs >= 15 ) { \
int j = 0; \
while( get_cabac_bypass( CC ) ) { \
j++; \
} \
\
coeff_abs=1; \
while( j-- ) { \
coeff_abs += coeff_abs + get_cabac_bypass( CC ); \
} \
coeff_abs+= 14; \
} \
\
if( is_dc ) { \
((type*)block)[j] = get_cabac_bypass_sign( CC, -coeff_abs ); \
}else{ \
((type*)block)[j] = ((int)(get_cabac_bypass_sign( CC, -coeff_abs ) * qmul[j] + 32)) >> 6; \
} \
}
if( is_dc ) {
block[j] = get_cabac_bypass_sign( CC, -coeff_abs );
}else{
block[j] = (get_cabac_bypass_sign( CC, -coeff_abs ) * qmul[j] + 32) >> 6;
}
if (h->pixel_size == 2) {
STORE_BLOCK(int32_t)
} else {
STORE_BLOCK(int16_t)
}
} while( coeff_count );
#ifdef CABAC_ON_STACK
......@@ -1304,6 +1311,7 @@ decode_intra_mb:
h->slice_table[ mb_xy ]= h->slice_num;
if(IS_INTRA_PCM(mb_type)) {
const int mb_size = 384*h->sps.bit_depth_luma/8;
const uint8_t *ptr;
// We assume these blocks are very rare so we do not optimize it.
......@@ -1316,9 +1324,9 @@ decode_intra_mb:
}
// The pixels are stored in the same order as levels in h->mb array.
memcpy(h->mb, ptr, 256); ptr+=256;
memcpy(h->mb, ptr, 2*mb_size/3); ptr+=2*mb_size/3;
if(CHROMA){
memcpy(h->mb+128, ptr, 128); ptr+=128;
memcpy(h->mb+mb_size/3, ptr, mb_size/3); ptr+=mb_size/3;
}
ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr);
......@@ -1652,13 +1660,15 @@ decode_intra_mb:
//av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
AV_ZERO128(h->mb_luma_dc+0);
AV_ZERO128(h->mb_luma_dc+8);
AV_ZERO128(h->mb_luma_dc+16);
AV_ZERO128(h->mb_luma_dc+24);
decode_cabac_residual_dc( h, h->mb_luma_dc, 0, LUMA_DC_BLOCK_INDEX, scan, 16);
if( cbp&15 ) {
qmul = h->dequant4_coeff[0][s->qscale];
for( i = 0; i < 16; i++ ) {
//av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
decode_cabac_residual_nondc(h, h->mb + 16*i, 1, i, scan + 1, qmul, 15);
decode_cabac_residual_nondc(h, h->mb + 16*i*h->pixel_size, 1, i, scan + 1, qmul, 15);
}
} else {
fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
......@@ -1668,7 +1678,7 @@ decode_intra_mb:
for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
if( cbp & (1<<i8x8) ) {
if( IS_8x8DCT(mb_type) ) {
decode_cabac_residual_nondc(h, h->mb + 64*i8x8, 5, 4*i8x8,
decode_cabac_residual_nondc(h, h->mb + 64*i8x8*h->pixel_size, 5, 4*i8x8,
scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64);
} else {
qmul = h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale];
......@@ -1676,7 +1686,7 @@ decode_intra_mb:
const int index = 4*i8x8 + i4x4;
//av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
//START_TIMER
decode_cabac_residual_nondc(h, h->mb + 16*index, 2, index, scan, qmul, 16);
decode_cabac_residual_nondc(h, h->mb + 16*index*h->pixel_size, 2, index, scan, qmul, 16);
//STOP_TIMER("decode_residual")
}
}
......@@ -1691,7 +1701,7 @@ decode_intra_mb:
int c;
for( c = 0; c < 2; c++ ) {
//av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
decode_cabac_residual_dc(h, h->mb + 256 + 16*4*c, 3, CHROMA_DC_BLOCK_INDEX+c, chroma_dc_scan, 4);
decode_cabac_residual_dc(h, h->mb + (256 + 16*4*c)*h->pixel_size, 3, CHROMA_DC_BLOCK_INDEX+c, chroma_dc_scan, 4);
}
}
......@@ -1702,7 +1712,7 @@ decode_intra_mb:
for( i = 0; i < 4; i++ ) {
const int index = 16 + 4 * c + i;
//av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
decode_cabac_residual_nondc(h, h->mb + 16*index, 4, index, scan + 1, qmul, 15);
decode_cabac_residual_nondc(h, h->mb + 16*index*h->pixel_size, 4, index, scan + 1, qmul, 15);
}
}
} else {
......
......@@ -488,37 +488,44 @@ static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, in
zeros_left= get_vlc2(gb, (total_zeros_vlc-1)[ total_coeff ].table, TOTAL_ZEROS_VLC_BITS, 1);
}
scantable += zeros_left + total_coeff - 1;
if(n >= LUMA_DC_BLOCK_INDEX){
block[*scantable] = level[0];
for(i=1;i<total_coeff && zeros_left > 0;i++) {
if(zeros_left < 7)
run_before= get_vlc2(gb, (run_vlc-1)[zeros_left].table, RUN_VLC_BITS, 1);
else
run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
zeros_left -= run_before;
scantable -= 1 + run_before;
block[*scantable]= level[i];
}
for(;i<total_coeff;i++) {
scantable--;
block[*scantable]= level[i];
}
}else{
block[*scantable] = (level[0] * qmul[*scantable] + 32)>>6;
for(i=1;i<total_coeff && zeros_left > 0;i++) {
if(zeros_left < 7)
run_before= get_vlc2(gb, (run_vlc-1)[zeros_left].table, RUN_VLC_BITS, 1);
else
run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
zeros_left -= run_before;
scantable -= 1 + run_before;
block[*scantable]= (level[i] * qmul[*scantable] + 32)>>6;
}
for(;i<total_coeff;i++) {
scantable--;
block[*scantable]= (level[i] * qmul[*scantable] + 32)>>6;
}
#define STORE_BLOCK(type) \
scantable += zeros_left + total_coeff - 1; \
if(n >= LUMA_DC_BLOCK_INDEX){ \
((type*)block)[*scantable] = level[0]; \
for(i=1;i<total_coeff && zeros_left > 0;i++) { \
if(zeros_left < 7) \
run_before= get_vlc2(gb, (run_vlc-1)[zeros_left].table, RUN_VLC_BITS, 1); \
else \
run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2); \
zeros_left -= run_before; \
scantable -= 1 + run_before; \
((type*)block)[*scantable]= level[i]; \
} \
for(;i<total_coeff;i++) { \
scantable--; \
((type*)block)[*scantable]= level[i]; \
} \
}else{ \
((type*)block)[*scantable] = ((int)(level[0] * qmul[*scantable] + 32))>>6; \
for(i=1;i<total_coeff && zeros_left > 0;i++) { \
if(zeros_left < 7) \
run_before= get_vlc2(gb, (run_vlc-1)[zeros_left].table, RUN_VLC_BITS, 1); \
else \
run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2); \
zeros_left -= run_before; \
scantable -= 1 + run_before; \
((type*)block)[*scantable]= ((int)(level[i] * qmul[*scantable] + 32))>>6; \
} \
for(;i<total_coeff;i++) { \
scantable--; \
((type*)block)[*scantable]= ((int)(level[i] * qmul[*scantable] + 32))>>6; \
} \
}
if (h->pixel_size == 2) {
STORE_BLOCK(int32_t)
} else {
STORE_BLOCK(int16_t)
}
if(zeros_left<0){
......@@ -605,7 +612,7 @@ decode_intra_mb:
align_get_bits(&s->gb);
// The pixels are stored in the same order as levels in h->mb array.
for(x=0; x < (CHROMA ? 384 : 256); x++){
for(x=0; x < (CHROMA ? 384 : 256)*h->sps.bit_depth_luma/8; x++){
((uint8_t*)h->mb)[x]= get_bits(&s->gb, 8);
}
......@@ -941,6 +948,8 @@ decode_intra_mb:
if(IS_INTRA16x16(mb_type)){
AV_ZERO128(h->mb_luma_dc+0);
AV_ZERO128(h->mb_luma_dc+8);
AV_ZERO128(h->mb_luma_dc+16);
AV_ZERO128(h->mb_luma_dc+24);
if( decode_residual(h, h->intra_gb_ptr, h->mb_luma_dc, LUMA_DC_BLOCK_INDEX, scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
return -1; //FIXME continue if partitioned and other return -1 too
}
......@@ -951,7 +960,7 @@ decode_intra_mb:
for(i8x8=0; i8x8<4; i8x8++){
for(i4x4=0; i4x4<4; i4x4++){
const int index= i4x4 + 4*i8x8;
if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index*h->pixel_size, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
return -1;
}
}
......@@ -963,7 +972,7 @@ decode_intra_mb:
for(i8x8=0; i8x8<4; i8x8++){
if(cbp & (1<<i8x8)){
if(IS_8x8DCT(mb_type)){
DCTELEM *buf = &h->mb[64*i8x8];
DCTELEM *buf = &h->mb[64*i8x8*h->pixel_size];
uint8_t *nnz;
for(i4x4=0; i4x4<4; i4x4++){
if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4,
......@@ -976,7 +985,7 @@ decode_intra_mb:
for(i4x4=0; i4x4<4; i4x4++){
const int index= i4x4 + 4*i8x8;
if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
if( decode_residual(h, gb, h->mb + 16*index*h->pixel_size, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
return -1;
}
}
......@@ -990,7 +999,7 @@ decode_intra_mb:
if(cbp&0x30){
for(chroma_idx=0; chroma_idx<2; chroma_idx++)
if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX+chroma_idx, chroma_dc_scan, NULL, 4) < 0){
if( decode_residual(h, gb, h->mb + (256 + 16*4*chroma_idx)*h->pixel_size, CHROMA_DC_BLOCK_INDEX+chroma_idx, chroma_dc_scan, NULL, 4) < 0){
return -1;
}
}
......@@ -1000,7 +1009,7 @@ decode_intra_mb:
const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
for(i4x4=0; i4x4<4; i4x4++){
const int index= 16 + 4*chroma_idx + i4x4;
if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, qmul, 15) < 0){
if( decode_residual(h, gb, h->mb + 16*index*h->pixel_size, index, scan + 1, qmul, 15) < 0){
return -1;
}
}
......
......@@ -650,10 +650,10 @@ static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, u
tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
//{ int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
if( dir == 0 ) {
filter_mb_edgev( &img_y[4*edge], linesize, bS, qp, h );
filter_mb_edgev( &img_y[4*edge*h->pixel_size], linesize, bS, qp, h );
if( (edge&1) == 0 ) {
filter_mb_edgecv( &img_cb[2*edge], uvlinesize, bS, h->chroma_qp[0], h);
filter_mb_edgecv( &img_cr[2*edge], uvlinesize, bS, h->chroma_qp[1], h);
filter_mb_edgecv( &img_cb[2*edge*h->pixel_size], uvlinesize, bS, h->chroma_qp[0], h);
filter_mb_edgecv( &img_cr[2*edge*h->pixel_size], uvlinesize, bS, h->chroma_qp[1], h);
}
} else {
filter_mb_edgeh( &img_y[4*edge*linesize], linesize, bS, qp, h );
......
......@@ -286,6 +286,7 @@ int avcodec_default_get_buffer(AVCodecContext *s, AVFrame *pic){
int unaligned;
AVPicture picture;
int stride_align[4];
const int pixel_size = av_pix_fmt_descriptors[s->pix_fmt].comp[0].step_minus1+1;
avcodec_get_chroma_sub_sample(s->pix_fmt, &h_chroma_shift, &v_chroma_shift);
......@@ -335,7 +336,7 @@ int avcodec_default_get_buffer(AVCodecContext *s, AVFrame *pic){
if((s->flags&CODEC_FLAG_EMU_EDGE) || !size[2])
buf->data[i] = buf->base[i];
else
buf->data[i] = buf->base[i] + FFALIGN((buf->linesize[i]*EDGE_WIDTH>>v_shift) + (EDGE_WIDTH>>h_shift), stride_align[i]);
buf->data[i] = buf->base[i] + FFALIGN((buf->linesize[i]*EDGE_WIDTH>>v_shift) + (pixel_size*EDGE_WIDTH>>h_shift), stride_align[i]);
}
if(size[1] && !size[2])
ff_set_systematic_pal2((uint32_t*)buf->data[1], s->pix_fmt);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment