Commit 6e3ef511 authored by Oskar Arvidsson's avatar Oskar Arvidsson Committed by Ronald S. Bultje

Add the notion of pixel size in h264 related functions.

In high bit depth the pixels will not be stored in uint8_t like in the
normal case, but in uint16_t. The pixel size is thus 1 in normal bit
depth and 2 in high bit depth.

Preparatory patch for high bit depth h264 decoding support.
Signed-off-by: 's avatarRonald S. Bultje <rsbultje@gmail.com>
parent 44ca80df
......@@ -1577,6 +1577,7 @@ static int input_get_buffer(AVCodecContext *codec, AVFrame *pic)
int perms = AV_PERM_WRITE;
int i, w, h, stride[4];
unsigned edge;
int pixel_size;
if (codec->codec->capabilities & CODEC_CAP_NEG_LINESIZES)
perms |= AV_PERM_NEG_LINESIZES;
......@@ -1598,6 +1599,7 @@ static int input_get_buffer(AVCodecContext *codec, AVFrame *pic)
if(!(ref = avfilter_get_video_buffer(ctx->outputs[0], perms, w, h)))
return -1;
pixel_size = av_pix_fmt_descriptors[ref->format].comp[0].step_minus1+1;
ref->video->w = codec->width;
ref->video->h = codec->height;
for(i = 0; i < 4; i ++) {
......@@ -1605,7 +1607,7 @@ static int input_get_buffer(AVCodecContext *codec, AVFrame *pic)
unsigned vshift = (i == 1 || i == 2) ? av_pix_fmt_descriptors[ref->format].log2_chroma_h : 0;
if (ref->data[i]) {
ref->data[i] += (edge >> hshift) + ((edge * ref->linesize[i]) >> vshift);
ref->data[i] += ((edge * pixel_size) >> hshift) + ((edge * ref->linesize[i]) >> vshift);
}
pic->data[i] = ref->data[i];
pic->linesize[i] = ref->linesize[i];
......
......@@ -314,12 +314,13 @@ static void chroma_dc_dct_c(DCTELEM *block){
static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
int src_x_offset, int src_y_offset,
qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op,
int pixel_shift){
MpegEncContext * const s = &h->s;
const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
const int luma_xy= (mx&3) + ((my&3)<<2);
uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize;
uint8_t * src_y = pic->data[0] + ((mx>>2) << pixel_shift) + (my>>2)*h->mb_linesize;
uint8_t * src_cb, * src_cr;
int extra_width= h->emu_edge_width;
int extra_height= h->emu_edge_height;
......@@ -336,8 +337,8 @@ static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square,
|| full_my < 0-extra_height
|| full_mx + 16/*FIXME*/ > pic_width + extra_width
|| full_my + 16/*FIXME*/ > pic_height + extra_height){
s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize;
s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_y - (2 << pixel_shift) - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
src_y= s->edge_emu_buffer + (2 << pixel_shift) + 2*h->mb_linesize;
emu=1;
}
......@@ -353,8 +354,8 @@ static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square,
my += 2 * ((s->mb_y & 1) - (pic->reference - 1));
emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
}
src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
src_cb= pic->data[1] + ((mx>>3) << pixel_shift) + (my>>3)*h->mb_uvlinesize;
src_cr= pic->data[2] + ((mx>>3) << pixel_shift) + (my>>3)*h->mb_uvlinesize;
if(emu){
s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
......@@ -374,14 +375,14 @@ static inline void mc_part_std(H264Context *h, int n, int square, int chroma_hei
int x_offset, int y_offset,
qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
int list0, int list1){
int list0, int list1, int pixel_shift){
MpegEncContext * const s = &h->s;
qpel_mc_func *qpix_op= qpix_put;
h264_chroma_mc_func chroma_op= chroma_put;
dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
dest_cb += x_offset + y_offset*h->mb_uvlinesize;
dest_cr += x_offset + y_offset*h->mb_uvlinesize;
dest_y += (2*x_offset << pixel_shift) + 2*y_offset*h-> mb_linesize;
dest_cb += ( x_offset << pixel_shift) + y_offset*h->mb_uvlinesize;
dest_cr += ( x_offset << pixel_shift) + y_offset*h->mb_uvlinesize;
x_offset += 8*s->mb_x;
y_offset += 8*(s->mb_y >> MB_FIELD);
......@@ -389,7 +390,7 @@ static inline void mc_part_std(H264Context *h, int n, int square, int chroma_hei
Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
dest_y, dest_cb, dest_cr, x_offset, y_offset,
qpix_op, chroma_op);
qpix_op, chroma_op, pixel_shift);
qpix_op= qpix_avg;
chroma_op= chroma_avg;
......@@ -399,7 +400,7 @@ static inline void mc_part_std(H264Context *h, int n, int square, int chroma_hei
Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
dest_y, dest_cb, dest_cr, x_offset, y_offset,
qpix_op, chroma_op);
qpix_op, chroma_op, pixel_shift);
}
}
......@@ -409,12 +410,12 @@ static inline void mc_part_weighted(H264Context *h, int n, int square, int chrom
qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
int list0, int list1){
int list0, int list1, int pixel_shift){
MpegEncContext * const s = &h->s;
dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
dest_cb += x_offset + y_offset*h->mb_uvlinesize;
dest_cr += x_offset + y_offset*h->mb_uvlinesize;
dest_y += (2*x_offset << pixel_shift) + 2*y_offset*h-> mb_linesize;
dest_cb += ( x_offset << pixel_shift) + y_offset*h->mb_uvlinesize;
dest_cr += ( x_offset << pixel_shift) + y_offset*h->mb_uvlinesize;
x_offset += 8*s->mb_x;
y_offset += 8*(s->mb_y >> MB_FIELD);
......@@ -422,17 +423,17 @@ static inline void mc_part_weighted(H264Context *h, int n, int square, int chrom
/* don't optimize for luma-only case, since B-frames usually
* use implicit weights => chroma too. */
uint8_t *tmp_cb = s->obmc_scratchpad;
uint8_t *tmp_cr = s->obmc_scratchpad + 8;
uint8_t *tmp_cr = s->obmc_scratchpad + (8 << pixel_shift);
uint8_t *tmp_y = s->obmc_scratchpad + 8*h->mb_uvlinesize;
int refn0 = h->ref_cache[0][ scan8[n] ];
int refn1 = h->ref_cache[1][ scan8[n] ];
mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
dest_y, dest_cb, dest_cr,
x_offset, y_offset, qpix_put, chroma_put);
x_offset, y_offset, qpix_put, chroma_put, pixel_shift);
mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
tmp_y, tmp_cb, tmp_cr,
x_offset, y_offset, qpix_put, chroma_put);
x_offset, y_offset, qpix_put, chroma_put, pixel_shift);
if(h->use_weight == 2){
int weight0 = h->implicit_weight[refn0][refn1][s->mb_y&1];
......@@ -457,7 +458,7 @@ static inline void mc_part_weighted(H264Context *h, int n, int square, int chrom
Picture *ref= &h->ref_list[list][refn];
mc_dir_part(h, ref, n, square, chroma_height, delta, list,
dest_y, dest_cb, dest_cr, x_offset, y_offset,
qpix_put, chroma_put);
qpix_put, chroma_put, pixel_shift);
luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
h->luma_weight[refn][list][0], h->luma_weight[refn][list][1]);
......@@ -476,19 +477,21 @@ static inline void mc_part(H264Context *h, int n, int square, int chroma_height,
qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
h264_weight_func *weight_op, h264_biweight_func *weight_avg,
int list0, int list1){
int list0, int list1, int pixel_shift){
if((h->use_weight==2 && list0 && list1
&& (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ][h->s.mb_y&1] != 32))
|| h->use_weight==1)
mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
x_offset, y_offset, qpix_put, chroma_put,
weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
weight_op[0], weight_op[3], weight_avg[0],
weight_avg[3], list0, list1, pixel_shift);
else
mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
x_offset, y_offset, qpix_put, chroma_put, qpix_avg,
chroma_avg, list0, list1, pixel_shift);
}
static inline void prefetch_motion(H264Context *h, int list){
static inline void prefetch_motion(H264Context *h, int list, int pixel_shift){
/* fetch pixels for estimated mv 4 macroblocks ahead
* optimized for 64byte cache lines */
MpegEncContext * const s = &h->s;
......@@ -497,48 +500,54 @@ static inline void prefetch_motion(H264Context *h, int list){
const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
uint8_t **src= h->ref_list[list][refn].data;
int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64;
int off= (mx << pixel_shift) + (my + (s->mb_x&3)*4)*h->mb_linesize + (64 << pixel_shift);
s->dsp.prefetch(src[0]+off, s->linesize, 4);
off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
off= ((mx>>1) << pixel_shift) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + (64 << pixel_shift);
s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
}
}
static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
static av_always_inline void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
h264_weight_func *weight_op, h264_biweight_func *weight_avg){
h264_weight_func *weight_op, h264_biweight_func *weight_avg,
int pixel_shift){
MpegEncContext * const s = &h->s;
const int mb_xy= h->mb_xy;
const int mb_type= s->current_picture.mb_type[mb_xy];
assert(IS_INTER(mb_type));
prefetch_motion(h, 0);
prefetch_motion(h, 0, pixel_shift);
if(IS_16X16(mb_type)){
mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
weight_op, weight_avg,
IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1),
pixel_shift);
}else if(IS_16X8(mb_type)){
mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
mc_part(h, 0, 0, 4, 8 << pixel_shift, dest_y, dest_cb, dest_cr, 0, 0,
qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
&weight_op[1], &weight_avg[1],
IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1),
pixel_shift);
mc_part(h, 8, 0, 4, 8 << pixel_shift, dest_y, dest_cb, dest_cr, 0, 4,
qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
&weight_op[1], &weight_avg[1],
IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1),
pixel_shift);
}else if(IS_8X16(mb_type)){
mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
&weight_op[2], &weight_avg[2],
IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1),
pixel_shift);
mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
&weight_op[2], &weight_avg[2],
IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1),
pixel_shift);
}else{
int i;
......@@ -554,25 +563,30 @@ static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t
mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
&weight_op[3], &weight_avg[3],
IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
pixel_shift);
}else if(IS_SUB_8X4(sub_mb_type)){
mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
mc_part(h, n , 0, 2, 4 << pixel_shift, dest_y, dest_cb, dest_cr, x_offset, y_offset,
qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
&weight_op[4], &weight_avg[4],
IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
pixel_shift);
mc_part(h, n+2, 0, 2, 4 << pixel_shift, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
&weight_op[4], &weight_avg[4],
IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
pixel_shift);
}else if(IS_SUB_4X8(sub_mb_type)){
mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
&weight_op[5], &weight_avg[5],
IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
pixel_shift);
mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
&weight_op[5], &weight_avg[5],
IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
pixel_shift);
}else{
int j;
assert(IS_SUB_4X4(sub_mb_type));
......@@ -582,15 +596,32 @@ static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t
mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
&weight_op[6], &weight_avg[6],
IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
pixel_shift);
}
}
}
}
prefetch_motion(h, 1);
prefetch_motion(h, 1, pixel_shift);
}
#define hl_motion_fn(sh, bits) \
static av_always_inline void hl_motion_ ## bits(H264Context *h, \
uint8_t *dest_y, \
uint8_t *dest_cb, uint8_t *dest_cr, \
qpel_mc_func (*qpix_put)[16], \
h264_chroma_mc_func (*chroma_put), \
qpel_mc_func (*qpix_avg)[16], \
h264_chroma_mc_func (*chroma_avg), \
h264_weight_func *weight_op, \
h264_biweight_func *weight_avg) \
{ \
hl_motion(h, dest_y, dest_cb, dest_cr, qpix_put, chroma_put, \
qpix_avg, chroma_avg, weight_op, weight_avg, sh); \
}
hl_motion_fn(0, 8);
hl_motion_fn(1, 16);
static void free_tables(H264Context *h, int free_rbsp){
int i;
......@@ -758,8 +789,8 @@ static void clone_tables(H264Context *dst, H264Context *src, int i){
* Allocate buffers which are not shared amongst multiple threads.
*/
static int context_init(H264Context *h){
FF_ALLOCZ_OR_GOTO(h->s.avctx, h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t), fail)
FF_ALLOCZ_OR_GOTO(h->s.avctx, h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t), fail)
FF_ALLOCZ_OR_GOTO(h->s.avctx, h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t)*2, fail)
FF_ALLOCZ_OR_GOTO(h->s.avctx, h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t)*2, fail)
h->ref_cache[0][scan8[5 ]+1] = h->ref_cache[0][scan8[7 ]+1] = h->ref_cache[0][scan8[13]+1] =
h->ref_cache[1][scan8[5 ]+1] = h->ref_cache[1][scan8[7 ]+1] = h->ref_cache[1][scan8[13]+1] = PART_NOT_AVAILABLE;
......@@ -861,6 +892,8 @@ av_cold int ff_h264_decode_init(AVCodecContext *avctx){
ff_h264_decode_init_vlc();
h->pixel_shift = 0;
h->thread_context[0] = h;
h->outputed_poc = INT_MIN;
h->prev_poc_msb= 1<<16;
......@@ -888,6 +921,7 @@ av_cold int ff_h264_decode_init(AVCodecContext *avctx){
int ff_h264_frame_start(H264Context *h){
MpegEncContext * const s = &h->s;
int i;
const int pixel_shift = h->pixel_shift;
if(MPV_frame_start(s, s->avctx) < 0)
return -1;
......@@ -904,14 +938,14 @@ int ff_h264_frame_start(H264Context *h){
assert(s->linesize && s->uvlinesize);
for(i=0; i<16; i++){
h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
h->block_offset[i]= (4*((scan8[i] - scan8[0])&7) << pixel_shift) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
h->block_offset[24+i]= (4*((scan8[i] - scan8[0])&7) << pixel_shift) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
}
for(i=0; i<4; i++){
h->block_offset[16+i]=
h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
h->block_offset[20+i]= (4*((scan8[i] - scan8[0])&7) << pixel_shift) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
h->block_offset[24+16+i]=
h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
h->block_offset[24+20+i]= (4*((scan8[i] - scan8[0])&7) << pixel_shift) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
}
/* can't be in alloc_tables because linesize isn't known there.
......@@ -945,6 +979,7 @@ static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src
MpegEncContext * const s = &h->s;
uint8_t *top_border;
int top_idx = 1;
const int pixel_shift = h->pixel_shift;
src_y -= linesize;
src_cb -= uvlinesize;
......@@ -955,9 +990,16 @@ static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src
if(!MB_MBAFF){
top_border = h->top_borders[0][s->mb_x];
AV_COPY128(top_border, src_y + 15*linesize);
if (pixel_shift)
AV_COPY128(top_border+16, src_y+15*linesize+16);
if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
if (pixel_shift) {
AV_COPY128(top_border+32, src_cb+7*uvlinesize);
AV_COPY128(top_border+48, src_cr+7*uvlinesize);
} else {
AV_COPY64(top_border+16, src_cb+7*uvlinesize);
AV_COPY64(top_border+24, src_cr+7*uvlinesize);
}
}
}
}else if(MB_MBAFF){
......@@ -970,14 +1012,24 @@ static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src
// There are two lines saved, the line above the the top macroblock of a pair,
// and the line above the bottom macroblock
AV_COPY128(top_border, src_y + 16*linesize);
if (pixel_shift)
AV_COPY128(top_border+16, src_y+16*linesize+16);
if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
if (pixel_shift) {
AV_COPY128(top_border+32, src_cb+8*uvlinesize);
AV_COPY128(top_border+48, src_cr+8*uvlinesize);
} else {
AV_COPY64(top_border+16, src_cb+8*uvlinesize);
AV_COPY64(top_border+24, src_cr+8*uvlinesize);
}
}
}
static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int simple){
static inline void xchg_mb_border(H264Context *h, uint8_t *src_y,
uint8_t *src_cb, uint8_t *src_cr,
int linesize, int uvlinesize,
int xchg, int simple, int pixel_shift){
MpegEncContext * const s = &h->s;
int deblock_left;
int deblock_top;
......@@ -1002,41 +1054,62 @@ static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_c
deblock_top = (s->mb_y > !!MB_FIELD);
}
src_y -= linesize + 1;
src_cb -= uvlinesize + 1;
src_cr -= uvlinesize + 1;
src_y -= linesize + 1 + pixel_shift;
src_cb -= uvlinesize + 1 + pixel_shift;
src_cr -= uvlinesize + 1 + pixel_shift;
top_border_m1 = h->top_borders[top_idx][s->mb_x-1];
top_border = h->top_borders[top_idx][s->mb_x];
#define XCHG(a,b,xchg)\
if (pixel_shift) {\
if (xchg) {\
AV_SWAP64(b+0,a+0);\
AV_SWAP64(b+8,a+8);\
} else {\
AV_COPY128(b,a); \
}\
} else \
if (xchg) AV_SWAP64(b,a);\
else AV_COPY64(b,a);
if(deblock_top){
if(deblock_left){
XCHG(top_border_m1+8, src_y -7, 1);
XCHG(top_border_m1 + (8 << pixel_shift), src_y - (7 << pixel_shift), 1);
}
XCHG(top_border+0, src_y +1, xchg);
XCHG(top_border+8, src_y +9, 1);
XCHG(top_border + (0 << pixel_shift), src_y + (1 << pixel_shift), xchg);
XCHG(top_border + (8 << pixel_shift), src_y + (9 << pixel_shift), 1);
if(s->mb_x+1 < s->mb_width){
XCHG(h->top_borders[top_idx][s->mb_x+1], src_y +17, 1);
XCHG(h->top_borders[top_idx][s->mb_x+1], src_y + (17 << pixel_shift), 1);
}
}
if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
if(deblock_top){
if(deblock_left){
XCHG(top_border_m1+16, src_cb -7, 1);
XCHG(top_border_m1+24, src_cr -7, 1);
XCHG(top_border_m1 + (16 << pixel_shift), src_cb - (7 << pixel_shift), 1);
XCHG(top_border_m1 + (24 << pixel_shift), src_cr - (7 << pixel_shift), 1);
}
XCHG(top_border+16, src_cb+1, 1);
XCHG(top_border+24, src_cr+1, 1);
XCHG(top_border + (16 << pixel_shift), src_cb+1+pixel_shift, 1);
XCHG(top_border + (24 << pixel_shift), src_cr+1+pixel_shift, 1);
}
}
}
static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
static av_always_inline int dctcoef_get(DCTELEM *mb, int high_bit_depth, int index) {
if (high_bit_depth) {
return AV_RN32A(((int32_t*)mb) + index);
} else
return AV_RN16A(mb + index);
}
static av_always_inline void dctcoef_set(DCTELEM *mb, int high_bit_depth, int index, int value) {
if (high_bit_depth) {
AV_WN32A(((int32_t*)mb) + index, value);
} else
AV_WN16A(mb + index, value);
}
static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple, int pixel_shift){
MpegEncContext * const s = &h->s;
const int mb_x= s->mb_x;
const int mb_y= s->mb_y;
......@@ -1052,12 +1125,12 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
dest_y = s->current_picture.data[0] + (mb_x + mb_y * s->linesize ) * 16;
dest_cb = s->current_picture.data[1] + (mb_x + mb_y * s->uvlinesize) * 8;
dest_cr = s->current_picture.data[2] + (mb_x + mb_y * s->uvlinesize) * 8;
dest_y = s->current_picture.data[0] + ((mb_x << pixel_shift) + mb_y * s->linesize ) * 16;
dest_cb = s->current_picture.data[1] + ((mb_x << pixel_shift) + mb_y * s->uvlinesize) * 8;
dest_cr = s->current_picture.data[2] + ((mb_x << pixel_shift) + mb_y * s->uvlinesize) * 8;
s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64, s->linesize, 4);
s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64, dest_cr - dest_cb, 2);
s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + (64 << pixel_shift), s->linesize, 4);
s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + (64 << pixel_shift), dest_cr - dest_cb, 2);
h->list_counts[mb_xy]= h->list_count;
......@@ -1094,6 +1167,28 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
}
if (!simple && IS_INTRA_PCM(mb_type)) {
if (pixel_shift) {
const int bit_depth = h->sps.bit_depth_luma;
int j;
GetBitContext gb;
init_get_bits(&gb, (uint8_t*)h->mb, 384*bit_depth);
for (i = 0; i < 16; i++) {
uint16_t *tmp_y = (uint16_t*)(dest_y + i*linesize);
for (j = 0; j < 16; j++)
tmp_y[j] = get_bits(&gb, bit_depth);
}
for (i = 0; i < 8; i++) {
uint16_t *tmp_cb = (uint16_t*)(dest_cb + i*uvlinesize);
for (j = 0; j < 8; j++)
tmp_cb[j] = get_bits(&gb, bit_depth);
}
for (i = 0; i < 8; i++) {
uint16_t *tmp_cr = (uint16_t*)(dest_cr + i*uvlinesize);
for (j = 0; j < 8; j++)
tmp_cr[j] = get_bits(&gb, bit_depth);
}
} else {
for (i=0; i<16; i++) {
memcpy(dest_y + i* linesize, h->mb + i*8, 16);
}
......@@ -1101,10 +1196,11 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
memcpy(dest_cb+ i*uvlinesize, h->mb + 128 + i*4, 8);
memcpy(dest_cr+ i*uvlinesize, h->mb + 160 + i*4, 8);
}
}
} else {
if(IS_INTRA(mb_type)){
if(h->deblocking_filter)
xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple);
xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple, pixel_shift);
if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
......@@ -1125,16 +1221,16 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
uint8_t * const ptr= dest_y + block_offset[i];
const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
h->hpc.pred8x8l_add[dir](ptr, h->mb + i*16, linesize);
h->hpc.pred8x8l_add[dir](ptr, h->mb + (i*16 << pixel_shift), linesize);
}else{
const int nnz = h->non_zero_count_cache[ scan8[i] ];
h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
(h->topright_samples_available<<i)&0x4000, linesize);
if(nnz){
if(nnz == 1 && h->mb[i*16])
idct_dc_add(ptr, h->mb + i*16, linesize);
if(nnz == 1 && dctcoef_get(h->mb, pixel_shift, i*16))
idct_dc_add(ptr, h->mb + (i*16 << pixel_shift), linesize);
else
idct_add (ptr, h->mb + i*16, linesize);
idct_add (ptr, h->mb + (i*16 << pixel_shift), linesize);
}
}
}
......@@ -1151,18 +1247,24 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
h->hpc.pred4x4_add[dir](ptr, h->mb + i*16, linesize);
h->hpc.pred4x4_add[dir](ptr, h->mb + (i*16 << pixel_shift), linesize);
}else{
uint8_t *topright;
int nnz, tr;
uint64_t tr_high;
if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
const int topright_avail= (h->topright_samples_available<<i)&0x8000;
assert(mb_y || linesize <= block_offset[i]);
if(!topright_avail){
if (pixel_shift) {
tr_high= ((uint16_t*)ptr)[3 - linesize/2]*0x0001000100010001ULL;
topright= (uint8_t*) &tr_high;
} else {
tr= ptr[3 - linesize]*0x01010101;
topright= (uint8_t*) &tr;
}
}else
topright= ptr + 4 - linesize;
topright= ptr + (4 << pixel_shift) - linesize;
}else
topright= NULL;
......@@ -1170,10 +1272,10 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
nnz = h->non_zero_count_cache[ scan8[i] ];
if(nnz){
if(is_h264){
if(nnz == 1 && h->mb[i*16])
idct_dc_add(ptr, h->mb + i*16, linesize);
if(nnz == 1 && dctcoef_get(h->mb, pixel_shift, i*16))
idct_dc_add(ptr, h->mb + (i*16 << pixel_shift), linesize);
else
idct_add (ptr, h->mb + i*16, linesize);
idct_add (ptr, h->mb + (i*16 << pixel_shift), linesize);
}else
ff_svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
}
......@@ -1191,19 +1293,27 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
static const uint8_t dc_mapping[16] = { 0*16, 1*16, 4*16, 5*16, 2*16, 3*16, 6*16, 7*16,
8*16, 9*16,12*16,13*16,10*16,11*16,14*16,15*16};
for(i = 0; i < 16; i++)
h->mb[dc_mapping[i]] = h->mb_luma_dc[i];
dctcoef_set(h->mb, pixel_shift, dc_mapping[i], dctcoef_get(h->mb_luma_dc, pixel_shift, i));
}
}
}else
ff_svq3_luma_dc_dequant_idct_c(h->mb, h->mb_luma_dc, s->qscale);
}
if(h->deblocking_filter)
xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple);
xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple, pixel_shift);
}else if(is_h264){
hl_motion(h, dest_y, dest_cb, dest_cr,
s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
h->h264dsp.weight_h264_pixels_tab, h->h264dsp.biweight_h264_pixels_tab);
if (pixel_shift) {
hl_motion_16(h, dest_y, dest_cb, dest_cr,
s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
h->h264dsp.weight_h264_pixels_tab,
h->h264dsp.biweight_h264_pixels_tab);
} else
hl_motion_8(h, dest_y, dest_cb, dest_cr,
s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
h->h264dsp.weight_h264_pixels_tab,
h->h264dsp.biweight_h264_pixels_tab);
}
......@@ -1215,8 +1325,8 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
h->hpc.pred16x16_add[h->intra16x16_pred_mode](dest_y, block_offset, h->mb, linesize);
}else{
for(i=0; i<16; i++){
if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16])
s->dsp.add_pixels4(dest_y + block_offset[i], h->mb + i*16, linesize);
if(h->non_zero_count_cache[ scan8[i] ] || dctcoef_get(h->mb, pixel_shift, i*16))
s->dsp.add_pixels4(dest_y + block_offset[i], h->mb + (i*16 << pixel_shift), linesize);
}
}
}else{
......@@ -1228,7 +1338,7 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
idct_add= IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
for(i=0; i<16; i+=di){
if(h->non_zero_count_cache[ scan8[i] ]){
idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
idct_add(dest_y + block_offset[i], h->mb + (i*16 << pixel_shift), linesize);
}
}
}else{
......@@ -1253,21 +1363,21 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
uint8_t *dest[2] = {dest_cb, dest_cr};
if(transform_bypass){
if(IS_INTRA(mb_type) && h->sps.profile_idc==244 && (h->chroma_pred_mode==VERT_PRED8x8 || h->chroma_pred_mode==HOR_PRED8x8)){
h->hpc.pred8x8_add[h->chroma_pred_mode](dest[0], block_offset + 16, h->mb + 16*16, uvlinesize);
h->hpc.pred8x8_add[h->chroma_pred_mode](dest[1], block_offset + 20, h->mb + 20*16, uvlinesize);
h->hpc.pred8x8_add[h->chroma_pred_mode](dest[0], block_offset + 16, h->mb + (16*16 << pixel_shift), uvlinesize);
h->hpc.pred8x8_add[h->chroma_pred_mode](dest[1], block_offset + 20, h->mb + (20*16 << pixel_shift), uvlinesize);
}else{
idct_add = s->dsp.add_pixels4;
for(i=16; i<16+8; i++){
if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16])
idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
if(h->non_zero_count_cache[ scan8[i] ] || dctcoef_get(h->mb, pixel_shift, i*16))
idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + (i*16 << pixel_shift), uvlinesize);
}
}
}else{
if(is_h264){
if(h->non_zero_count_cache[ scan8[CHROMA_DC_BLOCK_INDEX+0] ])
h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + 16*16 , h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + (16*16 << pixel_shift) , h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
if(h->non_zero_count_cache[ scan8[CHROMA_DC_BLOCK_INDEX+1] ])
h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + 16*16+4*16, h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + ((16*16+4*16) << pixel_shift), h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
h->h264dsp.h264_idct_add8(dest, block_offset,
h->mb, uvlinesize,
h->non_zero_count_cache);
......@@ -1291,15 +1401,18 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
/**
* Process a macroblock; this case avoids checks for expensive uncommon cases.
*/
static void hl_decode_mb_simple(H264Context *h){
hl_decode_mb_internal(h, 1);
#define hl_decode_mb_simple(sh, bits) \
static void hl_decode_mb_simple_ ## bits(H264Context *h){ \
hl_decode_mb_internal(h, 1, sh); \
}
hl_decode_mb_simple(0, 8);
hl_decode_mb_simple(1, 16);
/**
* Process a macroblock; this handles edge cases, such as interlacing.
*/
static void av_noinline hl_decode_mb_complex(H264Context *h){
hl_decode_mb_internal(h, 0);
hl_decode_mb_internal(h, 0, h->pixel_shift);
}
void ff_h264_hl_decode_mb(H264Context *h){
......@@ -1308,9 +1421,12 @@ void ff_h264_hl_decode_mb(H264Context *h){
const int mb_type= s->current_picture.mb_type[mb_xy];
int is_complex = CONFIG_SMALL || h->is_complex || IS_INTRA_PCM(mb_type) || s->qscale == 0;
if (is_complex)
if (is_complex) {
hl_decode_mb_complex(h);
else hl_decode_mb_simple(h);
} else if (h->pixel_shift) {
hl_decode_mb_simple_16(h);
} else
hl_decode_mb_simple_8(h);
}
static int pred_weight_table(H264Context *h){
......@@ -2432,6 +2548,7 @@ static void loop_filter(H264Context *h){
int linesize, uvlinesize, mb_x, mb_y;
const int end_mb_y= s->mb_y + FRAME_MBAFF;
const int old_slice_type= h->slice_type;
const int pixel_shift = h->pixel_shift;
if(h->deblocking_filter) {
for(mb_x= 0; mb_x<s->mb_width; mb_x++){
......@@ -2447,9 +2564,9 @@ static void loop_filter(H264Context *h){
s->mb_x= mb_x;
s->mb_y= mb_y;
dest_y = s->current_picture.data[0] + (mb_x + mb_y * s->linesize ) * 16;
dest_cb = s->current_picture.data[1] + (mb_x + mb_y * s->uvlinesize) * 8;
dest_cr = s->current_picture.data[2] + (mb_x + mb_y * s->uvlinesize) * 8;
dest_y = s->current_picture.data[0] + ((mb_x << pixel_shift) + mb_y * s->linesize ) * 16;
dest_cb = s->current_picture.data[1] + ((mb_x << pixel_shift) + mb_y * s->uvlinesize) * 8;
dest_cr = s->current_picture.data[2] + ((mb_x << pixel_shift) + mb_y * s->uvlinesize) * 8;
//FIXME simplify above
if (MB_FIELD) {
......
......@@ -265,6 +265,7 @@ typedef struct MMCO{
typedef struct H264Context{
MpegEncContext s;
H264DSPContext h264dsp;
int pixel_shift; ///< 0 for 8-bit H264, 1 for high-bit-depth H264
int chroma_qp[2]; //QPc
int qp_thresh; ///< QP threshold to skip loopfilter
......@@ -296,7 +297,7 @@ typedef struct H264Context{
unsigned int top_samples_available;
unsigned int topright_samples_available;
unsigned int left_samples_available;
uint8_t (*top_borders[2])[16+2*8];
uint8_t (*top_borders[2])[(16+2*8)*2];
/**
* non zero coeff count cache.
......@@ -406,9 +407,9 @@ typedef struct H264Context{
GetBitContext *intra_gb_ptr;
GetBitContext *inter_gb_ptr;
DECLARE_ALIGNED(16, DCTELEM, mb)[16*24];
DECLARE_ALIGNED(16, DCTELEM, mb_luma_dc)[16];
DCTELEM mb_padding[256]; ///< as mb is addressed by scantable[i] and scantable is uint8_t we can either check that i is not too large or ensure that there is some unused stuff after mb
DECLARE_ALIGNED(16, DCTELEM, mb)[16*24*2]; ///< as a dct coeffecient is int32_t in high depth, we need to reserve twice the space.
DECLARE_ALIGNED(16, DCTELEM, mb_luma_dc)[16*2];
DCTELEM mb_padding[256*2]; ///< as mb is addressed by scantable[i] and scantable is uint8_t we can either check that i is not too large or ensure that there is some unused stuff after mb
/**
* Cabac
......
......@@ -1100,47 +1100,54 @@ static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCT
}
}
do {
uint8_t *ctx = coeff_abs_level1_ctx[node_ctx] + abs_level_m1_ctx_base;
int j= scantable[index[--coeff_count]];
if( get_cabac( CC, ctx ) == 0 ) {
node_ctx = coeff_abs_level_transition[0][node_ctx];
if( is_dc ) {
block[j] = get_cabac_bypass_sign( CC, -1);
}else{
block[j] = (get_cabac_bypass_sign( CC, -qmul[j]) + 32) >> 6;
}
} else {
int coeff_abs = 2;
ctx = coeff_abs_levelgt1_ctx[node_ctx] + abs_level_m1_ctx_base;
node_ctx = coeff_abs_level_transition[1][node_ctx];
while( coeff_abs < 15 && get_cabac( CC, ctx ) ) {
coeff_abs++;
}
if( coeff_abs >= 15 ) {
int j = 0;
while( get_cabac_bypass( CC ) ) {
j++;
}
coeff_abs=1;
while( j-- ) {
coeff_abs += coeff_abs + get_cabac_bypass( CC );
}
coeff_abs+= 14;
}
#define STORE_BLOCK(type) \
do { \
uint8_t *ctx = coeff_abs_level1_ctx[node_ctx] + abs_level_m1_ctx_base; \
\
int j= scantable[index[--coeff_count]]; \
\
if( get_cabac( CC, ctx ) == 0 ) { \
node_ctx = coeff_abs_level_transition[0][node_ctx]; \
if( is_dc ) { \
((type*)block)[j] = get_cabac_bypass_sign( CC, -1); \
}else{ \
((type*)block)[j] = (get_cabac_bypass_sign( CC, -qmul[j]) + 32) >> 6; \
} \
} else { \
int coeff_abs = 2; \
ctx = coeff_abs_levelgt1_ctx[node_ctx] + abs_level_m1_ctx_base; \
node_ctx = coeff_abs_level_transition[1][node_ctx]; \
\
while( coeff_abs < 15 && get_cabac( CC, ctx ) ) { \
coeff_abs++; \
} \
\
if( coeff_abs >= 15 ) { \
int j = 0; \
while( get_cabac_bypass( CC ) ) { \
j++; \
} \
\
coeff_abs=1; \
while( j-- ) { \
coeff_abs += coeff_abs + get_cabac_bypass( CC ); \
} \
coeff_abs+= 14; \
} \
\
if( is_dc ) { \
((type*)block)[j] = get_cabac_bypass_sign( CC, -coeff_abs ); \
}else{ \
((type*)block)[j] = ((int)(get_cabac_bypass_sign( CC, -coeff_abs ) * qmul[j] + 32)) >> 6; \
} \
} \
} while ( coeff_count );
if( is_dc ) {
block[j] = get_cabac_bypass_sign( CC, -coeff_abs );
}else{
block[j] = (get_cabac_bypass_sign( CC, -coeff_abs ) * qmul[j] + 32) >> 6;
}
}
} while( coeff_count );
if (h->pixel_shift) {
STORE_BLOCK(int32_t)
} else {
STORE_BLOCK(int16_t)
}
#ifdef CABAC_ON_STACK
h->cabac.range = cc.range ;
h->cabac.low = cc.low ;
......@@ -1196,6 +1203,7 @@ int ff_h264_decode_mb_cabac(H264Context *h) {
int mb_xy;
int mb_type, partition_count, cbp = 0;
int dct8x8_allowed= h->pps.transform_8x8_mode;
const int pixel_shift = h->pixel_shift;
mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
......@@ -1304,6 +1312,7 @@ decode_intra_mb:
h->slice_table[ mb_xy ]= h->slice_num;
if(IS_INTRA_PCM(mb_type)) {
const int mb_size = (384*h->sps.bit_depth_luma) >> 3;
const uint8_t *ptr;
// We assume these blocks are very rare so we do not optimize it.
......@@ -1316,9 +1325,9 @@ decode_intra_mb:
}
// The pixels are stored in the same order as levels in h->mb array.
memcpy(h->mb, ptr, 256); ptr+=256;
memcpy(h->mb, ptr, 2*mb_size/3); ptr+=2*mb_size/3;
if(CHROMA){
memcpy(h->mb+128, ptr, 128); ptr+=128;
memcpy(h->mb+mb_size/3, ptr, mb_size/3); ptr+=mb_size/3;
}
ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr);
......@@ -1652,13 +1661,15 @@ decode_intra_mb:
//av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
AV_ZERO128(h->mb_luma_dc+0);
AV_ZERO128(h->mb_luma_dc+8);
AV_ZERO128(h->mb_luma_dc+16);
AV_ZERO128(h->mb_luma_dc+24);
decode_cabac_residual_dc( h, h->mb_luma_dc, 0, LUMA_DC_BLOCK_INDEX, scan, 16);
if( cbp&15 ) {
qmul = h->dequant4_coeff[0][s->qscale];
for( i = 0; i < 16; i++ ) {
//av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
decode_cabac_residual_nondc(h, h->mb + 16*i, 1, i, scan + 1, qmul, 15);
decode_cabac_residual_nondc(h, h->mb + (16*i << pixel_shift), 1, i, scan + 1, qmul, 15);
}
} else {
fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
......@@ -1668,7 +1679,7 @@ decode_intra_mb:
for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
if( cbp & (1<<i8x8) ) {
if( IS_8x8DCT(mb_type) ) {
decode_cabac_residual_nondc(h, h->mb + 64*i8x8, 5, 4*i8x8,
decode_cabac_residual_nondc(h, h->mb + (64*i8x8 << pixel_shift), 5, 4*i8x8,
scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64);
} else {
qmul = h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale];
......@@ -1676,7 +1687,7 @@ decode_intra_mb:
const int index = 4*i8x8 + i4x4;
//av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
//START_TIMER
decode_cabac_residual_nondc(h, h->mb + 16*index, 2, index, scan, qmul, 16);
decode_cabac_residual_nondc(h, h->mb + (16*index << pixel_shift), 2, index, scan, qmul, 16);
//STOP_TIMER("decode_residual")
}
}
......@@ -1691,7 +1702,7 @@ decode_intra_mb:
int c;
for( c = 0; c < 2; c++ ) {
//av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
decode_cabac_residual_dc(h, h->mb + 256 + 16*4*c, 3, CHROMA_DC_BLOCK_INDEX+c, chroma_dc_scan, 4);
decode_cabac_residual_dc(h, h->mb + ((256 + 16*4*c) << pixel_shift), 3, CHROMA_DC_BLOCK_INDEX+c, chroma_dc_scan, 4);
}
}
......@@ -1702,7 +1713,7 @@ decode_intra_mb:
for( i = 0; i < 4; i++ ) {
const int index = 16 + 4 * c + i;
//av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
decode_cabac_residual_nondc(h, h->mb + 16*index, 4, index, scan + 1, qmul, 15);
decode_cabac_residual_nondc(h, h->mb + (16*index << pixel_shift), 4, index, scan + 1, qmul, 15);
}
}
} else {
......
......@@ -488,37 +488,44 @@ static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, in
zeros_left= get_vlc2(gb, (total_zeros_vlc-1)[ total_coeff ].table, TOTAL_ZEROS_VLC_BITS, 1);
}
scantable += zeros_left + total_coeff - 1;
if(n >= LUMA_DC_BLOCK_INDEX){
block[*scantable] = level[0];
for(i=1;i<total_coeff && zeros_left > 0;i++) {
if(zeros_left < 7)
run_before= get_vlc2(gb, (run_vlc-1)[zeros_left].table, RUN_VLC_BITS, 1);
else
run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
zeros_left -= run_before;
scantable -= 1 + run_before;
block[*scantable]= level[i];
}
for(;i<total_coeff;i++) {
scantable--;
block[*scantable]= level[i];
}
}else{
block[*scantable] = (level[0] * qmul[*scantable] + 32)>>6;
for(i=1;i<total_coeff && zeros_left > 0;i++) {
if(zeros_left < 7)
run_before= get_vlc2(gb, (run_vlc-1)[zeros_left].table, RUN_VLC_BITS, 1);
else
run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
zeros_left -= run_before;
scantable -= 1 + run_before;
block[*scantable]= (level[i] * qmul[*scantable] + 32)>>6;
}
for(;i<total_coeff;i++) {
scantable--;
block[*scantable]= (level[i] * qmul[*scantable] + 32)>>6;
}
#define STORE_BLOCK(type) \
scantable += zeros_left + total_coeff - 1; \
if(n >= LUMA_DC_BLOCK_INDEX){ \
((type*)block)[*scantable] = level[0]; \
for(i=1;i<total_coeff && zeros_left > 0;i++) { \
if(zeros_left < 7) \
run_before= get_vlc2(gb, (run_vlc-1)[zeros_left].table, RUN_VLC_BITS, 1); \
else \
run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2); \
zeros_left -= run_before; \
scantable -= 1 + run_before; \
((type*)block)[*scantable]= level[i]; \
} \
for(;i<total_coeff;i++) { \
scantable--; \
((type*)block)[*scantable]= level[i]; \
} \
}else{ \
((type*)block)[*scantable] = ((int)(level[0] * qmul[*scantable] + 32))>>6; \
for(i=1;i<total_coeff && zeros_left > 0;i++) { \
if(zeros_left < 7) \
run_before= get_vlc2(gb, (run_vlc-1)[zeros_left].table, RUN_VLC_BITS, 1); \
else \
run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2); \
zeros_left -= run_before; \
scantable -= 1 + run_before; \
((type*)block)[*scantable]= ((int)(level[i] * qmul[*scantable] + 32))>>6; \
} \
for(;i<total_coeff;i++) { \
scantable--; \
((type*)block)[*scantable]= ((int)(level[i] * qmul[*scantable] + 32))>>6; \
} \
}
if (h->pixel_shift) {
STORE_BLOCK(int32_t)
} else {
STORE_BLOCK(int16_t)
}
if(zeros_left<0){
......@@ -535,6 +542,7 @@ int ff_h264_decode_mb_cavlc(H264Context *h){
int partition_count;
unsigned int mb_type, cbp;
int dct8x8_allowed= h->pps.transform_8x8_mode;
const int pixel_shift = h->pixel_shift;
mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
......@@ -605,7 +613,7 @@ decode_intra_mb:
align_get_bits(&s->gb);
// The pixels are stored in the same order as levels in h->mb array.
for(x=0; x < (CHROMA ? 384 : 256); x++){
for(x=0; x < (CHROMA ? 384 : 256)*h->sps.bit_depth_luma/8; x++){
((uint8_t*)h->mb)[x]= get_bits(&s->gb, 8);
}
......@@ -941,6 +949,8 @@ decode_intra_mb:
if(IS_INTRA16x16(mb_type)){
AV_ZERO128(h->mb_luma_dc+0);
AV_ZERO128(h->mb_luma_dc+8);
AV_ZERO128(h->mb_luma_dc+16);
AV_ZERO128(h->mb_luma_dc+24);
if( decode_residual(h, h->intra_gb_ptr, h->mb_luma_dc, LUMA_DC_BLOCK_INDEX, scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
return -1; //FIXME continue if partitioned and other return -1 too
}
......@@ -951,7 +961,7 @@ decode_intra_mb:
for(i8x8=0; i8x8<4; i8x8++){
for(i4x4=0; i4x4<4; i4x4++){
const int index= i4x4 + 4*i8x8;
if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
if( decode_residual(h, h->intra_gb_ptr, h->mb + (16*index << pixel_shift), index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
return -1;
}
}
......@@ -963,7 +973,7 @@ decode_intra_mb:
for(i8x8=0; i8x8<4; i8x8++){
if(cbp & (1<<i8x8)){
if(IS_8x8DCT(mb_type)){
DCTELEM *buf = &h->mb[64*i8x8];
DCTELEM *buf = &h->mb[64*i8x8 << pixel_shift];
uint8_t *nnz;
for(i4x4=0; i4x4<4; i4x4++){
if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4,
......@@ -976,7 +986,7 @@ decode_intra_mb:
for(i4x4=0; i4x4<4; i4x4++){
const int index= i4x4 + 4*i8x8;
if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
if( decode_residual(h, gb, h->mb + (16*index << pixel_shift), index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
return -1;
}
}
......@@ -990,7 +1000,7 @@ decode_intra_mb:
if(cbp&0x30){
for(chroma_idx=0; chroma_idx<2; chroma_idx++)
if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX+chroma_idx, chroma_dc_scan, NULL, 4) < 0){
if( decode_residual(h, gb, h->mb + ((256 + 16*4*chroma_idx) << pixel_shift), CHROMA_DC_BLOCK_INDEX+chroma_idx, chroma_dc_scan, NULL, 4) < 0){
return -1;
}
}
......@@ -1000,7 +1010,7 @@ decode_intra_mb:
const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
for(i4x4=0; i4x4<4; i4x4++){
const int index= 16 + 4*chroma_idx + i4x4;
if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, qmul, 15) < 0){
if( decode_residual(h, gb, h->mb + (16*index << pixel_shift), index, scan + 1, qmul, 15) < 0){
return -1;
}
}
......
......@@ -544,10 +544,10 @@ static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, u
tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
//{ int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
if( dir == 0 ) {
filter_mb_edgev( &img_y[4*edge], linesize, bS, qp, h );
filter_mb_edgev( &img_y[4*edge << h->pixel_shift], linesize, bS, qp, h );
if( (edge&1) == 0 ) {
filter_mb_edgecv( &img_cb[2*edge], uvlinesize, bS, h->chroma_qp[0], h);
filter_mb_edgecv( &img_cr[2*edge], uvlinesize, bS, h->chroma_qp[1], h);
filter_mb_edgecv( &img_cb[2*edge << h->pixel_shift], uvlinesize, bS, h->chroma_qp[0], h);
filter_mb_edgecv( &img_cr[2*edge << h->pixel_shift], uvlinesize, bS, h->chroma_qp[1], h);
}
} else {
filter_mb_edgeh( &img_y[4*edge*linesize], linesize, bS, qp, h );
......
......@@ -273,6 +273,7 @@ int avcodec_default_get_buffer(AVCodecContext *s, AVFrame *pic){
int unaligned;
AVPicture picture;
int stride_align[4];
const int pixel_size = av_pix_fmt_descriptors[s->pix_fmt].comp[0].step_minus1+1;
avcodec_get_chroma_sub_sample(s->pix_fmt, &h_chroma_shift, &v_chroma_shift);
......@@ -322,7 +323,7 @@ int avcodec_default_get_buffer(AVCodecContext *s, AVFrame *pic){
if((s->flags&CODEC_FLAG_EMU_EDGE) || !size[2])
buf->data[i] = buf->base[i];
else
buf->data[i] = buf->base[i] + FFALIGN((buf->linesize[i]*EDGE_WIDTH>>v_shift) + (EDGE_WIDTH>>h_shift), stride_align[i]);
buf->data[i] = buf->base[i] + FFALIGN((buf->linesize[i]*EDGE_WIDTH>>v_shift) + (pixel_size*EDGE_WIDTH>>h_shift), stride_align[i]);
}
if(size[1] && !size[2])
ff_set_systematic_pal2((uint32_t*)buf->data[1], s->pix_fmt);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment