Commit 6e3ef511 authored by Oskar Arvidsson's avatar Oskar Arvidsson Committed by Ronald S. Bultje

Add the notion of pixel size in h264 related functions.

In high bit depth the pixels will not be stored in uint8_t like in the
normal case, but in uint16_t. The pixel size is thus 1 in normal bit
depth and 2 in high bit depth.

Preparatory patch for high bit depth h264 decoding support.
Signed-off-by: 's avatarRonald S. Bultje <rsbultje@gmail.com>
parent 44ca80df
...@@ -1577,6 +1577,7 @@ static int input_get_buffer(AVCodecContext *codec, AVFrame *pic) ...@@ -1577,6 +1577,7 @@ static int input_get_buffer(AVCodecContext *codec, AVFrame *pic)
int perms = AV_PERM_WRITE; int perms = AV_PERM_WRITE;
int i, w, h, stride[4]; int i, w, h, stride[4];
unsigned edge; unsigned edge;
int pixel_size;
if (codec->codec->capabilities & CODEC_CAP_NEG_LINESIZES) if (codec->codec->capabilities & CODEC_CAP_NEG_LINESIZES)
perms |= AV_PERM_NEG_LINESIZES; perms |= AV_PERM_NEG_LINESIZES;
...@@ -1598,6 +1599,7 @@ static int input_get_buffer(AVCodecContext *codec, AVFrame *pic) ...@@ -1598,6 +1599,7 @@ static int input_get_buffer(AVCodecContext *codec, AVFrame *pic)
if(!(ref = avfilter_get_video_buffer(ctx->outputs[0], perms, w, h))) if(!(ref = avfilter_get_video_buffer(ctx->outputs[0], perms, w, h)))
return -1; return -1;
pixel_size = av_pix_fmt_descriptors[ref->format].comp[0].step_minus1+1;
ref->video->w = codec->width; ref->video->w = codec->width;
ref->video->h = codec->height; ref->video->h = codec->height;
for(i = 0; i < 4; i ++) { for(i = 0; i < 4; i ++) {
...@@ -1605,7 +1607,7 @@ static int input_get_buffer(AVCodecContext *codec, AVFrame *pic) ...@@ -1605,7 +1607,7 @@ static int input_get_buffer(AVCodecContext *codec, AVFrame *pic)
unsigned vshift = (i == 1 || i == 2) ? av_pix_fmt_descriptors[ref->format].log2_chroma_h : 0; unsigned vshift = (i == 1 || i == 2) ? av_pix_fmt_descriptors[ref->format].log2_chroma_h : 0;
if (ref->data[i]) { if (ref->data[i]) {
ref->data[i] += (edge >> hshift) + ((edge * ref->linesize[i]) >> vshift); ref->data[i] += ((edge * pixel_size) >> hshift) + ((edge * ref->linesize[i]) >> vshift);
} }
pic->data[i] = ref->data[i]; pic->data[i] = ref->data[i];
pic->linesize[i] = ref->linesize[i]; pic->linesize[i] = ref->linesize[i];
......
...@@ -314,12 +314,13 @@ static void chroma_dc_dct_c(DCTELEM *block){ ...@@ -314,12 +314,13 @@ static void chroma_dc_dct_c(DCTELEM *block){
static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list, static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
int src_x_offset, int src_y_offset, int src_x_offset, int src_y_offset,
qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){ qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op,
int pixel_shift){
MpegEncContext * const s = &h->s; MpegEncContext * const s = &h->s;
const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8; const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8; int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
const int luma_xy= (mx&3) + ((my&3)<<2); const int luma_xy= (mx&3) + ((my&3)<<2);
uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize; uint8_t * src_y = pic->data[0] + ((mx>>2) << pixel_shift) + (my>>2)*h->mb_linesize;
uint8_t * src_cb, * src_cr; uint8_t * src_cb, * src_cr;
int extra_width= h->emu_edge_width; int extra_width= h->emu_edge_width;
int extra_height= h->emu_edge_height; int extra_height= h->emu_edge_height;
...@@ -336,8 +337,8 @@ static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, ...@@ -336,8 +337,8 @@ static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square,
|| full_my < 0-extra_height || full_my < 0-extra_height
|| full_mx + 16/*FIXME*/ > pic_width + extra_width || full_mx + 16/*FIXME*/ > pic_width + extra_width
|| full_my + 16/*FIXME*/ > pic_height + extra_height){ || full_my + 16/*FIXME*/ > pic_height + extra_height){
s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height); s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_y - (2 << pixel_shift) - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize; src_y= s->edge_emu_buffer + (2 << pixel_shift) + 2*h->mb_linesize;
emu=1; emu=1;
} }
...@@ -353,8 +354,8 @@ static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, ...@@ -353,8 +354,8 @@ static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square,
my += 2 * ((s->mb_y & 1) - (pic->reference - 1)); my += 2 * ((s->mb_y & 1) - (pic->reference - 1));
emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1); emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
} }
src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize; src_cb= pic->data[1] + ((mx>>3) << pixel_shift) + (my>>3)*h->mb_uvlinesize;
src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize; src_cr= pic->data[2] + ((mx>>3) << pixel_shift) + (my>>3)*h->mb_uvlinesize;
if(emu){ if(emu){
s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1); s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
...@@ -374,14 +375,14 @@ static inline void mc_part_std(H264Context *h, int n, int square, int chroma_hei ...@@ -374,14 +375,14 @@ static inline void mc_part_std(H264Context *h, int n, int square, int chroma_hei
int x_offset, int y_offset, int x_offset, int y_offset,
qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put, qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg, qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
int list0, int list1){ int list0, int list1, int pixel_shift){
MpegEncContext * const s = &h->s; MpegEncContext * const s = &h->s;
qpel_mc_func *qpix_op= qpix_put; qpel_mc_func *qpix_op= qpix_put;
h264_chroma_mc_func chroma_op= chroma_put; h264_chroma_mc_func chroma_op= chroma_put;
dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize; dest_y += (2*x_offset << pixel_shift) + 2*y_offset*h-> mb_linesize;
dest_cb += x_offset + y_offset*h->mb_uvlinesize; dest_cb += ( x_offset << pixel_shift) + y_offset*h->mb_uvlinesize;
dest_cr += x_offset + y_offset*h->mb_uvlinesize; dest_cr += ( x_offset << pixel_shift) + y_offset*h->mb_uvlinesize;
x_offset += 8*s->mb_x; x_offset += 8*s->mb_x;
y_offset += 8*(s->mb_y >> MB_FIELD); y_offset += 8*(s->mb_y >> MB_FIELD);
...@@ -389,7 +390,7 @@ static inline void mc_part_std(H264Context *h, int n, int square, int chroma_hei ...@@ -389,7 +390,7 @@ static inline void mc_part_std(H264Context *h, int n, int square, int chroma_hei
Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ]; Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
mc_dir_part(h, ref, n, square, chroma_height, delta, 0, mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
dest_y, dest_cb, dest_cr, x_offset, y_offset, dest_y, dest_cb, dest_cr, x_offset, y_offset,
qpix_op, chroma_op); qpix_op, chroma_op, pixel_shift);
qpix_op= qpix_avg; qpix_op= qpix_avg;
chroma_op= chroma_avg; chroma_op= chroma_avg;
...@@ -399,7 +400,7 @@ static inline void mc_part_std(H264Context *h, int n, int square, int chroma_hei ...@@ -399,7 +400,7 @@ static inline void mc_part_std(H264Context *h, int n, int square, int chroma_hei
Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ]; Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
mc_dir_part(h, ref, n, square, chroma_height, delta, 1, mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
dest_y, dest_cb, dest_cr, x_offset, y_offset, dest_y, dest_cb, dest_cr, x_offset, y_offset,
qpix_op, chroma_op); qpix_op, chroma_op, pixel_shift);
} }
} }
...@@ -409,12 +410,12 @@ static inline void mc_part_weighted(H264Context *h, int n, int square, int chrom ...@@ -409,12 +410,12 @@ static inline void mc_part_weighted(H264Context *h, int n, int square, int chrom
qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put, qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op, h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg, h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
int list0, int list1){ int list0, int list1, int pixel_shift){
MpegEncContext * const s = &h->s; MpegEncContext * const s = &h->s;
dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize; dest_y += (2*x_offset << pixel_shift) + 2*y_offset*h-> mb_linesize;
dest_cb += x_offset + y_offset*h->mb_uvlinesize; dest_cb += ( x_offset << pixel_shift) + y_offset*h->mb_uvlinesize;
dest_cr += x_offset + y_offset*h->mb_uvlinesize; dest_cr += ( x_offset << pixel_shift) + y_offset*h->mb_uvlinesize;
x_offset += 8*s->mb_x; x_offset += 8*s->mb_x;
y_offset += 8*(s->mb_y >> MB_FIELD); y_offset += 8*(s->mb_y >> MB_FIELD);
...@@ -422,17 +423,17 @@ static inline void mc_part_weighted(H264Context *h, int n, int square, int chrom ...@@ -422,17 +423,17 @@ static inline void mc_part_weighted(H264Context *h, int n, int square, int chrom
/* don't optimize for luma-only case, since B-frames usually /* don't optimize for luma-only case, since B-frames usually
* use implicit weights => chroma too. */ * use implicit weights => chroma too. */
uint8_t *tmp_cb = s->obmc_scratchpad; uint8_t *tmp_cb = s->obmc_scratchpad;
uint8_t *tmp_cr = s->obmc_scratchpad + 8; uint8_t *tmp_cr = s->obmc_scratchpad + (8 << pixel_shift);
uint8_t *tmp_y = s->obmc_scratchpad + 8*h->mb_uvlinesize; uint8_t *tmp_y = s->obmc_scratchpad + 8*h->mb_uvlinesize;
int refn0 = h->ref_cache[0][ scan8[n] ]; int refn0 = h->ref_cache[0][ scan8[n] ];
int refn1 = h->ref_cache[1][ scan8[n] ]; int refn1 = h->ref_cache[1][ scan8[n] ];
mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0, mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
dest_y, dest_cb, dest_cr, dest_y, dest_cb, dest_cr,
x_offset, y_offset, qpix_put, chroma_put); x_offset, y_offset, qpix_put, chroma_put, pixel_shift);
mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1, mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
tmp_y, tmp_cb, tmp_cr, tmp_y, tmp_cb, tmp_cr,
x_offset, y_offset, qpix_put, chroma_put); x_offset, y_offset, qpix_put, chroma_put, pixel_shift);
if(h->use_weight == 2){ if(h->use_weight == 2){
int weight0 = h->implicit_weight[refn0][refn1][s->mb_y&1]; int weight0 = h->implicit_weight[refn0][refn1][s->mb_y&1];
...@@ -457,7 +458,7 @@ static inline void mc_part_weighted(H264Context *h, int n, int square, int chrom ...@@ -457,7 +458,7 @@ static inline void mc_part_weighted(H264Context *h, int n, int square, int chrom
Picture *ref= &h->ref_list[list][refn]; Picture *ref= &h->ref_list[list][refn];
mc_dir_part(h, ref, n, square, chroma_height, delta, list, mc_dir_part(h, ref, n, square, chroma_height, delta, list,
dest_y, dest_cb, dest_cr, x_offset, y_offset, dest_y, dest_cb, dest_cr, x_offset, y_offset,
qpix_put, chroma_put); qpix_put, chroma_put, pixel_shift);
luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom, luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
h->luma_weight[refn][list][0], h->luma_weight[refn][list][1]); h->luma_weight[refn][list][0], h->luma_weight[refn][list][1]);
...@@ -476,19 +477,21 @@ static inline void mc_part(H264Context *h, int n, int square, int chroma_height, ...@@ -476,19 +477,21 @@ static inline void mc_part(H264Context *h, int n, int square, int chroma_height,
qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put, qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg, qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
h264_weight_func *weight_op, h264_biweight_func *weight_avg, h264_weight_func *weight_op, h264_biweight_func *weight_avg,
int list0, int list1){ int list0, int list1, int pixel_shift){
if((h->use_weight==2 && list0 && list1 if((h->use_weight==2 && list0 && list1
&& (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ][h->s.mb_y&1] != 32)) && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ][h->s.mb_y&1] != 32))
|| h->use_weight==1) || h->use_weight==1)
mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr, mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
x_offset, y_offset, qpix_put, chroma_put, x_offset, y_offset, qpix_put, chroma_put,
weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1); weight_op[0], weight_op[3], weight_avg[0],
weight_avg[3], list0, list1, pixel_shift);
else else
mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr, mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1); x_offset, y_offset, qpix_put, chroma_put, qpix_avg,
chroma_avg, list0, list1, pixel_shift);
} }
static inline void prefetch_motion(H264Context *h, int list){ static inline void prefetch_motion(H264Context *h, int list, int pixel_shift){
/* fetch pixels for estimated mv 4 macroblocks ahead /* fetch pixels for estimated mv 4 macroblocks ahead
* optimized for 64byte cache lines */ * optimized for 64byte cache lines */
MpegEncContext * const s = &h->s; MpegEncContext * const s = &h->s;
...@@ -497,48 +500,54 @@ static inline void prefetch_motion(H264Context *h, int list){ ...@@ -497,48 +500,54 @@ static inline void prefetch_motion(H264Context *h, int list){
const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8; const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y; const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
uint8_t **src= h->ref_list[list][refn].data; uint8_t **src= h->ref_list[list][refn].data;
int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64; int off= (mx << pixel_shift) + (my + (s->mb_x&3)*4)*h->mb_linesize + (64 << pixel_shift);
s->dsp.prefetch(src[0]+off, s->linesize, 4); s->dsp.prefetch(src[0]+off, s->linesize, 4);
off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64; off= ((mx>>1) << pixel_shift) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + (64 << pixel_shift);
s->dsp.prefetch(src[1]+off, src[2]-src[1], 2); s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
} }
} }
static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, static av_always_inline void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put), qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg), qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
h264_weight_func *weight_op, h264_biweight_func *weight_avg){ h264_weight_func *weight_op, h264_biweight_func *weight_avg,
int pixel_shift){
MpegEncContext * const s = &h->s; MpegEncContext * const s = &h->s;
const int mb_xy= h->mb_xy; const int mb_xy= h->mb_xy;
const int mb_type= s->current_picture.mb_type[mb_xy]; const int mb_type= s->current_picture.mb_type[mb_xy];
assert(IS_INTER(mb_type)); assert(IS_INTER(mb_type));
prefetch_motion(h, 0); prefetch_motion(h, 0, pixel_shift);
if(IS_16X16(mb_type)){ if(IS_16X16(mb_type)){
mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0, mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0], qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
weight_op, weight_avg, weight_op, weight_avg,
IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1)); IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1),
pixel_shift);
}else if(IS_16X8(mb_type)){ }else if(IS_16X8(mb_type)){
mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0, mc_part(h, 0, 0, 4, 8 << pixel_shift, dest_y, dest_cb, dest_cr, 0, 0,
qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0], qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
&weight_op[1], &weight_avg[1], &weight_op[1], &weight_avg[1],
IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1)); IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1),
mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4, pixel_shift);
mc_part(h, 8, 0, 4, 8 << pixel_shift, dest_y, dest_cb, dest_cr, 0, 4,
qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0], qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
&weight_op[1], &weight_avg[1], &weight_op[1], &weight_avg[1],
IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1)); IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1),
pixel_shift);
}else if(IS_8X16(mb_type)){ }else if(IS_8X16(mb_type)){
mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0, mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1], qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
&weight_op[2], &weight_avg[2], &weight_op[2], &weight_avg[2],
IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1)); IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1),
pixel_shift);
mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0, mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1], qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
&weight_op[2], &weight_avg[2], &weight_op[2], &weight_avg[2],
IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1)); IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1),
pixel_shift);
}else{ }else{
int i; int i;
...@@ -554,25 +563,30 @@ static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t ...@@ -554,25 +563,30 @@ static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t
mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset, mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1], qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
&weight_op[3], &weight_avg[3], &weight_op[3], &weight_avg[3],
IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1)); IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
pixel_shift);
}else if(IS_SUB_8X4(sub_mb_type)){ }else if(IS_SUB_8X4(sub_mb_type)){
mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset, mc_part(h, n , 0, 2, 4 << pixel_shift, dest_y, dest_cb, dest_cr, x_offset, y_offset,
qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1], qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
&weight_op[4], &weight_avg[4], &weight_op[4], &weight_avg[4],
IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1)); IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2, pixel_shift);
mc_part(h, n+2, 0, 2, 4 << pixel_shift, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1], qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
&weight_op[4], &weight_avg[4], &weight_op[4], &weight_avg[4],
IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1)); IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
pixel_shift);
}else if(IS_SUB_4X8(sub_mb_type)){ }else if(IS_SUB_4X8(sub_mb_type)){
mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset, mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2], qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
&weight_op[5], &weight_avg[5], &weight_op[5], &weight_avg[5],
IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1)); IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
pixel_shift);
mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset, mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2], qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
&weight_op[5], &weight_avg[5], &weight_op[5], &weight_avg[5],
IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1)); IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
pixel_shift);
}else{ }else{
int j; int j;
assert(IS_SUB_4X4(sub_mb_type)); assert(IS_SUB_4X4(sub_mb_type));
...@@ -582,15 +596,32 @@ static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t ...@@ -582,15 +596,32 @@ static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t
mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset, mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2], qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
&weight_op[6], &weight_avg[6], &weight_op[6], &weight_avg[6],
IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1)); IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
pixel_shift);
} }
} }
} }
} }
prefetch_motion(h, 1); prefetch_motion(h, 1, pixel_shift);
} }
#define hl_motion_fn(sh, bits) \
static av_always_inline void hl_motion_ ## bits(H264Context *h, \
uint8_t *dest_y, \
uint8_t *dest_cb, uint8_t *dest_cr, \
qpel_mc_func (*qpix_put)[16], \
h264_chroma_mc_func (*chroma_put), \
qpel_mc_func (*qpix_avg)[16], \
h264_chroma_mc_func (*chroma_avg), \
h264_weight_func *weight_op, \
h264_biweight_func *weight_avg) \
{ \
hl_motion(h, dest_y, dest_cb, dest_cr, qpix_put, chroma_put, \
qpix_avg, chroma_avg, weight_op, weight_avg, sh); \
}
hl_motion_fn(0, 8);
hl_motion_fn(1, 16);
static void free_tables(H264Context *h, int free_rbsp){ static void free_tables(H264Context *h, int free_rbsp){
int i; int i;
...@@ -758,8 +789,8 @@ static void clone_tables(H264Context *dst, H264Context *src, int i){ ...@@ -758,8 +789,8 @@ static void clone_tables(H264Context *dst, H264Context *src, int i){
* Allocate buffers which are not shared amongst multiple threads. * Allocate buffers which are not shared amongst multiple threads.
*/ */
static int context_init(H264Context *h){ static int context_init(H264Context *h){
FF_ALLOCZ_OR_GOTO(h->s.avctx, h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t), fail) FF_ALLOCZ_OR_GOTO(h->s.avctx, h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t)*2, fail)
FF_ALLOCZ_OR_GOTO(h->s.avctx, h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t), fail) FF_ALLOCZ_OR_GOTO(h->s.avctx, h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t)*2, fail)
h->ref_cache[0][scan8[5 ]+1] = h->ref_cache[0][scan8[7 ]+1] = h->ref_cache[0][scan8[13]+1] = h->ref_cache[0][scan8[5 ]+1] = h->ref_cache[0][scan8[7 ]+1] = h->ref_cache[0][scan8[13]+1] =
h->ref_cache[1][scan8[5 ]+1] = h->ref_cache[1][scan8[7 ]+1] = h->ref_cache[1][scan8[13]+1] = PART_NOT_AVAILABLE; h->ref_cache[1][scan8[5 ]+1] = h->ref_cache[1][scan8[7 ]+1] = h->ref_cache[1][scan8[13]+1] = PART_NOT_AVAILABLE;
...@@ -861,6 +892,8 @@ av_cold int ff_h264_decode_init(AVCodecContext *avctx){ ...@@ -861,6 +892,8 @@ av_cold int ff_h264_decode_init(AVCodecContext *avctx){
ff_h264_decode_init_vlc(); ff_h264_decode_init_vlc();
h->pixel_shift = 0;
h->thread_context[0] = h; h->thread_context[0] = h;
h->outputed_poc = INT_MIN; h->outputed_poc = INT_MIN;
h->prev_poc_msb= 1<<16; h->prev_poc_msb= 1<<16;
...@@ -888,6 +921,7 @@ av_cold int ff_h264_decode_init(AVCodecContext *avctx){ ...@@ -888,6 +921,7 @@ av_cold int ff_h264_decode_init(AVCodecContext *avctx){
int ff_h264_frame_start(H264Context *h){ int ff_h264_frame_start(H264Context *h){
MpegEncContext * const s = &h->s; MpegEncContext * const s = &h->s;
int i; int i;
const int pixel_shift = h->pixel_shift;
if(MPV_frame_start(s, s->avctx) < 0) if(MPV_frame_start(s, s->avctx) < 0)
return -1; return -1;
...@@ -904,14 +938,14 @@ int ff_h264_frame_start(H264Context *h){ ...@@ -904,14 +938,14 @@ int ff_h264_frame_start(H264Context *h){
assert(s->linesize && s->uvlinesize); assert(s->linesize && s->uvlinesize);
for(i=0; i<16; i++){ for(i=0; i<16; i++){
h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3); h->block_offset[i]= (4*((scan8[i] - scan8[0])&7) << pixel_shift) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3); h->block_offset[24+i]= (4*((scan8[i] - scan8[0])&7) << pixel_shift) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
} }
for(i=0; i<4; i++){ for(i=0; i<4; i++){
h->block_offset[16+i]= h->block_offset[16+i]=
h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3); h->block_offset[20+i]= (4*((scan8[i] - scan8[0])&7) << pixel_shift) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
h->block_offset[24+16+i]= h->block_offset[24+16+i]=
h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3); h->block_offset[24+20+i]= (4*((scan8[i] - scan8[0])&7) << pixel_shift) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
} }
/* can't be in alloc_tables because linesize isn't known there. /* can't be in alloc_tables because linesize isn't known there.
...@@ -945,6 +979,7 @@ static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src ...@@ -945,6 +979,7 @@ static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src
MpegEncContext * const s = &h->s; MpegEncContext * const s = &h->s;
uint8_t *top_border; uint8_t *top_border;
int top_idx = 1; int top_idx = 1;
const int pixel_shift = h->pixel_shift;
src_y -= linesize; src_y -= linesize;
src_cb -= uvlinesize; src_cb -= uvlinesize;
...@@ -955,9 +990,16 @@ static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src ...@@ -955,9 +990,16 @@ static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src
if(!MB_MBAFF){ if(!MB_MBAFF){
top_border = h->top_borders[0][s->mb_x]; top_border = h->top_borders[0][s->mb_x];
AV_COPY128(top_border, src_y + 15*linesize); AV_COPY128(top_border, src_y + 15*linesize);
if (pixel_shift)
AV_COPY128(top_border+16, src_y+15*linesize+16);
if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){ if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
if (pixel_shift) {
AV_COPY128(top_border+32, src_cb+7*uvlinesize);
AV_COPY128(top_border+48, src_cr+7*uvlinesize);
} else {
AV_COPY64(top_border+16, src_cb+7*uvlinesize); AV_COPY64(top_border+16, src_cb+7*uvlinesize);
AV_COPY64(top_border+24, src_cr+7*uvlinesize); AV_COPY64(top_border+24, src_cr+7*uvlinesize);
}
} }
} }
}else if(MB_MBAFF){ }else if(MB_MBAFF){
...@@ -970,14 +1012,24 @@ static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src ...@@ -970,14 +1012,24 @@ static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src
// There are two lines saved, the line above the the top macroblock of a pair, // There are two lines saved, the line above the the top macroblock of a pair,
// and the line above the bottom macroblock // and the line above the bottom macroblock
AV_COPY128(top_border, src_y + 16*linesize); AV_COPY128(top_border, src_y + 16*linesize);
if (pixel_shift)
AV_COPY128(top_border+16, src_y+16*linesize+16);
if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){ if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
if (pixel_shift) {
AV_COPY128(top_border+32, src_cb+8*uvlinesize);
AV_COPY128(top_border+48, src_cr+8*uvlinesize);
} else {
AV_COPY64(top_border+16, src_cb+8*uvlinesize); AV_COPY64(top_border+16, src_cb+8*uvlinesize);
AV_COPY64(top_border+24, src_cr+8*uvlinesize); AV_COPY64(top_border+24, src_cr+8*uvlinesize);
}
} }
} }
static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int simple){ static inline void xchg_mb_border(H264Context *h, uint8_t *src_y,
uint8_t *src_cb, uint8_t *src_cr,
int linesize, int uvlinesize,
int xchg, int simple, int pixel_shift){
MpegEncContext * const s = &h->s; MpegEncContext * const s = &h->s;
int deblock_left; int deblock_left;
int deblock_top; int deblock_top;
...@@ -1002,41 +1054,62 @@ static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_c ...@@ -1002,41 +1054,62 @@ static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_c
deblock_top = (s->mb_y > !!MB_FIELD); deblock_top = (s->mb_y > !!MB_FIELD);
} }
src_y -= linesize + 1; src_y -= linesize + 1 + pixel_shift;
src_cb -= uvlinesize + 1; src_cb -= uvlinesize + 1 + pixel_shift;
src_cr -= uvlinesize + 1; src_cr -= uvlinesize + 1 + pixel_shift;
top_border_m1 = h->top_borders[top_idx][s->mb_x-1]; top_border_m1 = h->top_borders[top_idx][s->mb_x-1];
top_border = h->top_borders[top_idx][s->mb_x]; top_border = h->top_borders[top_idx][s->mb_x];
#define XCHG(a,b,xchg)\ #define XCHG(a,b,xchg)\
if (pixel_shift) {\
if (xchg) {\
AV_SWAP64(b+0,a+0);\
AV_SWAP64(b+8,a+8);\
} else {\
AV_COPY128(b,a); \
}\
} else \
if (xchg) AV_SWAP64(b,a);\ if (xchg) AV_SWAP64(b,a);\
else AV_COPY64(b,a); else AV_COPY64(b,a);
if(deblock_top){ if(deblock_top){
if(deblock_left){ if(deblock_left){
XCHG(top_border_m1+8, src_y -7, 1); XCHG(top_border_m1 + (8 << pixel_shift), src_y - (7 << pixel_shift), 1);
} }
XCHG(top_border+0, src_y +1, xchg); XCHG(top_border + (0 << pixel_shift), src_y + (1 << pixel_shift), xchg);
XCHG(top_border+8, src_y +9, 1); XCHG(top_border + (8 << pixel_shift), src_y + (9 << pixel_shift), 1);
if(s->mb_x+1 < s->mb_width){ if(s->mb_x+1 < s->mb_width){
XCHG(h->top_borders[top_idx][s->mb_x+1], src_y +17, 1); XCHG(h->top_borders[top_idx][s->mb_x+1], src_y + (17 << pixel_shift), 1);
} }
} }
if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){ if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
if(deblock_top){ if(deblock_top){
if(deblock_left){ if(deblock_left){
XCHG(top_border_m1+16, src_cb -7, 1); XCHG(top_border_m1 + (16 << pixel_shift), src_cb - (7 << pixel_shift), 1);
XCHG(top_border_m1+24, src_cr -7, 1); XCHG(top_border_m1 + (24 << pixel_shift), src_cr - (7 << pixel_shift), 1);
} }
XCHG(top_border+16, src_cb+1, 1); XCHG(top_border + (16 << pixel_shift), src_cb+1+pixel_shift, 1);
XCHG(top_border+24, src_cr+1, 1); XCHG(top_border + (24 << pixel_shift), src_cr+1+pixel_shift, 1);
} }
} }
} }
static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){ static av_always_inline int dctcoef_get(DCTELEM *mb, int high_bit_depth, int index) {
if (high_bit_depth) {
return AV_RN32A(((int32_t*)mb) + index);
} else
return AV_RN16A(mb + index);
}
static av_always_inline void dctcoef_set(DCTELEM *mb, int high_bit_depth, int index, int value) {
if (high_bit_depth) {
AV_WN32A(((int32_t*)mb) + index, value);
} else
AV_WN16A(mb + index, value);
}
static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple, int pixel_shift){
MpegEncContext * const s = &h->s; MpegEncContext * const s = &h->s;
const int mb_x= s->mb_x; const int mb_x= s->mb_x;
const int mb_y= s->mb_y; const int mb_y= s->mb_y;
...@@ -1052,12 +1125,12 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){ ...@@ -1052,12 +1125,12 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride); void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride); void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
dest_y = s->current_picture.data[0] + (mb_x + mb_y * s->linesize ) * 16; dest_y = s->current_picture.data[0] + ((mb_x << pixel_shift) + mb_y * s->linesize ) * 16;
dest_cb = s->current_picture.data[1] + (mb_x + mb_y * s->uvlinesize) * 8; dest_cb = s->current_picture.data[1] + ((mb_x << pixel_shift) + mb_y * s->uvlinesize) * 8;
dest_cr = s->current_picture.data[2] + (mb_x + mb_y * s->uvlinesize) * 8; dest_cr = s->current_picture.data[2] + ((mb_x << pixel_shift) + mb_y * s->uvlinesize) * 8;
s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64, s->linesize, 4); s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + (64 << pixel_shift), s->linesize, 4);
s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64, dest_cr - dest_cb, 2); s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + (64 << pixel_shift), dest_cr - dest_cb, 2);
h->list_counts[mb_xy]= h->list_count; h->list_counts[mb_xy]= h->list_count;
...@@ -1094,6 +1167,28 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){ ...@@ -1094,6 +1167,28 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
} }
if (!simple && IS_INTRA_PCM(mb_type)) { if (!simple && IS_INTRA_PCM(mb_type)) {
if (pixel_shift) {
const int bit_depth = h->sps.bit_depth_luma;
int j;
GetBitContext gb;
init_get_bits(&gb, (uint8_t*)h->mb, 384*bit_depth);
for (i = 0; i < 16; i++) {
uint16_t *tmp_y = (uint16_t*)(dest_y + i*linesize);
for (j = 0; j < 16; j++)
tmp_y[j] = get_bits(&gb, bit_depth);
}
for (i = 0; i < 8; i++) {
uint16_t *tmp_cb = (uint16_t*)(dest_cb + i*uvlinesize);
for (j = 0; j < 8; j++)
tmp_cb[j] = get_bits(&gb, bit_depth);
}
for (i = 0; i < 8; i++) {
uint16_t *tmp_cr = (uint16_t*)(dest_cr + i*uvlinesize);
for (j = 0; j < 8; j++)
tmp_cr[j] = get_bits(&gb, bit_depth);
}
} else {
for (i=0; i<16; i++) { for (i=0; i<16; i++) {
memcpy(dest_y + i* linesize, h->mb + i*8, 16); memcpy(dest_y + i* linesize, h->mb + i*8, 16);
} }
...@@ -1101,10 +1196,11 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){ ...@@ -1101,10 +1196,11 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
memcpy(dest_cb+ i*uvlinesize, h->mb + 128 + i*4, 8); memcpy(dest_cb+ i*uvlinesize, h->mb + 128 + i*4, 8);
memcpy(dest_cr+ i*uvlinesize, h->mb + 160 + i*4, 8); memcpy(dest_cr+ i*uvlinesize, h->mb + 160 + i*4, 8);
} }
}
} else { } else {
if(IS_INTRA(mb_type)){ if(IS_INTRA(mb_type)){
if(h->deblocking_filter) if(h->deblocking_filter)
xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple); xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple, pixel_shift);
if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){ if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize); h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
...@@ -1125,16 +1221,16 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){ ...@@ -1125,16 +1221,16 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
uint8_t * const ptr= dest_y + block_offset[i]; uint8_t * const ptr= dest_y + block_offset[i];
const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ]; const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
if(transform_bypass && h->sps.profile_idc==244 && dir<=1){ if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
h->hpc.pred8x8l_add[dir](ptr, h->mb + i*16, linesize); h->hpc.pred8x8l_add[dir](ptr, h->mb + (i*16 << pixel_shift), linesize);
}else{ }else{
const int nnz = h->non_zero_count_cache[ scan8[i] ]; const int nnz = h->non_zero_count_cache[ scan8[i] ];
h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000, h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
(h->topright_samples_available<<i)&0x4000, linesize); (h->topright_samples_available<<i)&0x4000, linesize);
if(nnz){ if(nnz){
if(nnz == 1 && h->mb[i*16]) if(nnz == 1 && dctcoef_get(h->mb, pixel_shift, i*16))
idct_dc_add(ptr, h->mb + i*16, linesize); idct_dc_add(ptr, h->mb + (i*16 << pixel_shift), linesize);
else else
idct_add (ptr, h->mb + i*16, linesize); idct_add (ptr, h->mb + (i*16 << pixel_shift), linesize);
} }
} }
} }
...@@ -1151,18 +1247,24 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){ ...@@ -1151,18 +1247,24 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ]; const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
if(transform_bypass && h->sps.profile_idc==244 && dir<=1){ if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
h->hpc.pred4x4_add[dir](ptr, h->mb + i*16, linesize); h->hpc.pred4x4_add[dir](ptr, h->mb + (i*16 << pixel_shift), linesize);
}else{ }else{
uint8_t *topright; uint8_t *topright;
int nnz, tr; int nnz, tr;
uint64_t tr_high;
if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){ if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
const int topright_avail= (h->topright_samples_available<<i)&0x8000; const int topright_avail= (h->topright_samples_available<<i)&0x8000;
assert(mb_y || linesize <= block_offset[i]); assert(mb_y || linesize <= block_offset[i]);
if(!topright_avail){ if(!topright_avail){
if (pixel_shift) {
tr_high= ((uint16_t*)ptr)[3 - linesize/2]*0x0001000100010001ULL;
topright= (uint8_t*) &tr_high;
} else {
tr= ptr[3 - linesize]*0x01010101; tr= ptr[3 - linesize]*0x01010101;
topright= (uint8_t*) &tr; topright= (uint8_t*) &tr;
}
}else }else
topright= ptr + 4 - linesize; topright= ptr + (4 << pixel_shift) - linesize;
}else }else
topright= NULL; topright= NULL;
...@@ -1170,10 +1272,10 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){ ...@@ -1170,10 +1272,10 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
nnz = h->non_zero_count_cache[ scan8[i] ]; nnz = h->non_zero_count_cache[ scan8[i] ];
if(nnz){ if(nnz){
if(is_h264){ if(is_h264){
if(nnz == 1 && h->mb[i*16]) if(nnz == 1 && dctcoef_get(h->mb, pixel_shift, i*16))
idct_dc_add(ptr, h->mb + i*16, linesize); idct_dc_add(ptr, h->mb + (i*16 << pixel_shift), linesize);
else else
idct_add (ptr, h->mb + i*16, linesize); idct_add (ptr, h->mb + (i*16 << pixel_shift), linesize);
}else }else
ff_svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0); ff_svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
} }
...@@ -1191,19 +1293,27 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){ ...@@ -1191,19 +1293,27 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
static const uint8_t dc_mapping[16] = { 0*16, 1*16, 4*16, 5*16, 2*16, 3*16, 6*16, 7*16, static const uint8_t dc_mapping[16] = { 0*16, 1*16, 4*16, 5*16, 2*16, 3*16, 6*16, 7*16,
8*16, 9*16,12*16,13*16,10*16,11*16,14*16,15*16}; 8*16, 9*16,12*16,13*16,10*16,11*16,14*16,15*16};
for(i = 0; i < 16; i++) for(i = 0; i < 16; i++)
h->mb[dc_mapping[i]] = h->mb_luma_dc[i]; dctcoef_set(h->mb, pixel_shift, dc_mapping[i], dctcoef_get(h->mb_luma_dc, pixel_shift, i));
} }
} }
}else }else
ff_svq3_luma_dc_dequant_idct_c(h->mb, h->mb_luma_dc, s->qscale); ff_svq3_luma_dc_dequant_idct_c(h->mb, h->mb_luma_dc, s->qscale);
} }
if(h->deblocking_filter) if(h->deblocking_filter)
xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple); xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple, pixel_shift);
}else if(is_h264){ }else if(is_h264){
hl_motion(h, dest_y, dest_cb, dest_cr, if (pixel_shift) {
s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab, hl_motion_16(h, dest_y, dest_cb, dest_cr,
s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab, s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
h->h264dsp.weight_h264_pixels_tab, h->h264dsp.biweight_h264_pixels_tab); s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
h->h264dsp.weight_h264_pixels_tab,
h->h264dsp.biweight_h264_pixels_tab);
} else
hl_motion_8(h, dest_y, dest_cb, dest_cr,
s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
h->h264dsp.weight_h264_pixels_tab,
h->h264dsp.biweight_h264_pixels_tab);
} }
...@@ -1215,8 +1325,8 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){ ...@@ -1215,8 +1325,8 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
h->hpc.pred16x16_add[h->intra16x16_pred_mode](dest_y, block_offset, h->mb, linesize); h->hpc.pred16x16_add[h->intra16x16_pred_mode](dest_y, block_offset, h->mb, linesize);
}else{ }else{
for(i=0; i<16; i++){ for(i=0; i<16; i++){
if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]) if(h->non_zero_count_cache[ scan8[i] ] || dctcoef_get(h->mb, pixel_shift, i*16))
s->dsp.add_pixels4(dest_y + block_offset[i], h->mb + i*16, linesize); s->dsp.add_pixels4(dest_y + block_offset[i], h->mb + (i*16 << pixel_shift), linesize);
} }
} }
}else{ }else{
...@@ -1228,7 +1338,7 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){ ...@@ -1228,7 +1338,7 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
idct_add= IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4; idct_add= IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
for(i=0; i<16; i+=di){ for(i=0; i<16; i+=di){
if(h->non_zero_count_cache[ scan8[i] ]){ if(h->non_zero_count_cache[ scan8[i] ]){
idct_add(dest_y + block_offset[i], h->mb + i*16, linesize); idct_add(dest_y + block_offset[i], h->mb + (i*16 << pixel_shift), linesize);
} }
} }
}else{ }else{
...@@ -1253,21 +1363,21 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){ ...@@ -1253,21 +1363,21 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
uint8_t *dest[2] = {dest_cb, dest_cr}; uint8_t *dest[2] = {dest_cb, dest_cr};
if(transform_bypass){ if(transform_bypass){
if(IS_INTRA(mb_type) && h->sps.profile_idc==244 && (h->chroma_pred_mode==VERT_PRED8x8 || h->chroma_pred_mode==HOR_PRED8x8)){ if(IS_INTRA(mb_type) && h->sps.profile_idc==244 && (h->chroma_pred_mode==VERT_PRED8x8 || h->chroma_pred_mode==HOR_PRED8x8)){
h->hpc.pred8x8_add[h->chroma_pred_mode](dest[0], block_offset + 16, h->mb + 16*16, uvlinesize); h->hpc.pred8x8_add[h->chroma_pred_mode](dest[0], block_offset + 16, h->mb + (16*16 << pixel_shift), uvlinesize);
h->hpc.pred8x8_add[h->chroma_pred_mode](dest[1], block_offset + 20, h->mb + 20*16, uvlinesize); h->hpc.pred8x8_add[h->chroma_pred_mode](dest[1], block_offset + 20, h->mb + (20*16 << pixel_shift), uvlinesize);
}else{ }else{
idct_add = s->dsp.add_pixels4; idct_add = s->dsp.add_pixels4;
for(i=16; i<16+8; i++){ for(i=16; i<16+8; i++){
if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]) if(h->non_zero_count_cache[ scan8[i] ] || dctcoef_get(h->mb, pixel_shift, i*16))
idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize); idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + (i*16 << pixel_shift), uvlinesize);
} }
} }
}else{ }else{
if(is_h264){ if(is_h264){
if(h->non_zero_count_cache[ scan8[CHROMA_DC_BLOCK_INDEX+0] ]) if(h->non_zero_count_cache[ scan8[CHROMA_DC_BLOCK_INDEX+0] ])
h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + 16*16 , h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]); h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + (16*16 << pixel_shift) , h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
if(h->non_zero_count_cache[ scan8[CHROMA_DC_BLOCK_INDEX+1] ]) if(h->non_zero_count_cache[ scan8[CHROMA_DC_BLOCK_INDEX+1] ])
h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + 16*16+4*16, h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]); h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + ((16*16+4*16) << pixel_shift), h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
h->h264dsp.h264_idct_add8(dest, block_offset, h->h264dsp.h264_idct_add8(dest, block_offset,
h->mb, uvlinesize, h->mb, uvlinesize,
h->non_zero_count_cache); h->non_zero_count_cache);
...@@ -1291,15 +1401,18 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){ ...@@ -1291,15 +1401,18 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
/** /**
* Process a macroblock; this case avoids checks for expensive uncommon cases. * Process a macroblock; this case avoids checks for expensive uncommon cases.
*/ */
static void hl_decode_mb_simple(H264Context *h){ #define hl_decode_mb_simple(sh, bits) \
hl_decode_mb_internal(h, 1); static void hl_decode_mb_simple_ ## bits(H264Context *h){ \
hl_decode_mb_internal(h, 1, sh); \
} }
hl_decode_mb_simple(0, 8);
hl_decode_mb_simple(1, 16);
/** /**
* Process a macroblock; this handles edge cases, such as interlacing. * Process a macroblock; this handles edge cases, such as interlacing.
*/ */
static void av_noinline hl_decode_mb_complex(H264Context *h){ static void av_noinline hl_decode_mb_complex(H264Context *h){
hl_decode_mb_internal(h, 0); hl_decode_mb_internal(h, 0, h->pixel_shift);
} }
void ff_h264_hl_decode_mb(H264Context *h){ void ff_h264_hl_decode_mb(H264Context *h){
...@@ -1308,9 +1421,12 @@ void ff_h264_hl_decode_mb(H264Context *h){ ...@@ -1308,9 +1421,12 @@ void ff_h264_hl_decode_mb(H264Context *h){
const int mb_type= s->current_picture.mb_type[mb_xy]; const int mb_type= s->current_picture.mb_type[mb_xy];
int is_complex = CONFIG_SMALL || h->is_complex || IS_INTRA_PCM(mb_type) || s->qscale == 0; int is_complex = CONFIG_SMALL || h->is_complex || IS_INTRA_PCM(mb_type) || s->qscale == 0;
if (is_complex) if (is_complex) {
hl_decode_mb_complex(h); hl_decode_mb_complex(h);
else hl_decode_mb_simple(h); } else if (h->pixel_shift) {
hl_decode_mb_simple_16(h);
} else
hl_decode_mb_simple_8(h);
} }
static int pred_weight_table(H264Context *h){ static int pred_weight_table(H264Context *h){
...@@ -2432,6 +2548,7 @@ static void loop_filter(H264Context *h){ ...@@ -2432,6 +2548,7 @@ static void loop_filter(H264Context *h){
int linesize, uvlinesize, mb_x, mb_y; int linesize, uvlinesize, mb_x, mb_y;
const int end_mb_y= s->mb_y + FRAME_MBAFF; const int end_mb_y= s->mb_y + FRAME_MBAFF;
const int old_slice_type= h->slice_type; const int old_slice_type= h->slice_type;
const int pixel_shift = h->pixel_shift;
if(h->deblocking_filter) { if(h->deblocking_filter) {
for(mb_x= 0; mb_x<s->mb_width; mb_x++){ for(mb_x= 0; mb_x<s->mb_width; mb_x++){
...@@ -2447,9 +2564,9 @@ static void loop_filter(H264Context *h){ ...@@ -2447,9 +2564,9 @@ static void loop_filter(H264Context *h){
s->mb_x= mb_x; s->mb_x= mb_x;
s->mb_y= mb_y; s->mb_y= mb_y;
dest_y = s->current_picture.data[0] + (mb_x + mb_y * s->linesize ) * 16; dest_y = s->current_picture.data[0] + ((mb_x << pixel_shift) + mb_y * s->linesize ) * 16;
dest_cb = s->current_picture.data[1] + (mb_x + mb_y * s->uvlinesize) * 8; dest_cb = s->current_picture.data[1] + ((mb_x << pixel_shift) + mb_y * s->uvlinesize) * 8;
dest_cr = s->current_picture.data[2] + (mb_x + mb_y * s->uvlinesize) * 8; dest_cr = s->current_picture.data[2] + ((mb_x << pixel_shift) + mb_y * s->uvlinesize) * 8;
//FIXME simplify above //FIXME simplify above
if (MB_FIELD) { if (MB_FIELD) {
......
...@@ -265,6 +265,7 @@ typedef struct MMCO{ ...@@ -265,6 +265,7 @@ typedef struct MMCO{
typedef struct H264Context{ typedef struct H264Context{
MpegEncContext s; MpegEncContext s;
H264DSPContext h264dsp; H264DSPContext h264dsp;
int pixel_shift; ///< 0 for 8-bit H264, 1 for high-bit-depth H264
int chroma_qp[2]; //QPc int chroma_qp[2]; //QPc
int qp_thresh; ///< QP threshold to skip loopfilter int qp_thresh; ///< QP threshold to skip loopfilter
...@@ -296,7 +297,7 @@ typedef struct H264Context{ ...@@ -296,7 +297,7 @@ typedef struct H264Context{
unsigned int top_samples_available; unsigned int top_samples_available;
unsigned int topright_samples_available; unsigned int topright_samples_available;
unsigned int left_samples_available; unsigned int left_samples_available;
uint8_t (*top_borders[2])[16+2*8]; uint8_t (*top_borders[2])[(16+2*8)*2];
/** /**
* non zero coeff count cache. * non zero coeff count cache.
...@@ -406,9 +407,9 @@ typedef struct H264Context{ ...@@ -406,9 +407,9 @@ typedef struct H264Context{
GetBitContext *intra_gb_ptr; GetBitContext *intra_gb_ptr;
GetBitContext *inter_gb_ptr; GetBitContext *inter_gb_ptr;
DECLARE_ALIGNED(16, DCTELEM, mb)[16*24]; DECLARE_ALIGNED(16, DCTELEM, mb)[16*24*2]; ///< as a dct coeffecient is int32_t in high depth, we need to reserve twice the space.
DECLARE_ALIGNED(16, DCTELEM, mb_luma_dc)[16]; DECLARE_ALIGNED(16, DCTELEM, mb_luma_dc)[16*2];
DCTELEM mb_padding[256]; ///< as mb is addressed by scantable[i] and scantable is uint8_t we can either check that i is not too large or ensure that there is some unused stuff after mb DCTELEM mb_padding[256*2]; ///< as mb is addressed by scantable[i] and scantable is uint8_t we can either check that i is not too large or ensure that there is some unused stuff after mb
/** /**
* Cabac * Cabac
......
...@@ -1100,47 +1100,54 @@ static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCT ...@@ -1100,47 +1100,54 @@ static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCT
} }
} }
do { #define STORE_BLOCK(type) \
uint8_t *ctx = coeff_abs_level1_ctx[node_ctx] + abs_level_m1_ctx_base; do { \
uint8_t *ctx = coeff_abs_level1_ctx[node_ctx] + abs_level_m1_ctx_base; \
int j= scantable[index[--coeff_count]]; \
int j= scantable[index[--coeff_count]]; \
if( get_cabac( CC, ctx ) == 0 ) { \
node_ctx = coeff_abs_level_transition[0][node_ctx]; if( get_cabac( CC, ctx ) == 0 ) { \
if( is_dc ) { node_ctx = coeff_abs_level_transition[0][node_ctx]; \
block[j] = get_cabac_bypass_sign( CC, -1); if( is_dc ) { \
}else{ ((type*)block)[j] = get_cabac_bypass_sign( CC, -1); \
block[j] = (get_cabac_bypass_sign( CC, -qmul[j]) + 32) >> 6; }else{ \
} ((type*)block)[j] = (get_cabac_bypass_sign( CC, -qmul[j]) + 32) >> 6; \
} else { } \
int coeff_abs = 2; } else { \
ctx = coeff_abs_levelgt1_ctx[node_ctx] + abs_level_m1_ctx_base; int coeff_abs = 2; \
node_ctx = coeff_abs_level_transition[1][node_ctx]; ctx = coeff_abs_levelgt1_ctx[node_ctx] + abs_level_m1_ctx_base; \
node_ctx = coeff_abs_level_transition[1][node_ctx]; \
while( coeff_abs < 15 && get_cabac( CC, ctx ) ) { \
coeff_abs++; while( coeff_abs < 15 && get_cabac( CC, ctx ) ) { \
} coeff_abs++; \
} \
if( coeff_abs >= 15 ) { \
int j = 0; if( coeff_abs >= 15 ) { \
while( get_cabac_bypass( CC ) ) { int j = 0; \
j++; while( get_cabac_bypass( CC ) ) { \
} j++; \
} \
coeff_abs=1; \
while( j-- ) { coeff_abs=1; \
coeff_abs += coeff_abs + get_cabac_bypass( CC ); while( j-- ) { \
} coeff_abs += coeff_abs + get_cabac_bypass( CC ); \
coeff_abs+= 14; } \
} coeff_abs+= 14; \
} \
\
if( is_dc ) { \
((type*)block)[j] = get_cabac_bypass_sign( CC, -coeff_abs ); \
}else{ \
((type*)block)[j] = ((int)(get_cabac_bypass_sign( CC, -coeff_abs ) * qmul[j] + 32)) >> 6; \
} \
} \
} while ( coeff_count );
if( is_dc ) { if (h->pixel_shift) {
block[j] = get_cabac_bypass_sign( CC, -coeff_abs ); STORE_BLOCK(int32_t)
}else{ } else {
block[j] = (get_cabac_bypass_sign( CC, -coeff_abs ) * qmul[j] + 32) >> 6; STORE_BLOCK(int16_t)
} }
}
} while( coeff_count );
#ifdef CABAC_ON_STACK #ifdef CABAC_ON_STACK
h->cabac.range = cc.range ; h->cabac.range = cc.range ;
h->cabac.low = cc.low ; h->cabac.low = cc.low ;
...@@ -1196,6 +1203,7 @@ int ff_h264_decode_mb_cabac(H264Context *h) { ...@@ -1196,6 +1203,7 @@ int ff_h264_decode_mb_cabac(H264Context *h) {
int mb_xy; int mb_xy;
int mb_type, partition_count, cbp = 0; int mb_type, partition_count, cbp = 0;
int dct8x8_allowed= h->pps.transform_8x8_mode; int dct8x8_allowed= h->pps.transform_8x8_mode;
const int pixel_shift = h->pixel_shift;
mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride; mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
...@@ -1304,6 +1312,7 @@ decode_intra_mb: ...@@ -1304,6 +1312,7 @@ decode_intra_mb:
h->slice_table[ mb_xy ]= h->slice_num; h->slice_table[ mb_xy ]= h->slice_num;
if(IS_INTRA_PCM(mb_type)) { if(IS_INTRA_PCM(mb_type)) {
const int mb_size = (384*h->sps.bit_depth_luma) >> 3;
const uint8_t *ptr; const uint8_t *ptr;
// We assume these blocks are very rare so we do not optimize it. // We assume these blocks are very rare so we do not optimize it.
...@@ -1316,9 +1325,9 @@ decode_intra_mb: ...@@ -1316,9 +1325,9 @@ decode_intra_mb:
} }
// The pixels are stored in the same order as levels in h->mb array. // The pixels are stored in the same order as levels in h->mb array.
memcpy(h->mb, ptr, 256); ptr+=256; memcpy(h->mb, ptr, 2*mb_size/3); ptr+=2*mb_size/3;
if(CHROMA){ if(CHROMA){
memcpy(h->mb+128, ptr, 128); ptr+=128; memcpy(h->mb+mb_size/3, ptr, mb_size/3); ptr+=mb_size/3;
} }
ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr); ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr);
...@@ -1652,13 +1661,15 @@ decode_intra_mb: ...@@ -1652,13 +1661,15 @@ decode_intra_mb:
//av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" ); //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
AV_ZERO128(h->mb_luma_dc+0); AV_ZERO128(h->mb_luma_dc+0);
AV_ZERO128(h->mb_luma_dc+8); AV_ZERO128(h->mb_luma_dc+8);
AV_ZERO128(h->mb_luma_dc+16);
AV_ZERO128(h->mb_luma_dc+24);
decode_cabac_residual_dc( h, h->mb_luma_dc, 0, LUMA_DC_BLOCK_INDEX, scan, 16); decode_cabac_residual_dc( h, h->mb_luma_dc, 0, LUMA_DC_BLOCK_INDEX, scan, 16);
if( cbp&15 ) { if( cbp&15 ) {
qmul = h->dequant4_coeff[0][s->qscale]; qmul = h->dequant4_coeff[0][s->qscale];
for( i = 0; i < 16; i++ ) { for( i = 0; i < 16; i++ ) {
//av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i ); //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
decode_cabac_residual_nondc(h, h->mb + 16*i, 1, i, scan + 1, qmul, 15); decode_cabac_residual_nondc(h, h->mb + (16*i << pixel_shift), 1, i, scan + 1, qmul, 15);
} }
} else { } else {
fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1); fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
...@@ -1668,7 +1679,7 @@ decode_intra_mb: ...@@ -1668,7 +1679,7 @@ decode_intra_mb:
for( i8x8 = 0; i8x8 < 4; i8x8++ ) { for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
if( cbp & (1<<i8x8) ) { if( cbp & (1<<i8x8) ) {
if( IS_8x8DCT(mb_type) ) { if( IS_8x8DCT(mb_type) ) {
decode_cabac_residual_nondc(h, h->mb + 64*i8x8, 5, 4*i8x8, decode_cabac_residual_nondc(h, h->mb + (64*i8x8 << pixel_shift), 5, 4*i8x8,
scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64); scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64);
} else { } else {
qmul = h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale]; qmul = h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale];
...@@ -1676,7 +1687,7 @@ decode_intra_mb: ...@@ -1676,7 +1687,7 @@ decode_intra_mb:
const int index = 4*i8x8 + i4x4; const int index = 4*i8x8 + i4x4;
//av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index ); //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
//START_TIMER //START_TIMER
decode_cabac_residual_nondc(h, h->mb + 16*index, 2, index, scan, qmul, 16); decode_cabac_residual_nondc(h, h->mb + (16*index << pixel_shift), 2, index, scan, qmul, 16);
//STOP_TIMER("decode_residual") //STOP_TIMER("decode_residual")
} }
} }
...@@ -1691,7 +1702,7 @@ decode_intra_mb: ...@@ -1691,7 +1702,7 @@ decode_intra_mb:
int c; int c;
for( c = 0; c < 2; c++ ) { for( c = 0; c < 2; c++ ) {
//av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c ); //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
decode_cabac_residual_dc(h, h->mb + 256 + 16*4*c, 3, CHROMA_DC_BLOCK_INDEX+c, chroma_dc_scan, 4); decode_cabac_residual_dc(h, h->mb + ((256 + 16*4*c) << pixel_shift), 3, CHROMA_DC_BLOCK_INDEX+c, chroma_dc_scan, 4);
} }
} }
...@@ -1702,7 +1713,7 @@ decode_intra_mb: ...@@ -1702,7 +1713,7 @@ decode_intra_mb:
for( i = 0; i < 4; i++ ) { for( i = 0; i < 4; i++ ) {
const int index = 16 + 4 * c + i; const int index = 16 + 4 * c + i;
//av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 ); //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
decode_cabac_residual_nondc(h, h->mb + 16*index, 4, index, scan + 1, qmul, 15); decode_cabac_residual_nondc(h, h->mb + (16*index << pixel_shift), 4, index, scan + 1, qmul, 15);
} }
} }
} else { } else {
......
...@@ -488,37 +488,44 @@ static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, in ...@@ -488,37 +488,44 @@ static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, in
zeros_left= get_vlc2(gb, (total_zeros_vlc-1)[ total_coeff ].table, TOTAL_ZEROS_VLC_BITS, 1); zeros_left= get_vlc2(gb, (total_zeros_vlc-1)[ total_coeff ].table, TOTAL_ZEROS_VLC_BITS, 1);
} }
scantable += zeros_left + total_coeff - 1; #define STORE_BLOCK(type) \
if(n >= LUMA_DC_BLOCK_INDEX){ scantable += zeros_left + total_coeff - 1; \
block[*scantable] = level[0]; if(n >= LUMA_DC_BLOCK_INDEX){ \
for(i=1;i<total_coeff && zeros_left > 0;i++) { ((type*)block)[*scantable] = level[0]; \
if(zeros_left < 7) for(i=1;i<total_coeff && zeros_left > 0;i++) { \
run_before= get_vlc2(gb, (run_vlc-1)[zeros_left].table, RUN_VLC_BITS, 1); if(zeros_left < 7) \
else run_before= get_vlc2(gb, (run_vlc-1)[zeros_left].table, RUN_VLC_BITS, 1); \
run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2); else \
zeros_left -= run_before; run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2); \
scantable -= 1 + run_before; zeros_left -= run_before; \
block[*scantable]= level[i]; scantable -= 1 + run_before; \
} ((type*)block)[*scantable]= level[i]; \
for(;i<total_coeff;i++) { } \
scantable--; for(;i<total_coeff;i++) { \
block[*scantable]= level[i]; scantable--; \
} ((type*)block)[*scantable]= level[i]; \
}else{ } \
block[*scantable] = (level[0] * qmul[*scantable] + 32)>>6; }else{ \
for(i=1;i<total_coeff && zeros_left > 0;i++) { ((type*)block)[*scantable] = ((int)(level[0] * qmul[*scantable] + 32))>>6; \
if(zeros_left < 7) for(i=1;i<total_coeff && zeros_left > 0;i++) { \
run_before= get_vlc2(gb, (run_vlc-1)[zeros_left].table, RUN_VLC_BITS, 1); if(zeros_left < 7) \
else run_before= get_vlc2(gb, (run_vlc-1)[zeros_left].table, RUN_VLC_BITS, 1); \
run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2); else \
zeros_left -= run_before; run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2); \
scantable -= 1 + run_before; zeros_left -= run_before; \
block[*scantable]= (level[i] * qmul[*scantable] + 32)>>6; scantable -= 1 + run_before; \
} ((type*)block)[*scantable]= ((int)(level[i] * qmul[*scantable] + 32))>>6; \
for(;i<total_coeff;i++) { } \
scantable--; for(;i<total_coeff;i++) { \
block[*scantable]= (level[i] * qmul[*scantable] + 32)>>6; scantable--; \
} ((type*)block)[*scantable]= ((int)(level[i] * qmul[*scantable] + 32))>>6; \
} \
}
if (h->pixel_shift) {
STORE_BLOCK(int32_t)
} else {
STORE_BLOCK(int16_t)
} }
if(zeros_left<0){ if(zeros_left<0){
...@@ -535,6 +542,7 @@ int ff_h264_decode_mb_cavlc(H264Context *h){ ...@@ -535,6 +542,7 @@ int ff_h264_decode_mb_cavlc(H264Context *h){
int partition_count; int partition_count;
unsigned int mb_type, cbp; unsigned int mb_type, cbp;
int dct8x8_allowed= h->pps.transform_8x8_mode; int dct8x8_allowed= h->pps.transform_8x8_mode;
const int pixel_shift = h->pixel_shift;
mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride; mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
...@@ -605,7 +613,7 @@ decode_intra_mb: ...@@ -605,7 +613,7 @@ decode_intra_mb:
align_get_bits(&s->gb); align_get_bits(&s->gb);
// The pixels are stored in the same order as levels in h->mb array. // The pixels are stored in the same order as levels in h->mb array.
for(x=0; x < (CHROMA ? 384 : 256); x++){ for(x=0; x < (CHROMA ? 384 : 256)*h->sps.bit_depth_luma/8; x++){
((uint8_t*)h->mb)[x]= get_bits(&s->gb, 8); ((uint8_t*)h->mb)[x]= get_bits(&s->gb, 8);
} }
...@@ -941,6 +949,8 @@ decode_intra_mb: ...@@ -941,6 +949,8 @@ decode_intra_mb:
if(IS_INTRA16x16(mb_type)){ if(IS_INTRA16x16(mb_type)){
AV_ZERO128(h->mb_luma_dc+0); AV_ZERO128(h->mb_luma_dc+0);
AV_ZERO128(h->mb_luma_dc+8); AV_ZERO128(h->mb_luma_dc+8);
AV_ZERO128(h->mb_luma_dc+16);
AV_ZERO128(h->mb_luma_dc+24);
if( decode_residual(h, h->intra_gb_ptr, h->mb_luma_dc, LUMA_DC_BLOCK_INDEX, scan, h->dequant4_coeff[0][s->qscale], 16) < 0){ if( decode_residual(h, h->intra_gb_ptr, h->mb_luma_dc, LUMA_DC_BLOCK_INDEX, scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
return -1; //FIXME continue if partitioned and other return -1 too return -1; //FIXME continue if partitioned and other return -1 too
} }
...@@ -951,7 +961,7 @@ decode_intra_mb: ...@@ -951,7 +961,7 @@ decode_intra_mb:
for(i8x8=0; i8x8<4; i8x8++){ for(i8x8=0; i8x8<4; i8x8++){
for(i4x4=0; i4x4<4; i4x4++){ for(i4x4=0; i4x4<4; i4x4++){
const int index= i4x4 + 4*i8x8; const int index= i4x4 + 4*i8x8;
if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){ if( decode_residual(h, h->intra_gb_ptr, h->mb + (16*index << pixel_shift), index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
return -1; return -1;
} }
} }
...@@ -963,7 +973,7 @@ decode_intra_mb: ...@@ -963,7 +973,7 @@ decode_intra_mb:
for(i8x8=0; i8x8<4; i8x8++){ for(i8x8=0; i8x8<4; i8x8++){
if(cbp & (1<<i8x8)){ if(cbp & (1<<i8x8)){
if(IS_8x8DCT(mb_type)){ if(IS_8x8DCT(mb_type)){
DCTELEM *buf = &h->mb[64*i8x8]; DCTELEM *buf = &h->mb[64*i8x8 << pixel_shift];
uint8_t *nnz; uint8_t *nnz;
for(i4x4=0; i4x4<4; i4x4++){ for(i4x4=0; i4x4<4; i4x4++){
if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4, if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4,
...@@ -976,7 +986,7 @@ decode_intra_mb: ...@@ -976,7 +986,7 @@ decode_intra_mb:
for(i4x4=0; i4x4<4; i4x4++){ for(i4x4=0; i4x4<4; i4x4++){
const int index= i4x4 + 4*i8x8; const int index= i4x4 + 4*i8x8;
if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){ if( decode_residual(h, gb, h->mb + (16*index << pixel_shift), index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
return -1; return -1;
} }
} }
...@@ -990,7 +1000,7 @@ decode_intra_mb: ...@@ -990,7 +1000,7 @@ decode_intra_mb:
if(cbp&0x30){ if(cbp&0x30){
for(chroma_idx=0; chroma_idx<2; chroma_idx++) for(chroma_idx=0; chroma_idx<2; chroma_idx++)
if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX+chroma_idx, chroma_dc_scan, NULL, 4) < 0){ if( decode_residual(h, gb, h->mb + ((256 + 16*4*chroma_idx) << pixel_shift), CHROMA_DC_BLOCK_INDEX+chroma_idx, chroma_dc_scan, NULL, 4) < 0){
return -1; return -1;
} }
} }
...@@ -1000,7 +1010,7 @@ decode_intra_mb: ...@@ -1000,7 +1010,7 @@ decode_intra_mb:
const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]]; const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
for(i4x4=0; i4x4<4; i4x4++){ for(i4x4=0; i4x4<4; i4x4++){
const int index= 16 + 4*chroma_idx + i4x4; const int index= 16 + 4*chroma_idx + i4x4;
if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, qmul, 15) < 0){ if( decode_residual(h, gb, h->mb + (16*index << pixel_shift), index, scan + 1, qmul, 15) < 0){
return -1; return -1;
} }
} }
......
...@@ -544,10 +544,10 @@ static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, u ...@@ -544,10 +544,10 @@ static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, u
tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize); tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
//{ int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); } //{ int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
if( dir == 0 ) { if( dir == 0 ) {
filter_mb_edgev( &img_y[4*edge], linesize, bS, qp, h ); filter_mb_edgev( &img_y[4*edge << h->pixel_shift], linesize, bS, qp, h );
if( (edge&1) == 0 ) { if( (edge&1) == 0 ) {
filter_mb_edgecv( &img_cb[2*edge], uvlinesize, bS, h->chroma_qp[0], h); filter_mb_edgecv( &img_cb[2*edge << h->pixel_shift], uvlinesize, bS, h->chroma_qp[0], h);
filter_mb_edgecv( &img_cr[2*edge], uvlinesize, bS, h->chroma_qp[1], h); filter_mb_edgecv( &img_cr[2*edge << h->pixel_shift], uvlinesize, bS, h->chroma_qp[1], h);
} }
} else { } else {
filter_mb_edgeh( &img_y[4*edge*linesize], linesize, bS, qp, h ); filter_mb_edgeh( &img_y[4*edge*linesize], linesize, bS, qp, h );
......
...@@ -273,6 +273,7 @@ int avcodec_default_get_buffer(AVCodecContext *s, AVFrame *pic){ ...@@ -273,6 +273,7 @@ int avcodec_default_get_buffer(AVCodecContext *s, AVFrame *pic){
int unaligned; int unaligned;
AVPicture picture; AVPicture picture;
int stride_align[4]; int stride_align[4];
const int pixel_size = av_pix_fmt_descriptors[s->pix_fmt].comp[0].step_minus1+1;
avcodec_get_chroma_sub_sample(s->pix_fmt, &h_chroma_shift, &v_chroma_shift); avcodec_get_chroma_sub_sample(s->pix_fmt, &h_chroma_shift, &v_chroma_shift);
...@@ -322,7 +323,7 @@ int avcodec_default_get_buffer(AVCodecContext *s, AVFrame *pic){ ...@@ -322,7 +323,7 @@ int avcodec_default_get_buffer(AVCodecContext *s, AVFrame *pic){
if((s->flags&CODEC_FLAG_EMU_EDGE) || !size[2]) if((s->flags&CODEC_FLAG_EMU_EDGE) || !size[2])
buf->data[i] = buf->base[i]; buf->data[i] = buf->base[i];
else else
buf->data[i] = buf->base[i] + FFALIGN((buf->linesize[i]*EDGE_WIDTH>>v_shift) + (EDGE_WIDTH>>h_shift), stride_align[i]); buf->data[i] = buf->base[i] + FFALIGN((buf->linesize[i]*EDGE_WIDTH>>v_shift) + (pixel_size*EDGE_WIDTH>>h_shift), stride_align[i]);
} }
if(size[1] && !size[2]) if(size[1] && !size[2])
ff_set_systematic_pal2((uint32_t*)buf->data[1], s->pix_fmt); ff_set_systematic_pal2((uint32_t*)buf->data[1], s->pix_fmt);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment