Commit 36d04801 authored by Anton Khirnov's avatar Anton Khirnov

h264: move the scratch buffers into the per-slice context

Also change the method for allocating them. Instead of two possible
alloc calls from different places, just ensure they are allocated at the
start of each slice. This should be simpler and less bug-prone than the
previous method.
parent 34d4c605
...@@ -381,8 +381,6 @@ void ff_h264_free_tables(H264Context *h, int free_rbsp) ...@@ -381,8 +381,6 @@ void ff_h264_free_tables(H264Context *h, int free_rbsp)
continue; continue;
av_freep(&hx->top_borders[1]); av_freep(&hx->top_borders[1]);
av_freep(&hx->top_borders[0]); av_freep(&hx->top_borders[0]);
av_freep(&hx->bipred_scratchpad);
av_freep(&hx->edge_emu_buffer);
av_freep(&hx->dc_val_base); av_freep(&hx->dc_val_base);
av_freep(&hx->er.mb_index2xy); av_freep(&hx->er.mb_index2xy);
av_freep(&hx->er.error_status_table); av_freep(&hx->er.error_status_table);
...@@ -397,6 +395,16 @@ void ff_h264_free_tables(H264Context *h, int free_rbsp) ...@@ -397,6 +395,16 @@ void ff_h264_free_tables(H264Context *h, int free_rbsp)
if (i) if (i)
av_freep(&h->thread_context[i]); av_freep(&h->thread_context[i]);
} }
for (i = 0; i < h->nb_slice_ctx; i++) {
H264SliceContext *sl = &h->slice_ctx[i];
av_freep(&sl->bipred_scratchpad);
av_freep(&sl->edge_emu_buffer);
sl->bipred_scratchpad_allocated = 0;
sl->edge_emu_buffer_allocated = 0;
}
} }
int ff_h264_alloc_tables(H264Context *h) int ff_h264_alloc_tables(H264Context *h)
......
...@@ -399,6 +399,11 @@ typedef struct H264SliceContext { ...@@ -399,6 +399,11 @@ typedef struct H264SliceContext {
const uint8_t *intra_pcm_ptr; const uint8_t *intra_pcm_ptr;
uint8_t *bipred_scratchpad;
uint8_t *edge_emu_buffer;
int bipred_scratchpad_allocated;
int edge_emu_buffer_allocated;
/** /**
* non zero coeff count cache. * non zero coeff count cache.
* is 64 if not available. * is 64 if not available.
...@@ -708,8 +713,6 @@ typedef struct H264Context { ...@@ -708,8 +713,6 @@ typedef struct H264Context {
int initial_cpb_removal_delay[32]; ///< Initial timestamps for CPBs int initial_cpb_removal_delay[32]; ///< Initial timestamps for CPBs
int cur_chroma_format_idc; int cur_chroma_format_idc;
uint8_t *bipred_scratchpad;
uint8_t *edge_emu_buffer;
int16_t *dc_val_base; int16_t *dc_val_base;
AVBufferPool *qscale_table_pool; AVBufferPool *qscale_table_pool;
......
...@@ -237,12 +237,12 @@ static av_always_inline void mc_dir_part(const H264Context *h, H264SliceContext ...@@ -237,12 +237,12 @@ static av_always_inline void mc_dir_part(const H264Context *h, H264SliceContext
full_my < 0 - extra_height || full_my < 0 - extra_height ||
full_mx + 16 /*FIXME*/ > pic_width + extra_width || full_mx + 16 /*FIXME*/ > pic_width + extra_width ||
full_my + 16 /*FIXME*/ > pic_height + extra_height) { full_my + 16 /*FIXME*/ > pic_height + extra_height) {
h->vdsp.emulated_edge_mc(h->edge_emu_buffer, h->vdsp.emulated_edge_mc(sl->edge_emu_buffer,
src_y - (2 << pixel_shift) - 2 * sl->mb_linesize, src_y - (2 << pixel_shift) - 2 * sl->mb_linesize,
sl->mb_linesize, sl->mb_linesize, sl->mb_linesize, sl->mb_linesize,
16 + 5, 16 + 5 /*FIXME*/, full_mx - 2, 16 + 5, 16 + 5 /*FIXME*/, full_mx - 2,
full_my - 2, pic_width, pic_height); full_my - 2, pic_width, pic_height);
src_y = h->edge_emu_buffer + (2 << pixel_shift) + 2 * sl->mb_linesize; src_y = sl->edge_emu_buffer + (2 << pixel_shift) + 2 * sl->mb_linesize;
emu = 1; emu = 1;
} }
...@@ -256,13 +256,13 @@ static av_always_inline void mc_dir_part(const H264Context *h, H264SliceContext ...@@ -256,13 +256,13 @@ static av_always_inline void mc_dir_part(const H264Context *h, H264SliceContext
if (chroma_idc == 3 /* yuv444 */) { if (chroma_idc == 3 /* yuv444 */) {
src_cb = pic->f.data[1] + offset; src_cb = pic->f.data[1] + offset;
if (emu) { if (emu) {
h->vdsp.emulated_edge_mc(h->edge_emu_buffer, h->vdsp.emulated_edge_mc(sl->edge_emu_buffer,
src_cb - (2 << pixel_shift) - 2 * sl->mb_linesize, src_cb - (2 << pixel_shift) - 2 * sl->mb_linesize,
sl->mb_linesize, sl->mb_linesize, sl->mb_linesize, sl->mb_linesize,
16 + 5, 16 + 5 /*FIXME*/, 16 + 5, 16 + 5 /*FIXME*/,
full_mx - 2, full_my - 2, full_mx - 2, full_my - 2,
pic_width, pic_height); pic_width, pic_height);
src_cb = h->edge_emu_buffer + (2 << pixel_shift) + 2 * sl->mb_linesize; src_cb = sl->edge_emu_buffer + (2 << pixel_shift) + 2 * sl->mb_linesize;
} }
qpix_op[luma_xy](dest_cb, src_cb, sl->mb_linesize); // FIXME try variable height perhaps? qpix_op[luma_xy](dest_cb, src_cb, sl->mb_linesize); // FIXME try variable height perhaps?
if (!square) if (!square)
...@@ -270,13 +270,13 @@ static av_always_inline void mc_dir_part(const H264Context *h, H264SliceContext ...@@ -270,13 +270,13 @@ static av_always_inline void mc_dir_part(const H264Context *h, H264SliceContext
src_cr = pic->f.data[2] + offset; src_cr = pic->f.data[2] + offset;
if (emu) { if (emu) {
h->vdsp.emulated_edge_mc(h->edge_emu_buffer, h->vdsp.emulated_edge_mc(sl->edge_emu_buffer,
src_cr - (2 << pixel_shift) - 2 * sl->mb_linesize, src_cr - (2 << pixel_shift) - 2 * sl->mb_linesize,
sl->mb_linesize, sl->mb_linesize, sl->mb_linesize, sl->mb_linesize,
16 + 5, 16 + 5 /*FIXME*/, 16 + 5, 16 + 5 /*FIXME*/,
full_mx - 2, full_my - 2, full_mx - 2, full_my - 2,
pic_width, pic_height); pic_width, pic_height);
src_cr = h->edge_emu_buffer + (2 << pixel_shift) + 2 * sl->mb_linesize; src_cr = sl->edge_emu_buffer + (2 << pixel_shift) + 2 * sl->mb_linesize;
} }
qpix_op[luma_xy](dest_cr, src_cr, sl->mb_linesize); // FIXME try variable height perhaps? qpix_op[luma_xy](dest_cr, src_cr, sl->mb_linesize); // FIXME try variable height perhaps?
if (!square) if (!square)
...@@ -297,22 +297,22 @@ static av_always_inline void mc_dir_part(const H264Context *h, H264SliceContext ...@@ -297,22 +297,22 @@ static av_always_inline void mc_dir_part(const H264Context *h, H264SliceContext
(my >> ysh) * sl->mb_uvlinesize; (my >> ysh) * sl->mb_uvlinesize;
if (emu) { if (emu) {
h->vdsp.emulated_edge_mc(h->edge_emu_buffer, src_cb, h->vdsp.emulated_edge_mc(sl->edge_emu_buffer, src_cb,
sl->mb_uvlinesize, sl->mb_uvlinesize, sl->mb_uvlinesize, sl->mb_uvlinesize,
9, 8 * chroma_idc + 1, (mx >> 3), (my >> ysh), 9, 8 * chroma_idc + 1, (mx >> 3), (my >> ysh),
pic_width >> 1, pic_height >> (chroma_idc == 1 /* yuv420 */)); pic_width >> 1, pic_height >> (chroma_idc == 1 /* yuv420 */));
src_cb = h->edge_emu_buffer; src_cb = sl->edge_emu_buffer;
} }
chroma_op(dest_cb, src_cb, sl->mb_uvlinesize, chroma_op(dest_cb, src_cb, sl->mb_uvlinesize,
height >> (chroma_idc == 1 /* yuv420 */), height >> (chroma_idc == 1 /* yuv420 */),
mx & 7, (my << (chroma_idc == 2 /* yuv422 */)) & 7); mx & 7, (my << (chroma_idc == 2 /* yuv422 */)) & 7);
if (emu) { if (emu) {
h->vdsp.emulated_edge_mc(h->edge_emu_buffer, src_cr, h->vdsp.emulated_edge_mc(sl->edge_emu_buffer, src_cr,
sl->mb_uvlinesize, sl->mb_uvlinesize, sl->mb_uvlinesize, sl->mb_uvlinesize,
9, 8 * chroma_idc + 1, (mx >> 3), (my >> ysh), 9, 8 * chroma_idc + 1, (mx >> 3), (my >> ysh),
pic_width >> 1, pic_height >> (chroma_idc == 1 /* yuv420 */)); pic_width >> 1, pic_height >> (chroma_idc == 1 /* yuv420 */));
src_cr = h->edge_emu_buffer; src_cr = sl->edge_emu_buffer;
} }
chroma_op(dest_cr, src_cr, sl->mb_uvlinesize, height >> (chroma_idc == 1 /* yuv420 */), chroma_op(dest_cr, src_cr, sl->mb_uvlinesize, height >> (chroma_idc == 1 /* yuv420 */),
mx & 7, (my << (chroma_idc == 2 /* yuv422 */)) & 7); mx & 7, (my << (chroma_idc == 2 /* yuv422 */)) & 7);
...@@ -405,9 +405,9 @@ static av_always_inline void mc_part_weighted(const H264Context *h, H264SliceCon ...@@ -405,9 +405,9 @@ static av_always_inline void mc_part_weighted(const H264Context *h, H264SliceCon
if (list0 && list1) { if (list0 && list1) {
/* don't optimize for luma-only case, since B-frames usually /* don't optimize for luma-only case, since B-frames usually
* use implicit weights => chroma too. */ * use implicit weights => chroma too. */
uint8_t *tmp_cb = h->bipred_scratchpad; uint8_t *tmp_cb = sl->bipred_scratchpad;
uint8_t *tmp_cr = h->bipred_scratchpad + (16 << pixel_shift); uint8_t *tmp_cr = sl->bipred_scratchpad + (16 << pixel_shift);
uint8_t *tmp_y = h->bipred_scratchpad + 16 * sl->mb_uvlinesize; uint8_t *tmp_y = sl->bipred_scratchpad + 16 * sl->mb_uvlinesize;
int refn0 = sl->ref_cache[0][scan8[n]]; int refn0 = sl->ref_cache[0][scan8[n]];
int refn1 = sl->ref_cache[1][scan8[n]]; int refn1 = sl->ref_cache[1][scan8[n]];
......
...@@ -157,21 +157,20 @@ static void release_unused_pictures(H264Context *h, int remove_current) ...@@ -157,21 +157,20 @@ static void release_unused_pictures(H264Context *h, int remove_current)
} }
} }
static int alloc_scratch_buffers(H264Context *h, int linesize) static int alloc_scratch_buffers(H264SliceContext *sl, int linesize)
{ {
int alloc_size = FFALIGN(FFABS(linesize) + 32, 32); int alloc_size = FFALIGN(FFABS(linesize) + 32, 32);
if (h->bipred_scratchpad) av_fast_malloc(&sl->bipred_scratchpad, &sl->bipred_scratchpad_allocated, 16 * 6 * alloc_size);
return 0;
h->bipred_scratchpad = av_malloc(16 * 6 * alloc_size);
// edge emu needs blocksize + filter length - 1 // edge emu needs blocksize + filter length - 1
// (= 21x21 for h264) // (= 21x21 for h264)
h->edge_emu_buffer = av_mallocz(alloc_size * 2 * 21); av_fast_malloc(&sl->edge_emu_buffer, &sl->edge_emu_buffer_allocated, alloc_size * 2 * 21);
if (!h->bipred_scratchpad || !h->edge_emu_buffer) { if (!sl->bipred_scratchpad || !sl->edge_emu_buffer) {
av_freep(&h->bipred_scratchpad); av_freep(&sl->bipred_scratchpad);
av_freep(&h->edge_emu_buffer); av_freep(&sl->edge_emu_buffer);
sl->bipred_scratchpad_allocated = 0;
sl->edge_emu_buffer_allocated = 0;
return AVERROR(ENOMEM); return AVERROR(ENOMEM);
} }
...@@ -381,8 +380,6 @@ static void clone_tables(H264Context *dst, H264SliceContext *sl, ...@@ -381,8 +380,6 @@ static void clone_tables(H264Context *dst, H264SliceContext *sl,
dst->DPB = src->DPB; dst->DPB = src->DPB;
dst->cur_pic_ptr = src->cur_pic_ptr; dst->cur_pic_ptr = src->cur_pic_ptr;
dst->cur_pic = src->cur_pic; dst->cur_pic = src->cur_pic;
dst->bipred_scratchpad = NULL;
dst->edge_emu_buffer = NULL;
ff_h264_pred_init(&dst->hpc, src->avctx->codec_id, src->sps.bit_depth_luma, ff_h264_pred_init(&dst->hpc, src->avctx->codec_id, src->sps.bit_depth_luma,
src->sps.chroma_format_idc); src->sps.chroma_format_idc);
} }
...@@ -460,8 +457,6 @@ int ff_h264_update_thread_context(AVCodecContext *dst, ...@@ -460,8 +457,6 @@ int ff_h264_update_thread_context(AVCodecContext *dst,
* the current value */ * the current value */
h->avctx->bits_per_raw_sample = h->sps.bit_depth_luma; h->avctx->bits_per_raw_sample = h->sps.bit_depth_luma;
av_freep(&h->bipred_scratchpad);
h->width = h1->width; h->width = h1->width;
h->height = h1->height; h->height = h1->height;
h->mb_height = h1->mb_height; h->mb_height = h1->mb_height;
...@@ -530,8 +525,6 @@ int ff_h264_update_thread_context(AVCodecContext *dst, ...@@ -530,8 +525,6 @@ int ff_h264_update_thread_context(AVCodecContext *dst,
h->rbsp_buffer = NULL; h->rbsp_buffer = NULL;
h->rbsp_buffer_size = 0; h->rbsp_buffer_size = 0;
h->bipred_scratchpad = NULL;
h->edge_emu_buffer = NULL;
h->thread_context[0] = h; h->thread_context[0] = h;
...@@ -567,12 +560,6 @@ int ff_h264_update_thread_context(AVCodecContext *dst, ...@@ -567,12 +560,6 @@ int ff_h264_update_thread_context(AVCodecContext *dst,
h->low_delay = h1->low_delay; h->low_delay = h1->low_delay;
h->droppable = h1->droppable; h->droppable = h1->droppable;
/* frame_start may not be called for the next thread (if it's decoding
* a bottom field) so this has to be allocated here */
err = alloc_scratch_buffers(h, h1->linesize);
if (err < 0)
return err;
// extradata/NAL handling // extradata/NAL handling
h->is_avc = h1->is_avc; h->is_avc = h1->is_avc;
...@@ -688,15 +675,6 @@ static int h264_frame_start(H264Context *h) ...@@ -688,15 +675,6 @@ static int h264_frame_start(H264Context *h)
h->block_offset[48 + 32 + i] = (4 * ((scan8[i] - scan8[0]) & 7) << pixel_shift) + 8 * h->uvlinesize * ((scan8[i] - scan8[0]) >> 3); h->block_offset[48 + 32 + i] = (4 * ((scan8[i] - scan8[0]) & 7) << pixel_shift) + 8 * h->uvlinesize * ((scan8[i] - scan8[0]) >> 3);
} }
/* can't be in alloc_tables because linesize isn't known there.
* FIXME: redo bipred weight to not require extra buffer? */
for (i = 0; i < h->slice_context_count; i++)
if (h->thread_context[i]) {
ret = alloc_scratch_buffers(h->thread_context[i], h->linesize);
if (ret < 0)
return ret;
}
/* Some macroblocks can be accessed before they're available in case /* Some macroblocks can be accessed before they're available in case
* of lost slices, MBAFF or threading. */ * of lost slices, MBAFF or threading. */
memset(h->slice_table, -1, memset(h->slice_table, -1,
...@@ -2181,6 +2159,11 @@ static int decode_slice(struct AVCodecContext *avctx, void *arg) ...@@ -2181,6 +2159,11 @@ static int decode_slice(struct AVCodecContext *avctx, void *arg)
H264SliceContext *sl = arg; H264SliceContext *sl = arg;
H264Context *h = sl->h264; H264Context *h = sl->h264;
int lf_x_start = sl->mb_x; int lf_x_start = sl->mb_x;
int ret;
ret = alloc_scratch_buffers(sl, h->linesize);
if (ret < 0)
return ret;
sl->mb_skip_run = -1; sl->mb_skip_run = -1;
......
...@@ -296,6 +296,7 @@ static inline void svq3_mc_dir_part(SVQ3Context *s, ...@@ -296,6 +296,7 @@ static inline void svq3_mc_dir_part(SVQ3Context *s,
int thirdpel, int dir, int avg) int thirdpel, int dir, int avg)
{ {
H264Context *h = &s->h; H264Context *h = &s->h;
H264SliceContext *sl = &h->slice_ctx[0];
const H264Picture *pic = (dir == 0) ? s->last_pic : s->next_pic; const H264Picture *pic = (dir == 0) ? s->last_pic : s->next_pic;
uint8_t *src, *dest; uint8_t *src, *dest;
int i, emu = 0; int i, emu = 0;
...@@ -316,11 +317,11 @@ static inline void svq3_mc_dir_part(SVQ3Context *s, ...@@ -316,11 +317,11 @@ static inline void svq3_mc_dir_part(SVQ3Context *s,
src = pic->f.data[0] + mx + my * h->linesize; src = pic->f.data[0] + mx + my * h->linesize;
if (emu) { if (emu) {
h->vdsp.emulated_edge_mc(h->edge_emu_buffer, src, h->vdsp.emulated_edge_mc(sl->edge_emu_buffer, src,
h->linesize, h->linesize, h->linesize, h->linesize,
width + 1, height + 1, width + 1, height + 1,
mx, my, s->h_edge_pos, s->v_edge_pos); mx, my, s->h_edge_pos, s->v_edge_pos);
src = h->edge_emu_buffer; src = sl->edge_emu_buffer;
} }
if (thirdpel) if (thirdpel)
(avg ? s->tdsp.avg_tpel_pixels_tab (avg ? s->tdsp.avg_tpel_pixels_tab
...@@ -343,12 +344,12 @@ static inline void svq3_mc_dir_part(SVQ3Context *s, ...@@ -343,12 +344,12 @@ static inline void svq3_mc_dir_part(SVQ3Context *s,
src = pic->f.data[i] + mx + my * h->uvlinesize; src = pic->f.data[i] + mx + my * h->uvlinesize;
if (emu) { if (emu) {
h->vdsp.emulated_edge_mc(h->edge_emu_buffer, src, h->vdsp.emulated_edge_mc(sl->edge_emu_buffer, src,
h->uvlinesize, h->uvlinesize, h->uvlinesize, h->uvlinesize,
width + 1, height + 1, width + 1, height + 1,
mx, my, (s->h_edge_pos >> 1), mx, my, (s->h_edge_pos >> 1),
s->v_edge_pos >> 1); s->v_edge_pos >> 1);
src = h->edge_emu_buffer; src = sl->edge_emu_buffer;
} }
if (thirdpel) if (thirdpel)
(avg ? s->tdsp.avg_tpel_pixels_tab (avg ? s->tdsp.avg_tpel_pixels_tab
...@@ -1060,6 +1061,7 @@ static int get_buffer(AVCodecContext *avctx, H264Picture *pic) ...@@ -1060,6 +1061,7 @@ static int get_buffer(AVCodecContext *avctx, H264Picture *pic)
{ {
SVQ3Context *s = avctx->priv_data; SVQ3Context *s = avctx->priv_data;
H264Context *h = &s->h; H264Context *h = &s->h;
H264SliceContext *sl = &h->slice_ctx[0];
const int big_mb_num = h->mb_stride * (h->mb_height + 1) + 1; const int big_mb_num = h->mb_stride * (h->mb_height + 1) + 1;
const int mb_array_size = h->mb_stride * h->mb_height; const int mb_array_size = h->mb_stride * h->mb_height;
const int b4_stride = h->mb_width * 4 + 1; const int b4_stride = h->mb_width * 4 + 1;
...@@ -1093,9 +1095,9 @@ static int get_buffer(AVCodecContext *avctx, H264Picture *pic) ...@@ -1093,9 +1095,9 @@ static int get_buffer(AVCodecContext *avctx, H264Picture *pic)
if (ret < 0) if (ret < 0)
goto fail; goto fail;
if (!h->edge_emu_buffer) { if (!sl->edge_emu_buffer) {
h->edge_emu_buffer = av_mallocz(pic->f.linesize[0] * 17); sl->edge_emu_buffer = av_mallocz(pic->f.linesize[0] * 17);
if (!h->edge_emu_buffer) if (!sl->edge_emu_buffer)
return AVERROR(ENOMEM); return AVERROR(ENOMEM);
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment