h264: split weighted pred-related vars into per-slice context

92c6c2a6 · Anton Khirnov · d8a45d2d · 92c6c2a6 · 92c6c2a6 · 92c6c2a6
Commit 92c6c2a6 authored Jan 17, 2015 by Anton Khirnov
11 changed files
--- a/libavcodec/dxva2_h264.c
+++ b/libavcodec/dxva2_h264.c
@@ -211,6 +211,7 @@ static void fill_slice_long(AVCodecContext *avctx, DXVA_Slice_H264_Long *slice,
                            const DXVA_PicParams_H264 *pp, unsigned position, unsigned size)
 {
    const H264Context *h = avctx->priv_data;
+    H264SliceContext *sl = &h->slice_ctx[0];
    struct dxva_context *ctx = avctx->hwaccel_context;
    unsigned list;

@@ -225,8 +226,8 @@ static void fill_slice_long(AVCodecContext *avctx, DXVA_Slice_H264_Long *slice,
    slice->slice_type            = ff_h264_get_slice_type(h);
    if (h->slice_type_fixed)
        slice->slice_type += 5;
-    slice->luma_log2_weight_denom       = h->luma_log2_weight_denom;
-    slice->chroma_log2_weight_denom     = h->chroma_log2_weight_denom;
+    slice->luma_log2_weight_denom       = sl->luma_log2_weight_denom;
+    slice->chroma_log2_weight_denom     = sl->chroma_log2_weight_denom;
    if (h->list_count > 0)
        slice->num_ref_idx_l0_active_minus1 = h->ref_count[0] - 1;
    if (h->list_count > 1)
@@ -250,15 +251,15 @@ static void fill_slice_long(AVCodecContext *avctx, DXVA_Slice_H264_Long *slice,
                                   r->reference == PICT_BOTTOM_FIELD);
                for (plane = 0; plane < 3; plane++) {
                    int w, o;
-                    if (plane == 0 && h->luma_weight_flag[list]) {
-                        w = h->luma_weight[i][list][0];
-                        o = h->luma_weight[i][list][1];
-                    } else if (plane >= 1 && h->chroma_weight_flag[list]) {
-                        w = h->chroma_weight[i][list][plane-1][0];
-                        o = h->chroma_weight[i][list][plane-1][1];
+                    if (plane == 0 && sl->luma_weight_flag[list]) {
+                        w = sl->luma_weight[i][list][0];
+                        o = sl->luma_weight[i][list][1];
+                    } else if (plane >= 1 && sl->chroma_weight_flag[list]) {
+                        w = sl->chroma_weight[i][list][plane-1][0];
+                        o = sl->chroma_weight[i][list][plane-1][1];
                    } else {
-                        w = 1 << (plane == 0 ? h->luma_log2_weight_denom :
-                                               h->chroma_log2_weight_denom);
+                        w = 1 << (plane == 0 ? sl->luma_log2_weight_denom :
+                                               sl->chroma_log2_weight_denom);
                        o = 0;
                    }
                    slice->Weights[list][i][plane][0] = w;

--- a/libavcodec/h264.c
+++ b/libavcodec/h264.c
@@ -74,7 +74,7 @@ static void h264_er_decode_mb(void *opaque, int ref, int mv_dir, int mv_type,
    fill_rectangle(h->mv_cache[0][scan8[0]], 4, 4, 8,
                   pack16to32((*mv)[0][0][0], (*mv)[0][0][1]), 4);
    assert(!FRAME_MBAFF(h));
-    ff_h264_hl_decode_mb(h);
+    ff_h264_hl_decode_mb(h, &h->slice_ctx[0]);
 }

 void ff_h264_draw_horiz_band(H264Context *h, int y, int height)
@@ -642,7 +642,17 @@ av_cold int ff_h264_decode_init(AVCodecContext *avctx)
    h->pixel_shift        = 0;
    h->sps.bit_depth_luma = avctx->bits_per_raw_sample = 8;

+    h->nb_slice_ctx = (avctx->active_thread_type & FF_THREAD_SLICE) ?  H264_MAX_THREADS : 1;
+    h->slice_ctx = av_mallocz_array(h->nb_slice_ctx, sizeof(*h->slice_ctx));
+    if (!h->slice_ctx) {
+        h->nb_slice_ctx = 0;
+        return AVERROR(ENOMEM);
+    }
+
    h->thread_context[0] = h;
+    for (i = 0; i < h->nb_slice_ctx; i++)
+        h->slice_ctx[i].h264 = h->thread_context[0];
+
    h->outputed_poc      = h->next_outputed_poc = INT_MIN;
    for (i = 0; i < MAX_DELAYED_PIC_COUNT; i++)
        h->last_pocs[i] = INT_MIN;
@@ -679,12 +689,23 @@ av_cold int ff_h264_decode_init(AVCodecContext *avctx)
 static int decode_init_thread_copy(AVCodecContext *avctx)
 {
    H264Context *h = avctx->priv_data;
+    int i;

    if (!avctx->internal->is_copy)
        return 0;
    memset(h->sps_buffers, 0, sizeof(h->sps_buffers));
    memset(h->pps_buffers, 0, sizeof(h->pps_buffers));

+    h->nb_slice_ctx = (avctx->active_thread_type & FF_THREAD_SLICE) ?  H264_MAX_THREADS : 1;
+    h->slice_ctx = av_mallocz_array(h->nb_slice_ctx, sizeof(*h->slice_ctx));
+    if (!h->slice_ctx) {
+        h->nb_slice_ctx = 0;
+        return AVERROR(ENOMEM);
+    }
+
+    for (i = 0; i < h->nb_slice_ctx; i++)
+        h->slice_ctx[i].h264 = h;
+
    h->avctx               = avctx;
    h->rbsp_buffer[0]      = NULL;
    h->rbsp_buffer[1]      = NULL;
@@ -976,37 +997,37 @@ static void decode_postinit(H264Context *h, int setup_finished)
        ff_thread_finish_setup(h->avctx);
 }

-int ff_pred_weight_table(H264Context *h)
+int ff_pred_weight_table(H264Context *h, H264SliceContext *sl)
 {
    int list, i;
    int luma_def, chroma_def;

-    h->use_weight             = 0;
-    h->use_weight_chroma      = 0;
-    h->luma_log2_weight_denom = get_ue_golomb(&h->gb);
+    sl->use_weight             = 0;
+    sl->use_weight_chroma      = 0;
+    sl->luma_log2_weight_denom = get_ue_golomb(&h->gb);
    if (h->sps.chroma_format_idc)
-        h->chroma_log2_weight_denom = get_ue_golomb(&h->gb);
-    luma_def   = 1 << h->luma_log2_weight_denom;
-    chroma_def = 1 << h->chroma_log2_weight_denom;
+        sl->chroma_log2_weight_denom = get_ue_golomb(&h->gb);
+    luma_def   = 1 << sl->luma_log2_weight_denom;
+    chroma_def = 1 << sl->chroma_log2_weight_denom;

    for (list = 0; list < 2; list++) {
-        h->luma_weight_flag[list]   = 0;
-        h->chroma_weight_flag[list] = 0;
+        sl->luma_weight_flag[list]   = 0;
+        sl->chroma_weight_flag[list] = 0;
        for (i = 0; i < h->ref_count[list]; i++) {
            int luma_weight_flag, chroma_weight_flag;

            luma_weight_flag = get_bits1(&h->gb);
            if (luma_weight_flag) {
-                h->luma_weight[i][list][0] = get_se_golomb(&h->gb);
-                h->luma_weight[i][list][1] = get_se_golomb(&h->gb);
-                if (h->luma_weight[i][list][0] != luma_def ||
-                    h->luma_weight[i][list][1] != 0) {
-                    h->use_weight             = 1;
-                    h->luma_weight_flag[list] = 1;
+                sl->luma_weight[i][list][0] = get_se_golomb(&h->gb);
+                sl->luma_weight[i][list][1] = get_se_golomb(&h->gb);
+                if (sl->luma_weight[i][list][0] != luma_def ||
+                    sl->luma_weight[i][list][1] != 0) {
+                    sl->use_weight             = 1;
+                    sl->luma_weight_flag[list] = 1;
                }
            } else {
-                h->luma_weight[i][list][0] = luma_def;
-                h->luma_weight[i][list][1] = 0;
+                sl->luma_weight[i][list][0] = luma_def;
+                sl->luma_weight[i][list][1] = 0;
            }

            if (h->sps.chroma_format_idc) {
@@ -1014,19 +1035,19 @@ int ff_pred_weight_table(H264Context *h)
                if (chroma_weight_flag) {
                    int j;
                    for (j = 0; j < 2; j++) {
-                        h->chroma_weight[i][list][j][0] = get_se_golomb(&h->gb);
-                        h->chroma_weight[i][list][j][1] = get_se_golomb(&h->gb);
-                        if (h->chroma_weight[i][list][j][0] != chroma_def ||
-                            h->chroma_weight[i][list][j][1] != 0) {
-                            h->use_weight_chroma        = 1;
-                            h->chroma_weight_flag[list] = 1;
+                        sl->chroma_weight[i][list][j][0] = get_se_golomb(&h->gb);
+                        sl->chroma_weight[i][list][j][1] = get_se_golomb(&h->gb);
+                        if (sl->chroma_weight[i][list][j][0] != chroma_def ||
+                            sl->chroma_weight[i][list][j][1] != 0) {
+                            sl->use_weight_chroma        = 1;
+                            sl->chroma_weight_flag[list] = 1;
                        }
                    }
                } else {
                    int j;
                    for (j = 0; j < 2; j++) {
-                        h->chroma_weight[i][list][j][0] = chroma_def;
-                        h->chroma_weight[i][list][j][1] = 0;
+                        sl->chroma_weight[i][list][j][0] = chroma_def;
+                        sl->chroma_weight[i][list][j][1] = 0;
                    }
                }
            }
@@ -1034,7 +1055,7 @@ int ff_pred_weight_table(H264Context *h)
        if (h->slice_type_nos != AV_PICTURE_TYPE_B)
            break;
    }
-    h->use_weight = h->use_weight || h->use_weight_chroma;
+    sl->use_weight = sl->use_weight || sl->use_weight_chroma;
    return 0;
 }

@@ -1401,6 +1422,7 @@ static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size,
 {
    AVCodecContext *const avctx = h->avctx;
    H264Context *hx; ///< thread context
+    H264SliceContext *sl;
    int buf_index;
    unsigned context_count;
    int next_avc;
@@ -1446,6 +1468,7 @@ static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size,
            }

            hx = h->thread_context[context_count];
+            sl = &h->slice_ctx[context_count];

            ptr = ff_h264_decode_nal(hx, buf + buf_index, &dst_length,
                                     &consumed, next_avc - buf_index);
@@ -1505,7 +1528,7 @@ again:
                hx->intra_gb_ptr      =
                hx->inter_gb_ptr      = &hx->gb;

-                if ((err = ff_h264_decode_slice_header(hx, h)))
+                if ((err = ff_h264_decode_slice_header(hx, sl, h)))
                    break;

                if (h->sei_recovery_frame_cnt >= 0 && h->recovery_frame < 0) {
@@ -1622,6 +1645,7 @@ again:
                h->nal_unit_type = hx->nal_unit_type;
                h->nal_ref_idc   = hx->nal_ref_idc;
                hx               = h;
+                sl               = &h->slice_ctx[0];
                goto again;
            }
        }
@@ -1769,6 +1793,9 @@ av_cold void ff_h264_free_context(H264Context *h)

    ff_h264_free_tables(h, 1); // FIXME cleanup init stuff perhaps

+    av_freep(&h->slice_ctx);
+    h->nb_slice_ctx = 0;
+
    for (i = 0; i < MAX_SPS_COUNT; i++)
        av_freep(h->sps_buffers + i);


--- a/libavcodec/h264.h
+++ b/libavcodec/h264.h
@@ -296,6 +296,22 @@ typedef struct H264Picture {
    int recovered;          ///< picture at IDR or recovery point + recovery count
 } H264Picture;

+typedef struct H264SliceContext {
+    struct H264Context *h264;
+
+    // Weighted pred stuff
+    int use_weight;
+    int use_weight_chroma;
+    int luma_log2_weight_denom;
+    int chroma_log2_weight_denom;
+    int luma_weight_flag[2];    ///< 7.4.3.2 luma_weight_lX_flag
+    int chroma_weight_flag[2];  ///< 7.4.3.2 chroma_weight_lX_flag
+    // The following 2 can be changed to int8_t but that causes 10cpu cycles speedloss
+    int luma_weight[48][2][2];
+    int chroma_weight[48][2][2][2];
+    int implicit_weight[48][48][2];
+} H264SliceContext;
+
 /**
 * H264Context
 */
@@ -312,6 +328,9 @@ typedef struct H264Context {
    H264Picture *cur_pic_ptr;
    H264Picture cur_pic;

+    H264SliceContext *slice_ctx;
+    int            nb_slice_ctx;
+
    int pixel_shift;    ///< 0 for 8-bit H264, 1 for high-bit-depth H264
    int chroma_qp[2];   // QPc

@@ -417,15 +436,6 @@ typedef struct H264Context {

    DECLARE_ALIGNED(8, uint16_t, sub_mb_type)[4];

-    // Weighted pred stuff
-    int use_weight;
-    int use_weight_chroma;
-    int luma_log2_weight_denom;
-    int chroma_log2_weight_denom;
-    // The following 2 can be changed to int8_t but that causes 10cpu cycles speedloss
-    int luma_weight[48][2][2];
-    int chroma_weight[48][2][2][2];
-    int implicit_weight[48][48][2];

    int direct_spatial_mv_pred;
    int col_parity;
@@ -683,8 +693,6 @@ typedef struct H264Context {

    int frame_recovered;    ///< Initial frame has been completely recovered

-    int luma_weight_flag[2];    ///< 7.4.3.2 luma_weight_lX_flag
-    int chroma_weight_flag[2];  ///< 7.4.3.2 chroma_weight_lX_flag

    // Timestamp stuff
    int sei_buffering_period_present;   ///< Buffering period SEI flag
@@ -762,7 +770,7 @@ int ff_h264_alloc_tables(H264Context *h);
 int ff_h264_fill_default_ref_list(H264Context *h);

 int ff_h264_decode_ref_pic_list_reordering(H264Context *h);
-void ff_h264_fill_mbaff_ref_list(H264Context *h);
+void ff_h264_fill_mbaff_ref_list(H264Context *h, H264SliceContext *sl);
 void ff_h264_remove_all_refs(H264Context *h);

 /**
@@ -787,7 +795,7 @@ int ff_h264_check_intra4x4_pred_mode(H264Context *h);
 */
 int ff_h264_check_intra_pred_mode(H264Context *h, int mode, int is_chroma);

-void ff_h264_hl_decode_mb(H264Context *h);
+void ff_h264_hl_decode_mb(H264Context *h, H264SliceContext *sl);
 int ff_h264_decode_extradata(H264Context *h);
 int ff_h264_decode_init(AVCodecContext *avctx);
 void ff_h264_decode_init_vlc(void);
@@ -1036,10 +1044,10 @@ int ff_h264_set_parameter_from_sps(H264Context *h);

 void ff_h264_draw_horiz_band(H264Context *h, int y, int height);
 int ff_init_poc(H264Context *h, int pic_field_poc[2], int *pic_poc);
-int ff_pred_weight_table(H264Context *h);
+int ff_pred_weight_table(H264Context *h, H264SliceContext *sl);
 int ff_set_ref_count(H264Context *h);

-int ff_h264_decode_slice_header(H264Context *h, H264Context *h0);
+int ff_h264_decode_slice_header(H264Context *h, H264SliceContext *sl, H264Context *h0);
 int ff_h264_execute_decode_slices(H264Context *h, unsigned context_count);
 int ff_h264_update_thread_context(AVCodecContext *dst,
                                  const AVCodecContext *src);

--- a/libavcodec/h264_mb.c
+++ b/libavcodec/h264_mb.c
@@ -362,7 +362,8 @@ static av_always_inline void mc_part_std(H264Context *h, int n, int square,
    }
 }

-static av_always_inline void mc_part_weighted(H264Context *h, int n, int square,
+static av_always_inline void mc_part_weighted(H264Context *h, H264SliceContext *sl,
+                                              int n, int square,
                                              int height, int delta,
                                              uint8_t *dest_y, uint8_t *dest_cb,
                                              uint8_t *dest_cr,
@@ -415,8 +416,8 @@ static av_always_inline void mc_part_weighted(H264Context *h, int n, int square,
                    x_offset, y_offset, qpix_put, chroma_put,
                    pixel_shift, chroma_idc);

-        if (h->use_weight == 2) {
-            int weight0 = h->implicit_weight[refn0][refn1][h->mb_y & 1];
+        if (sl->use_weight == 2) {
+            int weight0 = sl->implicit_weight[refn0][refn1][h->mb_y & 1];
            int weight1 = 64 - weight0;
            luma_weight_avg(dest_y, tmp_y, h->mb_linesize,
                            height, 5, weight0, weight1, 0);
@@ -426,23 +427,23 @@ static av_always_inline void mc_part_weighted(H264Context *h, int n, int square,
                              chroma_height, 5, weight0, weight1, 0);
        } else {
            luma_weight_avg(dest_y, tmp_y, h->mb_linesize, height,
-                            h->luma_log2_weight_denom,
-                            h->luma_weight[refn0][0][0],
-                            h->luma_weight[refn1][1][0],
-                            h->luma_weight[refn0][0][1] +
-                            h->luma_weight[refn1][1][1]);
+                            sl->luma_log2_weight_denom,
+                            sl->luma_weight[refn0][0][0],
+                            sl->luma_weight[refn1][1][0],
+                            sl->luma_weight[refn0][0][1] +
+                            sl->luma_weight[refn1][1][1]);
            chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, chroma_height,
-                              h->chroma_log2_weight_denom,
-                              h->chroma_weight[refn0][0][0][0],
-                              h->chroma_weight[refn1][1][0][0],
-                              h->chroma_weight[refn0][0][0][1] +
-                              h->chroma_weight[refn1][1][0][1]);
+                              sl->chroma_log2_weight_denom,
+                              sl->chroma_weight[refn0][0][0][0],
+                              sl->chroma_weight[refn1][1][0][0],
+                              sl->chroma_weight[refn0][0][0][1] +
+                              sl->chroma_weight[refn1][1][0][1]);
            chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, chroma_height,
-                              h->chroma_log2_weight_denom,
-                              h->chroma_weight[refn0][0][1][0],
-                              h->chroma_weight[refn1][1][1][0],
-                              h->chroma_weight[refn0][0][1][1] +
-                              h->chroma_weight[refn1][1][1][1]);
+                              sl->chroma_log2_weight_denom,
+                              sl->chroma_weight[refn0][0][1][0],
+                              sl->chroma_weight[refn1][1][1][0],
+                              sl->chroma_weight[refn0][0][1][1] +
+                              sl->chroma_weight[refn1][1][1][1]);
        }
    } else {
        int list     = list1 ? 1 : 0;
@@ -453,18 +454,18 @@ static av_always_inline void mc_part_weighted(H264Context *h, int n, int square,
                    qpix_put, chroma_put, pixel_shift, chroma_idc);

        luma_weight_op(dest_y, h->mb_linesize, height,
-                       h->luma_log2_weight_denom,
-                       h->luma_weight[refn][list][0],
-                       h->luma_weight[refn][list][1]);
-        if (h->use_weight_chroma) {
+                       sl->luma_log2_weight_denom,
+                       sl->luma_weight[refn][list][0],
+                       sl->luma_weight[refn][list][1]);
+        if (sl->use_weight_chroma) {
            chroma_weight_op(dest_cb, h->mb_uvlinesize, chroma_height,
-                             h->chroma_log2_weight_denom,
-                             h->chroma_weight[refn][list][0][0],
-                             h->chroma_weight[refn][list][0][1]);
+                             sl->chroma_log2_weight_denom,
+                             sl->chroma_weight[refn][list][0][0],
+                             sl->chroma_weight[refn][list][0][1]);
            chroma_weight_op(dest_cr, h->mb_uvlinesize, chroma_height,
-                             h->chroma_log2_weight_denom,
-                             h->chroma_weight[refn][list][1][0],
-                             h->chroma_weight[refn][list][1][1]);
+                             sl->chroma_log2_weight_denom,
+                             sl->chroma_weight[refn][list][1][0],
+                             sl->chroma_weight[refn][list][1][1]);
        }
    }
 }
@@ -801,7 +802,7 @@ static av_always_inline void hl_decode_mb_idct_luma(H264Context *h, int mb_type,
 #define SIMPLE 0
 #include "h264_mb_template.c"

-void ff_h264_hl_decode_mb(H264Context *h)
+void ff_h264_hl_decode_mb(H264Context *h, H264SliceContext *sl)
 {
    const int mb_xy   = h->mb_xy;
    const int mb_type = h->cur_pic.mb_type[mb_xy];
@@ -810,13 +811,13 @@ void ff_h264_hl_decode_mb(H264Context *h)

    if (CHROMA444(h)) {
        if (is_complex || h->pixel_shift)
-            hl_decode_mb_444_complex(h);
+            hl_decode_mb_444_complex(h, sl);
        else
-            hl_decode_mb_444_simple_8(h);
+            hl_decode_mb_444_simple_8(h, sl);
    } else if (is_complex) {
-        hl_decode_mb_complex(h);
+        hl_decode_mb_complex(h, sl);
    } else if (h->pixel_shift) {
-        hl_decode_mb_simple_16(h);
+        hl_decode_mb_simple_16(h, sl);
    } else
-        hl_decode_mb_simple_8(h);
+        hl_decode_mb_simple_8(h, sl);
 }
--- a/libavcodec/h264_mb_template.c
+++ b/libavcodec/h264_mb_template.c
@@ -40,7 +40,7 @@
 #define CHROMA_IDC 2
 #include "h264_mc_template.c"

-static av_noinline void FUNC(hl_decode_mb)(H264Context *h)
+static av_noinline void FUNC(hl_decode_mb)(H264Context *h, H264SliceContext *sl)
 {
    const int mb_x    = h->mb_x;
    const int mb_y    = h->mb_y;
@@ -176,13 +176,13 @@ static av_noinline void FUNC(hl_decode_mb)(H264Context *h)
                               uvlinesize, 0, 0, SIMPLE, PIXEL_SHIFT);
        } else if (is_h264) {
            if (chroma422) {
-                FUNC(hl_motion_422)(h, dest_y, dest_cb, dest_cr,
+                FUNC(hl_motion_422)(h, sl, dest_y, dest_cb, dest_cr,
                              h->qpel_put, h->h264chroma.put_h264_chroma_pixels_tab,
                              h->qpel_avg, h->h264chroma.avg_h264_chroma_pixels_tab,
                              h->h264dsp.weight_h264_pixels_tab,
                              h->h264dsp.biweight_h264_pixels_tab);
            } else {
-                FUNC(hl_motion_420)(h, dest_y, dest_cb, dest_cr,
+                FUNC(hl_motion_420)(h, sl, dest_y, dest_cb, dest_cr,
                              h->qpel_put, h->h264chroma.put_h264_chroma_pixels_tab,
                              h->qpel_avg, h->h264chroma.avg_h264_chroma_pixels_tab,
                              h->h264dsp.weight_h264_pixels_tab,
@@ -272,7 +272,7 @@ static av_noinline void FUNC(hl_decode_mb)(H264Context *h)
 #define CHROMA_IDC 3
 #include "h264_mc_template.c"

-static av_noinline void FUNC(hl_decode_mb_444)(H264Context *h)
+static av_noinline void FUNC(hl_decode_mb_444)(H264Context *h, H264SliceContext *sl)
 {
    const int mb_x    = h->mb_x;
    const int mb_y    = h->mb_y;
@@ -355,7 +355,7 @@ static av_noinline void FUNC(hl_decode_mb_444)(H264Context *h)
                xchg_mb_border(h, dest[0], dest[1], dest[2], linesize,
                               linesize, 0, 1, SIMPLE, PIXEL_SHIFT);
        } else {
-            FUNC(hl_motion_444)(h, dest[0], dest[1], dest[2],
+            FUNC(hl_motion_444)(h, sl, dest[0], dest[1], dest[2],
                      h->qpel_put, h->h264chroma.put_h264_chroma_pixels_tab,
                      h->qpel_avg, h->h264chroma.avg_h264_chroma_pixels_tab,
                      h->h264dsp.weight_h264_pixels_tab,

--- a/libavcodec/h264_mc_template.c
+++ b/libavcodec/h264_mc_template.c
@@ -34,7 +34,8 @@
 #undef  mc_part
 #define mc_part MCFUNC(mc_part)

-static void mc_part(H264Context *h, int n, int square,
+static void mc_part(H264Context *h, H264SliceContext *sl,
+                    int n, int square,
                    int height, int delta,
                    uint8_t *dest_y, uint8_t *dest_cb,
                    uint8_t *dest_cr,
@@ -47,10 +48,10 @@ static void mc_part(H264Context *h, int n, int square,
                    h264_biweight_func *weight_avg,
                    int list0, int list1)
 {
-    if ((h->use_weight == 2 && list0 && list1 &&
-         (h->implicit_weight[h->ref_cache[0][scan8[n]]][h->ref_cache[1][scan8[n]]][h->mb_y & 1] != 32)) ||
-        h->use_weight == 1)
-        mc_part_weighted(h, n, square, height, delta, dest_y, dest_cb, dest_cr,
+    if ((sl->use_weight == 2 && list0 && list1 &&
+         (sl->implicit_weight[h->ref_cache[0][scan8[n]]][h->ref_cache[1][scan8[n]]][h->mb_y & 1] != 32)) ||
+        sl->use_weight == 1)
+        mc_part_weighted(h, sl, n, square, height, delta, dest_y, dest_cb, dest_cr,
                         x_offset, y_offset, qpix_put, chroma_put,
                         weight_op[0], weight_op[1], weight_avg[0],
                         weight_avg[1], list0, list1, PIXEL_SHIFT, CHROMA_IDC);
@@ -60,7 +61,8 @@ static void mc_part(H264Context *h, int n, int square,
                    chroma_avg, list0, list1, PIXEL_SHIFT, CHROMA_IDC);
 }

-static void MCFUNC(hl_motion)(H264Context *h, uint8_t *dest_y,
+static void MCFUNC(hl_motion)(H264Context *h, H264SliceContext *sl,
+                              uint8_t *dest_y,
                              uint8_t *dest_cb, uint8_t *dest_cr,
                              qpel_mc_func(*qpix_put)[16],
                              h264_chroma_mc_func(*chroma_put),
@@ -79,25 +81,25 @@ static void MCFUNC(hl_motion)(H264Context *h, uint8_t *dest_y,
    prefetch_motion(h, 0, PIXEL_SHIFT, CHROMA_IDC);

    if (IS_16X16(mb_type)) {
-        mc_part(h, 0, 1, 16, 0, dest_y, dest_cb, dest_cr, 0, 0,
+        mc_part(h, sl, 0, 1, 16, 0, dest_y, dest_cb, dest_cr, 0, 0,
                qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
                weight_op, weight_avg,
                IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
    } else if (IS_16X8(mb_type)) {
-        mc_part(h, 0, 0, 8, 8 << PIXEL_SHIFT, dest_y, dest_cb, dest_cr, 0, 0,
+        mc_part(h, sl, 0, 0, 8, 8 << PIXEL_SHIFT, dest_y, dest_cb, dest_cr, 0, 0,
                qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
                weight_op, weight_avg,
                IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
-        mc_part(h, 8, 0, 8, 8 << PIXEL_SHIFT, dest_y, dest_cb, dest_cr, 0, 4,
+        mc_part(h, sl, 8, 0, 8, 8 << PIXEL_SHIFT, dest_y, dest_cb, dest_cr, 0, 4,
                qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
                weight_op, weight_avg,
                IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
    } else if (IS_8X16(mb_type)) {
-        mc_part(h, 0, 0, 16, 8 * h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
+        mc_part(h, sl, 0, 0, 16, 8 * h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
                qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
                &weight_op[1], &weight_avg[1],
                IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
-        mc_part(h, 4, 0, 16, 8 * h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
+        mc_part(h, sl, 4, 0, 16, 8 * h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
                qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
                &weight_op[1], &weight_avg[1],
                IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
@@ -113,29 +115,29 @@ static void MCFUNC(hl_motion)(H264Context *h, uint8_t *dest_y,
            int y_offset = (i & 2) << 1;

            if (IS_SUB_8X8(sub_mb_type)) {
-                mc_part(h, n, 1, 8, 0, dest_y, dest_cb, dest_cr,
+                mc_part(h, sl, n, 1, 8, 0, dest_y, dest_cb, dest_cr,
                        x_offset, y_offset,
                        qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
                        &weight_op[1], &weight_avg[1],
                        IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
            } else if (IS_SUB_8X4(sub_mb_type)) {
-                mc_part(h, n, 0, 4, 4 << PIXEL_SHIFT, dest_y, dest_cb, dest_cr,
+                mc_part(h, sl, n, 0, 4, 4 << PIXEL_SHIFT, dest_y, dest_cb, dest_cr,
                        x_offset, y_offset,
                        qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
                        &weight_op[1], &weight_avg[1],
                        IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
-                mc_part(h, n + 2, 0, 4, 4 << PIXEL_SHIFT,
+                mc_part(h, sl, n + 2, 0, 4, 4 << PIXEL_SHIFT,
                        dest_y, dest_cb, dest_cr, x_offset, y_offset + 2,
                        qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
                        &weight_op[1], &weight_avg[1],
                        IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
            } else if (IS_SUB_4X8(sub_mb_type)) {
-                mc_part(h, n, 0, 8, 4 * h->mb_linesize,
+                mc_part(h, sl, n, 0, 8, 4 * h->mb_linesize,
                        dest_y, dest_cb, dest_cr, x_offset, y_offset,
                        qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
                        &weight_op[2], &weight_avg[2],
                        IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
-                mc_part(h, n + 1, 0, 8, 4 * h->mb_linesize,
+                mc_part(h, sl, n + 1, 0, 8, 4 * h->mb_linesize,
                        dest_y, dest_cb, dest_cr, x_offset + 2, y_offset,
                        qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
                        &weight_op[2], &weight_avg[2],
@@ -146,7 +148,7 @@ static void MCFUNC(hl_motion)(H264Context *h, uint8_t *dest_y,
                for (j = 0; j < 4; j++) {
                    int sub_x_offset = x_offset + 2 * (j & 1);
                    int sub_y_offset = y_offset + (j & 2);
-                    mc_part(h, n + j, 1, 4, 0,
+                    mc_part(h, sl, n + j, 1, 4, 0,
                            dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
                            qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
                            &weight_op[2], &weight_avg[2],

--- a/libavcodec/h264_parser.c
+++ b/libavcodec/h264_parser.c
@@ -102,6 +102,7 @@ static int scan_mmco_reset(AVCodecParserContext *s)
 {
    H264ParseContext *p = s->priv_data;
    H264Context      *h = &p->h;
+    H264SliceContext *sl = &h->slice_ctx[0];

    h->slice_type_nos = s->pict_type & 3;

@@ -141,7 +142,7 @@ static int scan_mmco_reset(AVCodecParserContext *s)

    if ((h->pps.weighted_pred && h->slice_type_nos == AV_PICTURE_TYPE_P) ||
        (h->pps.weighted_bipred_idc == 1 && h->slice_type_nos == AV_PICTURE_TYPE_B))
-        ff_pred_weight_table(h);
+        ff_pred_weight_table(h, sl);

    if (get_bits1(&h->gb)) { // adaptive_ref_pic_marking_mode_flag
        int i;
@@ -543,6 +544,12 @@ static av_cold int init(AVCodecParserContext *s)
 {
    H264ParseContext *p = s->priv_data;
    H264Context      *h = &p->h;
+
+    h->slice_ctx = av_mallocz(sizeof(*h->slice_ctx));
+    if (!h->slice_ctx)
+        return 0;
+    h->nb_slice_ctx = 1;
+
    h->thread_context[0]   = h;
    h->slice_context_count = 1;
    ff_h264dsp_init(&h->h264dsp, 8, 1);

--- a/libavcodec/h264_refs.c
+++ b/libavcodec/h264_refs.c
@@ -337,7 +337,7 @@ int ff_h264_decode_ref_pic_list_reordering(H264Context *h)
    return 0;
 }

-void ff_h264_fill_mbaff_ref_list(H264Context *h)
+void ff_h264_fill_mbaff_ref_list(H264Context *h, H264SliceContext *sl)
 {
    int list, i, j;
    for (list = 0; list < 2; list++) { //FIXME try list_count
@@ -355,11 +355,11 @@ void ff_h264_fill_mbaff_ref_list(H264Context *h)
            field[1].reference = PICT_BOTTOM_FIELD;
            field[1].poc       = field[1].field_poc[1];

-            h->luma_weight[16 + 2 * i][list][0] = h->luma_weight[16 + 2 * i + 1][list][0] = h->luma_weight[i][list][0];
-            h->luma_weight[16 + 2 * i][list][1] = h->luma_weight[16 + 2 * i + 1][list][1] = h->luma_weight[i][list][1];
+            sl->luma_weight[16 + 2 * i][list][0] = sl->luma_weight[16 + 2 * i + 1][list][0] = sl->luma_weight[i][list][0];
+            sl->luma_weight[16 + 2 * i][list][1] = sl->luma_weight[16 + 2 * i + 1][list][1] = sl->luma_weight[i][list][1];
            for (j = 0; j < 2; j++) {
-                h->chroma_weight[16 + 2 * i][list][j][0] = h->chroma_weight[16 + 2 * i + 1][list][j][0] = h->chroma_weight[i][list][j][0];
-                h->chroma_weight[16 + 2 * i][list][j][1] = h->chroma_weight[16 + 2 * i + 1][list][j][1] = h->chroma_weight[i][list][j][1];
+                sl->chroma_weight[16 + 2 * i][list][j][0] = sl->chroma_weight[16 + 2 * i + 1][list][j][0] = sl->chroma_weight[i][list][j][0];
+                sl->chroma_weight[16 + 2 * i][list][j][1] = sl->chroma_weight[16 + 2 * i + 1][list][j][1] = sl->chroma_weight[i][list][j][1];
            }
        }
    }

--- a/libavcodec/h264_slice.c
+++ b/libavcodec/h264_slice.c
@@ -484,6 +484,8 @@ int ff_h264_update_thread_context(AVCodecContext *dst,
    }

    if (!inited) {
+        H264SliceContext *orig_slice_ctx = h->slice_ctx;
+
        for (i = 0; i < MAX_SPS_COUNT; i++)
            av_freep(h->sps_buffers + i);

@@ -503,6 +505,8 @@ int ff_h264_update_thread_context(AVCodecContext *dst,
        av_frame_unref(&h->cur_pic.f);
        h->cur_pic.tf.f = &h->cur_pic.f;

+        h->slice_ctx = orig_slice_ctx;
+
        h->avctx             = dst;
        h->DPB               = NULL;
        h->qscale_table_pool = NULL;
@@ -814,13 +818,13 @@ static av_always_inline void backup_mb_border(H264Context *h, uint8_t *src_y,
 * @param field  0/1 initialize the weight for interlaced MBAFF
 *                -1 initializes the rest
 */
-static void implicit_weight_table(H264Context *h, int field)
+static void implicit_weight_table(H264Context *h, H264SliceContext *sl, int field)
 {
    int ref0, ref1, i, cur_poc, ref_start, ref_count0, ref_count1;

    for (i = 0; i < 2; i++) {
-        h->luma_weight_flag[i]   = 0;
-        h->chroma_weight_flag[i] = 0;
+        sl->luma_weight_flag[i]   = 0;
+        sl->chroma_weight_flag[i] = 0;
    }

    if (field < 0) {
@@ -831,8 +835,8 @@ static void implicit_weight_table(H264Context *h, int field)
        }
        if (h->ref_count[0] == 1 && h->ref_count[1] == 1 && !FRAME_MBAFF(h) &&
            h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2 * cur_poc) {
-            h->use_weight        = 0;
-            h->use_weight_chroma = 0;
+            sl->use_weight        = 0;
+            sl->use_weight_chroma = 0;
            return;
        }
        ref_start  = 0;
@@ -845,10 +849,10 @@ static void implicit_weight_table(H264Context *h, int field)
        ref_count1 = 16 + 2 * h->ref_count[1];
    }

-    h->use_weight               = 2;
-    h->use_weight_chroma        = 2;
-    h->luma_log2_weight_denom   = 5;
-    h->chroma_log2_weight_denom = 5;
+    sl->use_weight               = 2;
+    sl->use_weight_chroma        = 2;
+    sl->luma_log2_weight_denom   = 5;
+    sl->chroma_log2_weight_denom = 5;

    for (ref0 = ref_start; ref0 < ref_count0; ref0++) {
        int poc0 = h->ref_list[0][ref0].poc;
@@ -866,10 +870,10 @@ static void implicit_weight_table(H264Context *h, int field)
                }
            }
            if (field < 0) {
-                h->implicit_weight[ref0][ref1][0] =
-                h->implicit_weight[ref0][ref1][1] = w;
+                sl->implicit_weight[ref0][ref1][0] =
+                sl->implicit_weight[ref0][ref1][1] = w;
            } else {
-                h->implicit_weight[ref0][ref1][field] = w;
+                sl->implicit_weight[ref0][ref1][field] = w;
            }
        }
    }
@@ -1139,6 +1143,8 @@ static int h264_slice_header_init(H264Context *h, int reinit)
            c->workaround_bugs   = h->workaround_bugs;
            c->pict_type         = h->pict_type;

+            h->slice_ctx[i].h264 = c;
+
            init_scan_tables(c);
            clone_tables(c, h, i);
            c->context_initialized = 1;
@@ -1166,7 +1172,7 @@ static int h264_slice_header_init(H264Context *h, int reinit)
 *
 * @return 0 if okay, <0 if an error occurred, 1 if decoding must not be multithreaded
 */
-int ff_h264_decode_slice_header(H264Context *h, H264Context *h0)
+int ff_h264_decode_slice_header(H264Context *h, H264SliceContext *sl, H264Context *h0)
 {
    unsigned int first_mb_in_slice;
    unsigned int pps_id;
@@ -1606,15 +1612,15 @@ int ff_h264_decode_slice_header(H264Context *h, H264Context *h0)
    if ((h->pps.weighted_pred && h->slice_type_nos == AV_PICTURE_TYPE_P) ||
        (h->pps.weighted_bipred_idc == 1 &&
         h->slice_type_nos == AV_PICTURE_TYPE_B))
-        ff_pred_weight_table(h);
+        ff_pred_weight_table(h, sl);
    else if (h->pps.weighted_bipred_idc == 2 &&
             h->slice_type_nos == AV_PICTURE_TYPE_B) {
-        implicit_weight_table(h, -1);
+        implicit_weight_table(h, sl, -1);
    } else {
-        h->use_weight = 0;
+        sl->use_weight = 0;
        for (i = 0; i < 2; i++) {
-            h->luma_weight_flag[i]   = 0;
-            h->chroma_weight_flag[i] = 0;
+            sl->luma_weight_flag[i]   = 0;
+            sl->chroma_weight_flag[i] = 0;
        }
    }

@@ -1632,11 +1638,11 @@ int ff_h264_decode_slice_header(H264Context *h, H264Context *h0)
    }

    if (FRAME_MBAFF(h)) {
-        ff_h264_fill_mbaff_ref_list(h);
+        ff_h264_fill_mbaff_ref_list(h, sl);

        if (h->pps.weighted_bipred_idc == 2 && h->slice_type_nos == AV_PICTURE_TYPE_B) {
-            implicit_weight_table(h, 0);
-            implicit_weight_table(h, 1);
+            implicit_weight_table(h, sl, 0);
+            implicit_weight_table(h, sl, 1);
        }
    }

@@ -1789,8 +1795,8 @@ int ff_h264_decode_slice_header(H264Context *h, H264Context *h0)
               h->qscale,
               h->deblocking_filter,
               h->slice_alpha_c0_offset, h->slice_beta_offset,
-               h->use_weight,
-               h->use_weight == 1 && h->use_weight_chroma ? "c" : "",
+               sl->use_weight,
+               sl->use_weight == 1 && sl->use_weight_chroma ? "c" : "",
               h->slice_type == AV_PICTURE_TYPE_B ? (h->direct_spatial_mv_pred ? "SPAT" : "TEMP") : "");
    }

@@ -2170,7 +2176,8 @@ static void er_add_slice(H264Context *h, int startx, int starty,

 static int decode_slice(struct AVCodecContext *avctx, void *arg)
 {
-    H264Context *h = *(void **)arg;
+    H264SliceContext *sl = arg;
+    H264Context       *h = sl->h264;
    int lf_x_start = h->mb_x;

    h->mb_skip_run = -1;
@@ -2197,7 +2204,7 @@ static int decode_slice(struct AVCodecContext *avctx, void *arg)
            // STOP_TIMER("decode_mb_cabac")

            if (ret >= 0)
-                ff_h264_hl_decode_mb(h);
+                ff_h264_hl_decode_mb(h, sl);

            // FIXME optimal? or let mb_decode decode 16x32 ?
            if (ret >= 0 && FRAME_MBAFF(h)) {
@@ -2206,7 +2213,7 @@ static int decode_slice(struct AVCodecContext *avctx, void *arg)
                ret = ff_h264_decode_mb_cabac(h);

                if (ret >= 0)
-                    ff_h264_hl_decode_mb(h);
+                    ff_h264_hl_decode_mb(h, sl);
                h->mb_y--;
            }
            eos = get_cabac_terminate(&h->cabac);
@@ -2256,7 +2263,7 @@ static int decode_slice(struct AVCodecContext *avctx, void *arg)
            int ret = ff_h264_decode_mb_cavlc(h);

            if (ret >= 0)
-                ff_h264_hl_decode_mb(h);
+                ff_h264_hl_decode_mb(h, sl);

            // FIXME optimal? or let mb_decode decode 16x32 ?
            if (ret >= 0 && FRAME_MBAFF(h)) {
@@ -2264,7 +2271,7 @@ static int decode_slice(struct AVCodecContext *avctx, void *arg)
                ret = ff_h264_decode_mb_cavlc(h);

                if (ret >= 0)
-                    ff_h264_hl_decode_mb(h);
+                    ff_h264_hl_decode_mb(h, sl);
                h->mb_y--;
            }

@@ -2341,15 +2348,15 @@ int ff_h264_execute_decode_slices(H264Context *h, unsigned context_count)
    if (h->avctx->hwaccel)
        return 0;
    if (context_count == 1) {
-        return decode_slice(avctx, &h);
+        return decode_slice(avctx, &h->slice_ctx[0]);
    } else {
        for (i = 1; i < context_count; i++) {
            hx                 = h->thread_context[i];
            hx->er.error_count = 0;
        }

-        avctx->execute(avctx, decode_slice, h->thread_context,
-                       NULL, context_count, sizeof(void *));
+        avctx->execute(avctx, decode_slice, h->slice_ctx,
+                       NULL, context_count, sizeof(h->slice_ctx[0]));

        /* pull back stuff from slices to master context */
        hx                   = h->thread_context[context_count - 1];

--- a/libavcodec/svq3.c
+++ b/libavcodec/svq3.c
@@ -1268,7 +1268,7 @@ static int svq3_decode_frame(AVCodecContext *avctx, void *data,
            }

            if (mb_type != 0)
-                ff_h264_hl_decode_mb(h);
+                ff_h264_hl_decode_mb(h, &h->slice_ctx[0]);

            if (h->pict_type != AV_PICTURE_TYPE_B && !h->low_delay)
                h->cur_pic.mb_type[h->mb_x + h->mb_y * h->mb_stride] =

--- a/libavcodec/vaapi_h264.c
+++ b/libavcodec/vaapi_h264.c
@@ -192,27 +192,28 @@ static void fill_vaapi_plain_pred_weight_table(H264Context   *h,
                                               short          chroma_weight[32][2],
                                               short          chroma_offset[32][2])
 {
+    H264SliceContext *sl = &h->slice_ctx[0];
    unsigned int i, j;

-    *luma_weight_flag    = h->luma_weight_flag[list];
-    *chroma_weight_flag  = h->chroma_weight_flag[list];
+    *luma_weight_flag    = sl->luma_weight_flag[list];
+    *chroma_weight_flag  = sl->chroma_weight_flag[list];

    for (i = 0; i < h->ref_count[list]; i++) {
        /* VA API also wants the inferred (default) values, not
           only what is available in the bitstream (7.4.3.2). */
-        if (h->luma_weight_flag[list]) {
-            luma_weight[i] = h->luma_weight[i][list][0];
-            luma_offset[i] = h->luma_weight[i][list][1];
+        if (sl->luma_weight_flag[list]) {
+            luma_weight[i] = sl->luma_weight[i][list][0];
+            luma_offset[i] = sl->luma_weight[i][list][1];
        } else {
-            luma_weight[i] = 1 << h->luma_log2_weight_denom;
+            luma_weight[i] = 1 << sl->luma_log2_weight_denom;
            luma_offset[i] = 0;
        }
        for (j = 0; j < 2; j++) {
-            if (h->chroma_weight_flag[list]) {
-                chroma_weight[i][j] = h->chroma_weight[i][list][j][0];
-                chroma_offset[i][j] = h->chroma_weight[i][list][j][1];
+            if (sl->chroma_weight_flag[list]) {
+                chroma_weight[i][j] = sl->chroma_weight[i][list][j][0];
+                chroma_offset[i][j] = sl->chroma_weight[i][list][j][1];
            } else {
-                chroma_weight[i][j] = 1 << h->chroma_log2_weight_denom;
+                chroma_weight[i][j] = 1 << sl->chroma_log2_weight_denom;
                chroma_offset[i][j] = 0;
            }
        }
@@ -316,6 +317,7 @@ static int vaapi_h264_decode_slice(AVCodecContext *avctx,
                                   uint32_t        size)
 {
    H264Context * const h = avctx->priv_data;
+    H264SliceContext *sl  = &h->slice_ctx[0];
    VASliceParameterBufferH264 *slice_param;

    av_dlog(avctx, "vaapi_h264_decode_slice(): buffer %p, size %d\n",
@@ -336,8 +338,8 @@ static int vaapi_h264_decode_slice(AVCodecContext *avctx,
    slice_param->disable_deblocking_filter_idc  = h->deblocking_filter < 2 ? !h->deblocking_filter : h->deblocking_filter;
    slice_param->slice_alpha_c0_offset_div2     = h->slice_alpha_c0_offset / 2;
    slice_param->slice_beta_offset_div2         = h->slice_beta_offset     / 2;
-    slice_param->luma_log2_weight_denom         = h->luma_log2_weight_denom;
-    slice_param->chroma_log2_weight_denom       = h->chroma_log2_weight_denom;
+    slice_param->luma_log2_weight_denom         = sl->luma_log2_weight_denom;
+    slice_param->chroma_log2_weight_denom       = sl->chroma_log2_weight_denom;

    fill_vaapi_RefPicList(slice_param->RefPicList0, h->ref_list[0], h->list_count > 0 ? h->ref_count[0] : 0);
    fill_vaapi_RefPicList(slice_param->RefPicList1, h->ref_list[1], h->list_count > 1 ? h->ref_count[1] : 0);