Commit fed92adb authored by Ronald S. Bultje's avatar Ronald S. Bultje

vp8: make mv_min/max thread-local if using partition threading.

Fixes tsan warnings like this in fate-vp8-test-vector-007:

WARNING: ThreadSanitizer: data race (pid=65909)
  Write of size 4 at 0x7d8c0000e088 by thread T1:
    #0 vp8_decode_mb_row_sliced vp8.c:2519 (ffmpeg:x86_64+0x100995ede)
[..]
  Previous write of size 4 at 0x7d8c0000e088 by thread T2:
    #0 vp8_decode_mb_row_sliced vp8.c:2519 (ffmpeg:x86_64+0x100995ede)
parent 9a54c6f2
......@@ -772,7 +772,7 @@ static int vp8_decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_si
}
static av_always_inline
void clamp_mv(VP8Context *s, VP56mv *dst, const VP56mv *src)
void clamp_mv(VP8mvbounds *s, VP56mv *dst, const VP56mv *src)
{
dst->x = av_clip(src->x, av_clip(s->mv_min.x, INT16_MIN, INT16_MAX),
av_clip(s->mv_max.x, INT16_MIN, INT16_MAX));
......@@ -1031,7 +1031,7 @@ void vp7_decode_mvs(VP8Context *s, VP8Macroblock *mb,
}
static av_always_inline
void vp8_decode_mvs(VP8Context *s, VP8Macroblock *mb,
void vp8_decode_mvs(VP8Context *s, VP8mvbounds *mv_bounds, VP8Macroblock *mb,
int mb_x, int mb_y, int layout)
{
VP8Macroblock *mb_edge[3] = { 0 /* top */,
......@@ -1102,7 +1102,7 @@ void vp8_decode_mvs(VP8Context *s, VP8Macroblock *mb,
if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAREST]][1])) {
if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAR]][2])) {
/* Choose the best mv out of 0,0 and the nearest mv */
clamp_mv(s, &mb->mv, &near_mv[CNT_ZERO + (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])]);
clamp_mv(mv_bounds, &mb->mv, &near_mv[CNT_ZERO + (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])]);
cnt[CNT_SPLITMV] = ((mb_edge[VP8_EDGE_LEFT]->mode == VP8_MVMODE_SPLIT) +
(mb_edge[VP8_EDGE_TOP]->mode == VP8_MVMODE_SPLIT)) * 2 +
(mb_edge[VP8_EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT);
......@@ -1116,11 +1116,11 @@ void vp8_decode_mvs(VP8Context *s, VP8Macroblock *mb,
mb->bmv[0] = mb->mv;
}
} else {
clamp_mv(s, &mb->mv, &near_mv[CNT_NEAR]);
clamp_mv(mv_bounds, &mb->mv, &near_mv[CNT_NEAR]);
mb->bmv[0] = mb->mv;
}
} else {
clamp_mv(s, &mb->mv, &near_mv[CNT_NEAREST]);
clamp_mv(mv_bounds, &mb->mv, &near_mv[CNT_NEAREST]);
mb->bmv[0] = mb->mv;
}
} else {
......@@ -1166,7 +1166,8 @@ void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
}
static av_always_inline
void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
void decode_mb_mode(VP8Context *s, VP8mvbounds *mv_bounds,
VP8Macroblock *mb, int mb_x, int mb_y,
uint8_t *segment, uint8_t *ref, int layout, int is_vp7)
{
VP56RangeCoder *c = &s->c;
......@@ -1230,7 +1231,7 @@ void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
if (is_vp7)
vp7_decode_mvs(s, mb, mb_x, mb_y, layout);
else
vp8_decode_mvs(s, mb, mb_x, mb_y, layout);
vp8_decode_mvs(s, mv_bounds, mb, mb_x, mb_y, layout);
} else {
// intra MB, 16.1
mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, s->prob->pred16x16);
......@@ -2205,8 +2206,8 @@ void vp78_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *curframe,
VP8Context *s = avctx->priv_data;
int mb_x, mb_y;
s->mv_min.y = -MARGIN;
s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
s->mv_bounds.mv_min.y = -MARGIN;
s->mv_bounds.mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
VP8Macroblock *mb = s->macroblocks_base +
((s->mb_width + 1) * (mb_y + 1) + 1);
......@@ -2214,20 +2215,20 @@ void vp78_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *curframe,
AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED * 0x01010101);
s->mv_min.x = -MARGIN;
s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
s->mv_bounds.mv_min.x = -MARGIN;
s->mv_bounds.mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
if (mb_y == 0)
AV_WN32A((mb - s->mb_width - 1)->intra4x4_pred_mode_top,
DC_PRED * 0x01010101);
decode_mb_mode(s, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
decode_mb_mode(s, &s->mv_bounds, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
prev_frame && prev_frame->seg_map ?
prev_frame->seg_map->data + mb_xy : NULL, 1, is_vp7);
s->mv_min.x -= 64;
s->mv_max.x -= 64;
s->mv_bounds.mv_min.x -= 64;
s->mv_bounds.mv_max.x -= 64;
}
s->mv_min.y -= 64;
s->mv_max.y -= 64;
s->mv_bounds.mv_min.y -= 64;
s->mv_bounds.mv_max.y -= 64;
}
}
......@@ -2325,8 +2326,8 @@ static av_always_inline int decode_mb_row_no_filter(AVCodecContext *avctx, void
if (!is_vp7 || mb_y == 0)
memset(td->left_nnz, 0, sizeof(td->left_nnz));
s->mv_min.x = -MARGIN;
s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
td->mv_bounds.mv_min.x = -MARGIN;
td->mv_bounds.mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
if (c->end <= c->buffer && c->bits >= 0)
......@@ -2350,7 +2351,7 @@ static av_always_inline int decode_mb_row_no_filter(AVCodecContext *avctx, void
dst[2] - dst[1], 2);
if (!s->mb_layout)
decode_mb_mode(s, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
decode_mb_mode(s, &td->mv_bounds, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
prev_frame && prev_frame->seg_map ?
prev_frame->seg_map->data + mb_xy : NULL, 0, is_vp7);
......@@ -2397,8 +2398,8 @@ static av_always_inline int decode_mb_row_no_filter(AVCodecContext *avctx, void
dst[0] += 16;
dst[1] += 8;
dst[2] += 8;
s->mv_min.x -= 64;
s->mv_max.x -= 64;
td->mv_bounds.mv_min.x -= 64;
td->mv_bounds.mv_max.x -= 64;
if (mb_x == s->mb_width + 1) {
update_pos(td, mb_y, s->mb_width + 3);
......@@ -2504,6 +2505,8 @@ int vp78_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata, int jobnr,
int ret;
td->thread_nr = threadnr;
td->mv_bounds.mv_min.y = -MARGIN - 64 * threadnr;
td->mv_bounds.mv_max.y = ((s->mb_height - 1) << 6) + MARGIN - 64 * threadnr;
for (mb_y = jobnr; mb_y < s->mb_height; mb_y += num_jobs) {
atomic_store(&td->thread_mb_pos, mb_y << 16);
ret = s->decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr);
......@@ -2515,8 +2518,8 @@ int vp78_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata, int jobnr,
s->filter_mb_row(avctx, tdata, jobnr, threadnr);
update_pos(td, mb_y, INT_MAX & 0xFFFF);
s->mv_min.y -= 64;
s->mv_max.y -= 64;
td->mv_bounds.mv_min.y -= 64 * num_jobs;
td->mv_bounds.mv_max.y -= 64 * num_jobs;
if (avctx->active_thread_type == FF_THREAD_FRAME)
ff_thread_report_progress(&curframe->tf, mb_y, 0);
......@@ -2662,8 +2665,8 @@ int vp78_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
s->num_jobs = num_jobs;
s->curframe = curframe;
s->prev_frame = prev_frame;
s->mv_min.y = -MARGIN;
s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
s->mv_bounds.mv_min.y = -MARGIN;
s->mv_bounds.mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
for (i = 0; i < MAX_THREADS; i++) {
VP8ThreadData *td = &s->thread_data[i];
atomic_init(&td->thread_mb_pos, 0);
......
......@@ -93,6 +93,16 @@ typedef struct VP8Macroblock {
VP56mv bmv[16];
} VP8Macroblock;
typedef struct VP8intmv {
int x;
int y;
} VP8intmv;
typedef struct VP8mvbounds {
VP8intmv mv_min;
VP8intmv mv_max;
} VP8mvbounds;
typedef struct VP8ThreadData {
DECLARE_ALIGNED(16, int16_t, block)[6][4][16];
DECLARE_ALIGNED(16, int16_t, block_dc)[16];
......@@ -122,6 +132,7 @@ typedef struct VP8ThreadData {
#define EDGE_EMU_LINESIZE 32
DECLARE_ALIGNED(16, uint8_t, edge_emu_buffer)[21 * EDGE_EMU_LINESIZE];
VP8FilterStrength *filter_strength;
VP8mvbounds mv_bounds;
} VP8ThreadData;
typedef struct VP8Frame {
......@@ -129,11 +140,6 @@ typedef struct VP8Frame {
AVBufferRef *seg_map;
} VP8Frame;
typedef struct VP8intmv {
int x;
int y;
} VP8intmv;
#define MAX_THREADS 8
typedef struct VP8Context {
VP8ThreadData *thread_data;
......@@ -152,8 +158,7 @@ typedef struct VP8Context {
uint8_t deblock_filter;
uint8_t mbskip_enabled;
uint8_t profile;
VP8intmv mv_min;
VP8intmv mv_max;
VP8mvbounds mv_bounds;
int8_t sign_bias[4]; ///< one state [0, 1] per ref frame type
int ref_count[3];
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment