Commit fbe02459 authored by Michael Niedermayer's avatar Michael Niedermayer

Merge remote-tracking branch 'qatar/master'

* qatar/master:
  configure: Check for CommandLineToArgvW
  vc1dec: Do not use random pred_flag if motion vector data is skipped
  vp8: Enclose pthread function calls in ifdefs
  snow: refactor code to work around a compiler bug in MSVC.
  vp8: Include the thread headers before using the pthread types
  configure: Check for getaddrinfo in ws2tcpip.h, too
  vp8: implement sliced threading
  vp8: move data from VP8Context->VP8Macroblock
  vp8: refactor decoding a single mb_row
  doc: update api changes with the right commit hashes
  mem: introduce av_malloc_array and av_mallocz_array

Conflicts:
	configure
	doc/APIchanges
	libavcodec/vp8.c
	libavutil/mem.h
	libavutil/version.h
Merged-by: 's avatarMichael Niedermayer <michaelni@gmx.at>
parents 9a1963fb b4675d0f
...@@ -1190,6 +1190,7 @@ HAVE_LIST=" ...@@ -1190,6 +1190,7 @@ HAVE_LIST="
clock_gettime clock_gettime
closesocket closesocket
cmov cmov
CommandLineToArgvW
cpuid cpuid
dcbzl dcbzl
dev_bktr_ioctl_bt848_h dev_bktr_ioctl_bt848_h
...@@ -3182,7 +3183,8 @@ check_func nanosleep || { check_func nanosleep -lrt && add_extralibs -lrt; } ...@@ -3182,7 +3183,8 @@ check_func nanosleep || { check_func nanosleep -lrt && add_extralibs -lrt; }
check_func clock_gettime || { check_func clock_gettime -lrt && add_extralibs -lrt; } check_func clock_gettime || { check_func clock_gettime -lrt && add_extralibs -lrt; }
check_func fcntl check_func fcntl
check_func fork check_func fork
check_func getaddrinfo $network_extralibs check_func getaddrinfo $network_extralibs ||
check_func_headers ws2tcpip.h getaddrinfo $network_extralibs
check_func gethrtime check_func gethrtime
check_func getopt check_func getopt
check_func getrusage check_func getrusage
...@@ -3207,6 +3209,7 @@ check_func_headers conio.h kbhit ...@@ -3207,6 +3209,7 @@ check_func_headers conio.h kbhit
check_func_headers windows.h PeekNamedPipe check_func_headers windows.h PeekNamedPipe
check_func_headers io.h setmode check_func_headers io.h setmode
check_func_headers lzo/lzo1x.h lzo1x_999_compress check_func_headers lzo/lzo1x.h lzo1x_999_compress
check_lib2 "windows.h shellapi.h" CommandLineToArgvW -lshell32
check_lib2 "windows.h psapi.h" GetProcessMemoryInfo -lpsapi check_lib2 "windows.h psapi.h" GetProcessMemoryInfo -lpsapi
check_func_headers windows.h GetProcessAffinityMask check_func_headers windows.h GetProcessAffinityMask
check_func_headers windows.h GetProcessTimes check_func_headers windows.h GetProcessTimes
......
...@@ -57,29 +57,32 @@ API changes, most recent first: ...@@ -57,29 +57,32 @@ API changes, most recent first:
2012-03-26 - a67d9cf - lavfi 2.66.100 2012-03-26 - a67d9cf - lavfi 2.66.100
Add avfilter_fill_frame_from_{audio_,}buffer_ref() functions. Add avfilter_fill_frame_from_{audio_,}buffer_ref() functions.
2012-06-22 - xxxxxxx - lavu 51.34.0 2012-07-10 - 5fade8a - lavu 51.37.0
Add av_malloc_array() and av_mallocz_array()
2012-06-22 - d3d3a32 - lavu 51.34.0
Add av_usleep() Add av_usleep()
2012-06-20 - ae0a301 - lavu 51.33.0 2012-06-20 - ae0a301 - lavu 51.33.0
Move av_gettime() to libavutil, add libavutil/time.h Move av_gettime() to libavutil, add libavutil/time.h
2012-xx-xx - xxxxxxx - lavr 0.0.3 2012-06-09 - 3971be0 - lavr 0.0.3
Add a parameter to avresample_build_matrix() for Dolby/DPLII downmixing. Add a parameter to avresample_build_matrix() for Dolby/DPLII downmixing.
2012-xx-xx - xxxxxxx - lavfi 2.23.0 - avfilter.h 2012-06-12 - 9baeff9 - lavfi 2.23.0 - avfilter.h
Add AVFilterContext.nb_inputs/outputs. Deprecate Add AVFilterContext.nb_inputs/outputs. Deprecate
AVFilterContext.input/output_count. AVFilterContext.input/output_count.
2012-xx-xx - xxxxxxx - lavfi 2.22.0 - avfilter.h 2012-06-12 - 84b9fbe - lavfi 2.22.0 - avfilter.h
Add avfilter_pad_get_type() and avfilter_pad_get_name(). Those Add avfilter_pad_get_type() and avfilter_pad_get_name(). Those
should now be used instead of accessing AVFilterPad members should now be used instead of accessing AVFilterPad members
directly. directly.
2012-xx-xx - xxxxxxx - lavu 51.32.0 - audioconvert.h 2012-06-12 - b0f0dfc - lavu 51.32.0 - audioconvert.h
Add av_get_channel_layout_channel_index(), av_get_channel_name() Add av_get_channel_layout_channel_index(), av_get_channel_name()
and av_channel_layout_extract_channel(). and av_channel_layout_extract_channel().
2012-05-25 - e0e0793 - lavu 51.31.0 - opt.h 2012-05-25 - 154486f - lavu 51.31.0 - opt.h
Add av_opt_set_bin() Add av_opt_set_bin()
2012-05-15 - lavfi 2.17.0 2012-05-15 - lavfi 2.17.0
......
...@@ -1013,10 +1013,18 @@ static av_always_inline int check_block_inter(SnowContext *s, int mb_x, int mb_y ...@@ -1013,10 +1013,18 @@ static av_always_inline int check_block_inter(SnowContext *s, int mb_x, int mb_y
static av_always_inline int check_4block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, int ref, int *best_rd){ static av_always_inline int check_4block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, int ref, int *best_rd){
const int b_stride= s->b_width << s->block_max_depth; const int b_stride= s->b_width << s->block_max_depth;
BlockNode *block= &s->block[mb_x + mb_y * b_stride]; BlockNode *block= &s->block[mb_x + mb_y * b_stride];
BlockNode backup[4]= {block[0], block[1], block[b_stride], block[b_stride+1]}; BlockNode backup[4];
unsigned value; unsigned value;
int rd, index; int rd, index;
/* We don't initialize backup[] during variable declaration, because
* that fails to compile on MSVC: "cannot convert from 'BlockNode' to
* 'int16_t'". */
backup[0] = block[0];
backup[1] = block[1];
backup[2] = block[b_stride];
backup[3] = block[b_stride + 1];
assert(mb_x>=0 && mb_y>=0); assert(mb_x>=0 && mb_y>=0);
assert(mb_x<b_stride); assert(mb_x<b_stride);
assert(((mb_x|mb_y)&1) == 0); assert(((mb_x|mb_y)&1) == 0);
......
...@@ -3956,7 +3956,7 @@ static int vc1_decode_p_mb_intfi(VC1Context *v) ...@@ -3956,7 +3956,7 @@ static int vc1_decode_p_mb_intfi(VC1Context *v)
s->current_picture.f.mb_type[mb_pos + v->mb_off] = MB_TYPE_16x16; s->current_picture.f.mb_type[mb_pos + v->mb_off] = MB_TYPE_16x16;
for (i = 0; i < 6; i++) v->mb_type[0][s->block_index[i]] = 0; for (i = 0; i < 6; i++) v->mb_type[0][s->block_index[i]] = 0;
if (idx_mbmode <= 5) { // 1-MV if (idx_mbmode <= 5) { // 1-MV
dmv_x = dmv_y = 0; dmv_x = dmv_y = pred_flag = 0;
if (idx_mbmode & 1) { if (idx_mbmode & 1) {
get_mvdata_interlaced(v, &dmv_x, &dmv_y, &pred_flag); get_mvdata_interlaced(v, &dmv_x, &dmv_y, &pred_flag);
} }
......
...@@ -4,6 +4,7 @@ ...@@ -4,6 +4,7 @@
* Copyright (C) 2010 David Conrad * Copyright (C) 2010 David Conrad
* Copyright (C) 2010 Ronald S. Bultje * Copyright (C) 2010 Ronald S. Bultje
* Copyright (C) 2010 Jason Garrett-Glaser * Copyright (C) 2010 Jason Garrett-Glaser
* Copyright (C) 2012 Daniel Kang
* *
* This file is part of FFmpeg. * This file is part of FFmpeg.
* *
...@@ -36,11 +37,16 @@ ...@@ -36,11 +37,16 @@
static void free_buffers(VP8Context *s) static void free_buffers(VP8Context *s)
{ {
int i;
if (s->thread_data)
for (i = 0; i < MAX_THREADS; i++) {
av_freep(&s->thread_data[i].filter_strength);
av_freep(&s->thread_data[i].edge_emu_buffer);
}
av_freep(&s->thread_data);
av_freep(&s->macroblocks_base); av_freep(&s->macroblocks_base);
av_freep(&s->filter_strength);
av_freep(&s->intra4x4_pred_mode_top); av_freep(&s->intra4x4_pred_mode_top);
av_freep(&s->top_nnz); av_freep(&s->top_nnz);
av_freep(&s->edge_emu_buffer);
av_freep(&s->top_border); av_freep(&s->top_border);
s->macroblocks = NULL; s->macroblocks = NULL;
...@@ -108,6 +114,9 @@ static void vp8_decode_flush(AVCodecContext *avctx) ...@@ -108,6 +114,9 @@ static void vp8_decode_flush(AVCodecContext *avctx)
static int update_dimensions(VP8Context *s, int width, int height) static int update_dimensions(VP8Context *s, int width, int height)
{ {
AVCodecContext *avctx = s->avctx;
int i;
if (width != s->avctx->width || ((width+15)/16 != s->mb_width || (height+15)/16 != s->mb_height) && s->macroblocks_base || if (width != s->avctx->width || ((width+15)/16 != s->mb_width || (height+15)/16 != s->mb_height) && s->macroblocks_base ||
height != s->avctx->height) { height != s->avctx->height) {
if (av_image_check_size(width, height, 0, s->avctx)) if (av_image_check_size(width, height, 0, s->avctx))
...@@ -121,14 +130,27 @@ static int update_dimensions(VP8Context *s, int width, int height) ...@@ -121,14 +130,27 @@ static int update_dimensions(VP8Context *s, int width, int height)
s->mb_width = (s->avctx->coded_width +15) / 16; s->mb_width = (s->avctx->coded_width +15) / 16;
s->mb_height = (s->avctx->coded_height+15) / 16; s->mb_height = (s->avctx->coded_height+15) / 16;
s->mb_layout = (avctx->active_thread_type == FF_THREAD_SLICE) && (FFMIN(s->num_coeff_partitions, avctx->thread_count) > 1);
if (!s->mb_layout) { // Frame threading and one thread
s->macroblocks_base = av_mallocz((s->mb_width+s->mb_height*2+1)*sizeof(*s->macroblocks)); s->macroblocks_base = av_mallocz((s->mb_width+s->mb_height*2+1)*sizeof(*s->macroblocks));
s->filter_strength = av_mallocz(s->mb_width*sizeof(*s->filter_strength));
s->intra4x4_pred_mode_top = av_mallocz(s->mb_width*4); s->intra4x4_pred_mode_top = av_mallocz(s->mb_width*4);
}
else // Sliced threading
s->macroblocks_base = av_mallocz((s->mb_width+2)*(s->mb_height+2)*sizeof(*s->macroblocks));
s->top_nnz = av_mallocz(s->mb_width*sizeof(*s->top_nnz)); s->top_nnz = av_mallocz(s->mb_width*sizeof(*s->top_nnz));
s->top_border = av_mallocz((s->mb_width+1)*sizeof(*s->top_border)); s->top_border = av_mallocz((s->mb_width+1)*sizeof(*s->top_border));
s->thread_data = av_mallocz(MAX_THREADS*sizeof(VP8ThreadData));
if (!s->macroblocks_base || !s->filter_strength || !s->intra4x4_pred_mode_top || for (i = 0; i < MAX_THREADS; i++) {
!s->top_nnz || !s->top_border) s->thread_data[i].filter_strength = av_mallocz(s->mb_width*sizeof(*s->thread_data[0].filter_strength));
#if HAVE_THREADS
pthread_mutex_init(&s->thread_data[i].lock, NULL);
pthread_cond_init(&s->thread_data[i].cond, NULL);
#endif
}
if (!s->macroblocks_base || !s->top_nnz || !s->top_border ||
(!s->intra4x4_pred_mode_top && !s->mb_layout))
return AVERROR(ENOMEM); return AVERROR(ENOMEM);
s->macroblocks = s->macroblocks_base + 1; s->macroblocks = s->macroblocks_base + 1;
...@@ -332,12 +354,6 @@ static int decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size) ...@@ -332,12 +354,6 @@ static int decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
memset(&s->segmentation, 0, sizeof(s->segmentation)); memset(&s->segmentation, 0, sizeof(s->segmentation));
} }
if (!s->macroblocks_base || /* first frame */
width != s->avctx->width || height != s->avctx->height || (width+15)/16 != s->mb_width || (height+15)/16 != s->mb_height) {
if ((ret = update_dimensions(s, width, height)) < 0)
return ret;
}
ff_vp56_init_range_decoder(c, buf, header_size); ff_vp56_init_range_decoder(c, buf, header_size);
buf += header_size; buf += header_size;
buf_size -= header_size; buf_size -= header_size;
...@@ -366,6 +382,12 @@ static int decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size) ...@@ -366,6 +382,12 @@ static int decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
return AVERROR_INVALIDDATA; return AVERROR_INVALIDDATA;
} }
if (!s->macroblocks_base || /* first frame */
width != s->avctx->width || height != s->avctx->height || (width+15)/16 != s->mb_width || (height+15)/16 != s->mb_height) {
if ((ret = update_dimensions(s, width, height)) < 0)
return ret;
}
get_quants(s); get_quants(s);
if (!s->keyframe) { if (!s->keyframe) {
...@@ -468,19 +490,26 @@ const uint8_t *get_submv_prob(uint32_t left, uint32_t top) ...@@ -468,19 +490,26 @@ const uint8_t *get_submv_prob(uint32_t left, uint32_t top)
* @returns the number of motion vectors parsed (2, 4 or 16) * @returns the number of motion vectors parsed (2, 4 or 16)
*/ */
static av_always_inline static av_always_inline
int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb) int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb, int layout)
{ {
int part_idx; int part_idx;
int n, num; int n, num;
VP8Macroblock *top_mb = &mb[2]; VP8Macroblock *top_mb;
VP8Macroblock *left_mb = &mb[-1]; VP8Macroblock *left_mb = &mb[-1];
const uint8_t *mbsplits_left = vp8_mbsplits[left_mb->partitioning], const uint8_t *mbsplits_left = vp8_mbsplits[left_mb->partitioning],
*mbsplits_top = vp8_mbsplits[top_mb->partitioning], *mbsplits_top,
*mbsplits_cur, *firstidx; *mbsplits_cur, *firstidx;
VP56mv *top_mv = top_mb->bmv; VP56mv *top_mv;
VP56mv *left_mv = left_mb->bmv; VP56mv *left_mv = left_mb->bmv;
VP56mv *cur_mv = mb->bmv; VP56mv *cur_mv = mb->bmv;
if (!layout) // layout is inlined, s->mb_layout is not
top_mb = &mb[2];
else
top_mb = &mb[-s->mb_width-1];
mbsplits_top = vp8_mbsplits[top_mb->partitioning];
top_mv = top_mb->bmv;
if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[0])) { if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[0])) {
if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[1])) { if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[1])) {
part_idx = VP8_SPLITMVMODE_16x8 + vp56_rac_get_prob(c, vp8_mbsplit_prob[2]); part_idx = VP8_SPLITMVMODE_16x8 + vp56_rac_get_prob(c, vp8_mbsplit_prob[2]);
...@@ -532,11 +561,11 @@ int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb) ...@@ -532,11 +561,11 @@ int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb)
} }
static av_always_inline static av_always_inline
void decode_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y) void decode_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int layout)
{ {
VP8Macroblock *mb_edge[3] = { mb + 2 /* top */, VP8Macroblock *mb_edge[3] = { 0 /* top */,
mb - 1 /* left */, mb - 1 /* left */,
mb + 1 /* top-left */ }; 0 /* top-left */ };
enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV }; enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV };
enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT }; enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
int idx = CNT_ZERO; int idx = CNT_ZERO;
...@@ -546,6 +575,15 @@ void decode_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y) ...@@ -546,6 +575,15 @@ void decode_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y)
uint8_t cnt[4] = { 0 }; uint8_t cnt[4] = { 0 };
VP56RangeCoder *c = &s->c; VP56RangeCoder *c = &s->c;
if (!layout) { // layout is inlined (s->mb_layout is not)
mb_edge[0] = mb + 2;
mb_edge[2] = mb + 1;
}
else {
mb_edge[0] = mb - s->mb_width-1;
mb_edge[2] = mb - s->mb_width-2;
}
AV_ZERO32(&near_mv[0]); AV_ZERO32(&near_mv[0]);
AV_ZERO32(&near_mv[1]); AV_ZERO32(&near_mv[1]);
AV_ZERO32(&near_mv[2]); AV_ZERO32(&near_mv[2]);
...@@ -600,7 +638,7 @@ void decode_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y) ...@@ -600,7 +638,7 @@ void decode_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y)
if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_SPLITMV]][3])) { if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_SPLITMV]][3])) {
mb->mode = VP8_MVMODE_SPLIT; mb->mode = VP8_MVMODE_SPLIT;
mb->mv = mb->bmv[decode_splitmvs(s, c, mb) - 1]; mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout) - 1];
} else { } else {
mb->mv.y += read_mv_component(c, s->prob->mvc[0]); mb->mv.y += read_mv_component(c, s->prob->mvc[0]);
mb->mv.x += read_mv_component(c, s->prob->mvc[1]); mb->mv.x += read_mv_component(c, s->prob->mvc[1]);
...@@ -622,14 +660,23 @@ void decode_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y) ...@@ -622,14 +660,23 @@ void decode_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y)
} }
static av_always_inline static av_always_inline
void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c, void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
int mb_x, int keyframe) int mb_x, int keyframe, int layout)
{ {
uint8_t *intra4x4 = s->intra4x4_pred_mode_mb; uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
if (layout == 1) {
VP8Macroblock *mb_top = mb - s->mb_width - 1;
memcpy(mb->intra4x4_pred_mode_top, mb_top->intra4x4_pred_mode_top, 4);
}
if (keyframe) { if (keyframe) {
int x, y; int x, y;
uint8_t* const top = s->intra4x4_pred_mode_top + 4 * mb_x; uint8_t* top;
uint8_t* const left = s->intra4x4_pred_mode_left; uint8_t* const left = s->intra4x4_pred_mode_left;
if (layout == 1)
top = mb->intra4x4_pred_mode_top;
else
top = s->intra4x4_pred_mode_top + 4 * mb_x;
for (y = 0; y < 4; y++) { for (y = 0; y < 4; y++) {
for (x = 0; x < 4; x++) { for (x = 0; x < 4; x++) {
const uint8_t *ctx; const uint8_t *ctx;
...@@ -647,7 +694,8 @@ void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c, ...@@ -647,7 +694,8 @@ void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c,
} }
static av_always_inline static av_always_inline
void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, uint8_t *segment, uint8_t *ref) void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
uint8_t *segment, uint8_t *ref, int layout)
{ {
VP56RangeCoder *c = &s->c; VP56RangeCoder *c = &s->c;
...@@ -656,7 +704,7 @@ void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, uint8_ ...@@ -656,7 +704,7 @@ void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, uint8_
*segment = vp56_rac_get_prob(c, s->prob->segmentid[1+bit]) + 2*bit; *segment = vp56_rac_get_prob(c, s->prob->segmentid[1+bit]) + 2*bit;
} else if (s->segmentation.enabled) } else if (s->segmentation.enabled)
*segment = ref ? *ref : *segment; *segment = ref ? *ref : *segment;
s->segment = *segment; mb->segment = *segment;
mb->skip = s->mbskip_enabled ? vp56_rac_get_prob(c, s->prob->mbskip) : 0; mb->skip = s->mbskip_enabled ? vp56_rac_get_prob(c, s->prob->mbskip) : 0;
...@@ -664,14 +712,17 @@ void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, uint8_ ...@@ -664,14 +712,17 @@ void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, uint8_
mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_intra, vp8_pred16x16_prob_intra); mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_intra, vp8_pred16x16_prob_intra);
if (mb->mode == MODE_I4x4) { if (mb->mode == MODE_I4x4) {
decode_intra4x4_modes(s, c, mb_x, 1); decode_intra4x4_modes(s, c, mb, mb_x, 1, layout);
} else { } else {
const uint32_t modes = vp8_pred4x4_mode[mb->mode] * 0x01010101u; const uint32_t modes = vp8_pred4x4_mode[mb->mode] * 0x01010101u;
if (s->mb_layout == 1)
AV_WN32A(mb->intra4x4_pred_mode_top, modes);
else
AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes); AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes);
AV_WN32A(s->intra4x4_pred_mode_left, modes); AV_WN32A( s->intra4x4_pred_mode_left, modes);
} }
s->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, vp8_pred8x8c_prob_intra); mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, vp8_pred8x8c_prob_intra);
mb->ref_frame = VP56_FRAME_CURRENT; mb->ref_frame = VP56_FRAME_CURRENT;
} else if (vp56_rac_get_prob_branchy(c, s->prob->intra)) { } else if (vp56_rac_get_prob_branchy(c, s->prob->intra)) {
// inter MB, 16.2 // inter MB, 16.2
...@@ -683,15 +734,15 @@ void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, uint8_ ...@@ -683,15 +734,15 @@ void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, uint8_
s->ref_count[mb->ref_frame-1]++; s->ref_count[mb->ref_frame-1]++;
// motion vectors, 16.3 // motion vectors, 16.3
decode_mvs(s, mb, mb_x, mb_y); decode_mvs(s, mb, mb_x, mb_y, layout);
} else { } else {
// intra MB, 16.1 // intra MB, 16.1
mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, s->prob->pred16x16); mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, s->prob->pred16x16);
if (mb->mode == MODE_I4x4) if (mb->mode == MODE_I4x4)
decode_intra4x4_modes(s, c, mb_x, 0); decode_intra4x4_modes(s, c, mb, mb_x, 0, layout);
s->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, s->prob->pred8x8c); mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, s->prob->pred8x8c);
mb->ref_frame = VP56_FRAME_CURRENT; mb->ref_frame = VP56_FRAME_CURRENT;
mb->partitioning = VP8_SPLITMVMODE_NONE; mb->partitioning = VP8_SPLITMVMODE_NONE;
AV_ZERO32(&mb->bmv[0]); AV_ZERO32(&mb->bmv[0]);
...@@ -787,28 +838,28 @@ int decode_block_coeffs(VP56RangeCoder *c, DCTELEM block[16], ...@@ -787,28 +838,28 @@ int decode_block_coeffs(VP56RangeCoder *c, DCTELEM block[16],
} }
static av_always_inline static av_always_inline
void decode_mb_coeffs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb, void decode_mb_coeffs(VP8Context *s, VP8ThreadData *td, VP56RangeCoder *c, VP8Macroblock *mb,
uint8_t t_nnz[9], uint8_t l_nnz[9]) uint8_t t_nnz[9], uint8_t l_nnz[9])
{ {
int i, x, y, luma_start = 0, luma_ctx = 3; int i, x, y, luma_start = 0, luma_ctx = 3;
int nnz_pred, nnz, nnz_total = 0; int nnz_pred, nnz, nnz_total = 0;
int segment = s->segment; int segment = mb->segment;
int block_dc = 0; int block_dc = 0;
if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) { if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
nnz_pred = t_nnz[8] + l_nnz[8]; nnz_pred = t_nnz[8] + l_nnz[8];
// decode DC values and do hadamard // decode DC values and do hadamard
nnz = decode_block_coeffs(c, s->block_dc, s->prob->token[1], 0, nnz_pred, nnz = decode_block_coeffs(c, td->block_dc, s->prob->token[1], 0, nnz_pred,
s->qmat[segment].luma_dc_qmul); s->qmat[segment].luma_dc_qmul);
l_nnz[8] = t_nnz[8] = !!nnz; l_nnz[8] = t_nnz[8] = !!nnz;
if (nnz) { if (nnz) {
nnz_total += nnz; nnz_total += nnz;
block_dc = 1; block_dc = 1;
if (nnz == 1) if (nnz == 1)
s->vp8dsp.vp8_luma_dc_wht_dc(s->block, s->block_dc); s->vp8dsp.vp8_luma_dc_wht_dc(td->block, td->block_dc);
else else
s->vp8dsp.vp8_luma_dc_wht(s->block, s->block_dc); s->vp8dsp.vp8_luma_dc_wht(td->block, td->block_dc);
} }
luma_start = 1; luma_start = 1;
luma_ctx = 0; luma_ctx = 0;
...@@ -818,10 +869,10 @@ void decode_mb_coeffs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb, ...@@ -818,10 +869,10 @@ void decode_mb_coeffs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
for (y = 0; y < 4; y++) for (y = 0; y < 4; y++)
for (x = 0; x < 4; x++) { for (x = 0; x < 4; x++) {
nnz_pred = l_nnz[y] + t_nnz[x]; nnz_pred = l_nnz[y] + t_nnz[x];
nnz = decode_block_coeffs(c, s->block[y][x], s->prob->token[luma_ctx], luma_start, nnz = decode_block_coeffs(c, td->block[y][x], s->prob->token[luma_ctx], luma_start,
nnz_pred, s->qmat[segment].luma_qmul); nnz_pred, s->qmat[segment].luma_qmul);
// nnz+block_dc may be one more than the actual last index, but we don't care // nnz+block_dc may be one more than the actual last index, but we don't care
s->non_zero_count_cache[y][x] = nnz + block_dc; td->non_zero_count_cache[y][x] = nnz + block_dc;
t_nnz[x] = l_nnz[y] = !!nnz; t_nnz[x] = l_nnz[y] = !!nnz;
nnz_total += nnz; nnz_total += nnz;
} }
...@@ -833,9 +884,9 @@ void decode_mb_coeffs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb, ...@@ -833,9 +884,9 @@ void decode_mb_coeffs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
for (y = 0; y < 2; y++) for (y = 0; y < 2; y++)
for (x = 0; x < 2; x++) { for (x = 0; x < 2; x++) {
nnz_pred = l_nnz[i+2*y] + t_nnz[i+2*x]; nnz_pred = l_nnz[i+2*y] + t_nnz[i+2*x];
nnz = decode_block_coeffs(c, s->block[i][(y<<1)+x], s->prob->token[2], 0, nnz = decode_block_coeffs(c, td->block[i][(y<<1)+x], s->prob->token[2], 0,
nnz_pred, s->qmat[segment].chroma_qmul); nnz_pred, s->qmat[segment].chroma_qmul);
s->non_zero_count_cache[i][(y<<1)+x] = nnz; td->non_zero_count_cache[i][(y<<1)+x] = nnz;
t_nnz[i+2*x] = l_nnz[i+2*y] = !!nnz; t_nnz[i+2*x] = l_nnz[i+2*y] = !!nnz;
nnz_total += nnz; nnz_total += nnz;
} }
...@@ -980,8 +1031,8 @@ int check_intra_pred4x4_mode_emuedge(int mode, int mb_x, int mb_y, int *copy_buf ...@@ -980,8 +1031,8 @@ int check_intra_pred4x4_mode_emuedge(int mode, int mb_x, int mb_y, int *copy_buf
} }
static av_always_inline static av_always_inline
void intra_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb, void intra_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
int mb_x, int mb_y) VP8Macroblock *mb, int mb_x, int mb_y)
{ {
AVCodecContext *avctx = s->avctx; AVCodecContext *avctx = s->avctx;
int x, y, mode, nnz; int x, y, mode, nnz;
...@@ -989,7 +1040,7 @@ void intra_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb, ...@@ -989,7 +1040,7 @@ void intra_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb,
// for the first row, we need to run xchg_mb_border to init the top edge to 127 // for the first row, we need to run xchg_mb_border to init the top edge to 127
// otherwise, skip it if we aren't going to deblock // otherwise, skip it if we aren't going to deblock
if (!(avctx->flags & CODEC_FLAG_EMU_EDGE && !mb_y) && (s->deblock_filter || !mb_y)) if (!(avctx->flags & CODEC_FLAG_EMU_EDGE && !mb_y) && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width, s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
s->filter.simple, 1); s->filter.simple, 1);
...@@ -1003,7 +1054,7 @@ void intra_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb, ...@@ -1003,7 +1054,7 @@ void intra_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb,
s->hpc.pred16x16[mode](dst[0], s->linesize); s->hpc.pred16x16[mode](dst[0], s->linesize);
} else { } else {
uint8_t *ptr = dst[0]; uint8_t *ptr = dst[0];
uint8_t *intra4x4 = s->intra4x4_pred_mode_mb; uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
uint8_t tr_top[4] = { 127, 127, 127, 127 }; uint8_t tr_top[4] = { 127, 127, 127, 127 };
// all blocks on the right edge of the macroblock use bottom edge // all blocks on the right edge of the macroblock use bottom edge
...@@ -1019,7 +1070,7 @@ void intra_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb, ...@@ -1019,7 +1070,7 @@ void intra_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb,
} }
if (mb->skip) if (mb->skip)
AV_ZERO128(s->non_zero_count_cache); AV_ZERO128(td->non_zero_count_cache);
for (y = 0; y < 4; y++) { for (y = 0; y < 4; y++) {
uint8_t *topright = ptr + 4 - s->linesize; uint8_t *topright = ptr + 4 - s->linesize;
...@@ -1072,12 +1123,12 @@ void intra_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb, ...@@ -1072,12 +1123,12 @@ void intra_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb,
AV_COPY32(ptr+4*x+s->linesize*3, copy_dst+36); AV_COPY32(ptr+4*x+s->linesize*3, copy_dst+36);
} }
nnz = s->non_zero_count_cache[y][x]; nnz = td->non_zero_count_cache[y][x];
if (nnz) { if (nnz) {
if (nnz == 1) if (nnz == 1)
s->vp8dsp.vp8_idct_dc_add(ptr+4*x, s->block[y][x], s->linesize); s->vp8dsp.vp8_idct_dc_add(ptr+4*x, td->block[y][x], s->linesize);
else else
s->vp8dsp.vp8_idct_add(ptr+4*x, s->block[y][x], s->linesize); s->vp8dsp.vp8_idct_add(ptr+4*x, td->block[y][x], s->linesize);
} }
topright += 4; topright += 4;
} }
...@@ -1088,14 +1139,14 @@ void intra_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb, ...@@ -1088,14 +1139,14 @@ void intra_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb,
} }
if (avctx->flags & CODEC_FLAG_EMU_EDGE) { if (avctx->flags & CODEC_FLAG_EMU_EDGE) {
mode = check_intra_pred8x8_mode_emuedge(s->chroma_pred_mode, mb_x, mb_y); mode = check_intra_pred8x8_mode_emuedge(mb->chroma_pred_mode, mb_x, mb_y);
} else { } else {
mode = check_intra_pred8x8_mode(s->chroma_pred_mode, mb_x, mb_y); mode = check_intra_pred8x8_mode(mb->chroma_pred_mode, mb_x, mb_y);
} }
s->hpc.pred8x8[mode](dst[1], s->uvlinesize); s->hpc.pred8x8[mode](dst[1], s->uvlinesize);
s->hpc.pred8x8[mode](dst[2], s->uvlinesize); s->hpc.pred8x8[mode](dst[2], s->uvlinesize);
if (!(avctx->flags & CODEC_FLAG_EMU_EDGE && !mb_y) && (s->deblock_filter || !mb_y)) if (!(avctx->flags & CODEC_FLAG_EMU_EDGE && !mb_y) && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width, s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
s->filter.simple, 0); s->filter.simple, 0);
...@@ -1125,7 +1176,8 @@ static const uint8_t subpel_idx[3][8] = { ...@@ -1125,7 +1176,8 @@ static const uint8_t subpel_idx[3][8] = {
* @param mc_func motion compensation function pointers (bilinear or sixtap MC) * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
*/ */
static av_always_inline static av_always_inline
void vp8_mc_luma(VP8Context *s, uint8_t *dst, AVFrame *ref, const VP56mv *mv, void vp8_mc_luma(VP8Context *s, VP8ThreadData *td, uint8_t *dst,
AVFrame *ref, const VP56mv *mv,
int x_off, int y_off, int block_w, int block_h, int x_off, int y_off, int block_w, int block_h,
int width, int height, int linesize, int width, int height, int linesize,
vp8_mc_func mc_func[3][3]) vp8_mc_func mc_func[3][3])
...@@ -1145,10 +1197,10 @@ void vp8_mc_luma(VP8Context *s, uint8_t *dst, AVFrame *ref, const VP56mv *mv, ...@@ -1145,10 +1197,10 @@ void vp8_mc_luma(VP8Context *s, uint8_t *dst, AVFrame *ref, const VP56mv *mv,
src += y_off * linesize + x_off; src += y_off * linesize + x_off;
if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] || if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) { y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
s->dsp.emulated_edge_mc(s->edge_emu_buffer, src - my_idx * linesize - mx_idx, linesize, s->dsp.emulated_edge_mc(td->edge_emu_buffer, src - my_idx * linesize - mx_idx, linesize,
block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my], block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
x_off - mx_idx, y_off - my_idx, width, height); x_off - mx_idx, y_off - my_idx, width, height);
src = s->edge_emu_buffer + mx_idx + linesize * my_idx; src = td->edge_emu_buffer + mx_idx + linesize * my_idx;
} }
mc_func[my_idx][mx_idx](dst, linesize, src, linesize, block_h, mx, my); mc_func[my_idx][mx_idx](dst, linesize, src, linesize, block_h, mx, my);
} else { } else {
...@@ -1175,8 +1227,8 @@ void vp8_mc_luma(VP8Context *s, uint8_t *dst, AVFrame *ref, const VP56mv *mv, ...@@ -1175,8 +1227,8 @@ void vp8_mc_luma(VP8Context *s, uint8_t *dst, AVFrame *ref, const VP56mv *mv,
* @param mc_func motion compensation function pointers (bilinear or sixtap MC) * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
*/ */
static av_always_inline static av_always_inline
void vp8_mc_chroma(VP8Context *s, uint8_t *dst1, uint8_t *dst2, AVFrame *ref, void vp8_mc_chroma(VP8Context *s, VP8ThreadData *td, uint8_t *dst1, uint8_t *dst2,
const VP56mv *mv, int x_off, int y_off, AVFrame *ref, const VP56mv *mv, int x_off, int y_off,
int block_w, int block_h, int width, int height, int linesize, int block_w, int block_h, int width, int height, int linesize,
vp8_mc_func mc_func[3][3]) vp8_mc_func mc_func[3][3])
{ {
...@@ -1195,16 +1247,16 @@ void vp8_mc_chroma(VP8Context *s, uint8_t *dst1, uint8_t *dst2, AVFrame *ref, ...@@ -1195,16 +1247,16 @@ void vp8_mc_chroma(VP8Context *s, uint8_t *dst1, uint8_t *dst2, AVFrame *ref,
ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 3, 0); ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 3, 0);
if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] || if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) { y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
s->dsp.emulated_edge_mc(s->edge_emu_buffer, src1 - my_idx * linesize - mx_idx, linesize, s->dsp.emulated_edge_mc(td->edge_emu_buffer, src1 - my_idx * linesize - mx_idx, linesize,
block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my], block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
x_off - mx_idx, y_off - my_idx, width, height); x_off - mx_idx, y_off - my_idx, width, height);
src1 = s->edge_emu_buffer + mx_idx + linesize * my_idx; src1 = td->edge_emu_buffer + mx_idx + linesize * my_idx;
mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my); mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
s->dsp.emulated_edge_mc(s->edge_emu_buffer, src2 - my_idx * linesize - mx_idx, linesize, s->dsp.emulated_edge_mc(td->edge_emu_buffer, src2 - my_idx * linesize - mx_idx, linesize,
block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my], block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
x_off - mx_idx, y_off - my_idx, width, height); x_off - mx_idx, y_off - my_idx, width, height);
src2 = s->edge_emu_buffer + mx_idx + linesize * my_idx; src2 = td->edge_emu_buffer + mx_idx + linesize * my_idx;
mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my); mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
} else { } else {
mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my); mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
...@@ -1218,7 +1270,7 @@ void vp8_mc_chroma(VP8Context *s, uint8_t *dst1, uint8_t *dst2, AVFrame *ref, ...@@ -1218,7 +1270,7 @@ void vp8_mc_chroma(VP8Context *s, uint8_t *dst1, uint8_t *dst2, AVFrame *ref,
} }
static av_always_inline static av_always_inline
void vp8_mc_part(VP8Context *s, uint8_t *dst[3], void vp8_mc_part(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
AVFrame *ref_frame, int x_off, int y_off, AVFrame *ref_frame, int x_off, int y_off,
int bx_off, int by_off, int bx_off, int by_off,
int block_w, int block_h, int block_w, int block_h,
...@@ -1227,7 +1279,7 @@ void vp8_mc_part(VP8Context *s, uint8_t *dst[3], ...@@ -1227,7 +1279,7 @@ void vp8_mc_part(VP8Context *s, uint8_t *dst[3],
VP56mv uvmv = *mv; VP56mv uvmv = *mv;
/* Y */ /* Y */
vp8_mc_luma(s, dst[0] + by_off * s->linesize + bx_off, vp8_mc_luma(s, td, dst[0] + by_off * s->linesize + bx_off,
ref_frame, mv, x_off + bx_off, y_off + by_off, ref_frame, mv, x_off + bx_off, y_off + by_off,
block_w, block_h, width, height, s->linesize, block_w, block_h, width, height, s->linesize,
s->put_pixels_tab[block_w == 8]); s->put_pixels_tab[block_w == 8]);
...@@ -1241,7 +1293,7 @@ void vp8_mc_part(VP8Context *s, uint8_t *dst[3], ...@@ -1241,7 +1293,7 @@ void vp8_mc_part(VP8Context *s, uint8_t *dst[3],
bx_off >>= 1; by_off >>= 1; bx_off >>= 1; by_off >>= 1;
width >>= 1; height >>= 1; width >>= 1; height >>= 1;
block_w >>= 1; block_h >>= 1; block_w >>= 1; block_h >>= 1;
vp8_mc_chroma(s, dst[1] + by_off * s->uvlinesize + bx_off, vp8_mc_chroma(s, td, dst[1] + by_off * s->uvlinesize + bx_off,
dst[2] + by_off * s->uvlinesize + bx_off, ref_frame, dst[2] + by_off * s->uvlinesize + bx_off, ref_frame,
&uvmv, x_off + bx_off, y_off + by_off, &uvmv, x_off + bx_off, y_off + by_off,
block_w, block_h, width, height, s->uvlinesize, block_w, block_h, width, height, s->uvlinesize,
...@@ -1272,8 +1324,8 @@ static av_always_inline void prefetch_motion(VP8Context *s, VP8Macroblock *mb, i ...@@ -1272,8 +1324,8 @@ static av_always_inline void prefetch_motion(VP8Context *s, VP8Macroblock *mb, i
* Apply motion vectors to prediction buffer, chapter 18. * Apply motion vectors to prediction buffer, chapter 18.
*/ */
static av_always_inline static av_always_inline
void inter_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb, void inter_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
int mb_x, int mb_y) VP8Macroblock *mb, int mb_x, int mb_y)
{ {
int x_off = mb_x << 4, y_off = mb_y << 4; int x_off = mb_x << 4, y_off = mb_y << 4;
int width = 16*s->mb_width, height = 16*s->mb_height; int width = 16*s->mb_width, height = 16*s->mb_height;
...@@ -1282,7 +1334,7 @@ void inter_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb, ...@@ -1282,7 +1334,7 @@ void inter_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb,
switch (mb->partitioning) { switch (mb->partitioning) {
case VP8_SPLITMVMODE_NONE: case VP8_SPLITMVMODE_NONE:
vp8_mc_part(s, dst, ref, x_off, y_off, vp8_mc_part(s, td, dst, ref, x_off, y_off,
0, 0, 16, 16, width, height, &mb->mv); 0, 0, 16, 16, width, height, &mb->mv);
break; break;
case VP8_SPLITMVMODE_4x4: { case VP8_SPLITMVMODE_4x4: {
...@@ -1292,7 +1344,7 @@ void inter_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb, ...@@ -1292,7 +1344,7 @@ void inter_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb,
/* Y */ /* Y */
for (y = 0; y < 4; y++) { for (y = 0; y < 4; y++) {
for (x = 0; x < 4; x++) { for (x = 0; x < 4; x++) {
vp8_mc_luma(s, dst[0] + 4*y*s->linesize + x*4, vp8_mc_luma(s, td, dst[0] + 4*y*s->linesize + x*4,
ref, &bmv[4*y + x], ref, &bmv[4*y + x],
4*x + x_off, 4*y + y_off, 4, 4, 4*x + x_off, 4*y + y_off, 4, 4,
width, height, s->linesize, width, height, s->linesize,
...@@ -1318,7 +1370,7 @@ void inter_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb, ...@@ -1318,7 +1370,7 @@ void inter_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb,
uvmv.x &= ~7; uvmv.x &= ~7;
uvmv.y &= ~7; uvmv.y &= ~7;
} }
vp8_mc_chroma(s, dst[1] + 4*y*s->uvlinesize + x*4, vp8_mc_chroma(s, td, dst[1] + 4*y*s->uvlinesize + x*4,
dst[2] + 4*y*s->uvlinesize + x*4, ref, &uvmv, dst[2] + 4*y*s->uvlinesize + x*4, ref, &uvmv,
4*x + x_off, 4*y + y_off, 4, 4, 4*x + x_off, 4*y + y_off, 4, 4,
width, height, s->uvlinesize, width, height, s->uvlinesize,
...@@ -1328,51 +1380,52 @@ void inter_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb, ...@@ -1328,51 +1380,52 @@ void inter_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb,
break; break;
} }
case VP8_SPLITMVMODE_16x8: case VP8_SPLITMVMODE_16x8:
vp8_mc_part(s, dst, ref, x_off, y_off, vp8_mc_part(s, td, dst, ref, x_off, y_off,
0, 0, 16, 8, width, height, &bmv[0]); 0, 0, 16, 8, width, height, &bmv[0]);
vp8_mc_part(s, dst, ref, x_off, y_off, vp8_mc_part(s, td, dst, ref, x_off, y_off,
0, 8, 16, 8, width, height, &bmv[1]); 0, 8, 16, 8, width, height, &bmv[1]);
break; break;
case VP8_SPLITMVMODE_8x16: case VP8_SPLITMVMODE_8x16:
vp8_mc_part(s, dst, ref, x_off, y_off, vp8_mc_part(s, td, dst, ref, x_off, y_off,
0, 0, 8, 16, width, height, &bmv[0]); 0, 0, 8, 16, width, height, &bmv[0]);
vp8_mc_part(s, dst, ref, x_off, y_off, vp8_mc_part(s, td, dst, ref, x_off, y_off,
8, 0, 8, 16, width, height, &bmv[1]); 8, 0, 8, 16, width, height, &bmv[1]);
break; break;
case VP8_SPLITMVMODE_8x8: case VP8_SPLITMVMODE_8x8:
vp8_mc_part(s, dst, ref, x_off, y_off, vp8_mc_part(s, td, dst, ref, x_off, y_off,
0, 0, 8, 8, width, height, &bmv[0]); 0, 0, 8, 8, width, height, &bmv[0]);
vp8_mc_part(s, dst, ref, x_off, y_off, vp8_mc_part(s, td, dst, ref, x_off, y_off,
8, 0, 8, 8, width, height, &bmv[1]); 8, 0, 8, 8, width, height, &bmv[1]);
vp8_mc_part(s, dst, ref, x_off, y_off, vp8_mc_part(s, td, dst, ref, x_off, y_off,
0, 8, 8, 8, width, height, &bmv[2]); 0, 8, 8, 8, width, height, &bmv[2]);
vp8_mc_part(s, dst, ref, x_off, y_off, vp8_mc_part(s, td, dst, ref, x_off, y_off,
8, 8, 8, 8, width, height, &bmv[3]); 8, 8, 8, 8, width, height, &bmv[3]);
break; break;
} }
} }
static av_always_inline void idct_mb(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb) static av_always_inline void idct_mb(VP8Context *s, VP8ThreadData *td,
uint8_t *dst[3], VP8Macroblock *mb)
{ {
int x, y, ch; int x, y, ch;
if (mb->mode != MODE_I4x4) { if (mb->mode != MODE_I4x4) {
uint8_t *y_dst = dst[0]; uint8_t *y_dst = dst[0];
for (y = 0; y < 4; y++) { for (y = 0; y < 4; y++) {
uint32_t nnz4 = AV_RL32(s->non_zero_count_cache[y]); uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[y]);
if (nnz4) { if (nnz4) {
if (nnz4&~0x01010101) { if (nnz4&~0x01010101) {
for (x = 0; x < 4; x++) { for (x = 0; x < 4; x++) {
if ((uint8_t)nnz4 == 1) if ((uint8_t)nnz4 == 1)
s->vp8dsp.vp8_idct_dc_add(y_dst+4*x, s->block[y][x], s->linesize); s->vp8dsp.vp8_idct_dc_add(y_dst+4*x, td->block[y][x], s->linesize);
else if((uint8_t)nnz4 > 1) else if((uint8_t)nnz4 > 1)
s->vp8dsp.vp8_idct_add(y_dst+4*x, s->block[y][x], s->linesize); s->vp8dsp.vp8_idct_add(y_dst+4*x, td->block[y][x], s->linesize);
nnz4 >>= 8; nnz4 >>= 8;
if (!nnz4) if (!nnz4)
break; break;
} }
} else { } else {
s->vp8dsp.vp8_idct_dc_add4y(y_dst, s->block[y], s->linesize); s->vp8dsp.vp8_idct_dc_add4y(y_dst, td->block[y], s->linesize);
} }
} }
y_dst += 4*s->linesize; y_dst += 4*s->linesize;
...@@ -1380,16 +1433,16 @@ static av_always_inline void idct_mb(VP8Context *s, uint8_t *dst[3], VP8Macroblo ...@@ -1380,16 +1433,16 @@ static av_always_inline void idct_mb(VP8Context *s, uint8_t *dst[3], VP8Macroblo
} }
for (ch = 0; ch < 2; ch++) { for (ch = 0; ch < 2; ch++) {
uint32_t nnz4 = AV_RL32(s->non_zero_count_cache[4+ch]); uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[4+ch]);
if (nnz4) { if (nnz4) {
uint8_t *ch_dst = dst[1+ch]; uint8_t *ch_dst = dst[1+ch];
if (nnz4&~0x01010101) { if (nnz4&~0x01010101) {
for (y = 0; y < 2; y++) { for (y = 0; y < 2; y++) {
for (x = 0; x < 2; x++) { for (x = 0; x < 2; x++) {
if ((uint8_t)nnz4 == 1) if ((uint8_t)nnz4 == 1)
s->vp8dsp.vp8_idct_dc_add(ch_dst+4*x, s->block[4+ch][(y<<1)+x], s->uvlinesize); s->vp8dsp.vp8_idct_dc_add(ch_dst+4*x, td->block[4+ch][(y<<1)+x], s->uvlinesize);
else if((uint8_t)nnz4 > 1) else if((uint8_t)nnz4 > 1)
s->vp8dsp.vp8_idct_add(ch_dst+4*x, s->block[4+ch][(y<<1)+x], s->uvlinesize); s->vp8dsp.vp8_idct_add(ch_dst+4*x, td->block[4+ch][(y<<1)+x], s->uvlinesize);
nnz4 >>= 8; nnz4 >>= 8;
if (!nnz4) if (!nnz4)
goto chroma_idct_end; goto chroma_idct_end;
...@@ -1397,7 +1450,7 @@ static av_always_inline void idct_mb(VP8Context *s, uint8_t *dst[3], VP8Macroblo ...@@ -1397,7 +1450,7 @@ static av_always_inline void idct_mb(VP8Context *s, uint8_t *dst[3], VP8Macroblo
ch_dst += 4*s->uvlinesize; ch_dst += 4*s->uvlinesize;
} }
} else { } else {
s->vp8dsp.vp8_idct_dc_add4uv(ch_dst, s->block[4+ch], s->uvlinesize); s->vp8dsp.vp8_idct_dc_add4uv(ch_dst, td->block[4+ch], s->uvlinesize);
} }
} }
chroma_idct_end: ; chroma_idct_end: ;
...@@ -1409,7 +1462,7 @@ static av_always_inline void filter_level_for_mb(VP8Context *s, VP8Macroblock *m ...@@ -1409,7 +1462,7 @@ static av_always_inline void filter_level_for_mb(VP8Context *s, VP8Macroblock *m
int interior_limit, filter_level; int interior_limit, filter_level;
if (s->segmentation.enabled) { if (s->segmentation.enabled) {
filter_level = s->segmentation.filter_level[s->segment]; filter_level = s->segmentation.filter_level[mb->segment];
if (!s->segmentation.absolute_vals) if (!s->segmentation.absolute_vals)
filter_level += s->filter.level; filter_level += s->filter.level;
} else } else
...@@ -1535,51 +1588,277 @@ static av_always_inline void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8Fi ...@@ -1535,51 +1588,277 @@ static av_always_inline void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8Fi
} }
} }
static void filter_mb_row(VP8Context *s, AVFrame *curframe, int mb_y) static void release_queued_segmaps(VP8Context *s, int is_close)
{
int leave_behind = is_close ? 0 : !s->maps_are_invalid;
while (s->num_maps_to_be_freed > leave_behind)
av_freep(&s->segmentation_maps[--s->num_maps_to_be_freed]);
s->maps_are_invalid = 0;
}
#define MARGIN (16 << 2)
static void vp8_decode_mv_mb_modes(AVCodecContext *avctx, AVFrame *curframe,
AVFrame *prev_frame)
{
VP8Context *s = avctx->priv_data;
int mb_x, mb_y;
s->mv_min.y = -MARGIN;
s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
VP8Macroblock *mb = s->macroblocks_base + ((s->mb_width+1)*(mb_y + 1) + 1);
int mb_xy = mb_y*s->mb_width;
AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED*0x01010101);
s->mv_min.x = -MARGIN;
s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
if (mb_y == 0)
AV_WN32A((mb-s->mb_width-1)->intra4x4_pred_mode_top, DC_PRED*0x01010101);
decode_mb_mode(s, mb, mb_x, mb_y, curframe->ref_index[0] + mb_xy,
prev_frame && prev_frame->ref_index[0] ? prev_frame->ref_index[0] + mb_xy : NULL, 1);
s->mv_min.x -= 64;
s->mv_max.x -= 64;
}
s->mv_min.y -= 64;
s->mv_max.y -= 64;
}
}
#if HAVE_THREADS
#define check_thread_pos(td, otd, mb_x_check, mb_y_check)\
do {\
int tmp = (mb_y_check << 16) | (mb_x_check & 0xFFFF);\
if (otd->thread_mb_pos < tmp) {\
pthread_mutex_lock(&otd->lock);\
td->wait_mb_pos = tmp;\
do {\
if (otd->thread_mb_pos >= tmp)\
break;\
pthread_cond_wait(&otd->cond, &otd->lock);\
} while (1);\
td->wait_mb_pos = INT_MAX;\
pthread_mutex_unlock(&otd->lock);\
}\
} while(0);
#define update_pos(td, mb_y, mb_x)\
do {\
int pos = (mb_y << 16) | (mb_x & 0xFFFF);\
int sliced_threading = (avctx->active_thread_type == FF_THREAD_SLICE) && (num_jobs > 1);\
int is_null = (next_td == NULL) || (prev_td == NULL);\
int pos_check = (is_null) ? 1 :\
(next_td != td && pos >= next_td->wait_mb_pos) ||\
(prev_td != td && pos >= prev_td->wait_mb_pos);\
td->thread_mb_pos = pos;\
if (sliced_threading && pos_check) {\
pthread_mutex_lock(&td->lock);\
pthread_cond_broadcast(&td->cond);\
pthread_mutex_unlock(&td->lock);\
}\
} while(0);
#else
#define check_thread_pos(td, otd, mb_x_check, mb_y_check)
#define update_pos(td, mb_y, mb_x)
#endif
static void vp8_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
int jobnr, int threadnr)
{ {
VP8FilterStrength *f = s->filter_strength; VP8Context *s = avctx->priv_data;
VP8ThreadData *prev_td, *next_td, *td = &s->thread_data[threadnr];
int mb_y = td->thread_mb_pos>>16;
int i, y, mb_x, mb_xy = mb_y*s->mb_width;
int num_jobs = s->num_jobs;
AVFrame *curframe = s->curframe, *prev_frame = s->prev_frame;
VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions-1)];
VP8Macroblock *mb;
uint8_t *dst[3] = { uint8_t *dst[3] = {
curframe->data[0] + 16*mb_y*s->linesize, curframe->data[0] + 16*mb_y*s->linesize,
curframe->data[1] + 8*mb_y*s->uvlinesize, curframe->data[1] + 8*mb_y*s->uvlinesize,
curframe->data[2] + 8*mb_y*s->uvlinesize curframe->data[2] + 8*mb_y*s->uvlinesize
}; };
int mb_x; if (mb_y == 0) prev_td = td;
else prev_td = &s->thread_data[(jobnr + num_jobs - 1)%num_jobs];
if (mb_y == s->mb_height-1) next_td = td;
else next_td = &s->thread_data[(jobnr + 1)%num_jobs];
if (s->mb_layout == 1)
mb = s->macroblocks_base + ((s->mb_width+1)*(mb_y + 1) + 1);
else {
mb = s->macroblocks + (s->mb_height - mb_y - 1)*2;
memset(mb - 1, 0, sizeof(*mb)); // zero left macroblock
AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED*0x01010101);
}
memset(td->left_nnz, 0, sizeof(td->left_nnz));
// left edge of 129 for intra prediction
if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) {
for (i = 0; i < 3; i++)
for (y = 0; y < 16>>!!i; y++)
dst[i][y*curframe->linesize[i]-1] = 129;
if (mb_y == 1) {
s->top_border[0][15] = s->top_border[0][23] = s->top_border[0][31] = 129;
}
}
s->mv_min.x = -MARGIN;
s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
// Wait for previous thread to read mb_x+2, and reach mb_y-1.
if (prev_td != td) {
if (threadnr != 0) {
check_thread_pos(td, prev_td, mb_x+1, mb_y-1);
} else {
check_thread_pos(td, prev_td, (s->mb_width+3) + (mb_x+1), mb_y-1);
}
}
for (mb_x = 0; mb_x < s->mb_width; mb_x++) { s->dsp.prefetch(dst[0] + (mb_x&3)*4*s->linesize + 64, s->linesize, 4);
s->dsp.prefetch(dst[1] + (mb_x&7)*s->uvlinesize + 64, dst[2] - dst[1], 2);
if (!s->mb_layout)
decode_mb_mode(s, mb, mb_x, mb_y, curframe->ref_index[0] + mb_xy,
prev_frame && prev_frame->ref_index[0] ? prev_frame->ref_index[0] + mb_xy : NULL, 0);
prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS);
if (!mb->skip)
decode_mb_coeffs(s, td, c, mb, s->top_nnz[mb_x], td->left_nnz);
if (mb->mode <= MODE_I4x4)
intra_predict(s, td, dst, mb, mb_x, mb_y);
else
inter_predict(s, td, dst, mb, mb_x, mb_y);
prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN);
if (!mb->skip) {
idct_mb(s, td, dst, mb);
} else {
AV_ZERO64(td->left_nnz);
AV_WN64(s->top_nnz[mb_x], 0); // array of 9, so unaligned
// Reset DC block predictors if they would exist if the mb had coefficients
if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
td->left_nnz[8] = 0;
s->top_nnz[mb_x][8] = 0;
}
}
if (s->deblock_filter)
filter_level_for_mb(s, mb, &td->filter_strength[mb_x]);
if (s->deblock_filter && num_jobs != 1 && threadnr == num_jobs-1) {
if (s->filter.simple)
backup_mb_border(s->top_border[mb_x+1], dst[0], NULL, NULL, s->linesize, 0, 1);
else
backup_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], s->linesize, s->uvlinesize, 0); backup_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], s->linesize, s->uvlinesize, 0);
filter_mb(s, dst, f++, mb_x, mb_y); }
prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN2);
dst[0] += 16; dst[0] += 16;
dst[1] += 8; dst[1] += 8;
dst[2] += 8; dst[2] += 8;
s->mv_min.x -= 64;
s->mv_max.x -= 64;
if (mb_x == s->mb_width+1) {
update_pos(td, mb_y, s->mb_width+3);
} else {
update_pos(td, mb_y, mb_x);
}
} }
} }
static void filter_mb_row_simple(VP8Context *s, AVFrame *curframe, int mb_y) static void vp8_filter_mb_row(AVCodecContext *avctx, void *tdata,
int jobnr, int threadnr)
{ {
VP8FilterStrength *f = s->filter_strength; VP8Context *s = avctx->priv_data;
uint8_t *dst = curframe->data[0] + 16*mb_y*s->linesize; VP8ThreadData *td = &s->thread_data[threadnr];
int mb_x; int mb_x, mb_y = td->thread_mb_pos>>16, num_jobs = s->num_jobs;
AVFrame *curframe = s->curframe;
VP8Macroblock *mb;
VP8ThreadData *prev_td, *next_td;
uint8_t *dst[3] = {
curframe->data[0] + 16*mb_y*s->linesize,
curframe->data[1] + 8*mb_y*s->uvlinesize,
curframe->data[2] + 8*mb_y*s->uvlinesize
};
if (s->mb_layout == 1)
mb = s->macroblocks_base + ((s->mb_width+1)*(mb_y + 1) + 1);
else
mb = s->macroblocks + (s->mb_height - mb_y - 1)*2;
if (mb_y == 0) prev_td = td;
else prev_td = &s->thread_data[(jobnr + num_jobs - 1)%num_jobs];
if (mb_y == s->mb_height-1) next_td = td;
else next_td = &s->thread_data[(jobnr + 1)%num_jobs];
for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb++) {
VP8FilterStrength *f = &td->filter_strength[mb_x];
if (prev_td != td) {
check_thread_pos(td, prev_td, (mb_x+1) + (s->mb_width+3), mb_y-1);
}
if (next_td != td)
if (next_td != &s->thread_data[0]) {
check_thread_pos(td, next_td, mb_x+1, mb_y+1);
}
if (num_jobs == 1) {
if (s->filter.simple)
backup_mb_border(s->top_border[mb_x+1], dst[0], NULL, NULL, s->linesize, 0, 1);
else
backup_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], s->linesize, s->uvlinesize, 0);
}
if (s->filter.simple)
filter_mb_simple(s, dst[0], f, mb_x, mb_y);
else
filter_mb(s, dst, f, mb_x, mb_y);
dst[0] += 16;
dst[1] += 8;
dst[2] += 8;
for (mb_x = 0; mb_x < s->mb_width; mb_x++) { update_pos(td, mb_y, (s->mb_width+3) + mb_x);
backup_mb_border(s->top_border[mb_x+1], dst, NULL, NULL, s->linesize, 0, 1);
filter_mb_simple(s, dst, f++, mb_x, mb_y);
dst += 16;
} }
} }
static void release_queued_segmaps(VP8Context *s, int is_close) static int vp8_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata,
int jobnr, int threadnr)
{ {
int leave_behind = is_close ? 0 : !s->maps_are_invalid; VP8Context *s = avctx->priv_data;
while (s->num_maps_to_be_freed > leave_behind) VP8ThreadData *td = &s->thread_data[jobnr];
av_freep(&s->segmentation_maps[--s->num_maps_to_be_freed]); VP8ThreadData *next_td = NULL, *prev_td = NULL;
s->maps_are_invalid = 0; AVFrame *curframe = s->curframe;
int mb_y, num_jobs = s->num_jobs;
td->thread_nr = threadnr;
for (mb_y = jobnr; mb_y < s->mb_height; mb_y += num_jobs) {
if (mb_y >= s->mb_height) break;
td->thread_mb_pos = mb_y<<16;
vp8_decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr);
if (s->deblock_filter)
vp8_filter_mb_row(avctx, tdata, jobnr, threadnr);
update_pos(td, mb_y, INT_MAX & 0xFFFF);
s->mv_min.y -= 64;
s->mv_max.y -= 64;
if (avctx->active_thread_type == FF_THREAD_FRAME)
ff_thread_report_progress(curframe, mb_y, 0);
}
return 0;
} }
static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size, static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
AVPacket *avpkt) AVPacket *avpkt)
{ {
VP8Context *s = avctx->priv_data; VP8Context *s = avctx->priv_data;
int ret, mb_x, mb_y, i, y, referenced; int ret, i, referenced, num_jobs;
enum AVDiscard skip_thresh; enum AVDiscard skip_thresh;
AVFrame *av_uninit(curframe), *prev_frame; AVFrame *av_uninit(curframe), *prev_frame;
...@@ -1670,13 +1949,16 @@ static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size, ...@@ -1670,13 +1949,16 @@ static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
s->linesize = curframe->linesize[0]; s->linesize = curframe->linesize[0];
s->uvlinesize = curframe->linesize[1]; s->uvlinesize = curframe->linesize[1];
if (!s->edge_emu_buffer) if (!s->thread_data[0].edge_emu_buffer)
s->edge_emu_buffer = av_malloc(21*s->linesize); for (i = 0; i < MAX_THREADS; i++)
s->thread_data[i].edge_emu_buffer = av_malloc(21*s->linesize);
memset(s->top_nnz, 0, s->mb_width*sizeof(*s->top_nnz)); memset(s->top_nnz, 0, s->mb_width*sizeof(*s->top_nnz));
/* Zero macroblock structures for top/top-left prediction from outside the frame. */ /* Zero macroblock structures for top/top-left prediction from outside the frame. */
if (!s->mb_layout)
memset(s->macroblocks + s->mb_height*2 - 1, 0, (s->mb_width+1)*sizeof(*s->macroblocks)); memset(s->macroblocks + s->mb_height*2 - 1, 0, (s->mb_width+1)*sizeof(*s->macroblocks));
if (!s->mb_layout && s->keyframe)
memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width*4);
// top edge of 127 for intra prediction // top edge of 127 for intra prediction
if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) { if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) {
...@@ -1684,96 +1966,30 @@ static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size, ...@@ -1684,96 +1966,30 @@ static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
memset(s->top_border[1]-1, 127, s->mb_width*sizeof(*s->top_border)+1); memset(s->top_border[1]-1, 127, s->mb_width*sizeof(*s->top_border)+1);
} }
memset(s->ref_count, 0, sizeof(s->ref_count)); memset(s->ref_count, 0, sizeof(s->ref_count));
if (s->keyframe)
memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width*4);
#define MARGIN (16 << 2)
s->mv_min.y = -MARGIN;
s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions-1)];
VP8Macroblock *mb = s->macroblocks + (s->mb_height - mb_y - 1)*2;
int mb_xy = mb_y*s->mb_width;
uint8_t *dst[3] = {
curframe->data[0] + 16*mb_y*s->linesize,
curframe->data[1] + 8*mb_y*s->uvlinesize,
curframe->data[2] + 8*mb_y*s->uvlinesize
};
memset(mb - 1, 0, sizeof(*mb)); // zero left macroblock
memset(s->left_nnz, 0, sizeof(s->left_nnz));
AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED*0x01010101);
// left edge of 129 for intra prediction // Make sure the previous frame has read its segmentation map,
if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) { // if we re-use the same map.
for (i = 0; i < 3; i++)
for (y = 0; y < 16>>!!i; y++)
dst[i][y*curframe->linesize[i]-1] = 129;
if (mb_y == 1) // top left edge is also 129
s->top_border[0][15] = s->top_border[0][23] = s->top_border[0][31] = 129;
}
s->mv_min.x = -MARGIN;
s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
if (prev_frame && s->segmentation.enabled && !s->segmentation.update_map) if (prev_frame && s->segmentation.enabled && !s->segmentation.update_map)
ff_thread_await_progress(prev_frame, mb_y, 0); ff_thread_await_progress(prev_frame, 1, 0);
for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
/* Prefetch the current frame, 4 MBs ahead */
s->dsp.prefetch(dst[0] + (mb_x&3)*4*s->linesize + 64, s->linesize, 4);
s->dsp.prefetch(dst[1] + (mb_x&7)*s->uvlinesize + 64, dst[2] - dst[1], 2);
decode_mb_mode(s, mb, mb_x, mb_y, curframe->ref_index[0] + mb_xy,
prev_frame && prev_frame->ref_index[0] ? prev_frame->ref_index[0] + mb_xy : NULL);
prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS);
if (!mb->skip)
decode_mb_coeffs(s, c, mb, s->top_nnz[mb_x], s->left_nnz);
if (mb->mode <= MODE_I4x4)
intra_predict(s, dst, mb, mb_x, mb_y);
else
inter_predict(s, dst, mb, mb_x, mb_y);
prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN); if (s->mb_layout == 1)
vp8_decode_mv_mb_modes(avctx, curframe, prev_frame);
if (!mb->skip) {
idct_mb(s, dst, mb);
} else {
AV_ZERO64(s->left_nnz);
AV_WN64(s->top_nnz[mb_x], 0); // array of 9, so unaligned
// Reset DC block predictors if they would exist if the mb had coefficients
if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
s->left_nnz[8] = 0;
s->top_nnz[mb_x][8] = 0;
}
}
if (s->deblock_filter)
filter_level_for_mb(s, mb, &s->filter_strength[mb_x]);
prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN2);
dst[0] += 16; if (avctx->active_thread_type == FF_THREAD_FRAME)
dst[1] += 8; num_jobs = 1;
dst[2] += 8;
s->mv_min.x -= 64;
s->mv_max.x -= 64;
}
if (s->deblock_filter) {
if (s->filter.simple)
filter_mb_row_simple(s, curframe, mb_y);
else else
filter_mb_row(s, curframe, mb_y); num_jobs = FFMIN(s->num_coeff_partitions, avctx->thread_count);
} s->num_jobs = num_jobs;
s->mv_min.y -= 64; s->curframe = curframe;
s->mv_max.y -= 64; s->prev_frame = prev_frame;
s->mv_min.y = -MARGIN;
ff_thread_report_progress(curframe, mb_y, 0); s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
for (i = 0; i < MAX_THREADS; i++) {
s->thread_data[i].thread_mb_pos = 0;
s->thread_data[i].wait_mb_pos = INT_MAX;
} }
avctx->execute2(avctx, vp8_decode_mb_row_sliced, s->thread_data, NULL, num_jobs);
ff_thread_report_progress(curframe, INT_MAX, 0); ff_thread_report_progress(curframe, INT_MAX, 0);
memcpy(&s->framep[0], &s->next_framep[0], sizeof(s->framep[0]) * 4); memcpy(&s->framep[0], &s->next_framep[0], sizeof(s->framep[0]) * 4);
...@@ -1862,7 +2078,7 @@ AVCodec ff_vp8_decoder = { ...@@ -1862,7 +2078,7 @@ AVCodec ff_vp8_decoder = {
.init = vp8_decode_init, .init = vp8_decode_init,
.close = vp8_decode_free, .close = vp8_decode_free,
.decode = vp8_decode_frame, .decode = vp8_decode_frame,
.capabilities = CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS, .capabilities = CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS | CODEC_CAP_SLICE_THREADS,
.flush = vp8_decode_flush, .flush = vp8_decode_flush,
.long_name = NULL_IF_CONFIG_SMALL("On2 VP8"), .long_name = NULL_IF_CONFIG_SMALL("On2 VP8"),
.init_thread_copy = ONLY_IF_THREADS_ENABLED(vp8_decode_init_thread_copy), .init_thread_copy = ONLY_IF_THREADS_ENABLED(vp8_decode_init_thread_copy),
......
...@@ -4,6 +4,7 @@ ...@@ -4,6 +4,7 @@
* Copyright (C) 2010 David Conrad * Copyright (C) 2010 David Conrad
* Copyright (C) 2010 Ronald S. Bultje * Copyright (C) 2010 Ronald S. Bultje
* Copyright (C) 2010 Jason Garrett-Glaser * Copyright (C) 2010 Jason Garrett-Glaser
* Copyright (C) 2012 Daniel Kang
* *
* This file is part of FFmpeg. * This file is part of FFmpeg.
* *
...@@ -29,6 +30,11 @@ ...@@ -29,6 +30,11 @@
#include "vp56data.h" #include "vp56data.h"
#include "vp8dsp.h" #include "vp8dsp.h"
#include "h264pred.h" #include "h264pred.h"
#if HAVE_PTHREADS
#include <pthread.h>
#elif HAVE_W32THREADS
#include "w32pthreads.h"
#endif
#define VP8_MAX_QUANT 127 #define VP8_MAX_QUANT 127
...@@ -79,15 +85,51 @@ typedef struct { ...@@ -79,15 +85,51 @@ typedef struct {
uint8_t mode; uint8_t mode;
uint8_t ref_frame; uint8_t ref_frame;
uint8_t partitioning; uint8_t partitioning;
uint8_t chroma_pred_mode;
uint8_t segment;
uint8_t intra4x4_pred_mode_mb[16];
uint8_t intra4x4_pred_mode_top[4];
VP56mv mv; VP56mv mv;
VP56mv bmv[16]; VP56mv bmv[16];
} VP8Macroblock; } VP8Macroblock;
typedef struct { typedef struct {
#if HAVE_THREADS
pthread_mutex_t lock;
pthread_cond_t cond;
#endif
int thread_nr;
int thread_mb_pos; // (mb_y << 16) | (mb_x & 0xFFFF)
int wait_mb_pos; // What the current thread is waiting on.
uint8_t *edge_emu_buffer;
/**
* For coeff decode, we need to know whether the above block had non-zero
* coefficients. This means for each macroblock, we need data for 4 luma
* blocks, 2 u blocks, 2 v blocks, and the luma dc block, for a total of 9
* per macroblock. We keep the last row in top_nnz.
*/
DECLARE_ALIGNED(8, uint8_t, left_nnz)[9];
/**
* This is the index plus one of the last non-zero coeff
* for each of the blocks in the current macroblock.
* So, 0 -> no coeffs
* 1 -> dc-only (special transform)
* 2+-> full transform
*/
DECLARE_ALIGNED(16, uint8_t, non_zero_count_cache)[6][4];
DECLARE_ALIGNED(16, DCTELEM, block)[6][4][16];
DECLARE_ALIGNED(16, DCTELEM, block_dc)[16];
VP8FilterStrength *filter_strength;
} VP8ThreadData;
#define MAX_THREADS 8
typedef struct {
VP8ThreadData *thread_data;
AVCodecContext *avctx; AVCodecContext *avctx;
AVFrame *framep[4]; AVFrame *framep[4];
AVFrame *next_framep[4]; AVFrame *next_framep[4];
uint8_t *edge_emu_buffer; AVFrame *curframe;
AVFrame *prev_frame;
uint16_t mb_width; /* number of horizontal MB */ uint16_t mb_width; /* number of horizontal MB */
uint16_t mb_height; /* number of vertical MB */ uint16_t mb_height; /* number of vertical MB */
...@@ -97,8 +139,6 @@ typedef struct { ...@@ -97,8 +139,6 @@ typedef struct {
uint8_t keyframe; uint8_t keyframe;
uint8_t deblock_filter; uint8_t deblock_filter;
uint8_t mbskip_enabled; uint8_t mbskip_enabled;
uint8_t segment; ///< segment of the current macroblock
uint8_t chroma_pred_mode; ///< 8x8c pred mode of the current macroblock
uint8_t profile; uint8_t profile;
VP56mv mv_min; VP56mv mv_min;
VP56mv mv_max; VP56mv mv_max;
...@@ -126,7 +166,6 @@ typedef struct { ...@@ -126,7 +166,6 @@ typedef struct {
} filter; } filter;
VP8Macroblock *macroblocks; VP8Macroblock *macroblocks;
VP8FilterStrength *filter_strength;
uint8_t *intra4x4_pred_mode_top; uint8_t *intra4x4_pred_mode_top;
uint8_t intra4x4_pred_mode_left[4]; uint8_t intra4x4_pred_mode_left[4];
...@@ -167,33 +206,10 @@ typedef struct { ...@@ -167,33 +206,10 @@ typedef struct {
int8_t ref[4]; int8_t ref[4];
} lf_delta; } lf_delta;
/**
* Cache of the top row needed for intra prediction
* 16 for luma, 8 for each chroma plane
*/
uint8_t (*top_border)[16+8+8]; uint8_t (*top_border)[16+8+8];
/**
* For coeff decode, we need to know whether the above block had non-zero
* coefficients. This means for each macroblock, we need data for 4 luma
* blocks, 2 u blocks, 2 v blocks, and the luma dc block, for a total of 9
* per macroblock. We keep the last row in top_nnz.
*/
uint8_t (*top_nnz)[9]; uint8_t (*top_nnz)[9];
DECLARE_ALIGNED(8, uint8_t, left_nnz)[9];
/**
* This is the index plus one of the last non-zero coeff
* for each of the blocks in the current macroblock.
* So, 0 -> no coeffs
* 1 -> dc-only (special transform)
* 2+-> full transform
*/
DECLARE_ALIGNED(16, uint8_t, non_zero_count_cache)[6][4];
VP56RangeCoder c; ///< header context, includes mb modes and motion vectors VP56RangeCoder c; ///< header context, includes mb modes and motion vectors
DECLARE_ALIGNED(16, DCTELEM, block)[6][4][16];
DECLARE_ALIGNED(16, DCTELEM, block_dc)[16];
uint8_t intra4x4_pred_mode_mb[16];
/** /**
* These are all of the updatable probabilities for binary decisions. * These are all of the updatable probabilities for binary decisions.
...@@ -246,6 +262,13 @@ typedef struct { ...@@ -246,6 +262,13 @@ typedef struct {
uint8_t *segmentation_maps[5]; uint8_t *segmentation_maps[5];
int num_maps_to_be_freed; int num_maps_to_be_freed;
int maps_are_invalid; int maps_are_invalid;
int num_jobs;
/**
* This describes the macroblock memory layout.
* 0 -> Only width+height*2+1 macroblocks allocated (frame/single thread).
* 1 -> Macroblocks for entire frame alloced (sliced thread).
*/
int mb_layout;
} VP8Context; } VP8Context;
#endif /* AVCODEC_VP8_H */ #endif /* AVCODEC_VP8_H */
...@@ -64,9 +64,9 @@ ...@@ -64,9 +64,9 @@
#endif #endif
#if AV_GCC_VERSION_AT_LEAST(4,3) #if AV_GCC_VERSION_AT_LEAST(4,3)
#define av_alloc_size(n) __attribute__((alloc_size(n))) #define av_alloc_size(...) __attribute__((alloc_size(__VA_ARGS__)))
#else #else
#define av_alloc_size(n) #define av_alloc_size(...)
#endif #endif
/** /**
...@@ -79,6 +79,22 @@ ...@@ -79,6 +79,22 @@
*/ */
void *av_malloc(size_t size) av_malloc_attrib av_alloc_size(1); void *av_malloc(size_t size) av_malloc_attrib av_alloc_size(1);
/**
* Helper function to allocate a block of size * nmemb bytes with
* using av_malloc()
* @param nmemb Number of elements
* @param size Size of the single element
* @return Pointer to the allocated block, NULL if the block cannot
* be allocated.
* @see av_malloc()
*/
av_alloc_size(1,2) static inline void *av_malloc_array(size_t nmemb, size_t size)
{
if (size <= 0 || nmemb >= INT_MAX / size)
return NULL;
return av_malloc(nmemb * size);
}
/** /**
* Allocate or reallocate a block of memory. * Allocate or reallocate a block of memory.
* If ptr is NULL and size > 0, allocate a new block. If * If ptr is NULL and size > 0, allocate a new block. If
...@@ -135,6 +151,23 @@ void *av_mallocz(size_t size) av_malloc_attrib av_alloc_size(1); ...@@ -135,6 +151,23 @@ void *av_mallocz(size_t size) av_malloc_attrib av_alloc_size(1);
*/ */
void *av_calloc(size_t nmemb, size_t size) av_malloc_attrib; void *av_calloc(size_t nmemb, size_t size) av_malloc_attrib;
/**
* Helper function to allocate a block of size * nmemb bytes with
* using av_mallocz()
* @param nmemb Number of elements
* @param size Size of the single element
* @return Pointer to the allocated block, NULL if the block cannot
* be allocated.
* @see av_mallocz()
* @see av_malloc_array()
*/
av_alloc_size(1,2) static inline void *av_mallocz_array(size_t nmemb, size_t size)
{
if (size <= 0 || nmemb >= INT_MAX / size)
return NULL;
return av_mallocz(nmemb * size);
}
/** /**
* Duplicate the string s. * Duplicate the string s.
* @param s string to be duplicated * @param s string to be duplicated
......
...@@ -39,7 +39,7 @@ ...@@ -39,7 +39,7 @@
*/ */
#define LIBAVUTIL_VERSION_MAJOR 51 #define LIBAVUTIL_VERSION_MAJOR 51
#define LIBAVUTIL_VERSION_MINOR 64 #define LIBAVUTIL_VERSION_MINOR 65
#define LIBAVUTIL_VERSION_MICRO 100 #define LIBAVUTIL_VERSION_MICRO 100
#define LIBAVUTIL_VERSION_INT AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \ #define LIBAVUTIL_VERSION_INT AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment