Commit c3d470b4 authored by Roman Shaposhnik's avatar Roman Shaposhnik

implementing more efficient (and direct) allocation of work for DV codec workers

Originally committed as revision 15788 to svn://svn.ffmpeg.org/ffmpeg/trunk
parent 939e4e3a
...@@ -62,15 +62,6 @@ typedef struct DVVideoContext { ...@@ -62,15 +62,6 @@ typedef struct DVVideoContext {
void (*idct_put[2])(uint8_t *dest, int line_size, DCTELEM *block); void (*idct_put[2])(uint8_t *dest, int line_size, DCTELEM *block);
} DVVideoContext; } DVVideoContext;
/**
* MultiThreading - dv_anchor applies to entire DV codec, not just the avcontext
* one element is needed for each video segment in a DV frame
* at most there are 4 DIF channels * 12 DIF sequences * 27 video segments (1080i50)
*/
#define DV_ANCHOR_SIZE (4*12*27)
static void* dv_anchor[DV_ANCHOR_SIZE];
#define TEX_VLC_BITS 9 #define TEX_VLC_BITS 9
#if ENABLE_SMALL #if ENABLE_SMALL
...@@ -89,6 +80,40 @@ static struct dv_vlc_pair { ...@@ -89,6 +80,40 @@ static struct dv_vlc_pair {
uint8_t size; uint8_t size;
} dv_vlc_map[DV_VLC_MAP_RUN_SIZE][DV_VLC_MAP_LEV_SIZE]; } dv_vlc_map[DV_VLC_MAP_RUN_SIZE][DV_VLC_MAP_LEV_SIZE];
static inline int dv_work_pool_size(const DVprofile *d)
{
int size = d->n_difchan*d->difseg_size*27;
if (DV_PROFILE_IS_1080i50(d))
size -= 3*27;
if (DV_PROFILE_IS_720p50(d))
size -= 4*27;
return size;
}
static int dv_init_dynamic_tables(const DVprofile *d)
{
int j,i,c,s,p;
if (d->work_chunks[dv_work_pool_size(d)-1])
return 0;
p = i = 0;
for (c=0; c<d->n_difchan; c++) {
for (s=0; s<d->difseg_size; s++) {
p += 6;
for (j=0; j<27; j++) {
p += !(j%3);
if (!(DV_PROFILE_IS_1080i50(d) && c != 0 && s == 11) &&
!(DV_PROFILE_IS_720p50(d) && s > 9)) {
d->work_chunks[i++] = (void*)(size_t)((p<<18)|(c << 16)|(s << 8)|j);
}
p += 5;
}
}
}
return 0;
}
static void dv_build_unquantize_tables(DVVideoContext *s, uint8_t* perm) static void dv_build_unquantize_tables(DVVideoContext *s, uint8_t* perm)
{ {
int i, q, a; int i, q, a;
...@@ -138,10 +163,6 @@ static av_cold int dvvideo_init(AVCodecContext *avctx) ...@@ -138,10 +163,6 @@ static av_cold int dvvideo_init(AVCodecContext *avctx)
done = 1; done = 1;
/* dv_anchor lets each thread know its ID */
for (i = 0; i < DV_ANCHOR_SIZE; i++)
dv_anchor[i] = (void*)(size_t)i;
/* it's faster to include sign bit in a generic VLC parsing scheme */ /* it's faster to include sign bit in a generic VLC parsing scheme */
for (i = 0, j = 0; i < NB_DV_VLC; i++, j++) { for (i = 0, j = 0; i < NB_DV_VLC; i++, j++) {
new_dv_vlc_bits[j] = dv_vlc_bits[i]; new_dv_vlc_bits[j] = dv_vlc_bits[i];
...@@ -360,13 +381,29 @@ static inline void bit_copy(PutBitContext *pb, GetBitContext *gb) ...@@ -360,13 +381,29 @@ static inline void bit_copy(PutBitContext *pb, GetBitContext *gb)
} }
} }
static inline void dv_calculate_mb_xy(DVVideoContext *s, int work_chunk, int m, int *mb_x, int *mb_y)
{
int i, chan, seg, slot;
chan = (work_chunk>>16)&0x03;
seg = (work_chunk>>8)&0xff;
slot = (work_chunk)&0xff;
i = (chan*s->sys->difseg_size+seg)*27*5 + slot*5 + m;
*mb_x = s->sys->video_place[i] & 0xff;
*mb_y = s->sys->video_place[i] >> 8;
/* We work with 720p frames split in half. The odd half-frame (chan==2,3) is displaced :-( */
if (s->sys->height == 720 && !(s->buf[1]&0x0C)) {
*mb_y -= (*mb_y>17)?18:-72; /* shifting the Y coordinate down by 72/2 macro blocks */
}
}
/* mb_x and mb_y are in units of 8 pixels */ /* mb_x and mb_y are in units of 8 pixels */
static inline void dv_decode_video_segment(DVVideoContext *s, static inline void dv_decode_video_segment(DVVideoContext *s, int work_chunk)
const uint8_t *buf_ptr1,
const uint16_t *mb_pos_ptr)
{ {
int quant, dc, dct_mode, class1, j; int quant, dc, dct_mode, class1, j;
int mb_index, mb_x, mb_y, v, last_index; int mb_index, mb_x, mb_y, last_index;
int y_stride, linesize; int y_stride, linesize;
DCTELEM *block, *block1; DCTELEM *block, *block1;
int c_offset; int c_offset;
...@@ -387,7 +424,7 @@ static inline void dv_decode_video_segment(DVVideoContext *s, ...@@ -387,7 +424,7 @@ static inline void dv_decode_video_segment(DVVideoContext *s,
memset(sblock, 0, sizeof(sblock)); memset(sblock, 0, sizeof(sblock));
/* pass 1 : read DC and AC coefficients in blocks */ /* pass 1 : read DC and AC coefficients in blocks */
buf_ptr = buf_ptr1; buf_ptr = &s->buf[(work_chunk>>18)*80];
block1 = &sblock[0][0]; block1 = &sblock[0][0];
mb1 = mb_data; mb1 = mb_data;
init_put_bits(&vs_pb, vs_bit_buffer, 5 * 80); init_put_bits(&vs_pb, vs_bit_buffer, 5 * 80);
...@@ -490,13 +527,7 @@ static inline void dv_decode_video_segment(DVVideoContext *s, ...@@ -490,13 +527,7 @@ static inline void dv_decode_video_segment(DVVideoContext *s,
block = &sblock[0][0]; block = &sblock[0][0];
mb = mb_data; mb = mb_data;
for (mb_index = 0; mb_index < 5; mb_index++) { for (mb_index = 0; mb_index < 5; mb_index++) {
v = *mb_pos_ptr++; dv_calculate_mb_xy(s, work_chunk, mb_index, &mb_x, &mb_y);
mb_x = v & 0xff;
mb_y = v >> 8;
/* We work with 720p frames split in half. The odd half-frame (chan==2,3) is displaced :-( */
if (s->sys->height == 720 && !(s->buf[1] & 0x0C)) {
mb_y -= (mb_y > 17) ? 18 : -72; /* shifting the Y coordinate down by 72/2 macroblocks */
}
/* idct_put'ting luminance */ /* idct_put'ting luminance */
if ((s->sys->pix_fmt == PIX_FMT_YUV420P) || if ((s->sys->pix_fmt == PIX_FMT_YUV420P) ||
...@@ -831,15 +862,14 @@ static inline void dv_guess_qnos(EncBlockInfo* blks, int* qnos) ...@@ -831,15 +862,14 @@ static inline void dv_guess_qnos(EncBlockInfo* blks, int* qnos)
} }
} }
static inline void dv_encode_video_segment(DVVideoContext *s, static inline void dv_encode_video_segment(DVVideoContext *s, int work_chunk)
uint8_t *dif,
const uint16_t *mb_pos_ptr)
{ {
int mb_index, i, j, v; int mb_index, i, j;
int mb_x, mb_y, c_offset, linesize; int mb_x, mb_y, c_offset, linesize;
uint8_t* y_ptr; uint8_t* y_ptr;
uint8_t* data; uint8_t* data;
uint8_t* ptr; uint8_t* ptr;
uint8_t* dif;
int do_edge_wrap; int do_edge_wrap;
DECLARE_ALIGNED_16(DCTELEM, block[64]); DECLARE_ALIGNED_16(DCTELEM, block[64]);
EncBlockInfo enc_blks[5*6]; EncBlockInfo enc_blks[5*6];
...@@ -851,12 +881,11 @@ static inline void dv_encode_video_segment(DVVideoContext *s, ...@@ -851,12 +881,11 @@ static inline void dv_encode_video_segment(DVVideoContext *s,
assert((((int)block) & 15) == 0); assert((((int)block) & 15) == 0);
dif = &s->buf[(work_chunk>>18)*80];
enc_blk = &enc_blks[0]; enc_blk = &enc_blks[0];
pb = &pbs[0]; pb = &pbs[0];
for (mb_index = 0; mb_index < 5; mb_index++) { for (mb_index = 0; mb_index < 5; mb_index++) {
v = *mb_pos_ptr++; dv_calculate_mb_xy(s, work_chunk, mb_index, &mb_x, &mb_y);
mb_x = v & 0xff;
mb_y = v >> 8;
y_ptr = s->picture.data[0] + ((mb_y * s->picture.linesize[0] + mb_x) << 3); y_ptr = s->picture.data[0] + ((mb_y * s->picture.linesize[0] + mb_x) << 3);
c_offset = (((mb_y >> (s->sys->pix_fmt == PIX_FMT_YUV420P)) * s->picture.linesize[1] + c_offset = (((mb_y >> (s->sys->pix_fmt == PIX_FMT_YUV420P)) * s->picture.linesize[1] +
(mb_x >> ((s->sys->pix_fmt == PIX_FMT_YUV411P) ? 2 : 1))) << 3); (mb_x >> ((s->sys->pix_fmt == PIX_FMT_YUV411P) ? 2 : 1))) << 3);
...@@ -984,52 +1013,14 @@ static inline void dv_encode_video_segment(DVVideoContext *s, ...@@ -984,52 +1013,14 @@ static inline void dv_encode_video_segment(DVVideoContext *s,
static int dv_decode_mt(AVCodecContext *avctx, void* sl) static int dv_decode_mt(AVCodecContext *avctx, void* sl)
{ {
DVVideoContext *s = avctx->priv_data; dv_decode_video_segment((DVVideoContext *)avctx->priv_data, (size_t)sl);
int slice = (size_t)sl;
/* which DIF channel is this? */
int chan = slice / (s->sys->difseg_size * 27);
/* slice within the DIF channel */
int chan_slice = slice % (s->sys->difseg_size * 27);
/* byte offset of this channel's data */
int chan_offset = chan * s->sys->difseg_size * 150 * 80;
/* DIF sequence */
int seq = chan_slice / 27;
/* in 1080i50 and 720p50 some seq are unused */
if ((DV_PROFILE_IS_1080i50(s->sys) && chan != 0 && seq == 11) ||
(DV_PROFILE_IS_720p50(s->sys) && seq > 9))
return 0;
dv_decode_video_segment(s, &s->buf[(seq * 6 + (chan_slice / 3)
+ chan_slice * 5 + 7)
* 80 + chan_offset],
&s->sys->video_place[slice * 5]);
return 0; return 0;
} }
#ifdef CONFIG_DVVIDEO_ENCODER #ifdef CONFIG_DVVIDEO_ENCODER
static int dv_encode_mt(AVCodecContext *avctx, void* sl) static int dv_encode_mt(AVCodecContext *avctx, void* sl)
{ {
DVVideoContext *s = avctx->priv_data; dv_encode_video_segment((DVVideoContext *)avctx->priv_data, (size_t)sl);
int slice = (size_t)sl;
/* which DIF channel is this? */
int chan = slice / (s->sys->difseg_size * 27);
/* slice within the DIF channel */
int chan_slice = slice % (s->sys->difseg_size * 27);
/* byte offset of this channel's data */
int chan_offset = chan * s->sys->difseg_size * 150 * 80;
dv_encode_video_segment(s, &s->buf[((chan_slice / 27) * 6 + (chan_slice / 3)
+ chan_slice * 5 + 7)
* 80 + chan_offset],
&s->sys->video_place[slice * 5]);
return 0; return 0;
} }
#endif #endif
...@@ -1044,7 +1035,7 @@ static int dvvideo_decode_frame(AVCodecContext *avctx, ...@@ -1044,7 +1035,7 @@ static int dvvideo_decode_frame(AVCodecContext *avctx,
DVVideoContext *s = avctx->priv_data; DVVideoContext *s = avctx->priv_data;
s->sys = dv_frame_profile(buf); s->sys = dv_frame_profile(buf);
if (!s->sys || buf_size < s->sys->frame_size) if (!s->sys || buf_size < s->sys->frame_size || dv_init_dynamic_tables(s->sys))
return -1; /* NOTE: we only accept several full frames */ return -1; /* NOTE: we only accept several full frames */
if (s->picture.data[0]) if (s->picture.data[0])
...@@ -1064,8 +1055,8 @@ static int dvvideo_decode_frame(AVCodecContext *avctx, ...@@ -1064,8 +1055,8 @@ static int dvvideo_decode_frame(AVCodecContext *avctx,
s->picture.top_field_first = 0; s->picture.top_field_first = 0;
s->buf = buf; s->buf = buf;
avctx->execute(avctx, dv_decode_mt, (void**)&dv_anchor[0], NULL, avctx->execute(avctx, dv_decode_mt, s->sys->work_chunks, NULL,
s->sys->n_difchan * s->sys->difseg_size * 27); dv_work_pool_size(s->sys));
emms_c(); emms_c();
...@@ -1208,9 +1199,7 @@ static int dvvideo_encode_frame(AVCodecContext *c, uint8_t *buf, int buf_size, ...@@ -1208,9 +1199,7 @@ static int dvvideo_encode_frame(AVCodecContext *c, uint8_t *buf, int buf_size,
DVVideoContext *s = c->priv_data; DVVideoContext *s = c->priv_data;
s->sys = dv_codec_profile(c); s->sys = dv_codec_profile(c);
if (!s->sys) if (!s->sys || buf_size < s->sys->frame_size || dv_init_dynamic_tables(s->sys))
return -1;
if (buf_size < s->sys->frame_size)
return -1; return -1;
c->pix_fmt = s->sys->pix_fmt; c->pix_fmt = s->sys->pix_fmt;
...@@ -1219,8 +1208,8 @@ static int dvvideo_encode_frame(AVCodecContext *c, uint8_t *buf, int buf_size, ...@@ -1219,8 +1208,8 @@ static int dvvideo_encode_frame(AVCodecContext *c, uint8_t *buf, int buf_size,
s->picture.pict_type = FF_I_TYPE; s->picture.pict_type = FF_I_TYPE;
s->buf = buf; s->buf = buf;
c->execute(c, dv_encode_mt, (void**)&dv_anchor[0], NULL, c->execute(c, dv_encode_mt, s->sys->work_chunks, NULL,
s->sys->n_difchan * s->sys->difseg_size * 27); dv_work_pool_size(s->sys));
emms_c(); emms_c();
......
...@@ -47,6 +47,7 @@ typedef struct DVprofile { ...@@ -47,6 +47,7 @@ typedef struct DVprofile {
int height; /* picture height in pixels */ int height; /* picture height in pixels */
int width; /* picture width in pixels */ int width; /* picture width in pixels */
AVRational sar[2]; /* sample aspect ratios for 4:3 and 16:9 */ AVRational sar[2]; /* sample aspect ratios for 4:3 and 16:9 */
void **work_chunks; /* each thread gets its own chunk of frame to work on */
const uint16_t *video_place; /* positions of all DV macroblocks */ const uint16_t *video_place; /* positions of all DV macroblocks */
enum PixelFormat pix_fmt; /* picture pixel format */ enum PixelFormat pix_fmt; /* picture pixel format */
int bpm; /* blocks per macroblock */ int bpm; /* blocks per macroblock */
...@@ -6159,6 +6160,16 @@ static const uint8_t block_sizes_dv100[8] = { ...@@ -6159,6 +6160,16 @@ static const uint8_t block_sizes_dv100[8] = {
80, 80, 80, 80, 80, 80, 64, 64, 80, 80, 80, 80, 80, 80, 64, 64,
}; };
static void *work_chunks_dv25pal [1*12*27];
static void *work_chunks_dv25pal411[1*12*27];
static void *work_chunks_dv25ntsc [1*10*27];
static void *work_chunks_dv50pal [2*12*27];
static void *work_chunks_dv50ntsc [2*10*27];
static void *work_chunks_dv100palp [2*12*27];
static void *work_chunks_dv100ntscp[2*10*27];
static void *work_chunks_dv100pali [4*12*27];
static void *work_chunks_dv100ntsci[4*10*27];
static const DVprofile dv_profiles[] = { static const DVprofile dv_profiles[] = {
{ .dsf = 0, { .dsf = 0,
.video_stype = 0x0, .video_stype = 0x0,
...@@ -6171,6 +6182,7 @@ static const DVprofile dv_profiles[] = { ...@@ -6171,6 +6182,7 @@ static const DVprofile dv_profiles[] = {
.width = 720, .width = 720,
.sar = {{10, 11}, {40, 33}}, .sar = {{10, 11}, {40, 33}},
.video_place = dv_place_411, .video_place = dv_place_411,
.work_chunks = &work_chunks_dv25ntsc[0],
.pix_fmt = PIX_FMT_YUV411P, .pix_fmt = PIX_FMT_YUV411P,
.bpm = 6, .bpm = 6,
.block_sizes = block_sizes_dv2550, .block_sizes = block_sizes_dv2550,
...@@ -6190,6 +6202,7 @@ static const DVprofile dv_profiles[] = { ...@@ -6190,6 +6202,7 @@ static const DVprofile dv_profiles[] = {
.width = 720, .width = 720,
.sar = {{59, 54}, {118, 81}}, .sar = {{59, 54}, {118, 81}},
.video_place = dv_place_420, .video_place = dv_place_420,
.work_chunks = &work_chunks_dv25pal[0],
.pix_fmt = PIX_FMT_YUV420P, .pix_fmt = PIX_FMT_YUV420P,
.bpm = 6, .bpm = 6,
.block_sizes = block_sizes_dv2550, .block_sizes = block_sizes_dv2550,
...@@ -6209,6 +6222,7 @@ static const DVprofile dv_profiles[] = { ...@@ -6209,6 +6222,7 @@ static const DVprofile dv_profiles[] = {
.width = 720, .width = 720,
.sar = {{59, 54}, {118, 81}}, .sar = {{59, 54}, {118, 81}},
.video_place = dv_place_411P, .video_place = dv_place_411P,
.work_chunks = &work_chunks_dv25pal411[0],
.pix_fmt = PIX_FMT_YUV411P, .pix_fmt = PIX_FMT_YUV411P,
.bpm = 6, .bpm = 6,
.block_sizes = block_sizes_dv2550, .block_sizes = block_sizes_dv2550,
...@@ -6228,6 +6242,7 @@ static const DVprofile dv_profiles[] = { ...@@ -6228,6 +6242,7 @@ static const DVprofile dv_profiles[] = {
.width = 720, .width = 720,
.sar = {{10, 11}, {40, 33}}, .sar = {{10, 11}, {40, 33}},
.video_place = dv_place_422_525, .video_place = dv_place_422_525,
.work_chunks = &work_chunks_dv50ntsc[0],
.pix_fmt = PIX_FMT_YUV422P, .pix_fmt = PIX_FMT_YUV422P,
.bpm = 6, .bpm = 6,
.block_sizes = block_sizes_dv2550, .block_sizes = block_sizes_dv2550,
...@@ -6247,6 +6262,7 @@ static const DVprofile dv_profiles[] = { ...@@ -6247,6 +6262,7 @@ static const DVprofile dv_profiles[] = {
.width = 720, .width = 720,
.sar = {{59, 54}, {118, 81}}, .sar = {{59, 54}, {118, 81}},
.video_place = dv_place_422_625, .video_place = dv_place_422_625,
.work_chunks = &work_chunks_dv50pal[0],
.pix_fmt = PIX_FMT_YUV422P, .pix_fmt = PIX_FMT_YUV422P,
.bpm = 6, .bpm = 6,
.block_sizes = block_sizes_dv2550, .block_sizes = block_sizes_dv2550,
...@@ -6266,6 +6282,7 @@ static const DVprofile dv_profiles[] = { ...@@ -6266,6 +6282,7 @@ static const DVprofile dv_profiles[] = {
.width = 1280, .width = 1280,
.sar = {{1, 1}, {1, 1}}, .sar = {{1, 1}, {1, 1}},
.video_place = dv_place_1080i60, .video_place = dv_place_1080i60,
.work_chunks = &work_chunks_dv100ntsci[0],
.pix_fmt = PIX_FMT_YUV422P, .pix_fmt = PIX_FMT_YUV422P,
.bpm = 8, .bpm = 8,
.block_sizes = block_sizes_dv100, .block_sizes = block_sizes_dv100,
...@@ -6285,6 +6302,7 @@ static const DVprofile dv_profiles[] = { ...@@ -6285,6 +6302,7 @@ static const DVprofile dv_profiles[] = {
.width = 1440, .width = 1440,
.sar = {{1, 1}, {1, 1}}, .sar = {{1, 1}, {1, 1}},
.video_place = dv_place_1080i50, .video_place = dv_place_1080i50,
.work_chunks = &work_chunks_dv100pali[0],
.pix_fmt = PIX_FMT_YUV422P, .pix_fmt = PIX_FMT_YUV422P,
.bpm = 8, .bpm = 8,
.block_sizes = block_sizes_dv100, .block_sizes = block_sizes_dv100,
...@@ -6304,6 +6322,7 @@ static const DVprofile dv_profiles[] = { ...@@ -6304,6 +6322,7 @@ static const DVprofile dv_profiles[] = {
.width = 960, .width = 960,
.sar = {{1, 1}, {1, 1}}, .sar = {{1, 1}, {1, 1}},
.video_place = dv_place_720p60, .video_place = dv_place_720p60,
.work_chunks = &work_chunks_dv100ntscp[0],
.pix_fmt = PIX_FMT_YUV422P, .pix_fmt = PIX_FMT_YUV422P,
.bpm = 8, .bpm = 8,
.block_sizes = block_sizes_dv100, .block_sizes = block_sizes_dv100,
...@@ -6323,6 +6342,7 @@ static const DVprofile dv_profiles[] = { ...@@ -6323,6 +6342,7 @@ static const DVprofile dv_profiles[] = {
.width = 960, .width = 960,
.sar = {{1, 1}, {1, 1}}, .sar = {{1, 1}, {1, 1}},
.video_place = dv_place_720p50, .video_place = dv_place_720p50,
.work_chunks = &work_chunks_dv100palp[0],
.pix_fmt = PIX_FMT_YUV422P, .pix_fmt = PIX_FMT_YUV422P,
.bpm = 8, .bpm = 8,
.block_sizes = block_sizes_dv100, .block_sizes = block_sizes_dv100,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment