Commit 9c3d33d6 authored by Michael Niedermayer's avatar Michael Niedermayer

multithreaded/SMP motion estimation

multithreaded/SMP encoding for MPEG1/MPEG2/MPEG4/H263
all pthread specific code is in pthread.c
to try it, run configure --enable-pthreads and ffmpeg ... -threads <num>
the internal thread API is a simple AVCodecContext.execute() callback which executes a given function pointer with different arguments and returns after finishing all, that way no mutexes or other thread-mess is needed outside pthread.c

Originally committed as revision 2772 to svn://svn.ffmpeg.org/ffmpeg/trunk
parent 7984082a
......@@ -29,6 +29,7 @@ echo " --enable-amr_nb enable amr_nb float audio codec"
echo " --enable-amr_nb-fixed use fixed point for amr-nb codec"
echo " --enable-amr_wb enable amr_wb float audio codec"
echo " --enable-sunmlib use Sun medialib [default=no]"
echo " --enable-pthreads use pthreads [default=no]"
echo ""
echo "Advanced options (experts only):"
echo " --source-path=PATH path of source code [$source_path]"
......@@ -171,6 +172,7 @@ amr_nb="no"
amr_wb="no"
amr_nb_fixed="no"
sunmlib="no"
pthreads="no"
# OS specific
targetos=`uname -s`
......@@ -415,6 +417,8 @@ for opt do
;;
--enable-sunmlib) sunmlib="yes"
;;
--enable-pthreads) pthreads="yes"
;;
esac
done
......@@ -919,6 +923,7 @@ echo "Imlib2 support $imlib2"
echo "freetype support $freetype2"
fi
echo "Sun medialib support" $sunmlib
echo "pthreads support" $pthreads
echo "AMR-NB float support" $amr_nb
echo "AMR-NB fixed support" $amr_nb_fixed
echo "AMR-WB float support" $amr_wb
......@@ -1033,6 +1038,11 @@ if test "$sunmlib" = "yes" ; then
echo "#define HAVE_MLIB 1" >> $TMPH
extralibs="$extralibs -lmlib"
fi
if test "$pthreads" = "yes" ; then
echo "HAVE_PTHREADS=yes" >> config.mak
echo "#define HAVE_PTHREADS 1" >> $TMPH
extralibs="$extralibs -lpthread"
fi
if test "$sdl" = "yes" ; then
echo "CONFIG_SDL=yes" >> config.mak
echo "SDL_LIBS=`sdl-config --libs`" >> config.mak
......
......@@ -117,6 +117,13 @@ static int ildct_cmp = FF_CMP_VSAD;
static int mb_cmp = FF_CMP_SAD;
static int sub_cmp = FF_CMP_SAD;
static int cmp = FF_CMP_SAD;
static int pre_cmp = FF_CMP_SAD;
static int pre_me = 0;
static float lumi_mask = 0;
static float dark_mask = 0;
static float scplx_mask = 0;
static float tcplx_mask = 0;
static float p_mask = 0;
static int use_4mv = 0;
static int use_obmc = 0;
static int use_aic = 0;
......@@ -185,6 +192,7 @@ static char *audio_device = NULL;
static int using_stdin = 0;
static int using_vhook = 0;
static int verbose = 1;
static int thread_count= 1;
#define DEFAULT_PASS_LOGFILENAME "ffmpeg2pass"
......@@ -1855,6 +1863,41 @@ static void opt_cmp(const char *arg)
cmp = atoi(arg);
}
static void opt_pre_cmp(const char *arg)
{
pre_cmp = atoi(arg);
}
static void opt_pre_me(const char *arg)
{
pre_me = atoi(arg);
}
static void opt_lumi_mask(const char *arg)
{
lumi_mask = atof(arg);
}
static void opt_dark_mask(const char *arg)
{
dark_mask = atof(arg);
}
static void opt_scplx_mask(const char *arg)
{
scplx_mask = atof(arg);
}
static void opt_tcplx_mask(const char *arg)
{
tcplx_mask = atof(arg);
}
static void opt_p_mask(const char *arg)
{
p_mask = atof(arg);
}
static void opt_qscale(const char *arg)
{
video_qscale = atof(arg);
......@@ -1990,6 +2033,11 @@ static void opt_sc_threshold(const char *arg)
sc_threshold= atoi(arg);
}
static void opt_thread_count(const char *arg)
{
thread_count= atoi(arg);
}
static void opt_audio_bitrate(const char *arg)
{
audio_bit_rate = atoi(arg) * 1000;
......@@ -2348,6 +2396,10 @@ static void opt_output_file(const char *filename)
exit(1);
}
avcodec_get_context_defaults(&st->codec);
#ifdef HAVE_PTHREADS
if(thread_count>1)
avcodec_pthread_init(&st->codec, thread_count);
#endif
video_enc = &st->codec;
......@@ -2398,6 +2450,13 @@ static void opt_output_file(const char *filename)
video_enc->ildct_cmp = ildct_cmp;
video_enc->me_sub_cmp = sub_cmp;
video_enc->me_cmp = cmp;
video_enc->me_pre_cmp = pre_cmp;
video_enc->pre_me = pre_me;
video_enc->lumi_masking = lumi_mask;
video_enc->dark_masking = dark_mask;
video_enc->spatial_cplx_masking = scplx_mask;
video_enc->temporal_cplx_masking = tcplx_mask;
video_enc->p_masking = p_mask;
video_enc->quantizer_noise_shaping= qns;
if (use_umv) {
......@@ -2451,7 +2510,8 @@ static void opt_output_file(const char *filename)
video_enc->qcompress = video_qcomp;
video_enc->rc_eq = video_rc_eq;
video_enc->debug = debug;
video_enc->debug_mv = debug_mv;
video_enc->debug_mv = debug_mv;
video_enc->thread_count = thread_count;
p= video_rc_override_string;
for(i=0; p; i++){
int start, end, q;
......@@ -2527,6 +2587,10 @@ static void opt_output_file(const char *filename)
exit(1);
}
avcodec_get_context_defaults(&st->codec);
#ifdef HAVE_PTHREADS
if(thread_count>1)
avcodec_pthread_init(&st->codec, thread_count);
#endif
audio_enc = &st->codec;
audio_enc->codec_type = CODEC_TYPE_AUDIO;
......@@ -2544,6 +2608,7 @@ static void opt_output_file(const char *filename)
audio_enc->bit_rate = audio_bit_rate;
audio_enc->sample_rate = audio_sample_rate;
audio_enc->strict_std_compliance = strict;
audio_enc->thread_count = thread_count;
/* For audio codecs other than AC3 we limit */
/* the number of coded channels to stereo */
if (audio_channels > 2 && codec_id != CODEC_ID_AC3) {
......@@ -2999,6 +3064,7 @@ const OptionDef options[] = {
{ "loop", OPT_BOOL | OPT_EXPERT, {(void*)&loop_input}, "loop (current only works with images)" },
{ "v", HAS_ARG, {(void*)opt_verbose}, "control amount of logging", "verbose" },
{ "target", HAS_ARG, {(void*)opt_target}, "specify target file type (\"vcd\", \"svcd\" or \"dvd\")", "type" },
{ "threads", HAS_ARG | OPT_EXPERT, {(void*)opt_thread_count}, "thread count", "count" },
/* video options */
{ "b", HAS_ARG | OPT_VIDEO, {(void*)opt_video_bitrate}, "set video bitrate (in kbit/s)", "bitrate" },
......@@ -3049,6 +3115,13 @@ const OptionDef options[] = {
{ "ildctcmp", HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)opt_ildct_cmp}, "ildct compare function", "cmp function" },
{ "subcmp", HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)opt_sub_cmp}, "subpel compare function", "cmp function" },
{ "cmp", HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)opt_cmp}, "fullpel compare function", "cmp function" },
{ "precmp", HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)opt_pre_cmp}, "pre motion estimation compare function", "cmp function" },
{ "preme", HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)opt_pre_me}, "pre motion estimation", "" },
{ "lumi_mask", HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)opt_lumi_mask}, "luminance masking", "" },
{ "dark_mask", HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)opt_dark_mask}, "darkness masking", "" },
{ "scplx_mask", HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)opt_scplx_mask}, "spatial complexity masking", "" },
{ "tcplx_mask", HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)opt_tcplx_mask}, "teporal complexity masking", "" },
{ "p_mask", HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)opt_p_mask}, "inter masking", "" },
{ "4mv", OPT_BOOL | OPT_EXPERT | OPT_VIDEO, {(void*)&use_4mv}, "use four motion vector by macroblock (MPEG4)" },
{ "obmc", OPT_BOOL | OPT_EXPERT | OPT_VIDEO, {(void*)&use_obmc}, "use overlapped block motion compensation (h263+)" },
{ "part", OPT_BOOL | OPT_EXPERT | OPT_VIDEO, {(void*)&use_part}, "use data partitioning (MPEG4)" },
......
......@@ -33,6 +33,10 @@ CLEANAMR=cleanamrfloat
endif
endif
ifeq ($(HAVE_PTHREADS),yes)
OBJS+= pthread.o
endif
ifeq ($(AMR_WB),yes)
OBJS+= amr.o amrwb_float/dec_acelp.o amrwb_float/dec_dtx.o amrwb_float/dec_gain.o \
amrwb_float/dec_if.o amrwb_float/dec_lpc.o amrwb_float/dec_main.o \
......
......@@ -17,7 +17,7 @@ extern "C" {
#define FFMPEG_VERSION_INT 0x000408
#define FFMPEG_VERSION "0.4.8"
#define LIBAVCODEC_BUILD 4701
#define LIBAVCODEC_BUILD 4702
#define LIBAVCODEC_VERSION_INT FFMPEG_VERSION_INT
#define LIBAVCODEC_VERSION FFMPEG_VERSION
......@@ -1508,6 +1508,32 @@ typedef struct AVCodecContext {
* - decoding: unused
*/
int quantizer_noise_shaping;
/**
* Thread count.
* is used to decide how many independant tasks should be passed to execute()
* - encoding: set by user
* - decoding: set by user
*/
int thread_count;
/**
* the codec may call this to execute several independant things. it will return only after
* finishing all tasks, the user may replace this with some multithreaded implementation, the
* default implementation will execute the parts serially
* @param count the number of functions this will be identical to thread_count if possible
* - encoding: set by lavc, user can override
* - decoding: set by lavc, user can override
*/
int (*execute)(struct AVCodecContext *c, int (*func)(struct AVCodecContext *c2, void *arg), void **arg2, int *ret, int count);
/**
* Thread opaque.
* can be used by execute() to store some per AVCodecContext stuff.
* - encoding: set by execute()
* - decoding: set by execute()
*/
void *thread_opaque;
} AVCodecContext;
......@@ -1846,6 +1872,11 @@ int avcodec_default_get_buffer(AVCodecContext *s, AVFrame *pic);
void avcodec_default_release_buffer(AVCodecContext *s, AVFrame *pic);
void avcodec_default_free_buffers(AVCodecContext *s);
int avcodec_pthread_init(AVCodecContext *s, int thread_count);
void avcodec_pthread_free(AVCodecContext *s);
int avcodec_pthread_execute(AVCodecContext *s, int (*func)(AVCodecContext *c2, void *arg2),void **arg, int *ret, int count);
//FIXME func typedef
/**
* opens / inits the AVCodecContext.
* not thread save!
......
......@@ -475,6 +475,28 @@ static inline uint8_t* pbBufPtr(PutBitContext *s)
#endif
}
/**
*
* PutBitContext must be flushed & aligned to a byte boundary before calling this.
*/
static inline void skip_put_bytes(PutBitContext *s, int n){
assert((put_bits_count(s)&7)==0);
#ifdef ALT_BITSTREAM_WRITER
FIXME may need some cleaning of the buffer
s->index += n<<3;
#else
assert(s->bit_left==32);
s->buf_ptr += n;
#endif
}
/**
* Changes the end of the buffer.
*/
static inline void set_put_bits_buffer_size(PutBitContext *s, int size){
s->buf_end= s->buf + size;
}
/* Bitstream reader API docs:
name
abritary name which is used as prefix for the internal variables
......
......@@ -1961,7 +1961,9 @@ void h263_encode_init(MpegEncContext *s)
s->luma_dc_vlc_length= uni_DCtab_lum_len;
s->chroma_dc_vlc_length= uni_DCtab_chrom_len;
s->ac_esc_length= 7+2+1+6+1+12+1;
s->y_dc_scale_table= ff_mpeg4_y_dc_scale_table;
s->c_dc_scale_table= ff_mpeg4_c_dc_scale_table;
if(s->flags & CODEC_FLAG_GLOBAL_HEADER){
s->avctx->extradata= av_malloc(1024);
......@@ -2290,12 +2292,11 @@ static void mpeg4_encode_vol_header(MpegEncContext * s, int vo_number, int vol_n
put_bits(&s->pb, 1, s->progressive_sequence ? 0 : 1);
put_bits(&s->pb, 1, 1); /* obmc disable */
if (vo_ver_id == 1) {
put_bits(&s->pb, 1, s->vol_sprite_usage=0); /* sprite enable */
put_bits(&s->pb, 1, s->vol_sprite_usage); /* sprite enable */
}else{
put_bits(&s->pb, 2, s->vol_sprite_usage=0); /* sprite enable */
put_bits(&s->pb, 2, s->vol_sprite_usage); /* sprite enable */
}
s->quant_precision=5;
put_bits(&s->pb, 1, 0); /* not 8 bit == false */
put_bits(&s->pb, 1, s->mpeg_quant); /* quant type= (0=h263 style)*/
......@@ -2384,9 +2385,6 @@ void mpeg4_encode_picture_header(MpegEncContext * s, int picture_number)
if (s->pict_type == B_TYPE)
put_bits(&s->pb, 3, s->b_code); /* fcode_back */
// printf("****frame %d\n", picture_number);
s->y_dc_scale_table= ff_mpeg4_y_dc_scale_table; //FIXME add short header support
s->c_dc_scale_table= ff_mpeg4_c_dc_scale_table;
}
#endif //CONFIG_ENCODERS
......@@ -2965,8 +2963,16 @@ static inline void memsetw(short *tab, int val, int n)
void ff_mpeg4_init_partitions(MpegEncContext *s)
{
init_put_bits(&s->tex_pb, s->tex_pb_buffer, PB_BUFFER_SIZE);
init_put_bits(&s->pb2 , s->pb2_buffer , PB_BUFFER_SIZE);
uint8_t *start= pbBufPtr(&s->pb);
uint8_t *end= s->pb.buf_end;
int size= end - start;
int pb_size = size/3;
int pb2_size= size/3;
int tex_size= size - pb_size - pb2_size;
set_put_bits_buffer_size(&s->pb, pb_size);
init_put_bits(&s->tex_pb, start + pb_size , tex_size);
init_put_bits(&s->pb2 , start + pb_size + tex_size, pb2_size);
}
void ff_mpeg4_merge_partitions(MpegEncContext *s)
......@@ -2989,8 +2995,9 @@ void ff_mpeg4_merge_partitions(MpegEncContext *s)
flush_put_bits(&s->pb2);
flush_put_bits(&s->tex_pb);
ff_copy_bits(&s->pb, s->pb2_buffer , pb2_len);
ff_copy_bits(&s->pb, s->tex_pb_buffer, tex_pb_len);
set_put_bits_buffer_size(&s->pb, s->pb2.buf_end - s->pb.buf);
ff_copy_bits(&s->pb, s->pb2.buf , pb2_len);
ff_copy_bits(&s->pb, s->tex_pb.buf, tex_pb_len);
s->last_bits= put_bits_count(&s->pb);
}
......
......@@ -531,11 +531,16 @@ static void escape_FF(MpegEncContext *s, int start)
}
}
void ff_mjpeg_stuffing(PutBitContext * pbc)
{
int length;
length= (-put_bits_count(pbc))&7;
if(length) put_bits(pbc, length, (1<<length)-1);
}
void mjpeg_picture_trailer(MpegEncContext *s)
{
int pad= (-put_bits_count(&s->pb))&7;
put_bits(&s->pb, pad,0xFF>>(8-pad));
ff_mjpeg_stuffing(&s->pb);
flush_put_bits(&s->pb);
assert((s->header_bits&7)==0);
......
......@@ -805,7 +805,7 @@ static inline int h263_mv4_search(MpegEncContext *s, int mx, int my, int shift)
if(P_LEFT[0] > (s->me.xmax<<shift)) P_LEFT[0] = (s->me.xmax<<shift);
/* special case for first line */
if (s->mb_y == 0 && block<2) {
if (s->first_slice_line && block<2) {
pred_x4= P_LEFT[0];
pred_y4= P_LEFT[1];
} else {
......@@ -845,13 +845,12 @@ static inline int h263_mv4_search(MpegEncContext *s, int mx, int my, int shift)
int dxy;
const int offset= ((block&1) + (block>>1)*stride)*8;
uint8_t *dest_y = s->me.scratchpad + offset;
if(s->quarter_sample){
uint8_t *ref= ref_data[0] + (mx4>>2) + (my4>>2)*stride;
dxy = ((my4 & 3) << 2) | (mx4 & 3);
if(s->no_rounding)
s->dsp.put_no_rnd_qpel_pixels_tab[1][dxy](dest_y , ref , s->linesize);
s->dsp.put_no_rnd_qpel_pixels_tab[1][dxy](dest_y , ref , stride);
else
s->dsp.put_qpel_pixels_tab [1][dxy](dest_y , ref , stride);
}else{
......@@ -966,7 +965,7 @@ static int interlaced_search(MpegEncContext *s, uint8_t *frame_src_data[3], uint
pred_x= P_LEFT[0];
pred_y= P_LEFT[1];
if(s->mb_y){
if(!s->first_slice_line){
P_TOP[0] = mv_table[xy - mot_stride][0];
P_TOP[1] = mv_table[xy - mot_stride][1];
P_TOPRIGHT[0] = mv_table[xy - mot_stride + 1][0];
......@@ -1115,7 +1114,7 @@ void ff_estimate_p_frame_motion(MpegEncContext * s,
if(P_LEFT[0] > (s->me.xmax<<shift)) P_LEFT[0] = (s->me.xmax<<shift);
if(mb_y) {
if(!s->first_slice_line) {
P_TOP[0] = s->current_picture.motion_val[0][mot_xy - mot_stride ][0];
P_TOP[1] = s->current_picture.motion_val[0][mot_xy - mot_stride ][1];
P_TOPRIGHT[0] = s->current_picture.motion_val[0][mot_xy - mot_stride + 2][0];
......@@ -1164,8 +1163,8 @@ void ff_estimate_p_frame_motion(MpegEncContext * s,
pic->mc_mb_var[s->mb_stride * mb_y + mb_x] = vard;
pic->mb_mean [s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
// pic->mb_cmp_score[s->mb_stride * mb_y + mb_x] = dmin;
pic->mb_var_sum += varc;
pic->mc_mb_var_sum += vard;
s->mb_var_sum_temp += varc;
s->mc_mb_var_sum_temp += vard;
//printf("E%d %d %d %X %X %X\n", s->mb_width, mb_x, mb_y,(int)s, (int)s->mb_var, (int)s->mc_mb_var); fflush(stdout);
#if 0
......@@ -1326,7 +1325,7 @@ int ff_pre_estimate_p_frame_motion(MpegEncContext * s,
if(P_LEFT[0] < (s->me.xmin<<shift)) P_LEFT[0] = (s->me.xmin<<shift);
/* special case for first line */
if (mb_y == s->mb_height-1) {
if (s->first_slice_line) {
pred_x= P_LEFT[0];
pred_y= P_LEFT[1];
P_TOP[0]= P_TOPRIGHT[0]= P_MEDIAN[0]=
......@@ -1409,7 +1408,7 @@ static int ff_estimate_motion_b(MpegEncContext * s,
if(P_LEFT[0] > (s->me.xmax<<shift)) P_LEFT[0] = (s->me.xmax<<shift);
/* special case for first line */
if (mb_y) {
if (!s->first_slice_line) {
P_TOP[0] = mv_table[mot_xy - mot_stride ][0];
P_TOP[1] = mv_table[mot_xy - mot_stride ][1];
P_TOPRIGHT[0] = mv_table[mot_xy - mot_stride + 1 ][0];
......@@ -1610,7 +1609,7 @@ static inline int direct_search(MpegEncContext * s, uint8_t *src_data[3], uint8_
P_LEFT[1] = clip(mv_table[mot_xy - 1][1], ymin<<shift, ymax<<shift);
/* special case for first line */
if (mb_y) {
if (!s->first_slice_line) { //FIXME maybe allow this over thread boundary as its cliped
P_TOP[0] = clip(mv_table[mot_xy - mot_stride ][0], xmin<<shift, xmax<<shift);
P_TOP[1] = clip(mv_table[mot_xy - mot_stride ][1], ymin<<shift, ymax<<shift);
P_TOPRIGHT[0] = clip(mv_table[mot_xy - mot_stride + 1 ][0], xmin<<shift, xmax<<shift);
......@@ -1727,7 +1726,7 @@ void ff_estimate_b_frame_motion(MpegEncContext * s,
}
score= ((unsigned)(score*score + 128*256))>>16;
s->current_picture.mc_mb_var_sum += score;
s->mc_mb_var_sum_temp += score;
s->current_picture.mc_mb_var[mb_y*s->mb_stride + mb_x] = score; //FIXME use SSE
}
......
......@@ -557,9 +557,11 @@ static int RENAME(qpel_get_mb_score)(MpegEncContext * s, int mx, int my, int pre
#define CHECK_CLIPED_MV(ax,ay)\
{\
const int x= FFMAX(xmin, FFMIN(ax, xmax));\
const int y= FFMAX(ymin, FFMIN(ay, ymax));\
CHECK_MV(x, y)\
const int x= ax;\
const int y= ay;\
const int x2= FFMAX(xmin, FFMIN(x, xmax));\
const int y2= FFMAX(ymin, FFMIN(y, ymax));\
CHECK_MV(x2, y2)\
}
#define CHECK_MV_DIR(x,y,new_dir)\
......@@ -912,7 +914,7 @@ static int RENAME(epzs_motion_search)(MpegEncContext * s,
score_map[0]= dmin;
/* first line */
if (s->mb_y == 0) {
if (s->first_slice_line) {
CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
CHECK_CLIPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
(last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
......@@ -938,13 +940,15 @@ static int RENAME(epzs_motion_search)(MpegEncContext * s,
if(s->me.pre_pass){
CHECK_CLIPED_MV((last_mv[ref_mv_xy-1][0]*ref_mv_scale + (1<<15))>>16,
(last_mv[ref_mv_xy-1][1]*ref_mv_scale + (1<<15))>>16)
CHECK_CLIPED_MV((last_mv[ref_mv_xy-ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
(last_mv[ref_mv_xy-ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
if(!s->first_slice_line)
CHECK_CLIPED_MV((last_mv[ref_mv_xy-ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
(last_mv[ref_mv_xy-ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
}else{
CHECK_CLIPED_MV((last_mv[ref_mv_xy+1][0]*ref_mv_scale + (1<<15))>>16,
(last_mv[ref_mv_xy+1][1]*ref_mv_scale + (1<<15))>>16)
CHECK_CLIPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
(last_mv[ref_mv_xy+ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
if(s->end_mb_y == s->mb_height || s->mb_y+1<s->end_mb_y) //FIXME replace at least with last_slice_line
CHECK_CLIPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
(last_mv[ref_mv_xy+ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
}
}
......@@ -1024,7 +1028,7 @@ static int RENAME(epzs_motion_search4)(MpegEncContext * s,
dmin = 1000000;
//printf("%d %d %d %d //",xmin, ymin, xmax, ymax);
/* first line */
if (s->mb_y == 0/* && block<2*/) {
if (s->first_slice_line) {
CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
CHECK_CLIPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
(last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
......@@ -1044,8 +1048,9 @@ static int RENAME(epzs_motion_search4)(MpegEncContext * s,
if(dmin>64*4){
CHECK_CLIPED_MV((last_mv[ref_mv_xy+1][0]*ref_mv_scale + (1<<15))>>16,
(last_mv[ref_mv_xy+1][1]*ref_mv_scale + (1<<15))>>16)
CHECK_CLIPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
(last_mv[ref_mv_xy+ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
if(s->end_mb_y == s->mb_height || s->mb_y+1<s->end_mb_y) //FIXME replace at least with last_slice_line
CHECK_CLIPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
(last_mv[ref_mv_xy+ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
}
if(s->me.dia_size==-1)
......@@ -1102,7 +1107,7 @@ static int RENAME(epzs_motion_search2)(MpegEncContext * s,
dmin = 1000000;
//printf("%d %d %d %d //",xmin, ymin, xmax, ymax);
/* first line */
if (s->mb_y == 0) {
if (s->first_slice_line) {
CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
CHECK_CLIPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
(last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
......@@ -1122,8 +1127,9 @@ static int RENAME(epzs_motion_search2)(MpegEncContext * s,
if(dmin>64*4){
CHECK_CLIPED_MV((last_mv[ref_mv_xy+1][0]*ref_mv_scale + (1<<15))>>16,
(last_mv[ref_mv_xy+1][1]*ref_mv_scale + (1<<15))>>16)
CHECK_CLIPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
(last_mv[ref_mv_xy+ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
if(s->end_mb_y == s->mb_height || s->mb_y+1<s->end_mb_y) //FIXME replace at least with last_slice_line
CHECK_CLIPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
(last_mv[ref_mv_xy+ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
}
if(s->me.dia_size==-1)
......
......@@ -419,7 +419,9 @@ void mpeg1_encode_picture_header(MpegEncContext *s, int picture_number)
put_bits(&s->pb, 8, 255);
}
put_bits(&s->pb, 2, s->intra_dc_precision);
put_bits(&s->pb, 2, s->picture_structure= PICT_FRAME);
assert(s->picture_structure == PICT_FRAME);
put_bits(&s->pb, 2, s->picture_structure);
if (s->progressive_sequence) {
put_bits(&s->pb, 1, 0); /* no repeat */
} else {
......
This diff is collapsed.
......@@ -47,6 +47,8 @@ enum OutputFormat {
#define MAX_FCODE 7
#define MAX_MV 2048
#define MAX_THREADS 8
#define MAX_PICTURE_COUNT 15
#define ME_MAP_SIZE 64
......@@ -285,6 +287,10 @@ typedef struct MpegEncContext {
Picture **input_picture; ///< next pictures on display order for encoding
Picture **reordered_input_picture; ///< pointer to the next pictures in codedorder for encoding
int start_mb_y; ///< start mb_y of this thread (so current thread should process start_mb_y <= row < end_mb_y)
int end_mb_y; ///< end mb_y of this thread (so current thread should process start_mb_y <= row < end_mb_y)
struct MpegEncContext *thread_context[MAX_THREADS];
/**
* copy of the previous picture structure.
* note, linesize & data, might not match the previous picture (for field pictures)
......@@ -332,7 +338,10 @@ typedef struct MpegEncContext {
uint8_t *cbp_table; ///< used to store cbp, ac_pred for partitioned decoding
uint8_t *pred_dir_table; ///< used to store pred_dir for partitioned decoding
uint8_t *allocated_edge_emu_buffer;
uint8_t *edge_emu_buffer; ///< points into the middle of allocated_edge_emu_buffer
uint8_t *edge_emu_buffer; ///< points into the middle of allocated_edge_emu_buffer
uint8_t *rd_scratchpad; ///< scartchpad for rate distortion mb decission
uint8_t *obmc_scratchpad;
uint8_t *b_scratchpad; ///< scratchpad used for writing into write only buffers
int qscale; ///< QP
int chroma_qscale; ///< chroma QP
......@@ -487,6 +496,10 @@ typedef struct MpegEncContext {
int misc_bits; ///< cbp, mb_type
int last_bits; ///< temp var used for calculating the above vars
/* temp variables for picture complexity calculation */
int mc_mb_var_sum_temp;
int mb_var_sum_temp;
/* error concealment / resync */
int error_count;
uint8_t *error_status_table; ///< table of the error status of each MB
......@@ -565,9 +578,6 @@ typedef struct MpegEncContext {
int intra_dc_threshold; ///< QP above whch the ac VLC should be used for intra dc
PutBitContext tex_pb; ///< used for data partitioned VOPs
PutBitContext pb2; ///< used for data partitioned VOPs
#define PB_BUFFER_SIZE 1024*256
uint8_t *tex_pb_buffer;
uint8_t *pb2_buffer;
int mpeg_quant;
int t_frame; ///< time distance of first I -> B, used for interlaced b frames
int padding_bug_score; ///< used to detect the VERY common padding bug in MPEG4
......@@ -908,6 +918,7 @@ void mjpeg_encode_mb(MpegEncContext *s,
DCTELEM block[6][64]);
void mjpeg_picture_header(MpegEncContext *s);
void mjpeg_picture_trailer(MpegEncContext *s);
void ff_mjpeg_stuffing(PutBitContext * pbc);
/* rate control */
......
/*
* Copyright (c) 2004 Michael Niedermayer <michaelni@gmx.at>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
*/
#include <semaphore.h>
#include <pthread.h>
//#define DEBUG
#include "avcodec.h"
#include "common.h"
typedef struct ThreadContext{
AVCodecContext *avctx;
pthread_t thread;
sem_t work_sem;
sem_t done_sem;
int (*func)(AVCodecContext *c, void *arg);
void *arg;
int ret;
}ThreadContext;
static void * thread_func(void *v){
ThreadContext *c= v;
for(;;){
//printf("thread_func %X enter wait\n", (int)v); fflush(stdout);
sem_wait(&c->work_sem);
//printf("thread_func %X after wait (func=%X)\n", (int)v, (int)c->func); fflush(stdout);
if(c->func)
c->ret= c->func(c->avctx, c->arg);
else
return NULL;
//printf("thread_func %X signal complete\n", (int)v); fflush(stdout);
sem_post(&c->done_sem);
}
return NULL;
}
/**
* free what has been allocated by avcodec_pthread_init().
* must be called after decoding has finished, especially dont call while avcodec_pthread_execute() is running
*/
void avcodec_pthread_free(AVCodecContext *s){
ThreadContext *c= s->thread_opaque;
int i;
for(i=0; i<s->thread_count; i++){
int val;
sem_getvalue(&c[i].work_sem, &val); assert(val == 0);
sem_getvalue(&c[i].done_sem, &val); assert(val == 0);
c[i].func= NULL;
sem_post(&c[i].work_sem);
pthread_join(c[i].thread, NULL);
sem_destroy(&c[i].work_sem);
sem_destroy(&c[i].done_sem);
}
av_freep(&s->thread_opaque);
}
int avcodec_pthread_execute(AVCodecContext *s, int (*func)(AVCodecContext *c2, void *arg2),void **arg, int *ret, int count){
ThreadContext *c= s->thread_opaque;
int i, val;
assert(s == c->avctx);
assert(count <= s->thread_count);
/* note, we can be certain that this is not called with the same AVCodecContext by different threads at the same time */
for(i=0; i<count; i++){
sem_getvalue(&c[i].work_sem, &val); assert(val == 0);
sem_getvalue(&c[i].done_sem, &val); assert(val == 0);
c[i].arg= arg[i];
c[i].func= func;
c[i].ret= 12345;
sem_post(&c[i].work_sem);
}
for(i=0; i<count; i++){
sem_wait(&c[i].done_sem);
sem_getvalue(&c[i].work_sem, &val); assert(val == 0);
sem_getvalue(&c[i].done_sem, &val); assert(val == 0);
c[i].func= NULL;
if(ret) ret[i]= c[i].ret;
}
return 0;
}
int avcodec_pthread_init(AVCodecContext *s, int thread_count){
int i;
ThreadContext *c;
s->thread_count= thread_count;
assert(!s->thread_opaque);
c= av_mallocz(sizeof(ThreadContext)*thread_count);
s->thread_opaque= c;
for(i=0; i<thread_count; i++){
//printf("init semaphors %d\n", i); fflush(stdout);
c[i].avctx= s;
if(sem_init(&c[i].work_sem, 0, 0))
goto fail;
if(sem_init(&c[i].done_sem, 0, 0))
goto fail;
//printf("create thread %d\n", i); fflush(stdout);
if(pthread_create(&c[i].thread, NULL, thread_func, &c[i]))
goto fail;
}
//printf("init done\n"); fflush(stdout);
s->execute= avcodec_pthread_execute;
return 0;
fail:
avcodec_pthread_free(s);
return -1;
}
......@@ -324,6 +324,16 @@ int avcodec_default_reget_buffer(AVCodecContext *s, AVFrame *pic){
return 0;
}
int avcodec_default_execute(AVCodecContext *c, int (*func)(AVCodecContext *c2, void *arg2),void **arg, int *ret, int count){
int i;
for(i=0; i<count; i++){
int r= func(c, arg[i]);
if(ret) ret[i]= r;
}
return 0;
}
enum PixelFormat avcodec_default_get_format(struct AVCodecContext *s, enum PixelFormat * fmt){
return fmt[0];
}
......@@ -352,6 +362,8 @@ void avcodec_get_context_defaults(AVCodecContext *s){
s->get_buffer= avcodec_default_get_buffer;
s->release_buffer= avcodec_default_release_buffer;
s->get_format= avcodec_default_get_format;
s->execute= avcodec_default_execute;
s->thread_count=1;
s->me_subpel_quality=8;
s->lmin= FF_QP2LAMBDA * s->qmin;
s->lmax= FF_QP2LAMBDA * s->qmax;
......
......@@ -8,6 +8,9 @@ stddev: 7.65 PSNR:30.44 bytes:7602176
13336cffcba456ff4a7607b2a7e57b33 *./data/a-mpeg2i.mpg
4c9701eb83ed81dd9a328af83d7d7c8a *./data/out.yuv
stddev: 7.66 PSNR:30.43 bytes:7602176
8c4a7744f40a1e7aa16b985ecaad176a *./data/a-mpeg2thread.mpg
12ab090b699c130e5aef8e050965f092 *./data/out.yuv
stddev: 9.44 PSNR:28.62 bytes:7299072
d0dc46dd831398237a690ebbeff18b64 *./data/a-msmpeg4v2.avi
712aa6c959d1d90a78fe98657cbff19c *./data/out.yuv
stddev: 8.11 PSNR:29.94 bytes:7602176
......@@ -38,10 +41,13 @@ stddev: 10.18 PSNR:27.96 bytes:7145472
64b4b917014169294d59fe43ad6b3da9 *./data/a-mpeg4-adv.avi
8069deacba9756fd25ad37b467eb6365 *./data/out.yuv
stddev: 10.23 PSNR:27.92 bytes:7602176
96453d489d5418e382824cfb2673ac58 *./data/a-mpeg4-thread.avi
17ec2d72186dbb72d8a79cd448796cef *./data/out.yuv
stddev: 12.09 PSNR:26.47 bytes:7145472
f863f4198521bd76930ea33991b47273 *./data/a-error-mpeg4-adv.avi
ba7fcd126c7c9fead5a5de71aaaf0624 *./data/out.yuv
stddev: 16.80 PSNR:23.61 bytes:7602176
198ad515da4f330d780c54fd8d6186ab *./data/a-error-mpeg4-nr.avi
198ad515da4f330d780c54fd8d6186ab *./data/a-mpeg4-nr.avi
ebdb326e19aeab8e3c70d7050dc3b240 *./data/out.yuv
stddev: 7.02 PSNR:31.19 bytes:7602176
328ebd044362116e274739e23c482ee7 *./data/a-mpeg1b.mpg
......
......@@ -33,6 +33,7 @@ elif [ "$1" = "libavtest" ] ; then
else
do_mpeg=y
do_mpeg2=y
do_mpeg2thread=y
do_msmpeg4v2=y
do_msmpeg4=y
do_wmv1=y
......@@ -51,6 +52,7 @@ else
do_adpcm_ms=y
do_rc=y
do_mpeg4adv=y
do_mpeg4thread=y
do_mpeg4nr=y
do_mpeg1b=y
do_asv1=y
......@@ -148,6 +150,16 @@ do_ffmpeg $file -y -qscale 10 -f pgmyuv -i $raw_src -vcodec mpeg2video -f mpeg1v
do_ffmpeg $raw_dst -y -i $file -f rawvideo $raw_dst
fi
###################################
if [ -n "$do_mpeg2thread" ] ; then
# mpeg2 encoding interlaced
file=${outfile}mpeg2thread.mpg
do_ffmpeg $file -y -qscale 10 -f pgmyuv -i $raw_src -vcodec mpeg2video -f mpeg1video -bf 2 -ildct -ilme -threads 2 $file
# mpeg2 decoding
do_ffmpeg $raw_dst -y -i $file -f rawvideo $raw_dst
fi
###################################
if [ -n "$do_msmpeg4v2" ] ; then
# msmpeg4 encoding
......@@ -248,6 +260,16 @@ do_ffmpeg $file -y -qscale 9 -4mv -hq -part -ps 200 -aic -trell -f pgmyuv -i $ra
do_ffmpeg $raw_dst -y -i $file -f rawvideo $raw_dst
fi
###################################
if [ -n "$do_mpeg4thread" ] ; then
# mpeg4
file=${outfile}mpeg4-thread.avi
do_ffmpeg $file -y -b 500 -4mv -hq -part -ps 200 -aic -trell -bf 2 -f pgmyuv -i $raw_src -an -vcodec mpeg4 -threads 2 $file
# mpeg4 decoding
do_ffmpeg $raw_dst -y -i $file -f rawvideo $raw_dst
fi
###################################
if [ -n "$do_error" ] ; then
# damaged mpeg4
......@@ -261,7 +283,7 @@ fi
###################################
if [ -n "$do_mpeg4nr" ] ; then
# noise reduction
file=${outfile}error-mpeg4-nr.avi
file=${outfile}mpeg4-nr.avi
do_ffmpeg $file -y -qscale 8 -4mv -mbd 2 -nr 200 -f pgmyuv -i $raw_src -an -vcodec mpeg4 $file
# mpeg4 decoding
......
......@@ -8,6 +8,9 @@ stddev: 4.95 PSNR:34.22 bytes:7602176
6da01fd0d910fbfcdc5b212ef3dd65cb *./data/a-mpeg2i.mpg
1e21fd7ed53abf352f9ea8548afa80a3 *./data/out.yuv
stddev: 4.96 PSNR:34.20 bytes:7602176
a3dd9c2911c9556d377ab1465f7365b4 *./data/a-mpeg2thread.mpg
8cf98fa5c59c959e35389a1a7180b379 *./data/out.yuv
stddev: 5.55 PSNR:33.22 bytes:7299072
14db391f167b52b21a983157b410affc *./data/a-msmpeg4v2.avi
fc8881e0904af9491d5fa0163183954b *./data/out.yuv
stddev: 5.29 PSNR:33.64 bytes:7602176
......@@ -38,10 +41,13 @@ stddev: 4.20 PSNR:35.64 bytes:7145472
accf60d11aceecabb3c1997aec6e18b5 *./data/a-mpeg4-adv.avi
a287b07b812fbeeb5364517303178ac7 *./data/out.yuv
stddev: 4.77 PSNR:34.54 bytes:7602176
8750b3935266211fea6b062f445bb305 *./data/a-mpeg4-thread.avi
aee1867b77490b3f8d58fcc9b7c5b535 *./data/out.yuv
stddev: 3.92 PSNR:36.25 bytes:7145472
03ff35856faefb4882eaf4d86d95bea7 *./data/a-error-mpeg4-adv.avi
8550acff0851ee915bd5800f1e20f37c *./data/out.yuv
stddev: 9.66 PSNR:28.42 bytes:7602176
74dbbba19d250a712702b1893c003461 *./data/a-error-mpeg4-nr.avi
74dbbba19d250a712702b1893c003461 *./data/a-mpeg4-nr.avi
81b985840c03bf101302abde131e3900 *./data/out.yuv
stddev: 4.67 PSNR:34.73 bytes:7602176
671802a2c5078e69f7f422765ea87f2a *./data/a-mpeg1b.mpg
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment