Commit 1457ab52 authored by Michael Niedermayer's avatar Michael Niedermayer

qpel encoding

4mv+b frames encoding finally fixed
chroma ME
5 comparission functions for ME
b frame encoding speedup
wmv2 codec (unfinished)
user specified diamond size for EPZS

Originally committed as revision 1365 to svn://svn.ffmpeg.org/ffmpeg/trunk
parent ac977341
...@@ -53,7 +53,7 @@ void avcodec_register_all(void) ...@@ -53,7 +53,7 @@ void avcodec_register_all(void)
register_avcodec(&msmpeg4v2_encoder); register_avcodec(&msmpeg4v2_encoder);
register_avcodec(&msmpeg4v3_encoder); register_avcodec(&msmpeg4v3_encoder);
register_avcodec(&wmv1_encoder); register_avcodec(&wmv1_encoder);
// register_avcodec(&wmv2_encoder); register_avcodec(&wmv2_encoder);
register_avcodec(&huffyuv_encoder); register_avcodec(&huffyuv_encoder);
#endif /* CONFIG_ENCODERS */ #endif /* CONFIG_ENCODERS */
register_avcodec(&rawvideo_codec); register_avcodec(&rawvideo_codec);
...@@ -66,7 +66,7 @@ void avcodec_register_all(void) ...@@ -66,7 +66,7 @@ void avcodec_register_all(void)
register_avcodec(&msmpeg4v2_decoder); register_avcodec(&msmpeg4v2_decoder);
register_avcodec(&msmpeg4v3_decoder); register_avcodec(&msmpeg4v3_decoder);
register_avcodec(&wmv1_decoder); register_avcodec(&wmv1_decoder);
// register_avcodec(&wmv2_decoder); register_avcodec(&wmv2_decoder);
register_avcodec(&mpeg_decoder); register_avcodec(&mpeg_decoder);
register_avcodec(&h263i_decoder); register_avcodec(&h263i_decoder);
register_avcodec(&rv10_decoder); register_avcodec(&rv10_decoder);
......
...@@ -5,8 +5,8 @@ ...@@ -5,8 +5,8 @@
#define LIBAVCODEC_VERSION_INT 0x000406 #define LIBAVCODEC_VERSION_INT 0x000406
#define LIBAVCODEC_VERSION "0.4.6" #define LIBAVCODEC_VERSION "0.4.6"
#define LIBAVCODEC_BUILD 4646 #define LIBAVCODEC_BUILD 4647
#define LIBAVCODEC_BUILD_STR "4646" #define LIBAVCODEC_BUILD_STR "4647"
enum CodecID { enum CodecID {
CODEC_ID_NONE, CODEC_ID_NONE,
...@@ -850,6 +850,41 @@ typedef struct AVCodecContext { ...@@ -850,6 +850,41 @@ typedef struct AVCodecContext {
* decoding: unused * decoding: unused
*/ */
int mb_qmax; int mb_qmax;
/**
* motion estimation compare function
* encoding: set by user.
* decoding: unused
*/
int me_cmp;
/**
* subpixel motion estimation compare function
* encoding: set by user.
* decoding: unused
*/
int me_sub_cmp;
/**
* macroblock compare function (not supported yet)
* encoding: set by user.
* decoding: unused
*/
int mb_cmp;
#define FF_CMP_SAD 0
#define FF_CMP_SSE 1
#define FF_CMP_SATD 2
#define FF_CMP_DCT 3
#define FF_CMP_PSNR 4
#define FF_CMP_BIT 5
#define FF_CMP_RD 6
#define FF_CMP_ZERO 7
#define FF_CMP_CHROMA 256
/**
* ME diamond size
* encoding: set by user.
* decoding: unused
*/
int dia_size;
} AVCodecContext; } AVCodecContext;
typedef struct AVCodec { typedef struct AVCodec {
......
This diff is collapsed.
...@@ -79,13 +79,10 @@ static void a(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ ...@@ -79,13 +79,10 @@ static void a(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
/* motion estimation */ /* motion estimation */
typedef int (*op_pixels_abs_func)(UINT8 *blk1/*align width (8 or 16)*/, UINT8 *blk2/*align 1*/, int line_size); typedef int (*op_pixels_abs_func)(UINT8 *blk1/*align width (8 or 16)*/, UINT8 *blk2/*align 1*/, int line_size)/* __attribute__ ((const))*/;
/*
int pix_abs16x16_c(UINT8 *blk1, UINT8 *blk2, int lx); typedef int (*me_cmp_func)(void /*MpegEncContext*/ *s, UINT8 *blk1/*align width (8 or 16)*/, UINT8 *blk2/*align 1*/, int line_size)/* __attribute__ ((const))*/;
int pix_abs16x16_x2_c(UINT8 *blk1, UINT8 *blk2, int lx);
int pix_abs16x16_y2_c(UINT8 *blk1, UINT8 *blk2, int lx);
int pix_abs16x16_xy2_c(UINT8 *blk1, UINT8 *blk2, int lx);
*/
typedef struct DSPContext { typedef struct DSPContext {
/* pixel ops : interface with DCT */ /* pixel ops : interface with DCT */
void (*get_pixels)(DCTELEM *block/*align 16*/, const UINT8 *pixels/*align 8*/, int line_size); void (*get_pixels)(DCTELEM *block/*align 16*/, const UINT8 *pixels/*align 8*/, int line_size);
...@@ -98,7 +95,16 @@ typedef struct DSPContext { ...@@ -98,7 +95,16 @@ typedef struct DSPContext {
void (*clear_blocks)(DCTELEM *blocks/*align 16*/); void (*clear_blocks)(DCTELEM *blocks/*align 16*/);
int (*pix_sum)(UINT8 * pix, int line_size); int (*pix_sum)(UINT8 * pix, int line_size);
int (*pix_norm1)(UINT8 * pix, int line_size); int (*pix_norm1)(UINT8 * pix, int line_size);
int (*pix_norm)(UINT8 * pix1, UINT8 * pix2, int line_size); me_cmp_func sad[2]; /* identical to pix_absAxA except additional void * */
me_cmp_func sse[2];
me_cmp_func hadamard8_diff[2];
me_cmp_func dct_sad[2];
me_cmp_func quant_psnr[2];
int (*hadamard8_abs )(uint8_t *src, int stride, int mean);
me_cmp_func me_cmp[11];
me_cmp_func me_sub_cmp[11];
me_cmp_func mb_cmp[11];
/* maybe create an array for 16/8 functions */ /* maybe create an array for 16/8 functions */
op_pixels_func put_pixels_tab[2][4]; op_pixels_func put_pixels_tab[2][4];
...@@ -109,6 +115,7 @@ typedef struct DSPContext { ...@@ -109,6 +115,7 @@ typedef struct DSPContext {
qpel_mc_func avg_qpel_pixels_tab[2][16]; qpel_mc_func avg_qpel_pixels_tab[2][16];
qpel_mc_func put_no_rnd_qpel_pixels_tab[2][16]; qpel_mc_func put_no_rnd_qpel_pixels_tab[2][16];
qpel_mc_func avg_no_rnd_qpel_pixels_tab[2][16]; qpel_mc_func avg_no_rnd_qpel_pixels_tab[2][16];
qpel_mc_func put_mspel_pixels_tab[8];
op_pixels_abs_func pix_abs16x16; op_pixels_abs_func pix_abs16x16;
op_pixels_abs_func pix_abs16x16_x2; op_pixels_abs_func pix_abs16x16_x2;
...@@ -120,9 +127,8 @@ typedef struct DSPContext { ...@@ -120,9 +127,8 @@ typedef struct DSPContext {
op_pixels_abs_func pix_abs8x8_xy2; op_pixels_abs_func pix_abs8x8_xy2;
/* huffyuv specific */ /* huffyuv specific */
//FIXME note: alignment isnt guranteed currently but could be if needed
void (*add_bytes)(uint8_t *dst/*align 16*/, uint8_t *src/*align 16*/, int w); void (*add_bytes)(uint8_t *dst/*align 16*/, uint8_t *src/*align 16*/, int w);
void (*diff_bytes)(uint8_t *dst/*align 16*/, uint8_t *src1/*align 16*/, uint8_t *src2/*align 16*/,int w); void (*diff_bytes)(uint8_t *dst/*align 16*/, uint8_t *src1/*align 16*/, uint8_t *src2/*align 1*/,int w);
} DSPContext; } DSPContext;
void dsputil_init(DSPContext* p, unsigned mask); void dsputil_init(DSPContext* p, unsigned mask);
...@@ -156,6 +162,7 @@ static inline void emms(void) ...@@ -156,6 +162,7 @@ static inline void emms(void)
__asm __volatile ("emms;":::"memory"); __asm __volatile ("emms;":::"memory");
} }
#define emms_c() \ #define emms_c() \
{\ {\
if (mm_flags & MM_MMX)\ if (mm_flags & MM_MMX)\
...@@ -281,6 +288,14 @@ void ff_mdct_calc(MDCTContext *s, FFTSample *out, ...@@ -281,6 +288,14 @@ void ff_mdct_calc(MDCTContext *s, FFTSample *out,
const FFTSample *input, FFTSample *tmp); const FFTSample *input, FFTSample *tmp);
void ff_mdct_end(MDCTContext *s); void ff_mdct_end(MDCTContext *s);
#define WARPER88_1616(name8, name16)\
static int name16(void /*MpegEncContext*/ *s, uint8_t *dst, uint8_t *src, int stride){\
return name8(s, dst , src , stride)\
+name8(s, dst+8 , src+8 , stride)\
+name8(s, dst +8*stride, src +8*stride, stride)\
+name8(s, dst+8+8*stride, src+8+8*stride, stride);\
}
#ifndef HAVE_LRINTF #ifndef HAVE_LRINTF
/* XXX: add ISOC specific test to avoid specific BSD testing. */ /* XXX: add ISOC specific test to avoid specific BSD testing. */
/* better than nothing implementation. */ /* better than nothing implementation. */
......
This diff is collapsed.
...@@ -40,7 +40,7 @@ static inline long long rdtsc() ...@@ -40,7 +40,7 @@ static inline long long rdtsc()
} }
#endif #endif
static int h263_decode_init(AVCodecContext *avctx) int ff_h263_decode_init(AVCodecContext *avctx)
{ {
MpegEncContext *s = avctx->priv_data; MpegEncContext *s = avctx->priv_data;
...@@ -113,7 +113,7 @@ static int h263_decode_init(AVCodecContext *avctx) ...@@ -113,7 +113,7 @@ static int h263_decode_init(AVCodecContext *avctx)
return 0; return 0;
} }
static int h263_decode_end(AVCodecContext *avctx) int ff_h263_decode_end(AVCodecContext *avctx)
{ {
MpegEncContext *s = avctx->priv_data; MpegEncContext *s = avctx->priv_data;
...@@ -343,7 +343,7 @@ static int mpeg4_find_frame_end(MpegEncContext *s, UINT8 *buf, int buf_size){ ...@@ -343,7 +343,7 @@ static int mpeg4_find_frame_end(MpegEncContext *s, UINT8 *buf, int buf_size){
return -1; return -1;
} }
static int h263_decode_frame(AVCodecContext *avctx, int ff_h263_decode_frame(AVCodecContext *avctx,
void *data, int *data_size, void *data, int *data_size,
UINT8 *buf, int buf_size) UINT8 *buf, int buf_size)
{ {
...@@ -416,9 +416,11 @@ retry: ...@@ -416,9 +416,11 @@ retry:
if (MPV_common_init(s) < 0) //we need the idct permutaton for reading a custom matrix if (MPV_common_init(s) < 0) //we need the idct permutaton for reading a custom matrix
return -1; return -1;
} }
/* let's go :-) */ /* let's go :-) */
if (s->h263_msmpeg4) { if (s->msmpeg4_version==5) {
ret= ff_wmv2_decode_picture_header(s);
} else if (s->msmpeg4_version) {
ret = msmpeg4_decode_picture_header(s); ret = msmpeg4_decode_picture_header(s);
} else if (s->h263_pred) { } else if (s->h263_pred) {
if(s->avctx->extradata_size && s->picture_number==0){ if(s->avctx->extradata_size && s->picture_number==0){
...@@ -634,7 +636,6 @@ retry: ...@@ -634,7 +636,6 @@ retry:
} }
if(num_end_markers || error){ if(num_end_markers || error){
fprintf(stderr, "concealing errors\n"); fprintf(stderr, "concealing errors\n");
//printf("type:%d\n", s->pict_type);
ff_error_resilience(s); ff_error_resilience(s);
} }
} }
...@@ -713,10 +714,10 @@ AVCodec mpeg4_decoder = { ...@@ -713,10 +714,10 @@ AVCodec mpeg4_decoder = {
CODEC_TYPE_VIDEO, CODEC_TYPE_VIDEO,
CODEC_ID_MPEG4, CODEC_ID_MPEG4,
sizeof(MpegEncContext), sizeof(MpegEncContext),
h263_decode_init, ff_h263_decode_init,
NULL, NULL,
h263_decode_end, ff_h263_decode_end,
h263_decode_frame, ff_h263_decode_frame,
CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1 | CODEC_CAP_TRUNCATED, CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1 | CODEC_CAP_TRUNCATED,
}; };
...@@ -725,10 +726,10 @@ AVCodec h263_decoder = { ...@@ -725,10 +726,10 @@ AVCodec h263_decoder = {
CODEC_TYPE_VIDEO, CODEC_TYPE_VIDEO,
CODEC_ID_H263, CODEC_ID_H263,
sizeof(MpegEncContext), sizeof(MpegEncContext),
h263_decode_init, ff_h263_decode_init,
NULL, NULL,
h263_decode_end, ff_h263_decode_end,
h263_decode_frame, ff_h263_decode_frame,
CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1, CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1,
}; };
...@@ -737,10 +738,10 @@ AVCodec msmpeg4v1_decoder = { ...@@ -737,10 +738,10 @@ AVCodec msmpeg4v1_decoder = {
CODEC_TYPE_VIDEO, CODEC_TYPE_VIDEO,
CODEC_ID_MSMPEG4V1, CODEC_ID_MSMPEG4V1,
sizeof(MpegEncContext), sizeof(MpegEncContext),
h263_decode_init, ff_h263_decode_init,
NULL, NULL,
h263_decode_end, ff_h263_decode_end,
h263_decode_frame, ff_h263_decode_frame,
CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1, CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1,
}; };
...@@ -749,10 +750,10 @@ AVCodec msmpeg4v2_decoder = { ...@@ -749,10 +750,10 @@ AVCodec msmpeg4v2_decoder = {
CODEC_TYPE_VIDEO, CODEC_TYPE_VIDEO,
CODEC_ID_MSMPEG4V2, CODEC_ID_MSMPEG4V2,
sizeof(MpegEncContext), sizeof(MpegEncContext),
h263_decode_init, ff_h263_decode_init,
NULL, NULL,
h263_decode_end, ff_h263_decode_end,
h263_decode_frame, ff_h263_decode_frame,
CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1, CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1,
}; };
...@@ -761,10 +762,10 @@ AVCodec msmpeg4v3_decoder = { ...@@ -761,10 +762,10 @@ AVCodec msmpeg4v3_decoder = {
CODEC_TYPE_VIDEO, CODEC_TYPE_VIDEO,
CODEC_ID_MSMPEG4V3, CODEC_ID_MSMPEG4V3,
sizeof(MpegEncContext), sizeof(MpegEncContext),
h263_decode_init, ff_h263_decode_init,
NULL, NULL,
h263_decode_end, ff_h263_decode_end,
h263_decode_frame, ff_h263_decode_frame,
CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1, CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1,
}; };
...@@ -773,22 +774,10 @@ AVCodec wmv1_decoder = { ...@@ -773,22 +774,10 @@ AVCodec wmv1_decoder = {
CODEC_TYPE_VIDEO, CODEC_TYPE_VIDEO,
CODEC_ID_WMV1, CODEC_ID_WMV1,
sizeof(MpegEncContext), sizeof(MpegEncContext),
h263_decode_init, ff_h263_decode_init,
NULL,
h263_decode_end,
h263_decode_frame,
CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1,
};
AVCodec wmv2_decoder = {
"wmv2",
CODEC_TYPE_VIDEO,
CODEC_ID_WMV2,
sizeof(MpegEncContext),
h263_decode_init,
NULL, NULL,
h263_decode_end, ff_h263_decode_end,
h263_decode_frame, ff_h263_decode_frame,
CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1, CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1,
}; };
...@@ -797,10 +786,10 @@ AVCodec h263i_decoder = { ...@@ -797,10 +786,10 @@ AVCodec h263i_decoder = {
CODEC_TYPE_VIDEO, CODEC_TYPE_VIDEO,
CODEC_ID_H263I, CODEC_ID_H263I,
sizeof(MpegEncContext), sizeof(MpegEncContext),
h263_decode_init, ff_h263_decode_init,
NULL, NULL,
h263_decode_end, ff_h263_decode_end,
h263_decode_frame, ff_h263_decode_frame,
CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1, CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1,
}; };
...@@ -43,6 +43,11 @@ int pix_abs8x8_x2_mmx2(UINT8 *blk1, UINT8 *blk2, int lx); ...@@ -43,6 +43,11 @@ int pix_abs8x8_x2_mmx2(UINT8 *blk1, UINT8 *blk2, int lx);
int pix_abs8x8_y2_mmx2(UINT8 *blk1, UINT8 *blk2, int lx); int pix_abs8x8_y2_mmx2(UINT8 *blk1, UINT8 *blk2, int lx);
int pix_abs8x8_xy2_mmx2(UINT8 *blk1, UINT8 *blk2, int lx); int pix_abs8x8_xy2_mmx2(UINT8 *blk1, UINT8 *blk2, int lx);
int sad16x16_mmx(void *s, UINT8 *blk1, UINT8 *blk2, int lx);
int sad8x8_mmx(void *s, UINT8 *blk1, UINT8 *blk2, int lx);
int sad16x16_mmx2(void *s, UINT8 *blk1, UINT8 *blk2, int lx);
int sad8x8_mmx2(void *s, UINT8 *blk1, UINT8 *blk2, int lx);
/* pixel operations */ /* pixel operations */
static const uint64_t mm_bone __attribute__ ((aligned(8))) = 0x0101010101010101ULL; static const uint64_t mm_bone __attribute__ ((aligned(8))) = 0x0101010101010101ULL;
static const uint64_t mm_wone __attribute__ ((aligned(8))) = 0x0001000100010001ULL; static const uint64_t mm_wone __attribute__ ((aligned(8))) = 0x0001000100010001ULL;
...@@ -213,7 +218,7 @@ static void get_pixels_mmx(DCTELEM *block, const UINT8 *pixels, int line_size) ...@@ -213,7 +218,7 @@ static void get_pixels_mmx(DCTELEM *block, const UINT8 *pixels, int line_size)
); );
} }
static void diff_pixels_mmx(DCTELEM *block, const UINT8 *s1, const UINT8 *s2, int stride) static inline void diff_pixels_mmx(DCTELEM *block, const UINT8 *s1, const UINT8 *s2, int stride)
{ {
asm volatile( asm volatile(
"pxor %%mm7, %%mm7 \n\t" "pxor %%mm7, %%mm7 \n\t"
...@@ -496,7 +501,150 @@ static void diff_bytes_mmx(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){ ...@@ -496,7 +501,150 @@ static void diff_bytes_mmx(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
for(; i<w; i++) for(; i<w; i++)
dst[i+0] = src1[i+0]-src2[i+0]; dst[i+0] = src1[i+0]-src2[i+0];
} }
#define LBUTTERFLY(a,b)\
"paddw " #b ", " #a " \n\t"\
"paddw " #b ", " #b " \n\t"\
"psubw " #a ", " #b " \n\t"
#define HADAMARD48\
LBUTTERFLY(%%mm0, %%mm1)\
LBUTTERFLY(%%mm2, %%mm3)\
LBUTTERFLY(%%mm4, %%mm5)\
LBUTTERFLY(%%mm6, %%mm7)\
\
LBUTTERFLY(%%mm0, %%mm2)\
LBUTTERFLY(%%mm1, %%mm3)\
LBUTTERFLY(%%mm4, %%mm6)\
LBUTTERFLY(%%mm5, %%mm7)\
\
LBUTTERFLY(%%mm0, %%mm4)\
LBUTTERFLY(%%mm1, %%mm5)\
LBUTTERFLY(%%mm2, %%mm6)\
LBUTTERFLY(%%mm3, %%mm7)
#define MMABS(a,z)\
"pxor " #z ", " #z " \n\t"\
"pcmpgtw " #a ", " #z " \n\t"\
"pxor " #z ", " #a " \n\t"\
"psubw " #z ", " #a " \n\t"
#define MMABS_SUM(a,z, sum)\
"pxor " #z ", " #z " \n\t"\
"pcmpgtw " #a ", " #z " \n\t"\
"pxor " #z ", " #a " \n\t"\
"psubw " #z ", " #a " \n\t"\
"paddusw " #a ", " #sum " \n\t"
#define SBUTTERFLY(a,b,t,n)\
"movq " #a ", " #t " \n\t" /* abcd */\
"punpckl" #n " " #b ", " #a " \n\t" /* aebf */\
"punpckh" #n " " #b ", " #t " \n\t" /* cgdh */\
#define TRANSPOSE4(a,b,c,d,t)\
SBUTTERFLY(a,b,t,wd) /* a=aebf t=cgdh */\
SBUTTERFLY(c,d,b,wd) /* c=imjn b=kolp */\
SBUTTERFLY(a,c,d,dq) /* a=aeim d=bfjn */\
SBUTTERFLY(t,b,c,dq) /* t=cgko c=dhlp */
#define LOAD4(o, a, b, c, d)\
"movq "#o"(%1), " #a " \n\t"\
"movq "#o"+16(%1), " #b " \n\t"\
"movq "#o"+32(%1), " #c " \n\t"\
"movq "#o"+48(%1), " #d " \n\t"
#define STORE4(o, a, b, c, d)\
"movq "#a", "#o"(%1) \n\t"\
"movq "#b", "#o"+16(%1) \n\t"\
"movq "#c", "#o"+32(%1) \n\t"\
"movq "#d", "#o"+48(%1) \n\t"\
static int hadamard8_diff_mmx(void *s, uint8_t *src1, uint8_t *src2, int stride){
uint64_t temp[16] __align8;
int sum=0;
diff_pixels_mmx((DCTELEM*)temp, src1, src2, stride);
asm volatile(
LOAD4(0 , %%mm0, %%mm1, %%mm2, %%mm3)
LOAD4(64, %%mm4, %%mm5, %%mm6, %%mm7)
HADAMARD48
"movq %%mm7, 112(%1) \n\t"
TRANSPOSE4(%%mm0, %%mm1, %%mm2, %%mm3, %%mm7)
STORE4(0 , %%mm0, %%mm3, %%mm7, %%mm2)
"movq 112(%1), %%mm7 \n\t"
TRANSPOSE4(%%mm4, %%mm5, %%mm6, %%mm7, %%mm0)
STORE4(64, %%mm4, %%mm7, %%mm0, %%mm6)
LOAD4(8 , %%mm0, %%mm1, %%mm2, %%mm3)
LOAD4(72, %%mm4, %%mm5, %%mm6, %%mm7)
HADAMARD48
"movq %%mm7, 120(%1) \n\t"
TRANSPOSE4(%%mm0, %%mm1, %%mm2, %%mm3, %%mm7)
STORE4(8 , %%mm0, %%mm3, %%mm7, %%mm2)
"movq 120(%1), %%mm7 \n\t"
TRANSPOSE4(%%mm4, %%mm5, %%mm6, %%mm7, %%mm0)
"movq %%mm7, %%mm5 \n\t"//FIXME remove
"movq %%mm6, %%mm7 \n\t"
"movq %%mm0, %%mm6 \n\t"
// STORE4(72, %%mm4, %%mm7, %%mm0, %%mm6) //FIXME remove
LOAD4(64, %%mm0, %%mm1, %%mm2, %%mm3)
// LOAD4(72, %%mm4, %%mm5, %%mm6, %%mm7)
HADAMARD48
"movq %%mm7, 64(%1) \n\t"
MMABS(%%mm0, %%mm7)
MMABS_SUM(%%mm1, %%mm7, %%mm0)
MMABS_SUM(%%mm2, %%mm7, %%mm0)
MMABS_SUM(%%mm3, %%mm7, %%mm0)
MMABS_SUM(%%mm4, %%mm7, %%mm0)
MMABS_SUM(%%mm5, %%mm7, %%mm0)
MMABS_SUM(%%mm6, %%mm7, %%mm0)
"movq 64(%1), %%mm1 \n\t"
MMABS_SUM(%%mm1, %%mm7, %%mm0)
"movq %%mm0, 64(%1) \n\t"
LOAD4(0 , %%mm0, %%mm1, %%mm2, %%mm3)
LOAD4(8 , %%mm4, %%mm5, %%mm6, %%mm7)
HADAMARD48
"movq %%mm7, (%1) \n\t"
MMABS(%%mm0, %%mm7)
MMABS_SUM(%%mm1, %%mm7, %%mm0)
MMABS_SUM(%%mm2, %%mm7, %%mm0)
MMABS_SUM(%%mm3, %%mm7, %%mm0)
MMABS_SUM(%%mm4, %%mm7, %%mm0)
MMABS_SUM(%%mm5, %%mm7, %%mm0)
MMABS_SUM(%%mm6, %%mm7, %%mm0)
"movq (%1), %%mm1 \n\t"
MMABS_SUM(%%mm1, %%mm7, %%mm0)
"movq 64(%1), %%mm1 \n\t"
MMABS_SUM(%%mm1, %%mm7, %%mm0)
"movq %%mm0, %%mm1 \n\t"
"psrlq $32, %%mm0 \n\t"
"paddusw %%mm1, %%mm0 \n\t"
"movq %%mm0, %%mm1 \n\t"
"psrlq $16, %%mm0 \n\t"
"paddusw %%mm1, %%mm0 \n\t"
"movd %%mm0, %0 \n\t"
: "=r" (sum)
: "r"(temp)
);
return sum&0xFFFF;
}
WARPER88_1616(hadamard8_diff_mmx, hadamard8_diff16_mmx)
#if 0 #if 0
static void just_return() { return; } static void just_return() { return; }
...@@ -579,7 +727,13 @@ void dsputil_init_mmx(DSPContext* c, unsigned mask) ...@@ -579,7 +727,13 @@ void dsputil_init_mmx(DSPContext* c, unsigned mask)
c->add_bytes= add_bytes_mmx; c->add_bytes= add_bytes_mmx;
c->diff_bytes= diff_bytes_mmx; c->diff_bytes= diff_bytes_mmx;
c->hadamard8_diff[0]= hadamard8_diff16_mmx;
c->hadamard8_diff[1]= hadamard8_diff_mmx;
c->sad[0]= sad16x16_mmx;
c->sad[1]= sad8x8_mmx;
if (mm_flags & MM_MMXEXT) { if (mm_flags & MM_MMXEXT) {
c->pix_abs16x16 = pix_abs16x16_mmx2; c->pix_abs16x16 = pix_abs16x16_mmx2;
c->pix_abs16x16_x2 = pix_abs16x16_x2_mmx2; c->pix_abs16x16_x2 = pix_abs16x16_x2_mmx2;
...@@ -591,6 +745,9 @@ void dsputil_init_mmx(DSPContext* c, unsigned mask) ...@@ -591,6 +745,9 @@ void dsputil_init_mmx(DSPContext* c, unsigned mask)
c->pix_abs8x8_y2 = pix_abs8x8_y2_mmx2; c->pix_abs8x8_y2 = pix_abs8x8_y2_mmx2;
c->pix_abs8x8_xy2 = pix_abs8x8_xy2_mmx2; c->pix_abs8x8_xy2 = pix_abs8x8_xy2_mmx2;
c->sad[0]= sad16x16_mmx2;
c->sad[1]= sad8x8_mmx2;
c->put_pixels_tab[0][1] = put_pixels16_x2_mmx2; c->put_pixels_tab[0][1] = put_pixels16_x2_mmx2;
c->put_pixels_tab[0][2] = put_pixels16_y2_mmx2; c->put_pixels_tab[0][2] = put_pixels16_y2_mmx2;
c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx2; c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx2;
......
...@@ -274,6 +274,15 @@ int pix_abs8x8_ ## suf(UINT8 *blk2, UINT8 *blk1, int stride)\ ...@@ -274,6 +274,15 @@ int pix_abs8x8_ ## suf(UINT8 *blk2, UINT8 *blk1, int stride)\
\ \
return sum_ ## suf();\ return sum_ ## suf();\
}\ }\
int sad8x8_ ## suf(void *s, UINT8 *blk2, UINT8 *blk1, int stride)\
{\
asm volatile("pxor %%mm7, %%mm7 \n\t"\
"pxor %%mm6, %%mm6 \n\t":);\
\
sad8_ ## suf(blk1, blk2, stride, 3);\
\
return sum_ ## suf();\
}\
\ \
int pix_abs8x8_x2_ ## suf(UINT8 *blk2, UINT8 *blk1, int stride)\ int pix_abs8x8_x2_ ## suf(UINT8 *blk2, UINT8 *blk1, int stride)\
{\ {\
...@@ -324,6 +333,16 @@ int pix_abs16x16_ ## suf(UINT8 *blk2, UINT8 *blk1, int stride)\ ...@@ -324,6 +333,16 @@ int pix_abs16x16_ ## suf(UINT8 *blk2, UINT8 *blk1, int stride)\
\ \
return sum_ ## suf();\ return sum_ ## suf();\
}\ }\
int sad16x16_ ## suf(void *s, UINT8 *blk2, UINT8 *blk1, int stride)\
{\
asm volatile("pxor %%mm7, %%mm7 \n\t"\
"pxor %%mm6, %%mm6 \n\t":);\
\
sad8_ ## suf(blk1 , blk2 , stride, 4);\
sad8_ ## suf(blk1+8, blk2+8, stride, 4);\
\
return sum_ ## suf();\
}\
int pix_abs16x16_x2_ ## suf(UINT8 *blk2, UINT8 *blk1, int stride)\ int pix_abs16x16_x2_ ## suf(UINT8 *blk2, UINT8 *blk1, int stride)\
{\ {\
asm volatile("pxor %%mm7, %%mm7 \n\t"\ asm volatile("pxor %%mm7, %%mm7 \n\t"\
......
This diff is collapsed.
This diff is collapsed.
...@@ -526,7 +526,7 @@ void ff_mpeg1_encode_init(MpegEncContext *s) ...@@ -526,7 +526,7 @@ void ff_mpeg1_encode_init(MpegEncContext *s)
} }
} }
} }
s->mv_penalty= mv_penalty; s->me.mv_penalty= mv_penalty;
s->fcode_tab= fcode_tab; s->fcode_tab= fcode_tab;
s->min_qcoeff=-255; s->min_qcoeff=-255;
s->max_qcoeff= 255; s->max_qcoeff= 255;
......
This diff is collapsed.
...@@ -129,6 +129,31 @@ typedef struct ParseContext{ ...@@ -129,6 +129,31 @@ typedef struct ParseContext{
int frame_start_found; int frame_start_found;
} ParseContext; } ParseContext;
struct MpegEncContext;
typedef struct MotionEstContext{
int skip; /* set if ME is skiped for the current MB */
int co_located_mv[4][2]; /* mv from last p frame for direct mode ME */
int direct_basis_mv[4][2];
uint8_t *scratchpad; /* data area for the me algo, so that the ME doesnt need to malloc/free */
uint32_t *map; /* map to avoid duplicate evaluations */
uint32_t *score_map; /* map to store the scores */
int map_generation;
int penalty_factor;
int sub_penalty_factor;
UINT16 (*mv_penalty)[MAX_MV*2+1]; /* amount of bits needed to encode a MV */
int (*sub_motion_search)(struct MpegEncContext * s,
int *mx_ptr, int *my_ptr, int dmin,
int xmin, int ymin, int xmax, int ymax,
int pred_x, int pred_y, Picture *ref_picture,
int n, int size, uint16_t * const mv_penalty);
int (*motion_search[7])(struct MpegEncContext * s, int block,
int *mx_ptr, int *my_ptr,
int P[10][2], int pred_x, int pred_y,
int xmin, int ymin, int xmax, int ymax, Picture *ref_picture,
uint16_t * const mv_penalty);
}MotionEstContext;
typedef struct MpegEncContext { typedef struct MpegEncContext {
struct AVCodecContext *avctx; struct AVCodecContext *avctx;
/* the following parameters must be initialized before encoding */ /* the following parameters must be initialized before encoding */
...@@ -222,15 +247,8 @@ typedef struct MpegEncContext { ...@@ -222,15 +247,8 @@ typedef struct MpegEncContext {
INT16 (*b_back_mv_table)[2]; /* MV table (1MV per MB) backward mode b-frame encoding */ INT16 (*b_back_mv_table)[2]; /* MV table (1MV per MB) backward mode b-frame encoding */
INT16 (*b_bidir_forw_mv_table)[2]; /* MV table (1MV per MB) bidir mode b-frame encoding */ INT16 (*b_bidir_forw_mv_table)[2]; /* MV table (1MV per MB) bidir mode b-frame encoding */
INT16 (*b_bidir_back_mv_table)[2]; /* MV table (1MV per MB) bidir mode b-frame encoding */ INT16 (*b_bidir_back_mv_table)[2]; /* MV table (1MV per MB) bidir mode b-frame encoding */
INT16 (*b_direct_forw_mv_table)[2];/* MV table (1MV per MB) direct mode b-frame encoding */
INT16 (*b_direct_back_mv_table)[2];/* MV table (1MV per MB) direct mode b-frame encoding */
INT16 (*b_direct_mv_table)[2]; /* MV table (1MV per MB) direct mode b-frame encoding */ INT16 (*b_direct_mv_table)[2]; /* MV table (1MV per MB) direct mode b-frame encoding */
int me_method; /* ME algorithm */ int me_method; /* ME algorithm */
uint8_t *me_scratchpad; /* data area for the me algo, so that the ME doesnt need to malloc/free */
uint32_t *me_map; /* map to avoid duplicate evaluations */
uint16_t *me_score_map; /* map to store the SADs */
int me_map_generation;
int skip_me; /* set if ME is skiped for the current MB */
int scene_change_score; int scene_change_score;
int mv_dir; int mv_dir;
#define MV_DIR_BACKWARD 1 #define MV_DIR_BACKWARD 1
...@@ -250,8 +268,9 @@ typedef struct MpegEncContext { ...@@ -250,8 +268,9 @@ typedef struct MpegEncContext {
int mv[2][4][2]; int mv[2][4][2];
int field_select[2][2]; int field_select[2][2];
int last_mv[2][2][2]; /* last MV, used for MV prediction in MPEG1 & B-frame MPEG4 */ int last_mv[2][2][2]; /* last MV, used for MV prediction in MPEG1 & B-frame MPEG4 */
UINT16 (*mv_penalty)[MAX_MV*2+1]; /* amount of bits needed to encode a MV, used for ME */
UINT8 *fcode_tab; /* smallest fcode needed for each MV */ UINT8 *fcode_tab; /* smallest fcode needed for each MV */
MotionEstContext me;
int no_rounding; /* apply no rounding to motion compensation (MPEG4, msmpeg4, ...) int no_rounding; /* apply no rounding to motion compensation (MPEG4, msmpeg4, ...)
for b-frames rounding mode is allways 0 */ for b-frames rounding mode is allways 0 */
...@@ -458,6 +477,7 @@ typedef struct MpegEncContext { ...@@ -458,6 +477,7 @@ typedef struct MpegEncContext {
/* [mb_intra][isChroma][level][run][last] */ /* [mb_intra][isChroma][level][run][last] */
int (*ac_stats)[2][MAX_LEVEL+1][MAX_RUN+1][2]; int (*ac_stats)[2][MAX_LEVEL+1][MAX_RUN+1][2];
int inter_intra_pred; int inter_intra_pred;
int mspel;
/* decompression specific */ /* decompression specific */
GetBitContext gb; GetBitContext gb;
...@@ -519,6 +539,7 @@ typedef struct MpegEncContext { ...@@ -519,6 +539,7 @@ typedef struct MpegEncContext {
void (*fdct)(DCTELEM *block/* align 16*/); void (*fdct)(DCTELEM *block/* align 16*/);
void (*idct_put)(UINT8 *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/); void (*idct_put)(UINT8 *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/);
void (*idct_add)(UINT8 *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/); void (*idct_add)(UINT8 *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/);
//FIXME move above funcs into dspContext perhaps
} MpegEncContext; } MpegEncContext;
...@@ -528,6 +549,9 @@ void MPV_common_end(MpegEncContext *s); ...@@ -528,6 +549,9 @@ void MPV_common_end(MpegEncContext *s);
void MPV_decode_mb(MpegEncContext *s, DCTELEM block[6][64]); void MPV_decode_mb(MpegEncContext *s, DCTELEM block[6][64]);
int MPV_frame_start(MpegEncContext *s, AVCodecContext *avctx); int MPV_frame_start(MpegEncContext *s, AVCodecContext *avctx);
void MPV_frame_end(MpegEncContext *s); void MPV_frame_end(MpegEncContext *s);
int MPV_encode_init(AVCodecContext *avctx);
int MPV_encode_end(AVCodecContext *avctx);
int MPV_encode_picture(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data);
#ifdef HAVE_MMX #ifdef HAVE_MMX
void MPV_common_init_mmx(MpegEncContext *s); void MPV_common_init_mmx(MpegEncContext *s);
#endif #endif
...@@ -553,6 +577,8 @@ void ff_clean_intra_table_entries(MpegEncContext *s); ...@@ -553,6 +577,8 @@ void ff_clean_intra_table_entries(MpegEncContext *s);
void ff_init_scantable(MpegEncContext *s, ScanTable *st, const UINT8 *src_scantable); void ff_init_scantable(MpegEncContext *s, ScanTable *st, const UINT8 *src_scantable);
void ff_error_resilience(MpegEncContext *s); void ff_error_resilience(MpegEncContext *s);
void ff_draw_horiz_band(MpegEncContext *s); void ff_draw_horiz_band(MpegEncContext *s);
void ff_emulated_edge_mc(MpegEncContext *s, UINT8 *src, int linesize, int block_w, int block_h,
int src_x, int src_y, int w, int h);
char ff_get_pict_type_char(int pict_type); char ff_get_pict_type_char(int pict_type);
...@@ -585,6 +611,7 @@ void ff_estimate_b_frame_motion(MpegEncContext * s, ...@@ -585,6 +611,7 @@ void ff_estimate_b_frame_motion(MpegEncContext * s,
int ff_get_best_fcode(MpegEncContext * s, int16_t (*mv_table)[2], int type); int ff_get_best_fcode(MpegEncContext * s, int16_t (*mv_table)[2], int type);
void ff_fix_long_p_mvs(MpegEncContext * s); void ff_fix_long_p_mvs(MpegEncContext * s);
void ff_fix_long_b_mvs(MpegEncContext * s, int16_t (*mv_table)[2], int f_code, int type); void ff_fix_long_b_mvs(MpegEncContext * s, int16_t (*mv_table)[2], int f_code, int type);
void ff_init_me(MpegEncContext *s);
/* mpeg12.c */ /* mpeg12.c */
...@@ -631,6 +658,11 @@ extern UINT8 ff_mpeg4_y_dc_scale_table[32]; ...@@ -631,6 +658,11 @@ extern UINT8 ff_mpeg4_y_dc_scale_table[32];
extern UINT8 ff_mpeg4_c_dc_scale_table[32]; extern UINT8 ff_mpeg4_c_dc_scale_table[32];
extern const INT16 ff_mpeg4_default_intra_matrix[64]; extern const INT16 ff_mpeg4_default_intra_matrix[64];
extern const INT16 ff_mpeg4_default_non_intra_matrix[64]; extern const INT16 ff_mpeg4_default_non_intra_matrix[64];
int ff_h263_decode_init(AVCodecContext *avctx);
int ff_h263_decode_frame(AVCodecContext *avctx,
void *data, int *data_size,
UINT8 *buf, int buf_size);
int ff_h263_decode_end(AVCodecContext *avctx);
void h263_encode_mb(MpegEncContext *s, void h263_encode_mb(MpegEncContext *s,
DCTELEM block[6][64], DCTELEM block[6][64],
int motion_x, int motion_y); int motion_x, int motion_y);
...@@ -667,6 +699,7 @@ int ff_mpeg4_decode_partitions(MpegEncContext *s); ...@@ -667,6 +699,7 @@ int ff_mpeg4_decode_partitions(MpegEncContext *s);
int ff_mpeg4_get_video_packet_prefix_length(MpegEncContext *s); int ff_mpeg4_get_video_packet_prefix_length(MpegEncContext *s);
int ff_h263_resync(MpegEncContext *s); int ff_h263_resync(MpegEncContext *s);
int ff_h263_get_gob_height(MpegEncContext *s); int ff_h263_get_gob_height(MpegEncContext *s);
void ff_mpeg4_set_direct_mv(MpegEncContext *s, int mx, int my);
/* rv10.c */ /* rv10.c */
...@@ -684,7 +717,16 @@ int msmpeg4_decode_picture_header(MpegEncContext * s); ...@@ -684,7 +717,16 @@ int msmpeg4_decode_picture_header(MpegEncContext * s);
int msmpeg4_decode_ext_header(MpegEncContext * s, int buf_size); int msmpeg4_decode_ext_header(MpegEncContext * s, int buf_size);
int ff_msmpeg4_decode_init(MpegEncContext *s); int ff_msmpeg4_decode_init(MpegEncContext *s);
void ff_msmpeg4_encode_init(MpegEncContext *s); void ff_msmpeg4_encode_init(MpegEncContext *s);
int ff_wmv2_decode_picture_header(MpegEncContext * s);
void ff_wmv2_add_mb(MpegEncContext *s, DCTELEM block[6][64], uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr);
void ff_mspel_motion(MpegEncContext *s,
UINT8 *dest_y, UINT8 *dest_cb, UINT8 *dest_cr,
UINT8 **ref_picture, op_pixels_func (*pix_op)[4],
int motion_x, int motion_y, int h);
int ff_wmv2_encode_picture_header(MpegEncContext * s, int picture_number);
void ff_wmv2_encode_mb(MpegEncContext * s,
DCTELEM block[6][64],
int motion_x, int motion_y);
/* mjpegenc.c */ /* mjpegenc.c */
int mjpeg_init(MpegEncContext *s); int mjpeg_init(MpegEncContext *s);
......
...@@ -48,12 +48,14 @@ ...@@ -48,12 +48,14 @@
#define II_BITRATE 128*1024 #define II_BITRATE 128*1024
#define MBAC_BITRATE 50*1024 #define MBAC_BITRATE 50*1024
#define DEFAULT_INTER_INDEX 3
static UINT32 v2_dc_lum_table[512][2]; static UINT32 v2_dc_lum_table[512][2];
static UINT32 v2_dc_chroma_table[512][2]; static UINT32 v2_dc_chroma_table[512][2];
static inline void msmpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int n); static inline void msmpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int n);
static inline int msmpeg4_decode_block(MpegEncContext * s, DCTELEM * block, static inline int msmpeg4_decode_block(MpegEncContext * s, DCTELEM * block,
int n, int coded); int n, int coded, const uint8_t *scantable);
static int msmpeg4_decode_dc(MpegEncContext * s, int n, int *dir_ptr); static int msmpeg4_decode_dc(MpegEncContext * s, int n, int *dir_ptr);
static int msmpeg4_decode_motion(MpegEncContext * s, static int msmpeg4_decode_motion(MpegEncContext * s,
int *mx_ptr, int *my_ptr); int *mx_ptr, int *my_ptr);
...@@ -63,6 +65,7 @@ static inline void msmpeg4_memsetw(short *tab, int val, int n); ...@@ -63,6 +65,7 @@ static inline void msmpeg4_memsetw(short *tab, int val, int n);
static int get_size_of_code(MpegEncContext * s, RLTable *rl, int last, int run, int level, int intra); static int get_size_of_code(MpegEncContext * s, RLTable *rl, int last, int run, int level, int intra);
static int msmpeg4v12_decode_mb(MpegEncContext *s, DCTELEM block[6][64]); static int msmpeg4v12_decode_mb(MpegEncContext *s, DCTELEM block[6][64]);
static int msmpeg4v34_decode_mb(MpegEncContext *s, DCTELEM block[6][64]); static int msmpeg4v34_decode_mb(MpegEncContext *s, DCTELEM block[6][64]);
static int wmv2_decode_mb(MpegEncContext *s, DCTELEM block[6][64]);
extern UINT32 inverse[256]; extern UINT32 inverse[256];
...@@ -160,13 +163,14 @@ static void common_init(MpegEncContext * s) ...@@ -160,13 +163,14 @@ static void common_init(MpegEncContext * s)
} }
break; break;
case 4: case 4:
case 5:
s->y_dc_scale_table= wmv1_y_dc_scale_table; s->y_dc_scale_table= wmv1_y_dc_scale_table;
s->c_dc_scale_table= wmv1_c_dc_scale_table; s->c_dc_scale_table= wmv1_c_dc_scale_table;
break; break;
} }
if(s->msmpeg4_version==4){ if(s->msmpeg4_version>=4){
ff_init_scantable(s, &s->intra_scantable , wmv1_scantable[1]); ff_init_scantable(s, &s->intra_scantable , wmv1_scantable[1]);
ff_init_scantable(s, &s->intra_h_scantable, wmv1_scantable[2]); ff_init_scantable(s, &s->intra_h_scantable, wmv1_scantable[2]);
ff_init_scantable(s, &s->intra_v_scantable, wmv1_scantable[3]); ff_init_scantable(s, &s->intra_v_scantable, wmv1_scantable[3]);
...@@ -370,9 +374,9 @@ void msmpeg4_encode_picture_header(MpegEncContext * s, int picture_number) ...@@ -370,9 +374,9 @@ void msmpeg4_encode_picture_header(MpegEncContext * s, int picture_number)
s->per_mb_rl_table = 0; s->per_mb_rl_table = 0;
if(s->msmpeg4_version==4) if(s->msmpeg4_version==4)
s->inter_intra_pred= (s->width*s->height < 320*240 && s->bit_rate<=II_BITRATE && s->pict_type==P_TYPE); s->inter_intra_pred= (s->width*s->height < 320*240 && s->bit_rate<=II_BITRATE && s->pict_type==P_TYPE);
//printf("%d %d %d %d %d\n", s->pict_type, s->bit_rate, s->inter_intra_pred, s->width, s->height);
if (s->pict_type == I_TYPE) { if (s->pict_type == I_TYPE) {
s->no_rounding = 1;
s->slice_height= s->mb_height/1; s->slice_height= s->mb_height/1;
put_bits(&s->pb, 5, 0x16 + s->mb_height/s->slice_height); put_bits(&s->pb, 5, 0x16 + s->mb_height/s->slice_height);
...@@ -404,12 +408,6 @@ void msmpeg4_encode_picture_header(MpegEncContext * s, int picture_number) ...@@ -404,12 +408,6 @@ void msmpeg4_encode_picture_header(MpegEncContext * s, int picture_number)
put_bits(&s->pb, 1, s->mv_table_index); put_bits(&s->pb, 1, s->mv_table_index);
} }
if(s->flipflop_rounding){
s->no_rounding ^= 1;
}else{
s->no_rounding = 0;
}
} }
s->esc3_level_length= 0; s->esc3_level_length= 0;
...@@ -923,7 +921,7 @@ static inline void msmpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int ...@@ -923,7 +921,7 @@ static inline void msmpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int
} }
/* recalculate block_last_index for M$ wmv1 */ /* recalculate block_last_index for M$ wmv1 */
if(s->msmpeg4_version==4 && s->block_last_index[n]>0){ if(s->msmpeg4_version>=4 && s->block_last_index[n]>0){
for(last_index=63; last_index>=0; last_index--){ for(last_index=63; last_index>=0; last_index--){
if(block[scantable[last_index]]) break; if(block[scantable[last_index]]) break;
} }
...@@ -975,7 +973,7 @@ else ...@@ -975,7 +973,7 @@ else
/* third escape */ /* third escape */
put_bits(&s->pb, 1, 0); put_bits(&s->pb, 1, 0);
put_bits(&s->pb, 1, last); put_bits(&s->pb, 1, last);
if(s->msmpeg4_version==4){ if(s->msmpeg4_version>=4){
if(s->esc3_level_length==0){ if(s->esc3_level_length==0){
s->esc3_level_length=8; s->esc3_level_length=8;
s->esc3_run_length= 6; s->esc3_run_length= 6;
...@@ -1014,7 +1012,7 @@ else ...@@ -1014,7 +1012,7 @@ else
/****************************************/ /****************************************/
/* decoding stuff */ /* decoding stuff */
static VLC mb_non_intra_vlc; static VLC mb_non_intra_vlc[4];
static VLC mb_intra_vlc; static VLC mb_intra_vlc;
static VLC dc_lum_vlc[2]; static VLC dc_lum_vlc[2];
static VLC dc_chroma_vlc[2]; static VLC dc_chroma_vlc[2];
...@@ -1139,9 +1137,12 @@ int ff_msmpeg4_decode_init(MpegEncContext *s) ...@@ -1139,9 +1137,12 @@ int ff_msmpeg4_decode_init(MpegEncContext *s)
&mvtab[0][1], 2, 1, &mvtab[0][1], 2, 1,
&mvtab[0][0], 2, 1); &mvtab[0][0], 2, 1);
init_vlc(&mb_non_intra_vlc, MB_NON_INTRA_VLC_BITS, 128, for(i=0; i<4; i++){
&table_mb_non_intra[0][1], 8, 4, init_vlc(&mb_non_intra_vlc[i], MB_NON_INTRA_VLC_BITS, 128,
&table_mb_non_intra[0][0], 8, 4); &wmv2_inter_table[i][0][1], 8, 4,
&wmv2_inter_table[i][0][0], 8, 4); //FIXME name?
}
init_vlc(&mb_intra_vlc, MB_INTRA_VLC_BITS, 64, init_vlc(&mb_intra_vlc, MB_INTRA_VLC_BITS, 64,
&table_mb_intra[0][1], 4, 2, &table_mb_intra[0][1], 4, 2,
&table_mb_intra[0][0], 4, 2); &table_mb_intra[0][0], 4, 2);
...@@ -1167,6 +1168,9 @@ int ff_msmpeg4_decode_init(MpegEncContext *s) ...@@ -1167,6 +1168,9 @@ int ff_msmpeg4_decode_init(MpegEncContext *s)
case 4: case 4:
s->decode_mb= msmpeg4v34_decode_mb; s->decode_mb= msmpeg4v34_decode_mb;
break; break;
case 5:
s->decode_mb= wmv2_decode_mb;
break;
} }
s->slice_height= s->mb_height; //to avoid 1/0 if the first frame isnt a keyframe s->slice_height= s->mb_height; //to avoid 1/0 if the first frame isnt a keyframe
...@@ -1334,6 +1338,7 @@ return -1; ...@@ -1334,6 +1338,7 @@ return -1;
s->no_rounding = 0; s->no_rounding = 0;
} }
} }
//printf("%d %d %d %d %d\n", s->pict_type, s->bit_rate, s->inter_intra_pred, s->width, s->height);
s->esc3_level_length= 0; s->esc3_level_length= 0;
s->esc3_run_length= 0; s->esc3_run_length= 0;
...@@ -1523,7 +1528,7 @@ static int msmpeg4v12_decode_mb(MpegEncContext *s, DCTELEM block[6][64]) ...@@ -1523,7 +1528,7 @@ static int msmpeg4v12_decode_mb(MpegEncContext *s, DCTELEM block[6][64])
} }
for (i = 0; i < 6; i++) { for (i = 0; i < 6; i++) {
if (msmpeg4_decode_block(s, block[i], i, (cbp >> (5 - i)) & 1) < 0) if (msmpeg4_decode_block(s, block[i], i, (cbp >> (5 - i)) & 1, NULL) < 0)
{ {
fprintf(stderr,"\nerror while decoding block: %d x %d (%d)\n", s->mb_x, s->mb_y, i); fprintf(stderr,"\nerror while decoding block: %d x %d (%d)\n", s->mb_x, s->mb_y, i);
return -1; return -1;
...@@ -1566,7 +1571,7 @@ printf("S "); ...@@ -1566,7 +1571,7 @@ printf("S ");
} }
} }
code = get_vlc2(&s->gb, mb_non_intra_vlc.table, MB_NON_INTRA_VLC_BITS, 3); code = get_vlc2(&s->gb, mb_non_intra_vlc[DEFAULT_INTER_INDEX].table, MB_NON_INTRA_VLC_BITS, 3);
if (code < 0) if (code < 0)
return -1; return -1;
//s->mb_intra = (code & 0x40) ? 0 : 1; //s->mb_intra = (code & 0x40) ? 0 : 1;
...@@ -1628,7 +1633,7 @@ printf("%c", s->ac_pred ? 'A' : 'I'); ...@@ -1628,7 +1633,7 @@ printf("%c", s->ac_pred ? 'A' : 'I');
} }
for (i = 0; i < 6; i++) { for (i = 0; i < 6; i++) {
if (msmpeg4_decode_block(s, block[i], i, (cbp >> (5 - i)) & 1) < 0) if (msmpeg4_decode_block(s, block[i], i, (cbp >> (5 - i)) & 1, NULL) < 0)
{ {
fprintf(stderr,"\nerror while decoding block: %d x %d (%d)\n", s->mb_x, s->mb_y, i); fprintf(stderr,"\nerror while decoding block: %d x %d (%d)\n", s->mb_x, s->mb_y, i);
return -1; return -1;
...@@ -1639,13 +1644,12 @@ printf("%c", s->ac_pred ? 'A' : 'I'); ...@@ -1639,13 +1644,12 @@ printf("%c", s->ac_pred ? 'A' : 'I');
} }
//#define ERROR_DETAILS //#define ERROR_DETAILS
static inline int msmpeg4_decode_block(MpegEncContext * s, DCTELEM * block, static inline int msmpeg4_decode_block(MpegEncContext * s, DCTELEM * block,
int n, int coded) int n, int coded, const uint8_t *scan_table)
{ {
int level, i, last, run, run_diff; int level, i, last, run, run_diff;
int dc_pred_dir; int dc_pred_dir;
RLTable *rl; RLTable *rl;
RL_VLC_ELEM *rl_vlc; RL_VLC_ELEM *rl_vlc;
const UINT8 *scan_table;
int qmul, qadd; int qmul, qadd;
if (s->mb_intra) { if (s->mb_intra) {
...@@ -1713,7 +1717,8 @@ static inline int msmpeg4_decode_block(MpegEncContext * s, DCTELEM * block, ...@@ -1713,7 +1717,8 @@ static inline int msmpeg4_decode_block(MpegEncContext * s, DCTELEM * block,
s->block_last_index[n] = i; s->block_last_index[n] = i;
return 0; return 0;
} }
scan_table = s->inter_scantable.permutated; if(!scan_table)
scan_table = s->inter_scantable.permutated;
set_stat(ST_INTER_AC); set_stat(ST_INTER_AC);
rl_vlc= rl->rl_vlc[s->qscale]; rl_vlc= rl->rl_vlc[s->qscale];
} }
...@@ -1889,7 +1894,7 @@ static inline int msmpeg4_decode_block(MpegEncContext * s, DCTELEM * block, ...@@ -1889,7 +1894,7 @@ static inline int msmpeg4_decode_block(MpegEncContext * s, DCTELEM * block,
i = 63; /* XXX: not optimal */ i = 63; /* XXX: not optimal */
} }
} }
if(s->msmpeg4_version==4 && i>0) i=63; //FIXME/XXX optimize if(s->msmpeg4_version>=4 && i>0) i=63; //FIXME/XXX optimize
s->block_last_index[n] = i; s->block_last_index[n] = i;
return 0; return 0;
...@@ -1990,3 +1995,9 @@ static int msmpeg4_decode_motion(MpegEncContext * s, ...@@ -1990,3 +1995,9 @@ static int msmpeg4_decode_motion(MpegEncContext * s,
*my_ptr = my; *my_ptr = my;
return 0; return 0;
} }
/* cleanest way to support it
* there is too much shared between versions so that we cant have 1 file per version & 1 common
* as allmost everything would be in the common file
*/
#include "wmv2.c"
This diff is collapsed.
...@@ -473,3 +473,93 @@ void simple_idct248_put(UINT8 *dest, int line_size, INT16 *block) ...@@ -473,3 +473,93 @@ void simple_idct248_put(UINT8 *dest, int line_size, INT16 *block)
idct4col(dest + line_size + i, 2 * line_size, block + 8 + i); idct4col(dest + line_size + i, 2 * line_size, block + 8 + i);
} }
} }
/* 8x4 & 4x8 WMV2 IDCT */
#undef CN_SHIFT
#undef C_SHIFT
#undef C_FIX
#undef C1
#undef C2
#define CN_SHIFT 12
#define C_FIX(x) ((int)((x) * 1.414213562 * (1 << CN_SHIFT) + 0.5))
#define C1 C_FIX(0.6532814824)
#define C2 C_FIX(0.2705980501)
#define C3 C_FIX(0.5)
#define C_SHIFT (4+1+12)
static inline void idct4col_add(UINT8 *dest, int line_size, const INT16 *col)
{
int c0, c1, c2, c3, a0, a1, a2, a3;
const UINT8 *cm = cropTbl + MAX_NEG_CROP;
a0 = col[8*0];
a1 = col[8*1];
a2 = col[8*2];
a3 = col[8*3];
c0 = (a0 + a2)*C3 + (1 << (C_SHIFT - 1));
c2 = (a0 - a2)*C3 + (1 << (C_SHIFT - 1));
c1 = a1 * C1 + a3 * C2;
c3 = a1 * C2 - a3 * C1;
dest[0] = cm[dest[0] + ((c0 + c1) >> C_SHIFT)];
dest += line_size;
dest[0] = cm[dest[0] + ((c2 + c3) >> C_SHIFT)];
dest += line_size;
dest[0] = cm[dest[0] + ((c2 - c3) >> C_SHIFT)];
dest += line_size;
dest[0] = cm[dest[0] + ((c0 - c1) >> C_SHIFT)];
}
#define RN_SHIFT 15
#define R_FIX(x) ((int)((x) * 1.414213562 * (1 << RN_SHIFT) + 0.5))
#define R1 R_FIX(0.6532814824)
#define R2 R_FIX(0.2705980501)
#define R3 R_FIX(0.5)
#define R_SHIFT 11
static inline void idct4row(INT16 *row)
{
int c0, c1, c2, c3, a0, a1, a2, a3;
const UINT8 *cm = cropTbl + MAX_NEG_CROP;
a0 = row[0];
a1 = row[1];
a2 = row[2];
a3 = row[3];
c0 = (a0 + a2)*R3 + (1 << (R_SHIFT - 1));
c2 = (a0 - a2)*R3 + (1 << (R_SHIFT - 1));
c1 = a1 * R1 + a3 * R2;
c3 = a1 * R2 - a3 * R1;
row[0]= (c0 + c1) >> R_SHIFT;
row[1]= (c2 + c3) >> R_SHIFT;
row[2]= (c2 - c3) >> R_SHIFT;
row[3]= (c0 - c1) >> R_SHIFT;
}
void simple_idct84_add(UINT8 *dest, int line_size, INT16 *block)
{
int i;
/* IDCT8 on each line */
for(i=0; i<4; i++) {
idctRowCondDC(block + i*8);
}
/* IDCT4 and store */
for(i=0;i<8;i++) {
idct4col_add(dest + i, line_size, block + i);
}
}
void simple_idct48_add(UINT8 *dest, int line_size, INT16 *block)
{
int i;
/* IDCT4 on each line */
for(i=0; i<8; i++) {
idct4row(block + i*8);
}
/* IDCT8 and store */
for(i=0; i<4; i++){
idctSparseColAdd(dest + i, line_size, block + i);
}
}
...@@ -26,3 +26,6 @@ void ff_simple_idct_put_mmx(UINT8 *dest, int line_size, INT16 *block); ...@@ -26,3 +26,6 @@ void ff_simple_idct_put_mmx(UINT8 *dest, int line_size, INT16 *block);
void simple_idct(short *block); void simple_idct(short *block);
void simple_idct248_put(UINT8 *dest, int line_size, INT16 *block); void simple_idct248_put(UINT8 *dest, int line_size, INT16 *block);
void simple_idct84_add(UINT8 *dest, int line_size, INT16 *block);
void simple_idct48_add(UINT8 *dest, int line_size, INT16 *block);
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment