Commit b0368839 authored by Michael Niedermayer's avatar Michael Niedermayer

MpegEncContext.(i)dct_* -> DspContext.(i)dct_*

bitexact cleanup

Originally committed as revision 1617 to svn://svn.ffmpeg.org/ffmpeg/trunk
parent a5dbb247
...@@ -154,6 +154,7 @@ static int do_play = 0; ...@@ -154,6 +154,7 @@ static int do_play = 0;
static int do_psnr = 0; static int do_psnr = 0;
static int do_vstats = 0; static int do_vstats = 0;
static int do_pass = 0; static int do_pass = 0;
static int bitexact = 0;
static char *pass_logfilename = NULL; static char *pass_logfilename = NULL;
static int audio_stream_copy = 0; static int audio_stream_copy = 0;
static int video_stream_copy = 0; static int video_stream_copy = 0;
...@@ -2075,6 +2076,9 @@ static void opt_input_file(const char *filename) ...@@ -2075,6 +2076,9 @@ static void opt_input_file(const char *filename)
enc->flags|= CODEC_FLAG_TRUNCATED; */ enc->flags|= CODEC_FLAG_TRUNCATED; */
if(/*enc->codec_id==CODEC_ID_MPEG4 || */enc->codec_id==CODEC_ID_MPEG1VIDEO) if(/*enc->codec_id==CODEC_ID_MPEG4 || */enc->codec_id==CODEC_ID_MPEG1VIDEO)
enc->flags|= CODEC_FLAG_TRUNCATED; enc->flags|= CODEC_FLAG_TRUNCATED;
if(bitexact)
enc->flags|= CODEC_FLAG_BITEXACT;
if (enc->frame_rate != rfps) { if (enc->frame_rate != rfps) {
fprintf(stderr,"\nSeems that stream %d comes from film source: %2.2f->%2.2f\n", fprintf(stderr,"\nSeems that stream %d comes from film source: %2.2f->%2.2f\n",
...@@ -2219,7 +2223,10 @@ static void opt_output_file(const char *filename) ...@@ -2219,7 +2223,10 @@ static void opt_output_file(const char *filename)
video_enc->flags |= CODEC_FLAG_QSCALE; video_enc->flags |= CODEC_FLAG_QSCALE;
st->quality = video_qscale; st->quality = video_qscale;
} }
if(bitexact)
video_enc->flags |= CODEC_FLAG_BITEXACT;
if (use_hq) { if (use_hq) {
video_enc->flags |= CODEC_FLAG_HQ; video_enc->flags |= CODEC_FLAG_HQ;
} }
...@@ -2557,7 +2564,7 @@ extern int ffm_nopts; ...@@ -2557,7 +2564,7 @@ extern int ffm_nopts;
static void opt_bitexact(void) static void opt_bitexact(void)
{ {
avcodec_set_bit_exact(); bitexact=1;
/* disable generate of real time pts in ffm (need to be supressed anyway) */ /* disable generate of real time pts in ffm (need to be supressed anyway) */
ffm_nopts = 1; ffm_nopts = 1;
} }
......
...@@ -20,6 +20,9 @@ ...@@ -20,6 +20,9 @@
#include "asm.h" #include "asm.h"
#include "../dsputil.h" #include "../dsputil.h"
extern void simple_idct_put_axp(uint8_t *dest, int line_size, DCTELEM *block);
extern void simple_idct_add_axp(uint8_t *dest, int line_size, DCTELEM *block);
void put_pixels_axp_asm(uint8_t *block, const uint8_t *pixels, void put_pixels_axp_asm(uint8_t *block, const uint8_t *pixels,
int line_size, int h); int line_size, int h);
void put_pixels_clamped_mvi_asm(const DCTELEM *block, uint8_t *pixels, void put_pixels_clamped_mvi_asm(const DCTELEM *block, uint8_t *pixels,
...@@ -295,7 +298,7 @@ static int sad8x8_mvi(void *s, uint8_t *a, uint8_t *b, int stride) ...@@ -295,7 +298,7 @@ static int sad8x8_mvi(void *s, uint8_t *a, uint8_t *b, int stride)
return pix_abs8x8_mvi(a, b, stride); return pix_abs8x8_mvi(a, b, stride);
} }
void dsputil_init_alpha(DSPContext* c, unsigned mask) void dsputil_init_alpha(DSPContext* c, AVCodecContext *avctx)
{ {
c->put_pixels_tab[0][0] = put_pixels16_axp_asm; c->put_pixels_tab[0][0] = put_pixels16_axp_asm;
c->put_pixels_tab[0][1] = put_pixels16_x2_axp; c->put_pixels_tab[0][1] = put_pixels16_x2_axp;
...@@ -357,4 +360,7 @@ void dsputil_init_alpha(DSPContext* c, unsigned mask) ...@@ -357,4 +360,7 @@ void dsputil_init_alpha(DSPContext* c, unsigned mask)
put_pixels_clamped_axp_p = c->put_pixels_clamped; put_pixels_clamped_axp_p = c->put_pixels_clamped;
add_pixels_clamped_axp_p = c->add_pixels_clamped; add_pixels_clamped_axp_p = c->add_pixels_clamped;
c->idct_put = simple_idct_put_axp;
c->idct_add = simple_idct_add_axp;
} }
...@@ -21,9 +21,6 @@ ...@@ -21,9 +21,6 @@
#include "../dsputil.h" #include "../dsputil.h"
#include "../mpegvideo.h" #include "../mpegvideo.h"
extern void simple_idct_put_axp(uint8_t *dest, int line_size, DCTELEM *block);
extern void simple_idct_add_axp(uint8_t *dest, int line_size, DCTELEM *block);
static void dct_unquantize_h263_axp(MpegEncContext *s, DCTELEM *block, static void dct_unquantize_h263_axp(MpegEncContext *s, DCTELEM *block,
int n, int qscale) int n, int qscale)
{ {
...@@ -97,6 +94,4 @@ static void dct_unquantize_h263_axp(MpegEncContext *s, DCTELEM *block, ...@@ -97,6 +94,4 @@ static void dct_unquantize_h263_axp(MpegEncContext *s, DCTELEM *block,
void MPV_common_init_axp(MpegEncContext *s) void MPV_common_init_axp(MpegEncContext *s)
{ {
s->dct_unquantize_h263 = dct_unquantize_h263_axp; s->dct_unquantize_h263 = dct_unquantize_h263_axp;
s->idct_put = simple_idct_put_axp;
s->idct_add = simple_idct_add_axp;
} }
...@@ -21,7 +21,33 @@ ...@@ -21,7 +21,33 @@
extern void j_rev_dct_ARM(DCTELEM *data); extern void j_rev_dct_ARM(DCTELEM *data);
void dsputil_init_armv4l(DSPContext* c, unsigned mask) /* XXX: local hack */
static void (*ff_put_pixels_clamped)(const DCTELEM *block, uint8_t *pixels, int line_size);
static void (*ff_add_pixels_clamped)(const DCTELEM *block, uint8_t *pixels, int line_size);
/* XXX: those functions should be suppressed ASAP when all IDCTs are
converted */
static void arm_idct_put(uint8_t *dest, int line_size, DCTELEM *block)
{
j_rev_dct_ARM (block);
ff_put_pixels_clamped(block, dest, line_size);
}
static void arm_idct_add(uint8_t *dest, int line_size, DCTELEM *block)
{
j_rev_dct_ARM (block);
ff_add_pixels_clamped(block, dest, line_size);
}
void dsputil_init_armv4l(DSPContext* c, AVCodecContext *avctx)
{ {
// ff_idct = j_rev_dct_ARM; const int idct_algo= avctx->idct_algo;
ff_put_pixels_clamped = c->put_pixels_clamped;
ff_add_pixels_clamped = c->add_pixels_clamped;
if(idct_algo==FF_IDCT_AUTO || idct_algo==FF_IDCT_ARM){
c->idct_put= arm_idct_put;
c->idct_add= arm_idct_add;
c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;/* FF_NO_IDCT_PERM */
}
} }
...@@ -21,35 +21,6 @@ ...@@ -21,35 +21,6 @@
#include "../mpegvideo.h" #include "../mpegvideo.h"
#include "../avcodec.h" #include "../avcodec.h"
extern void j_rev_dct_ARM(DCTELEM *data);
/* XXX: local hack */
static void (*ff_put_pixels_clamped)(const DCTELEM *block, uint8_t *pixels, int line_size);
static void (*ff_add_pixels_clamped)(const DCTELEM *block, uint8_t *pixels, int line_size);
/* XXX: those functions should be suppressed ASAP when all IDCTs are
converted */
static void arm_idct_put(uint8_t *dest, int line_size, DCTELEM *block)
{
j_rev_dct_ARM (block);
ff_put_pixels_clamped(block, dest, line_size);
}
static void arm_idct_add(uint8_t *dest, int line_size, DCTELEM *block)
{
j_rev_dct_ARM (block);
ff_add_pixels_clamped(block, dest, line_size);
}
void MPV_common_init_armv4l(MpegEncContext *s) void MPV_common_init_armv4l(MpegEncContext *s)
{ {
int i;
const int idct_algo= s->avctx->idct_algo;
ff_put_pixels_clamped = s->dsp.put_pixels_clamped;
ff_add_pixels_clamped = s->dsp.add_pixels_clamped;
if(idct_algo==FF_IDCT_AUTO || idct_algo==FF_IDCT_ARM){
s->idct_put= arm_idct_put;
s->idct_add= arm_idct_add;
s->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;/* FF_NO_IDCT_PERM */
}
} }
...@@ -16,8 +16,8 @@ extern "C" { ...@@ -16,8 +16,8 @@ extern "C" {
#define LIBAVCODEC_VERSION_INT 0x000406 #define LIBAVCODEC_VERSION_INT 0x000406
#define LIBAVCODEC_VERSION "0.4.6" #define LIBAVCODEC_VERSION "0.4.6"
#define LIBAVCODEC_BUILD 4659 #define LIBAVCODEC_BUILD 4660
#define LIBAVCODEC_BUILD_STR "4659" #define LIBAVCODEC_BUILD_STR "4660"
enum CodecID { enum CodecID {
CODEC_ID_NONE, CODEC_ID_NONE,
...@@ -159,6 +159,7 @@ static const int Motion_Est_QTab[] = { ME_ZERO, ME_PHODS, ME_LOG, ...@@ -159,6 +159,7 @@ static const int Motion_Est_QTab[] = { ME_ZERO, ME_PHODS, ME_LOG,
#define CODEC_FLAG_ALT_SCAN 0x00100000 /* use alternate scan */ #define CODEC_FLAG_ALT_SCAN 0x00100000 /* use alternate scan */
#define CODEC_FLAG_TRELLIS_QUANT 0x00200000 /* use trellis quantization */ #define CODEC_FLAG_TRELLIS_QUANT 0x00200000 /* use trellis quantization */
#define CODEC_FLAG_GLOBAL_HEADER 0x00400000 /* place global headers in extradata instead of every keyframe */ #define CODEC_FLAG_GLOBAL_HEADER 0x00400000 /* place global headers in extradata instead of every keyframe */
#define CODEC_FLAG_BITEXACT 0x00800000 /* use only bitexact stuff (except (i)dct) */
/* codec capabilities */ /* codec capabilities */
...@@ -1167,8 +1168,6 @@ unsigned avcodec_version(void); ...@@ -1167,8 +1168,6 @@ unsigned avcodec_version(void);
unsigned avcodec_build(void); unsigned avcodec_build(void);
void avcodec_init(void); void avcodec_init(void);
void avcodec_set_bit_exact(void);
void register_avcodec(AVCodec *format); void register_avcodec(AVCodec *format);
AVCodec *avcodec_find_encoder(enum CodecID id); AVCodec *avcodec_find_encoder(enum CodecID id);
AVCodec *avcodec_find_encoder_by_name(const char *name); AVCodec *avcodec_find_encoder_by_name(const char *name);
......
...@@ -21,8 +21,8 @@ ...@@ -21,8 +21,8 @@
#include "avcodec.h" #include "avcodec.h"
#include "dsputil.h" #include "dsputil.h"
#include "mpegvideo.h" #include "mpegvideo.h"
#include "simple_idct.h"
int ff_bit_exact=0;
uint8_t cropTbl[256 + 2 * MAX_NEG_CROP]; uint8_t cropTbl[256 + 2 * MAX_NEG_CROP];
uint32_t squareTbl[512]; uint32_t squareTbl[512];
...@@ -99,6 +99,18 @@ const uint32_t inverse[256]={ ...@@ -99,6 +99,18 @@ const uint32_t inverse[256]={
17318417, 17248865, 17179870, 17111424, 17043522, 16976156, 16909321, 16843010, 17318417, 17248865, 17179870, 17111424, 17043522, 16976156, 16909321, 16843010,
}; };
/* Input permutation for the simple_idct_mmx */
static const uint8_t simple_mmx_permutation[64]={
0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
};
static int pix_sum_c(uint8_t * pix, int line_size) static int pix_sum_c(uint8_t * pix, int line_size)
{ {
int s, i, j; int s, i, j;
...@@ -1787,7 +1799,7 @@ static int dct_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2 ...@@ -1787,7 +1799,7 @@ static int dct_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2
int sum=0, i; int sum=0, i;
s->dsp.diff_pixels(temp, src1, src2, stride); s->dsp.diff_pixels(temp, src1, src2, stride);
s->fdct(temp); s->dsp.fdct(temp);
for(i=0; i<64; i++) for(i=0; i<64; i++)
sum+= ABS(temp[i]); sum+= ABS(temp[i]);
...@@ -1887,7 +1899,7 @@ static int rd8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int ...@@ -1887,7 +1899,7 @@ static int rd8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int
s->dct_unquantize(s, temp, 0, s->qscale); s->dct_unquantize(s, temp, 0, s->qscale);
} }
s->idct_add(bak, stride, temp); s->dsp.idct_add(bak, stride, temp);
distoration= s->dsp.sse[1](NULL, bak, src1, stride); distoration= s->dsp.sse[1](NULL, bak, src1, stride);
...@@ -1959,7 +1971,20 @@ WARPER88_1616(quant_psnr8x8_c, quant_psnr16x16_c) ...@@ -1959,7 +1971,20 @@ WARPER88_1616(quant_psnr8x8_c, quant_psnr16x16_c)
WARPER88_1616(rd8x8_c, rd16x16_c) WARPER88_1616(rd8x8_c, rd16x16_c)
WARPER88_1616(bit8x8_c, bit16x16_c) WARPER88_1616(bit8x8_c, bit16x16_c)
void dsputil_init(DSPContext* c, unsigned mask) /* XXX: those functions should be suppressed ASAP when all IDCTs are
converted */
static void ff_jref_idct_put(uint8_t *dest, int line_size, DCTELEM *block)
{
j_rev_dct (block);
put_pixels_clamped_c(block, dest, line_size);
}
static void ff_jref_idct_add(uint8_t *dest, int line_size, DCTELEM *block)
{
j_rev_dct (block);
add_pixels_clamped_c(block, dest, line_size);
}
void dsputil_init(DSPContext* c, AVCodecContext *avctx)
{ {
static int init_done = 0; static int init_done = 0;
int i; int i;
...@@ -1980,6 +2005,23 @@ void dsputil_init(DSPContext* c, unsigned mask) ...@@ -1980,6 +2005,23 @@ void dsputil_init(DSPContext* c, unsigned mask)
init_done = 1; init_done = 1;
} }
#ifdef CONFIG_ENCODERS
if(avctx->dct_algo==FF_DCT_FASTINT)
c->fdct = fdct_ifast;
else
c->fdct = ff_jpeg_fdct_islow; //slow/accurate/default
#endif //CONFIG_ENCODERS
if(avctx->idct_algo==FF_IDCT_INT){
c->idct_put= ff_jref_idct_put;
c->idct_add= ff_jref_idct_add;
c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;
}else{ //accurate/default
c->idct_put= simple_idct_put;
c->idct_add= simple_idct_add;
c->idct_permutation_type= FF_NO_IDCT_PERM;
}
c->get_pixels = get_pixels_c; c->get_pixels = get_pixels_c;
c->diff_pixels = diff_pixels_c; c->diff_pixels = diff_pixels_c;
c->put_pixels_clamped = put_pixels_clamped_c; c->put_pixels_clamped = put_pixels_clamped_c;
...@@ -2082,37 +2124,43 @@ void dsputil_init(DSPContext* c, unsigned mask) ...@@ -2082,37 +2124,43 @@ void dsputil_init(DSPContext* c, unsigned mask)
c->diff_bytes= diff_bytes_c; c->diff_bytes= diff_bytes_c;
#ifdef HAVE_MMX #ifdef HAVE_MMX
dsputil_init_mmx(c, mask); dsputil_init_mmx(c, avctx);
if (ff_bit_exact)
{
/* FIXME - AVCodec context should have flag for bitexact match */
/* fprintf(stderr, "\n\n\nff_bit_exact %d\n\n\n\n", ff_bit_exact); */
dsputil_set_bit_exact_mmx(c, mask);
}
#endif #endif
#ifdef ARCH_ARMV4L #ifdef ARCH_ARMV4L
dsputil_init_armv4l(c, mask); dsputil_init_armv4l(c, avctx);
#endif #endif
#ifdef HAVE_MLIB #ifdef HAVE_MLIB
dsputil_init_mlib(c, mask); dsputil_init_mlib(c, avctx);
#endif #endif
#ifdef ARCH_ALPHA #ifdef ARCH_ALPHA
dsputil_init_alpha(c, mask); dsputil_init_alpha(c, avctx);
#endif #endif
#ifdef ARCH_POWERPC #ifdef ARCH_POWERPC
dsputil_init_ppc(c, mask); dsputil_init_ppc(c, avctx);
#endif #endif
#ifdef HAVE_MMI #ifdef HAVE_MMI
dsputil_init_mmi(c, mask); dsputil_init_mmi(c, avctx);
#endif #endif
}
/* remove any non bit exact operation (testing purpose) */ switch(c->idct_permutation_type){
void avcodec_set_bit_exact(void) case FF_NO_IDCT_PERM:
{ for(i=0; i<64; i++)
ff_bit_exact=1; c->idct_permutation[i]= i;
#ifdef HAVE_MMX break;
// FIXME - better set_bit_exact case FF_LIBMPEG2_IDCT_PERM:
// dsputil_set_bit_exact_mmx(); for(i=0; i<64; i++)
#endif c->idct_permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
break;
case FF_SIMPLE_IDCT_PERM:
for(i=0; i<64; i++)
c->idct_permutation[i]= simple_mmx_permutation[i];
break;
case FF_TRANSPOSE_IDCT_PERM:
for(i=0; i<64; i++)
c->idct_permutation[i]= ((i&7)<<3) | (i>>3);
break;
default:
fprintf(stderr, "Internal error, IDCT permutation not set\n");
}
} }
...@@ -149,9 +149,21 @@ typedef struct DSPContext { ...@@ -149,9 +149,21 @@ typedef struct DSPContext {
/* huffyuv specific */ /* huffyuv specific */
void (*add_bytes)(uint8_t *dst/*align 16*/, uint8_t *src/*align 16*/, int w); void (*add_bytes)(uint8_t *dst/*align 16*/, uint8_t *src/*align 16*/, int w);
void (*diff_bytes)(uint8_t *dst/*align 16*/, uint8_t *src1/*align 16*/, uint8_t *src2/*align 1*/,int w); void (*diff_bytes)(uint8_t *dst/*align 16*/, uint8_t *src1/*align 16*/, uint8_t *src2/*align 1*/,int w);
/* (I)DCT */
void (*fdct)(DCTELEM *block/* align 16*/);
void (*idct_put)(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/);
void (*idct_add)(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/);
uint8_t idct_permutation[64];
int idct_permutation_type;
#define FF_NO_IDCT_PERM 1
#define FF_LIBMPEG2_IDCT_PERM 2
#define FF_SIMPLE_IDCT_PERM 3
#define FF_TRANSPOSE_IDCT_PERM 4
} DSPContext; } DSPContext;
void dsputil_init(DSPContext* p, unsigned mask); void dsputil_init(DSPContext* p, AVCodecContext *avctx);
/** /**
* permute block according to permuatation. * permute block according to permuatation.
...@@ -194,11 +206,8 @@ static inline void emms(void) ...@@ -194,11 +206,8 @@ static inline void emms(void)
#define __align8 __attribute__ ((aligned (8))) #define __align8 __attribute__ ((aligned (8)))
void dsputil_init_mmx(DSPContext* c, unsigned mask); void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx);
void dsputil_set_bit_exact_mmx(DSPContext* c, unsigned mask); void dsputil_init_pix_mmx(DSPContext* c, AVCodecContext *avctx);
void dsputil_init_pix_mmx(DSPContext* c, unsigned mask);
void dsputil_set_bit_exact_pix_mmx(DSPContext* c, unsigned mask);
#elif defined(ARCH_ARMV4L) #elif defined(ARCH_ARMV4L)
...@@ -206,20 +215,20 @@ void dsputil_set_bit_exact_pix_mmx(DSPContext* c, unsigned mask); ...@@ -206,20 +215,20 @@ void dsputil_set_bit_exact_pix_mmx(DSPContext* c, unsigned mask);
line ptimizations */ line ptimizations */
#define __align8 __attribute__ ((aligned (4))) #define __align8 __attribute__ ((aligned (4)))
void dsputil_init_armv4l(DSPContext* c, unsigned mask); void dsputil_init_armv4l(DSPContext* c, AVCodecContext *avctx);
#elif defined(HAVE_MLIB) #elif defined(HAVE_MLIB)
/* SPARC/VIS IDCT needs 8-byte aligned DCT blocks */ /* SPARC/VIS IDCT needs 8-byte aligned DCT blocks */
#define __align8 __attribute__ ((aligned (8))) #define __align8 __attribute__ ((aligned (8)))
void dsputil_init_mlib(DSPContext* c, unsigned mask); void dsputil_init_mlib(DSPContext* c, AVCodecContext *avctx);
#elif defined(ARCH_ALPHA) #elif defined(ARCH_ALPHA)
#define __align8 __attribute__ ((aligned (8))) #define __align8 __attribute__ ((aligned (8)))
void dsputil_init_alpha(DSPContext* c, unsigned mask); void dsputil_init_alpha(DSPContext* c, AVCodecContext *avctx);
#elif defined(ARCH_POWERPC) #elif defined(ARCH_POWERPC)
...@@ -233,13 +242,13 @@ extern int mm_flags; ...@@ -233,13 +242,13 @@ extern int mm_flags;
#define __align8 __attribute__ ((aligned (16))) #define __align8 __attribute__ ((aligned (16)))
void dsputil_init_ppc(DSPContext* c, unsigned mask); void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx);
#elif defined(HAVE_MMI) #elif defined(HAVE_MMI)
#define __align8 __attribute__ ((aligned (16))) #define __align8 __attribute__ ((aligned (16)))
void dsputil_init_mmi(DSPContext* c, unsigned mask); void dsputil_init_mmi(DSPContext* c, AVCodecContext *avctx);
#else #else
......
...@@ -115,12 +115,12 @@ static int dvvideo_decode_init(AVCodecContext *avctx) ...@@ -115,12 +115,12 @@ static int dvvideo_decode_init(AVCodecContext *avctx)
/* XXX: fix it */ /* XXX: fix it */
memset(&s2, 0, sizeof(MpegEncContext)); memset(&s2, 0, sizeof(MpegEncContext));
s2.avctx = avctx; s2.avctx = avctx;
dsputil_init(&s2.dsp, avctx->dsp_mask); dsputil_init(&s2.dsp, avctx);
if (DCT_common_init(&s2) < 0) if (DCT_common_init(&s2) < 0)
return -1; return -1;
s->idct_put[0] = s2.idct_put; s->idct_put[0] = s2.dsp.idct_put;
memcpy(s->idct_permutation, s2.idct_permutation, 64); memcpy(s->idct_permutation, s2.dsp.idct_permutation, 64);
memcpy(s->dv_zigzag[0], s2.intra_scantable.permutated, 64); memcpy(s->dv_zigzag[0], s2.intra_scantable.permutated, 64);
/* XXX: use MMX also for idct248 */ /* XXX: use MMX also for idct248 */
......
This diff is collapsed.
...@@ -20,6 +20,7 @@ ...@@ -20,6 +20,7 @@
*/ */
#include "../dsputil.h" #include "../dsputil.h"
#include "../simple_idct.h"
int mm_flags; /* multimedia extension flags */ int mm_flags; /* multimedia extension flags */
...@@ -1408,8 +1409,35 @@ static void just_return() { return; } ...@@ -1408,8 +1409,35 @@ static void just_return() { return; }
c->put_ ## postfix1 = put_ ## postfix2;\ c->put_ ## postfix1 = put_ ## postfix2;\
c->put_no_rnd_ ## postfix1 = put_no_rnd_ ## postfix2;\ c->put_no_rnd_ ## postfix1 = put_no_rnd_ ## postfix2;\
c->avg_ ## postfix1 = avg_ ## postfix2; c->avg_ ## postfix1 = avg_ ## postfix2;
/* external functions, from idct_mmx.c */
void ff_mmx_idct(DCTELEM *block);
void ff_mmxext_idct(DCTELEM *block);
/* XXX: those functions should be suppressed ASAP when all IDCTs are
converted */
static void ff_libmpeg2mmx_idct_put(uint8_t *dest, int line_size, DCTELEM *block)
{
ff_mmx_idct (block);
put_pixels_clamped_mmx(block, dest, line_size);
}
static void ff_libmpeg2mmx_idct_add(uint8_t *dest, int line_size, DCTELEM *block)
{
ff_mmx_idct (block);
add_pixels_clamped_mmx(block, dest, line_size);
}
static void ff_libmpeg2mmx2_idct_put(uint8_t *dest, int line_size, DCTELEM *block)
{
ff_mmxext_idct (block);
put_pixels_clamped_mmx(block, dest, line_size);
}
static void ff_libmpeg2mmx2_idct_add(uint8_t *dest, int line_size, DCTELEM *block)
{
ff_mmxext_idct (block);
add_pixels_clamped_mmx(block, dest, line_size);
}
void dsputil_init_mmx(DSPContext* c, unsigned mask) void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
{ {
mm_flags = mm_support(); mm_flags = mm_support();
#if 0 #if 0
...@@ -1428,6 +1456,27 @@ void dsputil_init_mmx(DSPContext* c, unsigned mask) ...@@ -1428,6 +1456,27 @@ void dsputil_init_mmx(DSPContext* c, unsigned mask)
#endif #endif
if (mm_flags & MM_MMX) { if (mm_flags & MM_MMX) {
const int dct_algo = avctx->dct_algo;
const int idct_algo= avctx->idct_algo;
if(dct_algo==FF_DCT_AUTO || dct_algo==FF_DCT_MMX)
c->fdct = ff_fdct_mmx;
if(idct_algo==FF_IDCT_AUTO || idct_algo==FF_IDCT_SIMPLEMMX){
c->idct_put= ff_simple_idct_put_mmx;
c->idct_add= ff_simple_idct_add_mmx;
c->idct_permutation_type= FF_SIMPLE_IDCT_PERM;
}else if(idct_algo==FF_IDCT_LIBMPEG2MMX){
if(mm_flags & MM_MMXEXT){
c->idct_put= ff_libmpeg2mmx2_idct_put;
c->idct_add= ff_libmpeg2mmx2_idct_add;
}else{
c->idct_put= ff_libmpeg2mmx_idct_put;
c->idct_add= ff_libmpeg2mmx_idct_add;
}
c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;
}
c->get_pixels = get_pixels_mmx; c->get_pixels = get_pixels_mmx;
c->diff_pixels = diff_pixels_mmx; c->diff_pixels = diff_pixels_mmx;
c->put_pixels_clamped = put_pixels_clamped_mmx; c->put_pixels_clamped = put_pixels_clamped_mmx;
...@@ -1487,23 +1536,26 @@ void dsputil_init_mmx(DSPContext* c, unsigned mask) ...@@ -1487,23 +1536,26 @@ void dsputil_init_mmx(DSPContext* c, unsigned mask)
if (mm_flags & MM_MMXEXT) { if (mm_flags & MM_MMXEXT) {
c->put_pixels_tab[0][1] = put_pixels16_x2_mmx2; c->put_pixels_tab[0][1] = put_pixels16_x2_mmx2;
c->put_pixels_tab[0][2] = put_pixels16_y2_mmx2; c->put_pixels_tab[0][2] = put_pixels16_y2_mmx2;
c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx2;
c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmx2;
c->avg_pixels_tab[0][0] = avg_pixels16_mmx2; c->avg_pixels_tab[0][0] = avg_pixels16_mmx2;
c->avg_pixels_tab[0][1] = avg_pixels16_x2_mmx2; c->avg_pixels_tab[0][1] = avg_pixels16_x2_mmx2;
c->avg_pixels_tab[0][2] = avg_pixels16_y2_mmx2; c->avg_pixels_tab[0][2] = avg_pixels16_y2_mmx2;
c->avg_pixels_tab[0][3] = avg_pixels16_xy2_mmx2;
c->put_pixels_tab[1][1] = put_pixels8_x2_mmx2; c->put_pixels_tab[1][1] = put_pixels8_x2_mmx2;
c->put_pixels_tab[1][2] = put_pixels8_y2_mmx2; c->put_pixels_tab[1][2] = put_pixels8_y2_mmx2;
c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_mmx2;
c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_mmx2;
c->avg_pixels_tab[1][0] = avg_pixels8_mmx2; c->avg_pixels_tab[1][0] = avg_pixels8_mmx2;
c->avg_pixels_tab[1][1] = avg_pixels8_x2_mmx2; c->avg_pixels_tab[1][1] = avg_pixels8_x2_mmx2;
c->avg_pixels_tab[1][2] = avg_pixels8_y2_mmx2; c->avg_pixels_tab[1][2] = avg_pixels8_y2_mmx2;
c->avg_pixels_tab[1][3] = avg_pixels8_xy2_mmx2;
if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx2;
c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmx2;
c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_mmx2;
c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_mmx2;
c->avg_pixels_tab[0][3] = avg_pixels16_xy2_mmx2;
c->avg_pixels_tab[1][3] = avg_pixels8_xy2_mmx2;
}
#if 1 #if 1
SET_QPEL_FUNC(qpel_pixels_tab[0][ 0], qpel16_mc00_mmx2) SET_QPEL_FUNC(qpel_pixels_tab[0][ 0], qpel16_mc00_mmx2)
...@@ -1542,23 +1594,26 @@ void dsputil_init_mmx(DSPContext* c, unsigned mask) ...@@ -1542,23 +1594,26 @@ void dsputil_init_mmx(DSPContext* c, unsigned mask)
} else if (mm_flags & MM_3DNOW) { } else if (mm_flags & MM_3DNOW) {
c->put_pixels_tab[0][1] = put_pixels16_x2_3dnow; c->put_pixels_tab[0][1] = put_pixels16_x2_3dnow;
c->put_pixels_tab[0][2] = put_pixels16_y2_3dnow; c->put_pixels_tab[0][2] = put_pixels16_y2_3dnow;
c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_3dnow;
c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_3dnow;
c->avg_pixels_tab[0][0] = avg_pixels16_3dnow; c->avg_pixels_tab[0][0] = avg_pixels16_3dnow;
c->avg_pixels_tab[0][1] = avg_pixels16_x2_3dnow; c->avg_pixels_tab[0][1] = avg_pixels16_x2_3dnow;
c->avg_pixels_tab[0][2] = avg_pixels16_y2_3dnow; c->avg_pixels_tab[0][2] = avg_pixels16_y2_3dnow;
c->avg_pixels_tab[0][3] = avg_pixels16_xy2_3dnow;
c->put_pixels_tab[1][1] = put_pixels8_x2_3dnow; c->put_pixels_tab[1][1] = put_pixels8_x2_3dnow;
c->put_pixels_tab[1][2] = put_pixels8_y2_3dnow; c->put_pixels_tab[1][2] = put_pixels8_y2_3dnow;
c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_3dnow;
c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_3dnow;
c->avg_pixels_tab[1][0] = avg_pixels8_3dnow; c->avg_pixels_tab[1][0] = avg_pixels8_3dnow;
c->avg_pixels_tab[1][1] = avg_pixels8_x2_3dnow; c->avg_pixels_tab[1][1] = avg_pixels8_x2_3dnow;
c->avg_pixels_tab[1][2] = avg_pixels8_y2_3dnow; c->avg_pixels_tab[1][2] = avg_pixels8_y2_3dnow;
c->avg_pixels_tab[1][3] = avg_pixels8_xy2_3dnow;
if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_3dnow;
c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_3dnow;
c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_3dnow;
c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_3dnow;
c->avg_pixels_tab[0][3] = avg_pixels16_xy2_3dnow;
c->avg_pixels_tab[1][3] = avg_pixels8_xy2_3dnow;
}
SET_QPEL_FUNC(qpel_pixels_tab[0][ 0], qpel16_mc00_3dnow) SET_QPEL_FUNC(qpel_pixels_tab[0][ 0], qpel16_mc00_3dnow)
SET_QPEL_FUNC(qpel_pixels_tab[0][ 1], qpel16_mc10_3dnow) SET_QPEL_FUNC(qpel_pixels_tab[0][ 1], qpel16_mc10_3dnow)
...@@ -1594,7 +1649,8 @@ void dsputil_init_mmx(DSPContext* c, unsigned mask) ...@@ -1594,7 +1649,8 @@ void dsputil_init_mmx(DSPContext* c, unsigned mask)
SET_QPEL_FUNC(qpel_pixels_tab[1][15], qpel8_mc33_3dnow) SET_QPEL_FUNC(qpel_pixels_tab[1][15], qpel8_mc33_3dnow)
} }
} }
dsputil_init_pix_mmx(c, mask);
dsputil_init_pix_mmx(c, avctx);
#if 0 #if 0
// for speed testing // for speed testing
get_pixels = just_return; get_pixels = just_return;
...@@ -1630,20 +1686,3 @@ void dsputil_init_mmx(DSPContext* c, unsigned mask) ...@@ -1630,20 +1686,3 @@ void dsputil_init_mmx(DSPContext* c, unsigned mask)
//ff_idct = just_return; //ff_idct = just_return;
#endif #endif
} }
/* remove any non bit exact operation (testing purpose). NOTE that
this function should be kept as small as possible because it is
always difficult to test automatically non bit exact cases. */
void dsputil_set_bit_exact_mmx(DSPContext* c, unsigned mask)
{
if (mm_flags & MM_MMX) {
/* MMX2 & 3DNOW */
c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx;
c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmx;
c->avg_pixels_tab[0][3] = avg_pixels16_xy2_mmx;
c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_mmx;
c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_mmx;
c->avg_pixels_tab[1][3] = avg_pixels8_xy2_mmx;
}
dsputil_set_bit_exact_pix_mmx(c, mask);
}
...@@ -386,7 +386,7 @@ static int pix_abs16x16_xy2_ ## suf(uint8_t *blk2, uint8_t *blk1, int stride)\ ...@@ -386,7 +386,7 @@ static int pix_abs16x16_xy2_ ## suf(uint8_t *blk2, uint8_t *blk1, int stride)\
PIX_SAD(mmx) PIX_SAD(mmx)
PIX_SAD(mmx2) PIX_SAD(mmx2)
void dsputil_init_pix_mmx(DSPContext* c, unsigned mask) void dsputil_init_pix_mmx(DSPContext* c, AVCodecContext *avctx)
{ {
if (mm_flags & MM_MMX) { if (mm_flags & MM_MMX) {
c->pix_abs16x16 = pix_abs16x16_mmx; c->pix_abs16x16 = pix_abs16x16_mmx;
...@@ -403,27 +403,18 @@ void dsputil_init_pix_mmx(DSPContext* c, unsigned mask) ...@@ -403,27 +403,18 @@ void dsputil_init_pix_mmx(DSPContext* c, unsigned mask)
} }
if (mm_flags & MM_MMXEXT) { if (mm_flags & MM_MMXEXT) {
c->pix_abs16x16 = pix_abs16x16_mmx2; c->pix_abs16x16 = pix_abs16x16_mmx2;
c->pix_abs16x16_x2 = pix_abs16x16_x2_mmx2;
c->pix_abs16x16_y2 = pix_abs16x16_y2_mmx2;
c->pix_abs16x16_xy2 = pix_abs16x16_xy2_mmx2;
c->pix_abs8x8 = pix_abs8x8_mmx2; c->pix_abs8x8 = pix_abs8x8_mmx2;
c->pix_abs8x8_x2 = pix_abs8x8_x2_mmx2;
c->pix_abs8x8_y2 = pix_abs8x8_y2_mmx2;
c->pix_abs8x8_xy2 = pix_abs8x8_xy2_mmx2;
c->sad[0]= sad16x16_mmx2; c->sad[0]= sad16x16_mmx2;
c->sad[1]= sad8x8_mmx2; c->sad[1]= sad8x8_mmx2;
}
} if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
c->pix_abs16x16_x2 = pix_abs16x16_x2_mmx2;
void dsputil_set_bit_exact_pix_mmx(DSPContext* c, unsigned mask) c->pix_abs16x16_y2 = pix_abs16x16_y2_mmx2;
{ c->pix_abs16x16_xy2 = pix_abs16x16_xy2_mmx2;
if (mm_flags & MM_MMXEXT) { c->pix_abs8x8_x2 = pix_abs8x8_x2_mmx2;
c->pix_abs16x16_x2 = pix_abs16x16_x2_mmx; c->pix_abs8x8_y2 = pix_abs8x8_y2_mmx2;
c->pix_abs16x16_y2 = pix_abs16x16_y2_mmx; c->pix_abs8x8_xy2 = pix_abs8x8_xy2_mmx2;
c->pix_abs16x16_xy2 = pix_abs16x16_xy2_mmx; }
c->pix_abs8x8_x2 = pix_abs8x8_x2_mmx;
c->pix_abs8x8_y2 = pix_abs8x8_y2_mmx;
c->pix_abs8x8_xy2 = pix_abs8x8_xy2_mmx;
} }
} }
...@@ -23,7 +23,6 @@ ...@@ -23,7 +23,6 @@
#include "../dsputil.h" #include "../dsputil.h"
#include "../mpegvideo.h" #include "../mpegvideo.h"
#include "../avcodec.h" #include "../avcodec.h"
#include "../simple_idct.h"
extern uint8_t zigzag_direct_noperm[64]; extern uint8_t zigzag_direct_noperm[64];
extern uint16_t inv_zigzag_direct16[64]; extern uint16_t inv_zigzag_direct16[64];
...@@ -499,38 +498,10 @@ static void draw_edges_mmx(uint8_t *buf, int wrap, int width, int height, int w) ...@@ -499,38 +498,10 @@ static void draw_edges_mmx(uint8_t *buf, int wrap, int width, int height, int w)
#define RENAME(a) a ## _MMX2 #define RENAME(a) a ## _MMX2
#include "mpegvideo_mmx_template.c" #include "mpegvideo_mmx_template.c"
/* external functions, from idct_mmx.c */
void ff_mmx_idct(DCTELEM *block);
void ff_mmxext_idct(DCTELEM *block);
/* XXX: those functions should be suppressed ASAP when all IDCTs are
converted */
static void ff_libmpeg2mmx_idct_put(uint8_t *dest, int line_size, DCTELEM *block)
{
ff_mmx_idct (block);
put_pixels_clamped_mmx(block, dest, line_size);
}
static void ff_libmpeg2mmx_idct_add(uint8_t *dest, int line_size, DCTELEM *block)
{
ff_mmx_idct (block);
add_pixels_clamped_mmx(block, dest, line_size);
}
static void ff_libmpeg2mmx2_idct_put(uint8_t *dest, int line_size, DCTELEM *block)
{
ff_mmxext_idct (block);
put_pixels_clamped_mmx(block, dest, line_size);
}
static void ff_libmpeg2mmx2_idct_add(uint8_t *dest, int line_size, DCTELEM *block)
{
ff_mmxext_idct (block);
add_pixels_clamped_mmx(block, dest, line_size);
}
void MPV_common_init_mmx(MpegEncContext *s) void MPV_common_init_mmx(MpegEncContext *s)
{ {
if (mm_flags & MM_MMX) { if (mm_flags & MM_MMX) {
const int dct_algo = s->avctx->dct_algo; const int dct_algo = s->avctx->dct_algo;
const int idct_algo= s->avctx->idct_algo;
s->dct_unquantize_h263 = dct_unquantize_h263_mmx; s->dct_unquantize_h263 = dct_unquantize_h263_mmx;
s->dct_unquantize_mpeg1 = dct_unquantize_mpeg1_mmx; s->dct_unquantize_mpeg1 = dct_unquantize_mpeg1_mmx;
...@@ -539,28 +510,11 @@ void MPV_common_init_mmx(MpegEncContext *s) ...@@ -539,28 +510,11 @@ void MPV_common_init_mmx(MpegEncContext *s)
draw_edges = draw_edges_mmx; draw_edges = draw_edges_mmx;
if(dct_algo==FF_DCT_AUTO || dct_algo==FF_DCT_MMX){ if(dct_algo==FF_DCT_AUTO || dct_algo==FF_DCT_MMX){
s->fdct = ff_fdct_mmx;
if(mm_flags & MM_MMXEXT){ if(mm_flags & MM_MMXEXT){
s->dct_quantize= dct_quantize_MMX2; s->dct_quantize= dct_quantize_MMX2;
} else { } else {
s->dct_quantize= dct_quantize_MMX; s->dct_quantize= dct_quantize_MMX;
} }
} }
if(idct_algo==FF_IDCT_AUTO || idct_algo==FF_IDCT_SIMPLEMMX){
s->idct_put= ff_simple_idct_put_mmx;
s->idct_add= ff_simple_idct_add_mmx;
s->idct_permutation_type= FF_SIMPLE_IDCT_PERM;
}else if(idct_algo==FF_IDCT_LIBMPEG2MMX){
if(mm_flags & MM_MMXEXT){
s->idct_put= ff_libmpeg2mmx2_idct_put;
s->idct_add= ff_libmpeg2mmx2_idct_add;
}else{
s->idct_put= ff_libmpeg2mmx_idct_put;
s->idct_add= ff_libmpeg2mmx_idct_add;
}
s->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;
}
} }
} }
...@@ -191,7 +191,7 @@ static int RENAME(dct_quantize)(MpegEncContext *s, ...@@ -191,7 +191,7 @@ static int RENAME(dct_quantize)(MpegEncContext *s,
if(s->mb_intra) block[0]= level; if(s->mb_intra) block[0]= level;
else block[0]= temp_block[0]; else block[0]= temp_block[0];
if(s->idct_permutation[1]==8){ if(s->dsp.idct_permutation_type == FF_SIMPLE_IDCT_PERM){
if(last_non_zero_p1 <= 1) goto end; if(last_non_zero_p1 <= 1) goto end;
block[0x08] = temp_block[0x01]; block[0x10] = temp_block[0x08]; block[0x08] = temp_block[0x01]; block[0x10] = temp_block[0x08];
block[0x20] = temp_block[0x10]; block[0x20] = temp_block[0x10];
...@@ -235,7 +235,7 @@ static int RENAME(dct_quantize)(MpegEncContext *s, ...@@ -235,7 +235,7 @@ static int RENAME(dct_quantize)(MpegEncContext *s,
block[0x3E] = temp_block[0x3D]; block[0x27] = temp_block[0x36]; block[0x3E] = temp_block[0x3D]; block[0x27] = temp_block[0x36];
block[0x3D] = temp_block[0x2F]; block[0x2F] = temp_block[0x37]; block[0x3D] = temp_block[0x2F]; block[0x2F] = temp_block[0x37];
block[0x37] = temp_block[0x3E]; block[0x3F] = temp_block[0x3F]; block[0x37] = temp_block[0x3E]; block[0x3F] = temp_block[0x3F];
}else if(s->idct_permutation[1]==4){ }else if(s->dsp.idct_permutation_type == FF_LIBMPEG2_IDCT_PERM){
if(last_non_zero_p1 <= 1) goto end; if(last_non_zero_p1 <= 1) goto end;
block[0x04] = temp_block[0x01]; block[0x04] = temp_block[0x01];
block[0x08] = temp_block[0x08]; block[0x10] = temp_block[0x10]; block[0x08] = temp_block[0x08]; block[0x10] = temp_block[0x10];
......
...@@ -386,7 +386,7 @@ static void jpeg_put_comments(MpegEncContext *s) ...@@ -386,7 +386,7 @@ static void jpeg_put_comments(MpegEncContext *s)
} }
/* comment */ /* comment */
if(!ff_bit_exact){ if(!(s->flags & CODEC_FLAG_BITEXACT)){
put_marker(p, COM); put_marker(p, COM);
flush_put_bits(p); flush_put_bits(p);
ptr = pbBufPtr(p); ptr = pbBufPtr(p);
...@@ -703,7 +703,7 @@ static int mjpeg_decode_init(AVCodecContext *avctx) ...@@ -703,7 +703,7 @@ static int mjpeg_decode_init(AVCodecContext *avctx)
s->avctx = avctx; s->avctx = avctx;
/* ugly way to get the idct & scantable */ /* ugly way to get the idct & scantable FIXME */
memset(&s2, 0, sizeof(MpegEncContext)); memset(&s2, 0, sizeof(MpegEncContext));
s2.flags= avctx->flags; s2.flags= avctx->flags;
s2.avctx= avctx; s2.avctx= avctx;
...@@ -713,7 +713,7 @@ static int mjpeg_decode_init(AVCodecContext *avctx) ...@@ -713,7 +713,7 @@ static int mjpeg_decode_init(AVCodecContext *avctx)
if (MPV_common_init(&s2) < 0) if (MPV_common_init(&s2) < 0)
return -1; return -1;
s->scantable= s2.intra_scantable; s->scantable= s2.intra_scantable;
s->idct_put= s2.idct_put; s->idct_put= s2.dsp.idct_put;
MPV_common_end(&s2); MPV_common_end(&s2);
s->mpeg_enc_ctx_allocated = 0; s->mpeg_enc_ctx_allocated = 0;
......
...@@ -1653,7 +1653,7 @@ static void mpeg_decode_quant_matrix_extension(MpegEncContext *s) ...@@ -1653,7 +1653,7 @@ static void mpeg_decode_quant_matrix_extension(MpegEncContext *s)
if (get_bits1(&s->gb)) { if (get_bits1(&s->gb)) {
for(i=0;i<64;i++) { for(i=0;i<64;i++) {
v = get_bits(&s->gb, 8); v = get_bits(&s->gb, 8);
j= s->idct_permutation[ ff_zigzag_direct[i] ]; j= s->dsp.idct_permutation[ ff_zigzag_direct[i] ];
s->intra_matrix[j] = v; s->intra_matrix[j] = v;
s->chroma_intra_matrix[j] = v; s->chroma_intra_matrix[j] = v;
} }
...@@ -1661,7 +1661,7 @@ static void mpeg_decode_quant_matrix_extension(MpegEncContext *s) ...@@ -1661,7 +1661,7 @@ static void mpeg_decode_quant_matrix_extension(MpegEncContext *s)
if (get_bits1(&s->gb)) { if (get_bits1(&s->gb)) {
for(i=0;i<64;i++) { for(i=0;i<64;i++) {
v = get_bits(&s->gb, 8); v = get_bits(&s->gb, 8);
j= s->idct_permutation[ ff_zigzag_direct[i] ]; j= s->dsp.idct_permutation[ ff_zigzag_direct[i] ];
s->inter_matrix[j] = v; s->inter_matrix[j] = v;
s->chroma_inter_matrix[j] = v; s->chroma_inter_matrix[j] = v;
} }
...@@ -1669,14 +1669,14 @@ static void mpeg_decode_quant_matrix_extension(MpegEncContext *s) ...@@ -1669,14 +1669,14 @@ static void mpeg_decode_quant_matrix_extension(MpegEncContext *s)
if (get_bits1(&s->gb)) { if (get_bits1(&s->gb)) {
for(i=0;i<64;i++) { for(i=0;i<64;i++) {
v = get_bits(&s->gb, 8); v = get_bits(&s->gb, 8);
j= s->idct_permutation[ ff_zigzag_direct[i] ]; j= s->dsp.idct_permutation[ ff_zigzag_direct[i] ];
s->chroma_intra_matrix[j] = v; s->chroma_intra_matrix[j] = v;
} }
} }
if (get_bits1(&s->gb)) { if (get_bits1(&s->gb)) {
for(i=0;i<64;i++) { for(i=0;i<64;i++) {
v = get_bits(&s->gb, 8); v = get_bits(&s->gb, 8);
j= s->idct_permutation[ ff_zigzag_direct[i] ]; j= s->dsp.idct_permutation[ ff_zigzag_direct[i] ];
s->chroma_inter_matrix[j] = v; s->chroma_inter_matrix[j] = v;
} }
} }
...@@ -1985,7 +1985,7 @@ static int mpeg1_decode_sequence(AVCodecContext *avctx, ...@@ -1985,7 +1985,7 @@ static int mpeg1_decode_sequence(AVCodecContext *avctx,
#endif #endif
} else { } else {
for(i=0;i<64;i++) { for(i=0;i<64;i++) {
int j= s->idct_permutation[i]; int j= s->dsp.idct_permutation[i];
v = ff_mpeg1_default_intra_matrix[i]; v = ff_mpeg1_default_intra_matrix[i];
s->intra_matrix[j] = v; s->intra_matrix[j] = v;
s->chroma_intra_matrix[j] = v; s->chroma_intra_matrix[j] = v;
...@@ -2006,7 +2006,7 @@ static int mpeg1_decode_sequence(AVCodecContext *avctx, ...@@ -2006,7 +2006,7 @@ static int mpeg1_decode_sequence(AVCodecContext *avctx,
#endif #endif
} else { } else {
for(i=0;i<64;i++) { for(i=0;i<64;i++) {
int j= s->idct_permutation[i]; int j= s->dsp.idct_permutation[i];
v = ff_mpeg1_default_non_intra_matrix[i]; v = ff_mpeg1_default_non_intra_matrix[i];
s->inter_matrix[j] = v; s->inter_matrix[j] = v;
s->chroma_inter_matrix[j] = v; s->chroma_inter_matrix[j] = v;
......
This diff is collapsed.
...@@ -346,12 +346,6 @@ typedef struct MpegEncContext { ...@@ -346,12 +346,6 @@ typedef struct MpegEncContext {
ScanTable intra_h_scantable; ScanTable intra_h_scantable;
ScanTable intra_v_scantable; ScanTable intra_v_scantable;
ScanTable inter_scantable; // if inter == intra then intra should be used to reduce tha cache usage ScanTable inter_scantable; // if inter == intra then intra should be used to reduce tha cache usage
uint8_t idct_permutation[64];
int idct_permutation_type;
#define FF_NO_IDCT_PERM 1
#define FF_LIBMPEG2_IDCT_PERM 2
#define FF_SIMPLE_IDCT_PERM 3
#define FF_TRANSPOSE_IDCT_PERM 4
void *opaque; /* private data for the user */ void *opaque; /* private data for the user */
...@@ -562,10 +556,6 @@ typedef struct MpegEncContext { ...@@ -562,10 +556,6 @@ typedef struct MpegEncContext {
DCTELEM *block/*align 16*/, int n, int qscale); DCTELEM *block/*align 16*/, int n, int qscale);
int (*dct_quantize)(struct MpegEncContext *s, DCTELEM *block/*align 16*/, int n, int qscale, int *overflow); int (*dct_quantize)(struct MpegEncContext *s, DCTELEM *block/*align 16*/, int n, int qscale, int *overflow);
int (*fast_dct_quantize)(struct MpegEncContext *s, DCTELEM *block/*align 16*/, int n, int qscale, int *overflow); int (*fast_dct_quantize)(struct MpegEncContext *s, DCTELEM *block/*align 16*/, int n, int qscale, int *overflow);
void (*fdct)(DCTELEM *block/* align 16*/);
void (*idct_put)(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/);
void (*idct_add)(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/);
//FIXME move above funcs into dspContext perhaps
} MpegEncContext; } MpegEncContext;
...@@ -610,8 +600,6 @@ int ff_combine_frame( MpegEncContext *s, int next, uint8_t **buf, int *buf_size) ...@@ -610,8 +600,6 @@ int ff_combine_frame( MpegEncContext *s, int next, uint8_t **buf, int *buf_size)
extern enum PixelFormat ff_yuv420p_list[2]; extern enum PixelFormat ff_yuv420p_list[2];
extern int ff_bit_exact;
static inline void ff_init_block_index(MpegEncContext *s){ static inline void ff_init_block_index(MpegEncContext *s){
s->block_index[0]= s->block_wrap[0]*(s->mb_y*2 + 1) - 1 + s->mb_x*2; s->block_index[0]= s->block_wrap[0]*(s->mb_y*2 + 1) - 1 + s->mb_x*2;
s->block_index[1]= s->block_wrap[0]*(s->mb_y*2 + 1) + s->mb_x*2; s->block_index[1]= s->block_wrap[0]*(s->mb_y*2 + 1) + s->mb_x*2;
......
...@@ -25,6 +25,9 @@ ...@@ -25,6 +25,9 @@
#include "dsputil_altivec.h" #include "dsputil_altivec.h"
#endif #endif
extern void idct_put_altivec(uint8_t *dest, int line_size, int16_t *block);
extern void idct_add_altivec(uint8_t *dest, int line_size, int16_t *block);
int mm_flags = 0; int mm_flags = 0;
int mm_support(void) int mm_support(void)
...@@ -169,7 +172,7 @@ long check_dcbz_effect(void) ...@@ -169,7 +172,7 @@ long check_dcbz_effect(void)
return count; return count;
} }
void dsputil_init_ppc(DSPContext* c, unsigned mask) void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx)
{ {
// Common optimisations whether Altivec or not // Common optimisations whether Altivec or not
...@@ -215,6 +218,18 @@ void dsputil_init_ppc(DSPContext* c, unsigned mask) ...@@ -215,6 +218,18 @@ void dsputil_init_ppc(DSPContext* c, unsigned mask)
c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_altivec; c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_altivec;
c->gmc1 = gmc1_altivec; c->gmc1 = gmc1_altivec;
if ((avctx->idct_algo == FF_IDCT_AUTO) ||
(avctx->idct_algo == FF_IDCT_ALTIVEC))
{
c->idct_put = idct_put_altivec;
c->idct_add = idct_add_altivec;
#ifndef ALTIVEC_USE_REFERENCE_C_CODE
c->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM;
#else /* ALTIVEC_USE_REFERENCE_C_CODE */
c->idct_permutation_type = FF_NO_IDCT_PERM;
#endif /* ALTIVEC_USE_REFERENCE_C_CODE */
}
#ifdef POWERPC_TBL_PERFORMANCE_REPORT #ifdef POWERPC_TBL_PERFORMANCE_REPORT
{ {
......
...@@ -468,7 +468,7 @@ int dct_quantize_altivec(MpegEncContext* s, ...@@ -468,7 +468,7 @@ int dct_quantize_altivec(MpegEncContext* s,
// and handle it using the vector unit if we can. This is the permute used // and handle it using the vector unit if we can. This is the permute used
// by the altivec idct, so it is common when using the altivec dct. // by the altivec idct, so it is common when using the altivec dct.
if ((lastNonZero > 0) && (s->idct_permutation_type == FF_TRANSPOSE_IDCT_PERM)) if ((lastNonZero > 0) && (s->dsp.idct_permutation_type == FF_TRANSPOSE_IDCT_PERM))
{ {
TRANSPOSE8(data0, data1, data2, data3, data4, data5, data6, data7); TRANSPOSE8(data0, data1, data2, data3, data4, data5, data6, data7);
} }
...@@ -501,10 +501,10 @@ int dct_quantize_altivec(MpegEncContext* s, ...@@ -501,10 +501,10 @@ int dct_quantize_altivec(MpegEncContext* s,
// We handled the tranpose permutation above and we don't // We handled the tranpose permutation above and we don't
// need to permute the "no" permutation case. // need to permute the "no" permutation case.
if ((lastNonZero > 0) && if ((lastNonZero > 0) &&
(s->idct_permutation_type != FF_TRANSPOSE_IDCT_PERM) && (s->dsp.idct_permutation_type != FF_TRANSPOSE_IDCT_PERM) &&
(s->idct_permutation_type != FF_NO_IDCT_PERM)) (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM))
{ {
ff_block_permute(data, s->idct_permutation, ff_block_permute(data, s->idsp.dct_permutation,
s->intra_scantable.scantable, lastNonZero); s->intra_scantable.scantable, lastNonZero);
} }
......
...@@ -22,6 +22,9 @@ ...@@ -22,6 +22,9 @@
#include "../dsputil.h" #include "../dsputil.h"
#include "mmi.h" #include "mmi.h"
void ff_mmi_idct_put(uint8_t *dest, int line_size, DCTELEM *block);
void ff_mmi_idct_add(uint8_t *dest, int line_size, DCTELEM *block);
static void clear_blocks_mmi(DCTELEM * blocks) static void clear_blocks_mmi(DCTELEM * blocks)
{ {
...@@ -103,8 +106,10 @@ static void put_pixels16_mmi(uint8_t *block, const uint8_t *pixels, int line_siz ...@@ -103,8 +106,10 @@ static void put_pixels16_mmi(uint8_t *block, const uint8_t *pixels, int line_siz
} }
void dsputil_init_mmi(DSPContext* c, unsigned mask) void dsputil_init_mmi(DSPContext* c, AVCodecContext *avctx)
{ {
const int idct_algo= avctx->idct_algo;
c->clear_blocks = clear_blocks_mmi; c->clear_blocks = clear_blocks_mmi;
c->put_pixels_tab[1][0] = put_pixels8_mmi; c->put_pixels_tab[1][0] = put_pixels8_mmi;
...@@ -114,5 +119,11 @@ void dsputil_init_mmi(DSPContext* c, unsigned mask) ...@@ -114,5 +119,11 @@ void dsputil_init_mmi(DSPContext* c, unsigned mask)
c->put_no_rnd_pixels_tab[0][0] = put_pixels16_mmi; c->put_no_rnd_pixels_tab[0][0] = put_pixels16_mmi;
c->get_pixels = get_pixels_mmi; c->get_pixels = get_pixels_mmi;
if(idct_algo==FF_IDCT_AUTO || idct_algo==FF_IDCT_PS2){
c->idct_put= ff_mmi_idct_put;
c->idct_add= ff_mmi_idct_add;
c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;
}
} }
...@@ -22,10 +22,6 @@ ...@@ -22,10 +22,6 @@
#include "../mpegvideo.h" #include "../mpegvideo.h"
#include "../avcodec.h" #include "../avcodec.h"
void ff_mmi_idct_put(uint8_t *dest, int line_size, DCTELEM *block);
void ff_mmi_idct_add(uint8_t *dest, int line_size, DCTELEM *block);
static void dct_unquantize_h263_mmi(MpegEncContext *s, static void dct_unquantize_h263_mmi(MpegEncContext *s,
DCTELEM *block, int n, int qscale) DCTELEM *block, int n, int qscale)
{ {
...@@ -84,14 +80,6 @@ static void dct_unquantize_h263_mmi(MpegEncContext *s, ...@@ -84,14 +80,6 @@ static void dct_unquantize_h263_mmi(MpegEncContext *s,
void MPV_common_init_mmi(MpegEncContext *s) void MPV_common_init_mmi(MpegEncContext *s)
{ {
int i;
const int idct_algo= s->avctx->idct_algo;
if(idct_algo==FF_IDCT_AUTO || idct_algo==FF_IDCT_PS2){
s->idct_put= ff_mmi_idct_put;
s->idct_add= ff_mmi_idct_add;
s->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;
}
s->dct_unquantize_h263 = dct_unquantize_h263_mmi; s->dct_unquantize_h263 = dct_unquantize_h263_mmi;
} }
......
...@@ -328,7 +328,7 @@ static int decode_ext_header(Wmv2Context *w){ ...@@ -328,7 +328,7 @@ static int decode_ext_header(Wmv2Context *w){
code = get_bits(&gb, 3); code = get_bits(&gb, 3);
if(code==0) return -1; if(code==0) return -1;
s->slice_height = s->mb_height / code; s->slice_height = s->mb_height / code;
if(s->avctx->debug&FF_DEBUG_PICT_INFO){ if(s->avctx->debug&FF_DEBUG_PICT_INFO){
...@@ -582,7 +582,7 @@ static void wmv2_add_block(Wmv2Context *w, DCTELEM *block1, uint8_t *dst, int st ...@@ -582,7 +582,7 @@ static void wmv2_add_block(Wmv2Context *w, DCTELEM *block1, uint8_t *dst, int st
switch(w->abt_type_table[n]){ switch(w->abt_type_table[n]){
case 0: case 0:
if (s->block_last_index[n] >= 0) { if (s->block_last_index[n] >= 0) {
s->idct_add (dst, stride, block1); s->dsp.idct_add (dst, stride, block1);
} }
break; break;
case 1: case 1:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment