Commit 595e7bd9 authored by Stefan Gehrer's avatar Stefan Gehrer

some MMX optimizations for the CAVS decoder

Originally committed as revision 5846 to svn://svn.ffmpeg.org/ffmpeg/trunk
parent 09be55df
......@@ -288,6 +288,9 @@ OBJS += i386/fdct_mmx.o i386/cputest.o \
ifeq ($(CONFIG_GPL),yes)
OBJS += i386/idct_mmx_xvid.o
endif
ifeq ($(CONFIG_CAVS_DECODER),yes)
OBJS += i386/cavsdsp_mmx.o
endif
ifeq ($(TARGET_BUILTIN_VECTOR),yes)
i386/fft_sse.o: CFLAGS+= -msse
depend: CFLAGS+= -msse
......
......@@ -1194,6 +1194,7 @@ typedef struct AVCodecContext {
#define FF_IDCT_VP3 12
#define FF_IDCT_IPP 13
#define FF_IDCT_XVIDMMX 14
#define FF_IDCT_CAVS 15
/**
* slice count.
......
......@@ -78,6 +78,7 @@ typedef struct {
int qp;
int qp_fixed;
int cbp;
ScanTable scantable;
/** intra prediction is done with un-deblocked samples
they are saved here before deblocking the MB */
......@@ -97,6 +98,7 @@ typedef struct {
int scale_den[2]; ///< for scaling neighbouring MVs
int got_keyframe;
DCTELEM *block;
} AVSContext;
/*****************************************************************************
......@@ -649,10 +651,9 @@ static int decode_residual_block(AVSContext *h, GetBitContext *gb,
int dqm = dequant_mul[qp];
int dqs = dequant_shift[qp];
int dqa = 1 << (dqs - 1);
const uint8_t *scantab = ff_zigzag_direct;
DCTELEM block[64];
const uint8_t *scantab = h->scantable.permutated;
DCTELEM *block = h->block;
memset(block,0,64*sizeof(DCTELEM));
for(i=0;i<65;i++) {
level_code = get_ue_code(gb,r->golomb_order);
if(level_code >= ESCAPE_CODE) {
......@@ -1135,8 +1136,10 @@ static int decode_pic(AVSContext *h) {
enum mb_t mb_type;
if (!s->context_initialized) {
s->avctx->idct_algo = FF_IDCT_CAVS;
if (MPV_common_init(s) < 0)
return -1;
ff_init_scantable(s->dsp.idct_permutation,&h->scantable,ff_zigzag_direct);
}
get_bits(&s->gb,16);//bbv_dwlay
if(h->stc == PIC_PB_START_CODE) {
......@@ -1281,6 +1284,7 @@ static void init_top_lines(AVSContext *h) {
/* alloc space for co-located MVs and types */
h->col_mv = av_malloc( h->mb_width*h->mb_height*4*sizeof(vector_t));
h->col_type_base = av_malloc(h->mb_width*h->mb_height);
h->block = av_mallocz(64*sizeof(DCTELEM));
}
static int decode_seq_header(AVSContext *h) {
......@@ -1478,6 +1482,7 @@ static int cavs_decode_end(AVCodecContext * avctx) {
av_free(h->top_border_v);
av_free(h->col_mv);
av_free(h->col_type_base);
av_free(h->block);
return 0;
}
......
......@@ -246,6 +246,7 @@ static void cavs_idct8_add_c(uint8_t *dst, DCTELEM *block, int stride) {
dst[i + 6*stride] = cm[ dst[i + 6*stride] + ((b1 - b5) >> 7)];
dst[i + 7*stride] = cm[ dst[i + 7*stride] + ((b0 - b4) >> 7)];
}
memset(block,0,64*sizeof(DCTELEM));
}
/*****************************************************************************
......
This diff is collapsed.
......@@ -2622,6 +2622,22 @@ PREFETCH(prefetch_3dnow, prefetch)
#include "h264dsp_mmx.c"
/* AVS specific */
void ff_cavsdsp_init_mmx2(DSPContext* c, AVCodecContext *avctx);
void ff_put_cavs_qpel8_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride) {
put_pixels8_mmx(dst, src, stride, 8);
}
void ff_avg_cavs_qpel8_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride) {
avg_pixels8_mmx(dst, src, stride, 8);
}
void ff_put_cavs_qpel16_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride) {
put_pixels16_mmx(dst, src, stride, 16);
}
void ff_avg_cavs_qpel16_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride) {
avg_pixels16_mmx(dst, src, stride, 16);
}
/* external functions, from idct_mmx.c */
void ff_mmx_idct(DCTELEM *block);
void ff_mmxext_idct(DCTELEM *block);
......@@ -2779,6 +2795,8 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
c->idct_permutation_type= FF_PARTTRANS_IDCT_PERM;
}
#endif
}else if(idct_algo==FF_IDCT_CAVS){
c->idct_permutation_type= FF_TRANSPOSE_IDCT_PERM;
#ifdef CONFIG_GPL
}else if(idct_algo==FF_IDCT_XVIDMMX){
if(mm_flags & MM_MMXEXT){
......@@ -3012,6 +3030,10 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
c->biweight_h264_pixels_tab[6]= ff_h264_biweight_4x4_mmx2;
c->biweight_h264_pixels_tab[7]= ff_h264_biweight_4x2_mmx2;
#ifdef CONFIG_CAVS_DECODER
ff_cavsdsp_init_mmx2(c, avctx);
#endif
#ifdef CONFIG_ENCODERS
c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_mmx2;
#endif //CONFIG_ENCODERS
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment