Commit 21af69f7 authored by Fabrice Bellard's avatar Fabrice Bellard

use block[] in structure to have it aligned on 8 bytes for mmx optimizations -...

use block[] in structure to have it aligned on 8 bytes for mmx optimizations - dct_unquantize is always a function pointer - added specialized dct_unquantize_h263


Originally committed as revision 22 to svn://svn.ffmpeg.org/ffmpeg/trunk
parent b7ec19d3
...@@ -24,15 +24,17 @@ ...@@ -24,15 +24,17 @@
#include "dsputil.h" #include "dsputil.h"
#include "mpegvideo.h" #include "mpegvideo.h"
#include "../config.h" static void encode_picture(MpegEncContext *s, int picture_number);
static void rate_control_init(MpegEncContext *s);
#ifdef ARCH_X86 static int rate_estimate_qscale(MpegEncContext *s);
#include "i386/mpegvideo.c" static void dct_unquantize_mpeg1_c(MpegEncContext *s,
#endif DCTELEM *block, int n, int qscale);
#ifndef DCT_UNQUANTIZE static void dct_unquantize_h263_c(MpegEncContext *s,
#define DCT_UNQUANTIZE(a,b,c,d) dct_unquantize(a,b,c,d) DCTELEM *block, int n, int qscale);
#endif static int dct_quantize(MpegEncContext *s, DCTELEM *block, int n, int qscale);
static int dct_quantize_mmx(MpegEncContext *s,
DCTELEM *block, int n,
int qscale);
#define EDGE_WIDTH 16 #define EDGE_WIDTH 16
/* enable all paranoid tests for rounding, overflows, etc... */ /* enable all paranoid tests for rounding, overflows, etc... */
...@@ -59,10 +61,6 @@ static UINT8 h263_chroma_roundtab[16] = { ...@@ -59,10 +61,6 @@ static UINT8 h263_chroma_roundtab[16] = {
0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2,
}; };
static void encode_picture(MpegEncContext *s, int picture_number);
static void rate_control_init(MpegEncContext *s);
static int rate_estimate_qscale(MpegEncContext *s);
/* default motion estimation */ /* default motion estimation */
int motion_estimation_method = ME_LOG; int motion_estimation_method = ME_LOG;
...@@ -98,8 +96,13 @@ int MPV_common_init(MpegEncContext *s) ...@@ -98,8 +96,13 @@ int MPV_common_init(MpegEncContext *s)
int c_size, i; int c_size, i;
UINT8 *pict; UINT8 *pict;
#if defined ( HAVE_MMX ) && defined ( BIN_PORTABILITY ) if (s->out_format == FMT_H263)
MPV_common_init_mmx(); s->dct_unquantize = dct_unquantize_h263_c;
else
s->dct_unquantize = dct_unquantize_mpeg1_c;
#ifdef HAVE_MMX
MPV_common_init_mmx(s);
#endif #endif
s->mb_width = (s->width + 15) / 16; s->mb_width = (s->width + 15) / 16;
s->mb_height = (s->height + 15) / 16; s->mb_height = (s->height + 15) / 16;
...@@ -358,7 +361,6 @@ static void draw_edges(UINT8 *buf, int wrap, int width, int height, int w) ...@@ -358,7 +361,6 @@ static void draw_edges(UINT8 *buf, int wrap, int width, int height, int w)
} }
/* generic function for encode/decode called before a frame is coded/decoded */ /* generic function for encode/decode called before a frame is coded/decoded */
#ifndef ARCH_X86
void MPV_frame_start(MpegEncContext *s) void MPV_frame_start(MpegEncContext *s)
{ {
int i; int i;
...@@ -378,7 +380,7 @@ void MPV_frame_start(MpegEncContext *s) ...@@ -378,7 +380,7 @@ void MPV_frame_start(MpegEncContext *s)
} }
} }
} }
#endif
/* generic function for encode/decode called after a frame has been coded/decoded */ /* generic function for encode/decode called after a frame has been coded/decoded */
void MPV_frame_end(MpegEncContext *s) void MPV_frame_end(MpegEncContext *s)
{ {
...@@ -461,12 +463,6 @@ static inline int clip(int a, int amin, int amax) ...@@ -461,12 +463,6 @@ static inline int clip(int a, int amin, int amax)
return a; return a;
} }
static int dct_quantize(MpegEncContext *s, DCTELEM *block, int n, int qscale);
static int dct_quantize_mmx(MpegEncContext *s,
DCTELEM *block, int n,
int qscale);
static void dct_unquantize(MpegEncContext *s, DCTELEM *block, int n, int qscale);
/* apply one mpeg motion vector to the three components */ /* apply one mpeg motion vector to the three components */
static inline void mpeg_motion(MpegEncContext *s, static inline void mpeg_motion(MpegEncContext *s,
UINT8 *dest_y, UINT8 *dest_cb, UINT8 *dest_cr, UINT8 *dest_y, UINT8 *dest_cb, UINT8 *dest_cr,
...@@ -633,7 +629,7 @@ static inline void put_dct(MpegEncContext *s, ...@@ -633,7 +629,7 @@ static inline void put_dct(MpegEncContext *s,
DCTELEM *block, int i, UINT8 *dest, int line_size) DCTELEM *block, int i, UINT8 *dest, int line_size)
{ {
if (!s->mpeg2) if (!s->mpeg2)
DCT_UNQUANTIZE(s, block, i, s->qscale); s->dct_unquantize(s, block, i, s->qscale);
j_rev_dct (block); j_rev_dct (block);
put_pixels_clamped(block, dest, line_size); put_pixels_clamped(block, dest, line_size);
} }
...@@ -644,7 +640,7 @@ static inline void add_dct(MpegEncContext *s, ...@@ -644,7 +640,7 @@ static inline void add_dct(MpegEncContext *s,
{ {
if (s->block_last_index[i] >= 0) { if (s->block_last_index[i] >= 0) {
if (!s->mpeg2) if (!s->mpeg2)
DCT_UNQUANTIZE(s, block, i, s->qscale); s->dct_unquantize(s, block, i, s->qscale);
j_rev_dct (block); j_rev_dct (block);
add_pixels_clamped(block, dest, line_size); add_pixels_clamped(block, dest, line_size);
} }
...@@ -740,7 +736,7 @@ void MPV_decode_mb(MpegEncContext *s, DCTELEM block[6][64]) ...@@ -740,7 +736,7 @@ void MPV_decode_mb(MpegEncContext *s, DCTELEM block[6][64])
UINT8 *mbskip_ptr; UINT8 *mbskip_ptr;
/* avoid copy if macroblock skipped in last frame too */ /* avoid copy if macroblock skipped in last frame too */
if (!s->encoding) { if (!s->encoding && s->pict_type != B_TYPE) {
mbskip_ptr = &s->mbskip_table[s->mb_y * s->mb_width + s->mb_x]; mbskip_ptr = &s->mbskip_table[s->mb_y * s->mb_width + s->mb_x];
if (s->mb_skiped) { if (s->mb_skiped) {
s->mb_skiped = 0; s->mb_skiped = 0;
...@@ -810,7 +806,6 @@ static void encode_picture(MpegEncContext *s, int picture_number) ...@@ -810,7 +806,6 @@ static void encode_picture(MpegEncContext *s, int picture_number)
{ {
int mb_x, mb_y, wrap; int mb_x, mb_y, wrap;
UINT8 *ptr; UINT8 *ptr;
DCTELEM block[6][64];
int i, motion_x, motion_y; int i, motion_x, motion_y;
s->picture_number = picture_number; s->picture_number = picture_number;
...@@ -879,17 +874,17 @@ static void encode_picture(MpegEncContext *s, int picture_number) ...@@ -879,17 +874,17 @@ static void encode_picture(MpegEncContext *s, int picture_number)
/* get the pixels */ /* get the pixels */
wrap = s->linesize; wrap = s->linesize;
ptr = s->new_picture[0] + (mb_y * 16 * wrap) + mb_x * 16; ptr = s->new_picture[0] + (mb_y * 16 * wrap) + mb_x * 16;
get_pixels(block[0], ptr, wrap); get_pixels(s->block[0], ptr, wrap);
get_pixels(block[1], ptr + 8, wrap); get_pixels(s->block[1], ptr + 8, wrap);
get_pixels(block[2], ptr + 8 * wrap, wrap); get_pixels(s->block[2], ptr + 8 * wrap, wrap);
get_pixels(block[3], ptr + 8 * wrap + 8, wrap); get_pixels(s->block[3], ptr + 8 * wrap + 8, wrap);
wrap = s->linesize >> 1; wrap = s->linesize >> 1;
ptr = s->new_picture[1] + (mb_y * 8 * wrap) + mb_x * 8; ptr = s->new_picture[1] + (mb_y * 8 * wrap) + mb_x * 8;
get_pixels(block[4], ptr, wrap); get_pixels(s->block[4], ptr, wrap);
wrap = s->linesize >> 1; wrap = s->linesize >> 1;
ptr = s->new_picture[2] + (mb_y * 8 * wrap) + mb_x * 8; ptr = s->new_picture[2] + (mb_y * 8 * wrap) + mb_x * 8;
get_pixels(block[5], ptr, wrap); get_pixels(s->block[5], ptr, wrap);
/* subtract previous frame if non intra */ /* subtract previous frame if non intra */
if (!s->mb_intra) { if (!s->mb_intra) {
...@@ -900,10 +895,10 @@ static void encode_picture(MpegEncContext *s, int picture_number) ...@@ -900,10 +895,10 @@ static void encode_picture(MpegEncContext *s, int picture_number)
((mb_y * 16 + (motion_y >> 1)) * s->linesize) + ((mb_y * 16 + (motion_y >> 1)) * s->linesize) +
(mb_x * 16 + (motion_x >> 1)); (mb_x * 16 + (motion_x >> 1));
sub_pixels_2(block[0], ptr, s->linesize, dxy); sub_pixels_2(s->block[0], ptr, s->linesize, dxy);
sub_pixels_2(block[1], ptr + 8, s->linesize, dxy); sub_pixels_2(s->block[1], ptr + 8, s->linesize, dxy);
sub_pixels_2(block[2], ptr + s->linesize * 8, s->linesize, dxy); sub_pixels_2(s->block[2], ptr + s->linesize * 8, s->linesize, dxy);
sub_pixels_2(block[3], ptr + 8 + s->linesize * 8, s->linesize ,dxy); sub_pixels_2(s->block[3], ptr + 8 + s->linesize * 8, s->linesize ,dxy);
if (s->out_format == FMT_H263) { if (s->out_format == FMT_H263) {
/* special rounding for h263 */ /* special rounding for h263 */
...@@ -923,9 +918,9 @@ static void encode_picture(MpegEncContext *s, int picture_number) ...@@ -923,9 +918,9 @@ static void encode_picture(MpegEncContext *s, int picture_number)
} }
offset = ((mb_y * 8 + my) * (s->linesize >> 1)) + (mb_x * 8 + mx); offset = ((mb_y * 8 + my) * (s->linesize >> 1)) + (mb_x * 8 + mx);
ptr = s->last_picture[1] + offset; ptr = s->last_picture[1] + offset;
sub_pixels_2(block[4], ptr, s->linesize >> 1, dxy); sub_pixels_2(s->block[4], ptr, s->linesize >> 1, dxy);
ptr = s->last_picture[2] + offset; ptr = s->last_picture[2] + offset;
sub_pixels_2(block[5], ptr, s->linesize >> 1, dxy); sub_pixels_2(s->block[5], ptr, s->linesize >> 1, dxy);
} }
emms_c(); emms_c();
...@@ -943,25 +938,25 @@ static void encode_picture(MpegEncContext *s, int picture_number) ...@@ -943,25 +938,25 @@ static void encode_picture(MpegEncContext *s, int picture_number)
for(i=0;i<6;i++) { for(i=0;i<6;i++) {
int last_index; int last_index;
if (av_fdct == jpeg_fdct_ifast) if (av_fdct == jpeg_fdct_ifast)
last_index = dct_quantize(s, block[i], i, s->qscale); last_index = dct_quantize(s, s->block[i], i, s->qscale);
else else
last_index = dct_quantize_mmx(s, block[i], i, s->qscale); last_index = dct_quantize_mmx(s, s->block[i], i, s->qscale);
s->block_last_index[i] = last_index; s->block_last_index[i] = last_index;
} }
/* huffman encode */ /* huffman encode */
switch(s->out_format) { switch(s->out_format) {
case FMT_MPEG1: case FMT_MPEG1:
mpeg1_encode_mb(s, block, motion_x, motion_y); mpeg1_encode_mb(s, s->block, motion_x, motion_y);
break; break;
case FMT_H263: case FMT_H263:
if (s->h263_msmpeg4) if (s->h263_msmpeg4)
msmpeg4_encode_mb(s, block, motion_x, motion_y); msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
else else
h263_encode_mb(s, block, motion_x, motion_y); h263_encode_mb(s, s->block, motion_x, motion_y);
break; break;
case FMT_MJPEG: case FMT_MJPEG:
mjpeg_encode_mb(s, block); mjpeg_encode_mb(s, s->block);
break; break;
} }
...@@ -969,7 +964,7 @@ static void encode_picture(MpegEncContext *s, int picture_number) ...@@ -969,7 +964,7 @@ static void encode_picture(MpegEncContext *s, int picture_number)
s->mv[0][0][0] = motion_x; s->mv[0][0][0] = motion_x;
s->mv[0][0][1] = motion_y; s->mv[0][0][1] = motion_y;
MPV_decode_mb(s, block); MPV_decode_mb(s, s->block);
} }
} }
} }
...@@ -1121,9 +1116,8 @@ static int dct_quantize_mmx(MpegEncContext *s, ...@@ -1121,9 +1116,8 @@ static int dct_quantize_mmx(MpegEncContext *s,
return last_non_zero; return last_non_zero;
} }
#ifndef HAVE_DCT_UNQUANTIZE static void dct_unquantize_mpeg1_c(MpegEncContext *s,
static void dct_unquantize(MpegEncContext *s, DCTELEM *block, int n, int qscale)
DCTELEM *block, int n, int qscale)
{ {
int i, level; int i, level;
const UINT16 *quant_matrix; const UINT16 *quant_matrix;
...@@ -1133,10 +1127,6 @@ static void dct_unquantize(MpegEncContext *s, ...@@ -1133,10 +1127,6 @@ static void dct_unquantize(MpegEncContext *s,
block[0] = block[0] * s->y_dc_scale; block[0] = block[0] * s->y_dc_scale;
else else
block[0] = block[0] * s->c_dc_scale; block[0] = block[0] * s->c_dc_scale;
if (s->out_format == FMT_H263) {
i = 1;
goto unquant_even;
}
/* XXX: only mpeg1 */ /* XXX: only mpeg1 */
quant_matrix = s->intra_matrix; quant_matrix = s->intra_matrix;
for(i=1;i<64;i++) { for(i=1;i<64;i++) {
...@@ -1160,7 +1150,6 @@ static void dct_unquantize(MpegEncContext *s, ...@@ -1160,7 +1150,6 @@ static void dct_unquantize(MpegEncContext *s,
} }
} else { } else {
i = 0; i = 0;
unquant_even:
quant_matrix = s->non_intra_matrix; quant_matrix = s->non_intra_matrix;
for(;i<64;i++) { for(;i<64;i++) {
level = block[i]; level = block[i];
...@@ -1185,7 +1174,41 @@ static void dct_unquantize(MpegEncContext *s, ...@@ -1185,7 +1174,41 @@ static void dct_unquantize(MpegEncContext *s,
} }
} }
} }
#endif
static void dct_unquantize_h263_c(MpegEncContext *s,
DCTELEM *block, int n, int qscale)
{
int i, level, qmul, qadd;
if (s->mb_intra) {
if (n < 4)
block[0] = block[0] * s->y_dc_scale;
else
block[0] = block[0] * s->c_dc_scale;
i = 1;
} else {
i = 0;
}
qmul = s->qscale << 1;
qadd = (s->qscale - 1) | 1;
for(;i<64;i++) {
level = block[i];
if (level) {
if (level < 0) {
level = level * qmul - qadd;
} else {
level = level * qmul + qadd;
}
#ifdef PARANOID
if (level < -2048 || level > 2047)
fprintf(stderr, "unquant error %d %d\n", i, level);
#endif
block[i] = level;
}
}
}
/* rate control */ /* rate control */
......
...@@ -173,6 +173,10 @@ typedef struct MpegEncContext { ...@@ -173,6 +173,10 @@ typedef struct MpegEncContext {
int interlaced_dct; int interlaced_dct;
int last_qscale; int last_qscale;
int first_slice; int first_slice;
DCTELEM block[6][64] __align8;
void (*dct_unquantize)(struct MpegEncContext *s,
DCTELEM *block, int n, int qscale);
} MpegEncContext; } MpegEncContext;
extern const UINT8 zigzag_direct[64]; extern const UINT8 zigzag_direct[64];
...@@ -182,6 +186,9 @@ void MPV_common_end(MpegEncContext *s); ...@@ -182,6 +186,9 @@ void MPV_common_end(MpegEncContext *s);
void MPV_decode_mb(MpegEncContext *s, DCTELEM block[6][64]); void MPV_decode_mb(MpegEncContext *s, DCTELEM block[6][64]);
void MPV_frame_start(MpegEncContext *s); void MPV_frame_start(MpegEncContext *s);
void MPV_frame_end(MpegEncContext *s); void MPV_frame_end(MpegEncContext *s);
#ifdef HAVE_MMX
void MPV_common_init_mmx(MpegEncContext *s);
#endif
/* motion_est.c */ /* motion_est.c */
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment