dsputil.h 21.1 KB
Newer Older
1 2 3
/*
 * DSP utils
 * Copyright (c) 2000, 2001, 2002 Fabrice Bellard.
4
 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */
Michael Niedermayer's avatar
Michael Niedermayer committed
20 21 22

/**
 * @file dsputil.h
Michael Niedermayer's avatar
Michael Niedermayer committed
23
 * DSP utils.
24 25
 * note, many functions in here may use MMX which trashes the FPU state, it is
 * absolutely necessary to call emms_c() between dsp & float/double code
Michael Niedermayer's avatar
Michael Niedermayer committed
26 27
 */

Fabrice Bellard's avatar
Fabrice Bellard committed
28 29 30 31
#ifndef DSPUTIL_H
#define DSPUTIL_H

#include "common.h"
32
#include "avcodec.h"
Fabrice Bellard's avatar
Fabrice Bellard committed
33

Michael Niedermayer's avatar
Michael Niedermayer committed
34

Michael Niedermayer's avatar
Michael Niedermayer committed
35
//#define DEBUG
Fabrice Bellard's avatar
Fabrice Bellard committed
36 37 38
/* dct code */
typedef short DCTELEM;

39
void fdct_ifast (DCTELEM *data);
40
void fdct_ifast248 (DCTELEM *data);
41
void ff_jpeg_fdct_islow (DCTELEM *data);
42
void ff_fdct248_islow (DCTELEM *data);
Fabrice Bellard's avatar
Fabrice Bellard committed
43 44

void j_rev_dct (DCTELEM *data);
45
void j_rev_dct4 (DCTELEM *data);
46
void j_rev_dct2 (DCTELEM *data);
47
void j_rev_dct1 (DCTELEM *data);
Fabrice Bellard's avatar
Fabrice Bellard committed
48

49
void ff_fdct_mmx(DCTELEM *block);
50
void ff_fdct_mmx2(DCTELEM *block);
51
void ff_fdct_sse2(DCTELEM *block);
Fabrice Bellard's avatar
Fabrice Bellard committed
52

53 54 55 56
void ff_h264_idct_add_c(uint8_t *dst, DCTELEM *block, int stride);
void ff_h264_lowres_idct_add_c(uint8_t *dst, int stride, DCTELEM *block);
void ff_h264_lowres_idct_put_c(uint8_t *dst, int stride, DCTELEM *block);

57
/* encoding scans */
58 59 60
extern const uint8_t ff_alternate_horizontal_scan[64];
extern const uint8_t ff_alternate_vertical_scan[64];
extern const uint8_t ff_zigzag_direct[64];
61
extern const uint8_t ff_zigzag248_direct[64];
62

Fabrice Bellard's avatar
Fabrice Bellard committed
63
/* pixel operations */
64
#define MAX_NEG_CROP 1024
Fabrice Bellard's avatar
Fabrice Bellard committed
65 66

/* temporary */
67 68
extern uint32_t squareTbl[512];
extern uint8_t cropTbl[256 + 2 * MAX_NEG_CROP];
Fabrice Bellard's avatar
Fabrice Bellard committed
69

70 71
/* VP3 DSP functions */
void vp3_dsp_init_c(void);
72 73
void vp3_idct_c(int16_t *input_data, int16_t *dequant_matrix,
    int coeff_count, DCTELEM *output_data);
74 75

void vp3_dsp_init_mmx(void);
76 77
void vp3_idct_mmx(int16_t *input_data, int16_t *dequant_matrix,
    int coeff_count, DCTELEM *output_data);
78

79
void vp3_dsp_init_sse2(void);
80 81
void vp3_idct_sse2(int16_t *input_data, int16_t *dequant_matrix,
    int coeff_count, DCTELEM *output_data);
Fabrice Bellard's avatar
Fabrice Bellard committed
82

Michael Niedermayer's avatar
Michael Niedermayer committed
83
/* minimum alignment rules ;)
84
if u notice errors in the align stuff, need more alignment for some asm code for some cpu
Michael Niedermayer's avatar
Michael Niedermayer committed
85 86 87 88
or need to use a function with less aligned data then send a mail to the ffmpeg-dev list, ...

!warning these alignments might not match reallity, (missing attribute((align)) stuff somewhere possible)
i (michael) didnt check them, these are just the alignents which i think could be reached easily ...
Fabrice Bellard's avatar
Fabrice Bellard committed
89

Michael Niedermayer's avatar
Michael Niedermayer committed
90 91 92
!future video codecs might need functions with less strict alignment
*/

93
/*
94 95 96 97
void get_pixels_c(DCTELEM *block, const uint8_t *pixels, int line_size);
void diff_pixels_c(DCTELEM *block, const uint8_t *s1, const uint8_t *s2, int stride);
void put_pixels_clamped_c(const DCTELEM *block, uint8_t *pixels, int line_size);
void add_pixels_clamped_c(const DCTELEM *block, uint8_t *pixels, int line_size);
98
void clear_blocks_c(DCTELEM *blocks);
99
*/
Fabrice Bellard's avatar
Fabrice Bellard committed
100 101

/* add and put pixel (decoding) */
Michael Niedermayer's avatar
Michael Niedermayer committed
102
// blocksizes for op_pixels_func are 8x4,8x8 16x8 16x16
103
//h for op_pixels_func is limited to {width/2, width} but never larger than 16 and never smaller then 4
104
typedef void (*op_pixels_func)(uint8_t *block/*align width (8 or 16)*/, const uint8_t *pixels/*align 1*/, int line_size, int h);
105
typedef void (*tpel_mc_func)(uint8_t *block/*align width (8 or 16)*/, const uint8_t *pixels/*align 1*/, int line_size, int w, int h);
106
typedef void (*qpel_mc_func)(uint8_t *dst/*align width (8 or 16)*/, uint8_t *src/*align 1*/, int stride);
107
typedef void (*h264_chroma_mc_func)(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int srcStride, int h, int x, int y);
Michael Niedermayer's avatar
Michael Niedermayer committed
108

Michael Niedermayer's avatar
Michael Niedermayer committed
109
#define DEF_OLD_QPEL(name)\
110 111 112
void ff_put_        ## name (uint8_t *dst/*align width (8 or 16)*/, uint8_t *src/*align 1*/, int stride);\
void ff_put_no_rnd_ ## name (uint8_t *dst/*align width (8 or 16)*/, uint8_t *src/*align 1*/, int stride);\
void ff_avg_        ## name (uint8_t *dst/*align width (8 or 16)*/, uint8_t *src/*align 1*/, int stride);
Michael Niedermayer's avatar
Michael Niedermayer committed
113 114 115 116 117 118 119 120 121 122 123 124 125

DEF_OLD_QPEL(qpel16_mc11_old_c)
DEF_OLD_QPEL(qpel16_mc31_old_c)
DEF_OLD_QPEL(qpel16_mc12_old_c)
DEF_OLD_QPEL(qpel16_mc32_old_c)
DEF_OLD_QPEL(qpel16_mc13_old_c)
DEF_OLD_QPEL(qpel16_mc33_old_c)
DEF_OLD_QPEL(qpel8_mc11_old_c)
DEF_OLD_QPEL(qpel8_mc31_old_c)
DEF_OLD_QPEL(qpel8_mc12_old_c)
DEF_OLD_QPEL(qpel8_mc32_old_c)
DEF_OLD_QPEL(qpel8_mc13_old_c)
DEF_OLD_QPEL(qpel8_mc33_old_c)
Michael Niedermayer's avatar
Michael Niedermayer committed
126 127 128 129 130 131

#define CALL_2X_PIXELS(a, b, n)\
static void a(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
    b(block  , pixels  , line_size, h);\
    b(block+n, pixels+n, line_size, h);\
}
Michael Niedermayer's avatar
Michael Niedermayer committed
132

Fabrice Bellard's avatar
Fabrice Bellard committed
133
/* motion estimation */
134 135
// h is limited to {width/2, width, 2*width} but never larger than 16 and never smaller then 2
// allthough currently h<4 is not used as functions with width <8 are not used and neither implemented
136
typedef int (*me_cmp_func)(void /*MpegEncContext*/ *s, uint8_t *blk1/*align width (8 or 16)*/, uint8_t *blk2/*align 1*/, int line_size, int h)/* __attribute__ ((const))*/;
Michael Niedermayer's avatar
Michael Niedermayer committed
137

138

Michael Niedermayer's avatar
Michael Niedermayer committed
139 140 141
/**
 * DSPContext.
 */
142 143
typedef struct DSPContext {
    /* pixel ops : interface with DCT */
144 145 146
    void (*get_pixels)(DCTELEM *block/*align 16*/, const uint8_t *pixels/*align 8*/, int line_size);
    void (*diff_pixels)(DCTELEM *block/*align 16*/, const uint8_t *s1/*align 8*/, const uint8_t *s2/*align 8*/, int stride);
    void (*put_pixels_clamped)(const DCTELEM *block/*align 16*/, uint8_t *pixels/*align 8*/, int line_size);
147
    void (*put_signed_pixels_clamped)(const DCTELEM *block/*align 16*/, uint8_t *pixels/*align 8*/, int line_size);
148
    void (*add_pixels_clamped)(const DCTELEM *block/*align 16*/, uint8_t *pixels/*align 8*/, int line_size);
Michael Niedermayer's avatar
Michael Niedermayer committed
149 150 151
    /**
     * translational global motion compensation.
     */
152
    void (*gmc1)(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int srcStride, int h, int x16, int y16, int rounder);
Michael Niedermayer's avatar
Michael Niedermayer committed
153 154 155
    /**
     * global motion compensation.
     */
156
    void (*gmc )(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int ox, int oy,
157 158
		    int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height);
    void (*clear_blocks)(DCTELEM *blocks/*align 16*/);
159 160
    int (*pix_sum)(uint8_t * pix, int line_size);
    int (*pix_norm1)(uint8_t * pix, int line_size);
161 162
// 16x16 8x8 4x4 2x2 16x8 8x4 4x2 8x16 4x8 2x4
    
163 164 165 166 167 168 169 170 171
    me_cmp_func sad[5]; /* identical to pix_absAxA except additional void * */
    me_cmp_func sse[5];
    me_cmp_func hadamard8_diff[5];
    me_cmp_func dct_sad[5];
    me_cmp_func quant_psnr[5];
    me_cmp_func bit[5];
    me_cmp_func rd[5];
    me_cmp_func vsad[5];
    me_cmp_func vsse[5];
172
    me_cmp_func nsse[5];
173 174
    me_cmp_func w53[5];
    me_cmp_func w97[5];
Michael Niedermayer's avatar
Michael Niedermayer committed
175

176 177 178 179
    me_cmp_func me_pre_cmp[5];
    me_cmp_func me_cmp[5];
    me_cmp_func me_sub_cmp[5];
    me_cmp_func mb_cmp[5];
180
    me_cmp_func ildct_cmp[5]; //only width 16 used
181

Michael Niedermayer's avatar
Michael Niedermayer committed
182 183
    /**
     * Halfpel motion compensation with rounding (a+b+1)>>1.
184
     * this is an array[4][4] of motion compensation funcions for 4 
185
     * horizontal blocksizes (8,16) and the 4 halfpel positions<br>
186
     * *pixels_tab[ 0->16xH 1->8xH ][ xhalfpel + 2*yhalfpel ]
Michael Niedermayer's avatar
Michael Niedermayer committed
187 188 189 190 191
     * @param block destination where the result is stored
     * @param pixels source
     * @param line_size number of bytes in a horizontal line of block
     * @param h height
     */
192
    op_pixels_func put_pixels_tab[4][4];
Michael Niedermayer's avatar
Michael Niedermayer committed
193 194 195

    /**
     * Halfpel motion compensation with rounding (a+b+1)>>1.
Mike Melanson's avatar
Mike Melanson committed
196
     * This is an array[4][4] of motion compensation functions for 4 
197
     * horizontal blocksizes (8,16) and the 4 halfpel positions<br>
198
     * *pixels_tab[ 0->16xH 1->8xH ][ xhalfpel + 2*yhalfpel ]
Michael Niedermayer's avatar
Michael Niedermayer committed
199 200 201 202 203
     * @param block destination into which the result is averaged (a+b+1)>>1
     * @param pixels source
     * @param line_size number of bytes in a horizontal line of block
     * @param h height
     */
204
    op_pixels_func avg_pixels_tab[4][4];
Michael Niedermayer's avatar
Michael Niedermayer committed
205 206 207

    /**
     * Halfpel motion compensation with no rounding (a+b)>>1.
Michael Niedermayer's avatar
Michael Niedermayer committed
208 209
     * this is an array[2][4] of motion compensation funcions for 2 
     * horizontal blocksizes (8,16) and the 4 halfpel positions<br>
210
     * *pixels_tab[ 0->16xH 1->8xH ][ xhalfpel + 2*yhalfpel ]
Michael Niedermayer's avatar
Michael Niedermayer committed
211 212 213 214 215
     * @param block destination where the result is stored
     * @param pixels source
     * @param line_size number of bytes in a horizontal line of block
     * @param h height
     */
Michael Niedermayer's avatar
Michael Niedermayer committed
216
    op_pixels_func put_no_rnd_pixels_tab[4][4];
Michael Niedermayer's avatar
Michael Niedermayer committed
217 218 219

    /**
     * Halfpel motion compensation with no rounding (a+b)>>1.
Michael Niedermayer's avatar
Michael Niedermayer committed
220 221
     * this is an array[2][4] of motion compensation funcions for 2 
     * horizontal blocksizes (8,16) and the 4 halfpel positions<br>
222
     * *pixels_tab[ 0->16xH 1->8xH ][ xhalfpel + 2*yhalfpel ]
Michael Niedermayer's avatar
Michael Niedermayer committed
223 224 225 226 227
     * @param block destination into which the result is averaged (a+b)>>1
     * @param pixels source
     * @param line_size number of bytes in a horizontal line of block
     * @param h height
     */
Michael Niedermayer's avatar
Michael Niedermayer committed
228
    op_pixels_func avg_no_rnd_pixels_tab[4][4];
229
    
230 231
    void (*put_no_rnd_pixels_l2[2])(uint8_t *block/*align width (8 or 16)*/, const uint8_t *a/*align 1*/, const uint8_t *b/*align 1*/, int line_size, int h);
    
232 233 234 235 236 237 238 239 240 241
    /**
     * Thirdpel motion compensation with rounding (a+b+1)>>1.
     * this is an array[12] of motion compensation funcions for the 9 thirdpel positions<br>
     * *pixels_tab[ xthirdpel + 4*ythirdpel ]
     * @param block destination where the result is stored
     * @param pixels source
     * @param line_size number of bytes in a horizontal line of block
     * @param h height
     */
    tpel_mc_func put_tpel_pixels_tab[11]; //FIXME individual func ptr per width?
242 243
    tpel_mc_func avg_tpel_pixels_tab[11]; //FIXME individual func ptr per width?

244 245 246 247
    qpel_mc_func put_qpel_pixels_tab[2][16];
    qpel_mc_func avg_qpel_pixels_tab[2][16];
    qpel_mc_func put_no_rnd_qpel_pixels_tab[2][16];
    qpel_mc_func avg_no_rnd_qpel_pixels_tab[2][16];
Michael Niedermayer's avatar
Michael Niedermayer committed
248
    qpel_mc_func put_mspel_pixels_tab[8];
249 250 251 252 253 254
    
    /**
     * h264 Chram MC
     */
    h264_chroma_mc_func put_h264_chroma_pixels_tab[3];
    h264_chroma_mc_func avg_h264_chroma_pixels_tab[3];
255

256 257 258
    qpel_mc_func put_h264_qpel_pixels_tab[3][16];
    qpel_mc_func avg_h264_qpel_pixels_tab[3][16];
    
259
    me_cmp_func pix_abs[2][4];
Michael Niedermayer's avatar
Michael Niedermayer committed
260 261 262
    
    /* huffyuv specific */
    void (*add_bytes)(uint8_t *dst/*align 16*/, uint8_t *src/*align 16*/, int w);
Michael Niedermayer's avatar
Michael Niedermayer committed
263
    void (*diff_bytes)(uint8_t *dst/*align 16*/, uint8_t *src1/*align 16*/, uint8_t *src2/*align 1*/,int w);
264 265 266 267 268
    /**
     * subtract huffyuv's variant of median prediction
     * note, this might read from src1[-1], src2[-1]
     */
    void (*sub_hfyu_median_prediction)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w, int *left, int *left_top);
Michael Niedermayer's avatar
Michael Niedermayer committed
269
    void (*bswap_buf)(uint32_t *dst, uint32_t *src, int w);
270
    
Michael Niedermayer's avatar
Michael Niedermayer committed
271 272 273
    void (*h263_v_loop_filter)(uint8_t *src, int stride, int qscale);
    void (*h263_h_loop_filter)(uint8_t *src, int stride, int qscale);

274
    void (*h261_loop_filter)(uint8_t *src, int stride);
275

276 277
    /* (I)DCT */
    void (*fdct)(DCTELEM *block/* align 16*/);
278
    void (*fdct248)(DCTELEM *block/* align 16*/);
279 280 281
    
    /* IDCT really*/
    void (*idct)(DCTELEM *block/* align 16*/);
Michael Niedermayer's avatar
Michael Niedermayer committed
282 283
    
    /**
Michael Niedermayer's avatar
Michael Niedermayer committed
284
     * block -> idct -> clip to unsigned 8 bit -> dest.
Michael Niedermayer's avatar
Michael Niedermayer committed
285
     * (-1392, 0, 0, ...) -> idct -> (-174, -174, ...) -> put -> (0, 0, ...)
Michael Niedermayer's avatar
Michael Niedermayer committed
286
     * @param line_size size in bytes of a horizotal line of dest
Michael Niedermayer's avatar
Michael Niedermayer committed
287
     */
288
    void (*idct_put)(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/);
Michael Niedermayer's avatar
Michael Niedermayer committed
289 290 291
    
    /**
     * block -> idct -> add dest -> clip to unsigned 8 bit -> dest.
Michael Niedermayer's avatar
Michael Niedermayer committed
292
     * @param line_size size in bytes of a horizotal line of dest
Michael Niedermayer's avatar
Michael Niedermayer committed
293
     */
294
    void (*idct_add)(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/);
Michael Niedermayer's avatar
Michael Niedermayer committed
295 296
    
    /**
Michael Niedermayer's avatar
Michael Niedermayer committed
297
     * idct input permutation.
298 299 300 301
     * several optimized IDCTs need a permutated input (relative to the normal order of the reference
     * IDCT)
     * this permutation must be performed before the idct_put/add, note, normally this can be merged
     * with the zigzag/alternate scan<br>
Michael Niedermayer's avatar
Michael Niedermayer committed
302 303 304 305 306 307
     * an example to avoid confusion:
     * - (->decode coeffs -> zigzag reorder -> dequant -> reference idct ->...)
     * - (x -> referece dct -> reference idct -> x)
     * - (x -> referece dct -> simple_mmx_perm = idct_permutation -> simple_idct_mmx -> x)
     * - (->decode coeffs -> zigzag reorder -> simple_mmx_perm -> dequant -> simple_idct_mmx ->...)
     */
308 309 310 311 312 313 314
    uint8_t idct_permutation[64];
    int idct_permutation_type;
#define FF_NO_IDCT_PERM 1
#define FF_LIBMPEG2_IDCT_PERM 2
#define FF_SIMPLE_IDCT_PERM 3
#define FF_TRANSPOSE_IDCT_PERM 4

315 316 317 318 319
    int (*try_8x8basis)(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale);
    void (*add_8x8basis)(int16_t rem[64], int16_t basis[64], int scale);
#define BASIS_SHIFT 16
#define RECON_SHIFT 6

320 321 322 323 324 325 326
    /**
     * This function handles any initialization for the VP3 DSP functions.
     */
    void (*vp3_dsp_init)(void);

    /** 
     * This function is responsible for taking a block of zigzag'd,
327 328
     * quantized DCT coefficients and reconstructing the original block of
     * samples.
329 330 331
     * @param input_data 64 zigzag'd, quantized DCT coefficients
     * @param dequant_matrix 64 zigzag'd quantizer coefficients
     * @param coeff_count index of the last coefficient
332 333
     * @param output_samples space for 64 DCTELEMs where the transformed
     * samples will be stored
334
     */
335 336
    void (*vp3_idct)(int16_t *input_data, int16_t *dequant_matrix,
        int coeff_count, DCTELEM *output_samples);
337 338
 
    void (*h264_idct_add)(uint8_t *dst, DCTELEM *block, int stride);
339 340
} DSPContext;

Fabrice Bellard's avatar
Fabrice Bellard committed
341
void dsputil_static_init(void);
342
void dsputil_init(DSPContext* p, AVCodecContext *avctx);
Fabrice Bellard's avatar
Fabrice Bellard committed
343

344 345 346 347
/**
 * permute block according to permuatation.
 * @param last last non zero element in scantable order
 */
348
void ff_block_permute(DCTELEM *block, uint8_t *permutation, const uint8_t *scantable, int last);
349

350 351
void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type);

Michael Niedermayer's avatar
Michael Niedermayer committed
352 353 354 355 356 357 358 359 360 361 362 363
#define	BYTE_VEC32(c)	((c)*0x01010101UL)

static inline uint32_t rnd_avg32(uint32_t a, uint32_t b)
{
    return (a | b) - (((a ^ b) & ~BYTE_VEC32(0x01)) >> 1);
}

static inline uint32_t no_rnd_avg32(uint32_t a, uint32_t b)
{
    return (a & b) + (((a ^ b) & ~BYTE_VEC32(0x01)) >> 1);
}

364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386
static inline int get_penalty_factor(int lambda, int lambda2, int type){
    switch(type&0xFF){
    default:
    case FF_CMP_SAD:
        return lambda>>FF_LAMBDA_SHIFT;
    case FF_CMP_DCT:
        return (3*lambda)>>(FF_LAMBDA_SHIFT+1);
    case FF_CMP_W53:
        return (4*lambda)>>(FF_LAMBDA_SHIFT);
    case FF_CMP_W97:
        return (2*lambda)>>(FF_LAMBDA_SHIFT);
    case FF_CMP_SATD:
        return (2*lambda)>>FF_LAMBDA_SHIFT;
    case FF_CMP_RD:
    case FF_CMP_PSNR:
    case FF_CMP_SSE:
    case FF_CMP_NSSE:
        return lambda2>>FF_LAMBDA_SHIFT;
    case FF_CMP_BIT:
        return 1;
    }
}

Michael Niedermayer's avatar
Michael Niedermayer committed
387
/**
Michael Niedermayer's avatar
Michael Niedermayer committed
388
 * Empty mmx state.
Michael Niedermayer's avatar
Michael Niedermayer committed
389 390 391
 * this must be called between any dsp function and float/double code.
 * for example sin(); dsp->idct_put(); emms_c(); cos()
 */
392 393
#define emms_c()

394 395 396 397
/* should be defined by architectures supporting
   one or more MultiMedia extension */
int mm_support(void);

398 399
#define __align16 __attribute__ ((aligned (16)))

Fabrice Bellard's avatar
Fabrice Bellard committed
400
#if defined(HAVE_MMX)
Fabrice Bellard's avatar
Fabrice Bellard committed
401

402
#undef emms_c
403

Fabrice Bellard's avatar
Fabrice Bellard committed
404 405 406 407 408 409 410 411
#define MM_MMX    0x0001 /* standard MMX */
#define MM_3DNOW  0x0004 /* AMD 3DNOW */
#define MM_MMXEXT 0x0002 /* SSE integer functions or AMD MMX ext */
#define MM_SSE    0x0008 /* SSE functions */
#define MM_SSE2   0x0010 /* PIV SSE2 functions */

extern int mm_flags;

412 413
void add_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size);
void put_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size);
414
void put_signed_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size);
Fabrice Bellard's avatar
Fabrice Bellard committed
415 416 417

static inline void emms(void)
{
418 419 420
    __asm __volatile ("emms;":::"memory");
}

Michael Niedermayer's avatar
Michael Niedermayer committed
421

422 423 424 425
#define emms_c() \
{\
    if (mm_flags & MM_MMX)\
        emms();\
Fabrice Bellard's avatar
Fabrice Bellard committed
426 427 428
}

#define __align8 __attribute__ ((aligned (8)))
429
#define STRIDE_ALIGN 8
Fabrice Bellard's avatar
Fabrice Bellard committed
430

431 432
void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx);
void dsputil_init_pix_mmx(DSPContext* c, AVCodecContext *avctx);
433

Fabrice Bellard's avatar
Fabrice Bellard committed
434 435 436
#elif defined(ARCH_ARMV4L)

/* This is to use 4 bytes read to the IDCT pointers for some 'zero'
437
   line optimizations */
Fabrice Bellard's avatar
Fabrice Bellard committed
438
#define __align8 __attribute__ ((aligned (4)))
439
#define STRIDE_ALIGN 4
Fabrice Bellard's avatar
Fabrice Bellard committed
440

441
void dsputil_init_armv4l(DSPContext* c, AVCodecContext *avctx);
Fabrice Bellard's avatar
Fabrice Bellard committed
442

Fabrice Bellard's avatar
Fabrice Bellard committed
443 444 445 446
#elif defined(HAVE_MLIB)

/* SPARC/VIS IDCT needs 8-byte aligned DCT blocks */
#define __align8 __attribute__ ((aligned (8)))
447
#define STRIDE_ALIGN 8
Fabrice Bellard's avatar
Fabrice Bellard committed
448

449
void dsputil_init_mlib(DSPContext* c, AVCodecContext *avctx);
Fabrice Bellard's avatar
Fabrice Bellard committed
450

451 452 453 454
#elif defined(ARCH_SPARC)

/* SPARC/VIS IDCT needs 8-byte aligned DCT blocks */
#define __align8 __attribute__ ((aligned (8)))
455
#define STRIDE_ALIGN 8
456 457
void dsputil_init_vis(DSPContext* c, AVCodecContext *avctx);

458 459 460
#elif defined(ARCH_ALPHA)

#define __align8 __attribute__ ((aligned (8)))
461
#define STRIDE_ALIGN 8
462

463
void dsputil_init_alpha(DSPContext* c, AVCodecContext *avctx);
464

465 466
#elif defined(ARCH_POWERPC)

467 468 469 470
#define MM_ALTIVEC    0x0001 /* standard AltiVec */

extern int mm_flags;

471
#if defined(HAVE_ALTIVEC) && !defined(CONFIG_DARWIN)
472
#define pixel altivec_pixel
473
#include <altivec.h>
474
#undef pixel
475 476
#endif

477
#define __align8 __attribute__ ((aligned (16)))
478
#define STRIDE_ALIGN 16
479

480
void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx);
481

482 483 484
#elif defined(HAVE_MMI)

#define __align8 __attribute__ ((aligned (16)))
485
#define STRIDE_ALIGN 16
486

487
void dsputil_init_mmi(DSPContext* c, AVCodecContext *avctx);
488

489 490 491
#elif defined(ARCH_SH4)

#define __align8 __attribute__ ((aligned (8)))
492
#define STRIDE_ALIGN 8
493 494 495

void dsputil_init_sh4(DSPContext* c, AVCodecContext *avctx);

Fabrice Bellard's avatar
Fabrice Bellard committed
496 497
#else

498 499
#define __align8 __attribute__ ((aligned (8)))
#define STRIDE_ALIGN 8
Fabrice Bellard's avatar
Fabrice Bellard committed
500 501 502

#endif

503 504 505 506
#ifdef __GNUC__

struct unaligned_64 { uint64_t l; } __attribute__((packed));
struct unaligned_32 { uint32_t l; } __attribute__((packed));
507
struct unaligned_16 { uint16_t l; } __attribute__((packed));
508

509
#define LD16(a) (((const struct unaligned_16 *) (a))->l)
510 511 512 513 514 515 516
#define LD32(a) (((const struct unaligned_32 *) (a))->l)
#define LD64(a) (((const struct unaligned_64 *) (a))->l)

#define ST32(a, b) (((struct unaligned_32 *) (a))->l) = (b)

#else /* __GNUC__ */

517
#define LD16(a) (*((uint16_t*)(a)))
518 519 520 521 522 523 524
#define LD32(a) (*((uint32_t*)(a)))
#define LD64(a) (*((uint64_t*)(a)))

#define ST32(a, b) *((uint32_t*)(a)) = (b)

#endif /* !__GNUC__ */

525
/* PSNR */
526
void get_psnr(uint8_t *orig_image[3], uint8_t *coded_image[3],
527 528
              int orig_linesize[3], int coded_linesize,
              AVCodecContext *avctx);
529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548

/* FFT computation */

/* NOTE: soon integer code will be added, so you must use the
   FFTSample type */
typedef float FFTSample;

typedef struct FFTComplex {
    FFTSample re, im;
} FFTComplex;

typedef struct FFTContext {
    int nbits;
    int inverse;
    uint16_t *revtab;
    FFTComplex *exptab;
    FFTComplex *exptab1; /* only used by SSE code */
    void (*fft_calc)(struct FFTContext *s, FFTComplex *z);
} FFTContext;

549 550 551 552 553
int ff_fft_init(FFTContext *s, int nbits, int inverse);
void ff_fft_permute(FFTContext *s, FFTComplex *z);
void ff_fft_calc_c(FFTContext *s, FFTComplex *z);
void ff_fft_calc_sse(FFTContext *s, FFTComplex *z);
void ff_fft_calc_altivec(FFTContext *s, FFTComplex *z);
554

555
static inline void ff_fft_calc(FFTContext *s, FFTComplex *z)
556 557 558
{
    s->fft_calc(s, z);
}
559
void ff_fft_end(FFTContext *s);
560 561 562 563 564 565 566 567 568 569 570 571

/* MDCT computation */

typedef struct MDCTContext {
    int n;  /* size of MDCT (i.e. number of input data * 2) */
    int nbits; /* n = 2^nbits */
    /* pre/post rotation tables */
    FFTSample *tcos;
    FFTSample *tsin;
    FFTContext fft;
} MDCTContext;

Fabrice Bellard's avatar
Fabrice Bellard committed
572
int ff_mdct_init(MDCTContext *s, int nbits, int inverse);
573
void ff_imdct_calc(MDCTContext *s, FFTSample *output,
574
                const FFTSample *input, FFTSample *tmp);
575
void ff_mdct_calc(MDCTContext *s, FFTSample *out,
576
               const FFTSample *input, FFTSample *tmp);
Fabrice Bellard's avatar
Fabrice Bellard committed
577
void ff_mdct_end(MDCTContext *s);
578

579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596
#define WARPER8_16(name8, name16)\
static int name16(void /*MpegEncContext*/ *s, uint8_t *dst, uint8_t *src, int stride, int h){\
    return name8(s, dst           , src           , stride, h)\
          +name8(s, dst+8         , src+8         , stride, h);\
}

#define WARPER8_16_SQ(name8, name16)\
static int name16(void /*MpegEncContext*/ *s, uint8_t *dst, uint8_t *src, int stride, int h){\
    int score=0;\
    score +=name8(s, dst           , src           , stride, 8);\
    score +=name8(s, dst+8         , src+8         , stride, 8);\
    if(h==16){\
        dst += 8*stride;\
        src += 8*stride;\
        score +=name8(s, dst           , src           , stride, 8);\
        score +=name8(s, dst+8         , src+8         , stride, 8);\
    }\
    return score;\
Michael Niedermayer's avatar
Michael Niedermayer committed
597 598
}

599
#ifndef HAVE_LRINTF
600 601
/* XXX: add ISOC specific test to avoid specific BSD testing. */
/* better than nothing implementation. */
602
/* btw, rintf() is existing on fbsd too -- alex */
603
static always_inline long int lrintf(float x)
604
{
605
#ifdef CONFIG_WIN32
606 607 608 609 610 611 612 613
#  ifdef ARCH_X86
    int32_t i;
    asm volatile(
        "fistpl %0\n\t"
        : "=m" (i) : "t" (x) : "st"
    );
    return i;
#  else
614
    /* XXX: incorrect, but make it compile */
615 616
    return (int)(x + (x < 0 ? -0.5 : 0.5));
#  endif
617
#else
618
    return (int)(rint(x));
619
#endif
620
}
621 622 623 624 625
#else
#ifndef _ISOC9X_SOURCE
#define _ISOC9X_SOURCE
#endif
#include <math.h>
626 627
#endif

Fabrice Bellard's avatar
Fabrice Bellard committed
628
#endif