dsputil.h 35.6 KB
Newer Older
1 2
/*
 * DSP utils
3
 * Copyright (c) 2000, 2001, 2002 Fabrice Bellard
4
 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5
 *
6 7 8
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
9 10
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
11
 * version 2.1 of the License, or (at your option) any later version.
12
 *
13
 * FFmpeg is distributed in the hope that it will be useful,
14 15 16 17 18
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
19
 * License along with FFmpeg; if not, write to the Free Software
20
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21
 */
Michael Niedermayer's avatar
Michael Niedermayer committed
22 23

/**
24
 * @file libavcodec/dsputil.h
Michael Niedermayer's avatar
Michael Niedermayer committed
25
 * DSP utils.
26 27
 * note, many functions in here may use MMX which trashes the FPU state, it is
 * absolutely necessary to call emms_c() between dsp & float/double code
Michael Niedermayer's avatar
Michael Niedermayer committed
28 29
 */

30 31
#ifndef AVCODEC_DSPUTIL_H
#define AVCODEC_DSPUTIL_H
Fabrice Bellard's avatar
Fabrice Bellard committed
32

33
#include "libavutil/intreadwrite.h"
34
#include "avcodec.h"
Fabrice Bellard's avatar
Fabrice Bellard committed
35

Michael Niedermayer's avatar
Michael Niedermayer committed
36

Michael Niedermayer's avatar
Michael Niedermayer committed
37
//#define DEBUG
Fabrice Bellard's avatar
Fabrice Bellard committed
38 39
/* dct code */
typedef short DCTELEM;
40
typedef int DWTELEM;
41
typedef short IDWTELEM;
Fabrice Bellard's avatar
Fabrice Bellard committed
42

43
void fdct_ifast (DCTELEM *data);
44
void fdct_ifast248 (DCTELEM *data);
45
void ff_jpeg_fdct_islow (DCTELEM *data);
46
void ff_fdct248_islow (DCTELEM *data);
Fabrice Bellard's avatar
Fabrice Bellard committed
47 48

void j_rev_dct (DCTELEM *data);
49
void j_rev_dct4 (DCTELEM *data);
50
void j_rev_dct2 (DCTELEM *data);
51
void j_rev_dct1 (DCTELEM *data);
52
void ff_wmv2_idct_c(DCTELEM *data);
Fabrice Bellard's avatar
Fabrice Bellard committed
53

54
void ff_fdct_mmx(DCTELEM *block);
55
void ff_fdct_mmx2(DCTELEM *block);
56
void ff_fdct_sse2(DCTELEM *block);
Fabrice Bellard's avatar
Fabrice Bellard committed
57

58
void ff_h264_idct8_add_c(uint8_t *dst, DCTELEM *block, int stride);
59
void ff_h264_idct_add_c(uint8_t *dst, DCTELEM *block, int stride);
60 61
void ff_h264_idct8_dc_add_c(uint8_t *dst, DCTELEM *block, int stride);
void ff_h264_idct_dc_add_c(uint8_t *dst, DCTELEM *block, int stride);
62 63
void ff_h264_lowres_idct_add_c(uint8_t *dst, int stride, DCTELEM *block);
void ff_h264_lowres_idct_put_c(uint8_t *dst, int stride, DCTELEM *block);
64 65 66 67
void ff_h264_idct_add16_c(uint8_t *dst, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);
void ff_h264_idct_add16intra_c(uint8_t *dst, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);
void ff_h264_idct8_add4_c(uint8_t *dst, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);
void ff_h264_idct_add8_c(uint8_t **dest, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);
68

69 70
void ff_vector_fmul_add_add_c(float *dst, const float *src0, const float *src1,
                              const float *src2, int src3, int blocksize, int step);
71 72
void ff_vector_fmul_window_c(float *dst, const float *src0, const float *src1,
                             const float *win, float add_bias, int len);
Michael Niedermayer's avatar
Michael Niedermayer committed
73
void ff_float_to_int16_c(int16_t *dst, const float *src, long len);
74
void ff_float_to_int16_interleave_c(int16_t *dst, const float **src, long len, int channels);
75

76
/* encoding scans */
77 78 79
extern const uint8_t ff_alternate_horizontal_scan[64];
extern const uint8_t ff_alternate_vertical_scan[64];
extern const uint8_t ff_zigzag_direct[64];
80
extern const uint8_t ff_zigzag248_direct[64];
81

Fabrice Bellard's avatar
Fabrice Bellard committed
82
/* pixel operations */
83
#define MAX_NEG_CROP 1024
Fabrice Bellard's avatar
Fabrice Bellard committed
84 85

/* temporary */
86
extern uint32_t ff_squareTbl[512];
87
extern uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP];
Fabrice Bellard's avatar
Fabrice Bellard committed
88

89
/* VP3 DSP functions */
90 91 92
void ff_vp3_idct_c(DCTELEM *block/* align 16*/);
void ff_vp3_idct_put_c(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/);
void ff_vp3_idct_add_c(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/);
Fabrice Bellard's avatar
Fabrice Bellard committed
93

94 95 96
void ff_vp3_v_loop_filter_c(uint8_t *src, int stride, int *bounding_values);
void ff_vp3_h_loop_filter_c(uint8_t *src, int stride, int *bounding_values);

97 98 99 100
/* VP6 DSP functions */
void ff_vp6_filter_diag4_c(uint8_t *dst, uint8_t *src, int stride,
                           const int16_t *h_weights, const int16_t *v_weights);

101 102 103 104 105
/* 1/2^n downscaling functions from imgconvert.c */
void ff_img_copy_plane(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height);
void ff_shrink22(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height);
void ff_shrink44(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height);
void ff_shrink88(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height);
106 107 108

void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy,
              int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height);
109

Michael Niedermayer's avatar
Michael Niedermayer committed
110
/* minimum alignment rules ;)
Diego Biurrun's avatar
Diego Biurrun committed
111 112 113 114 115 116
If you notice errors in the align stuff, need more alignment for some ASM code
for some CPU or need to use a function with less aligned data then send a mail
to the ffmpeg-devel mailing list, ...

!warning These alignments might not match reality, (missing attribute((align))
stuff somewhere possible).
Diego Biurrun's avatar
Diego Biurrun committed
117
I (Michael) did not check them, these are just the alignments which I think
Diego Biurrun's avatar
Diego Biurrun committed
118
could be reached easily ...
Fabrice Bellard's avatar
Fabrice Bellard committed
119

Michael Niedermayer's avatar
Michael Niedermayer committed
120 121 122
!future video codecs might need functions with less strict alignment
*/

123
/*
124 125 126 127
void get_pixels_c(DCTELEM *block, const uint8_t *pixels, int line_size);
void diff_pixels_c(DCTELEM *block, const uint8_t *s1, const uint8_t *s2, int stride);
void put_pixels_clamped_c(const DCTELEM *block, uint8_t *pixels, int line_size);
void add_pixels_clamped_c(const DCTELEM *block, uint8_t *pixels, int line_size);
128
void clear_blocks_c(DCTELEM *blocks);
129
*/
Fabrice Bellard's avatar
Fabrice Bellard committed
130 131

/* add and put pixel (decoding) */
Michael Niedermayer's avatar
Michael Niedermayer committed
132
// blocksizes for op_pixels_func are 8x4,8x8 16x8 16x16
133
//h for op_pixels_func is limited to {width/2, width} but never larger than 16 and never smaller then 4
134
typedef void (*op_pixels_func)(uint8_t *block/*align width (8 or 16)*/, const uint8_t *pixels/*align 1*/, int line_size, int h);
135
typedef void (*tpel_mc_func)(uint8_t *block/*align width (8 or 16)*/, const uint8_t *pixels/*align 1*/, int line_size, int w, int h);
136
typedef void (*qpel_mc_func)(uint8_t *dst/*align width (8 or 16)*/, uint8_t *src/*align 1*/, int stride);
137
typedef void (*h264_chroma_mc_func)(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int srcStride, int h, int x, int y);
138
typedef void (*h264_weight_func)(uint8_t *block, int stride, int log2_denom, int weight, int offset);
Loren Merritt's avatar
Loren Merritt committed
139
typedef void (*h264_biweight_func)(uint8_t *dst, uint8_t *src, int stride, int log2_denom, int weightd, int weights, int offset);
Michael Niedermayer's avatar
Michael Niedermayer committed
140

Michael Niedermayer's avatar
Michael Niedermayer committed
141
#define DEF_OLD_QPEL(name)\
142 143 144
void ff_put_        ## name (uint8_t *dst/*align width (8 or 16)*/, uint8_t *src/*align 1*/, int stride);\
void ff_put_no_rnd_ ## name (uint8_t *dst/*align width (8 or 16)*/, uint8_t *src/*align 1*/, int stride);\
void ff_avg_        ## name (uint8_t *dst/*align width (8 or 16)*/, uint8_t *src/*align 1*/, int stride);
Michael Niedermayer's avatar
Michael Niedermayer committed
145 146 147 148 149 150 151 152 153 154 155 156 157

DEF_OLD_QPEL(qpel16_mc11_old_c)
DEF_OLD_QPEL(qpel16_mc31_old_c)
DEF_OLD_QPEL(qpel16_mc12_old_c)
DEF_OLD_QPEL(qpel16_mc32_old_c)
DEF_OLD_QPEL(qpel16_mc13_old_c)
DEF_OLD_QPEL(qpel16_mc33_old_c)
DEF_OLD_QPEL(qpel8_mc11_old_c)
DEF_OLD_QPEL(qpel8_mc31_old_c)
DEF_OLD_QPEL(qpel8_mc12_old_c)
DEF_OLD_QPEL(qpel8_mc32_old_c)
DEF_OLD_QPEL(qpel8_mc13_old_c)
DEF_OLD_QPEL(qpel8_mc33_old_c)
Michael Niedermayer's avatar
Michael Niedermayer committed
158 159 160 161 162 163

#define CALL_2X_PIXELS(a, b, n)\
static void a(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
    b(block  , pixels  , line_size, h);\
    b(block+n, pixels+n, line_size, h);\
}
Michael Niedermayer's avatar
Michael Niedermayer committed
164

Fabrice Bellard's avatar
Fabrice Bellard committed
165
/* motion estimation */
166
// h is limited to {width/2, width, 2*width} but never larger than 16 and never smaller then 2
Diego Biurrun's avatar
Diego Biurrun committed
167
// although currently h<4 is not used as functions with width <8 are neither used nor implemented
168
typedef int (*me_cmp_func)(void /*MpegEncContext*/ *s, uint8_t *blk1/*align width (8 or 16)*/, uint8_t *blk2/*align 1*/, int line_size, int h)/* __attribute__ ((const))*/;
Michael Niedermayer's avatar
Michael Niedermayer committed
169

170

171 172 173
// for snow slices
typedef struct slice_buffer_s slice_buffer;

174 175 176 177 178 179 180
/**
 * Scantable.
 */
typedef struct ScanTable{
    const uint8_t *scantable;
    uint8_t permutated[64];
    uint8_t raster_end[64];
181
#if ARCH_PPC
182
                /** Used by dct_quantize_altivec to find last-non-zero */
183
    DECLARE_ALIGNED(16, uint8_t, inverse[64]);
184 185 186 187 188
#endif
} ScanTable;

void ff_init_scantable(uint8_t *, ScanTable *st, const uint8_t *src_scantable);

189 190 191 192
void ff_emulated_edge_mc(uint8_t *buf, uint8_t *src, int linesize,
                         int block_w, int block_h,
                         int src_x, int src_y, int w, int h);

Michael Niedermayer's avatar
Michael Niedermayer committed
193 194 195
/**
 * DSPContext.
 */
196 197
typedef struct DSPContext {
    /* pixel ops : interface with DCT */
198 199 200
    void (*get_pixels)(DCTELEM *block/*align 16*/, const uint8_t *pixels/*align 8*/, int line_size);
    void (*diff_pixels)(DCTELEM *block/*align 16*/, const uint8_t *s1/*align 8*/, const uint8_t *s2/*align 8*/, int stride);
    void (*put_pixels_clamped)(const DCTELEM *block/*align 16*/, uint8_t *pixels/*align 8*/, int line_size);
201
    void (*put_signed_pixels_clamped)(const DCTELEM *block/*align 16*/, uint8_t *pixels/*align 8*/, int line_size);
202
    void (*add_pixels_clamped)(const DCTELEM *block/*align 16*/, uint8_t *pixels/*align 8*/, int line_size);
Loren Merritt's avatar
Loren Merritt committed
203 204
    void (*add_pixels8)(uint8_t *pixels, DCTELEM *block, int line_size);
    void (*add_pixels4)(uint8_t *pixels, DCTELEM *block, int line_size);
205
    int (*sum_abs_dctelem)(DCTELEM *block/*align 16*/);
Michael Niedermayer's avatar
Michael Niedermayer committed
206 207 208
    /**
     * translational global motion compensation.
     */
209
    void (*gmc1)(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int srcStride, int h, int x16, int y16, int rounder);
Michael Niedermayer's avatar
Michael Niedermayer committed
210 211 212
    /**
     * global motion compensation.
     */
213
    void (*gmc )(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int ox, int oy,
214
                    int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height);
Loren Merritt's avatar
Loren Merritt committed
215
    void (*clear_block)(DCTELEM *block/*align 16*/);
216
    void (*clear_blocks)(DCTELEM *blocks/*align 16*/);
217 218
    int (*pix_sum)(uint8_t * pix, int line_size);
    int (*pix_norm1)(uint8_t * pix, int line_size);
219
// 16x16 8x8 4x4 2x2 16x8 8x4 4x2 8x16 4x8 2x4
220

221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241
    me_cmp_func sad[6]; /* identical to pix_absAxA except additional void * */
    me_cmp_func sse[6];
    me_cmp_func hadamard8_diff[6];
    me_cmp_func dct_sad[6];
    me_cmp_func quant_psnr[6];
    me_cmp_func bit[6];
    me_cmp_func rd[6];
    me_cmp_func vsad[6];
    me_cmp_func vsse[6];
    me_cmp_func nsse[6];
    me_cmp_func w53[6];
    me_cmp_func w97[6];
    me_cmp_func dct_max[6];
    me_cmp_func dct264_sad[6];

    me_cmp_func me_pre_cmp[6];
    me_cmp_func me_cmp[6];
    me_cmp_func me_sub_cmp[6];
    me_cmp_func mb_cmp[6];
    me_cmp_func ildct_cmp[6]; //only width 16 used
    me_cmp_func frame_skip_cmp[6]; //only width 8 used
242

243 244
    int (*ssd_int8_vs_int16)(const int8_t *pix1, const int16_t *pix2,
                             int size);
245

Michael Niedermayer's avatar
Michael Niedermayer committed
246 247
    /**
     * Halfpel motion compensation with rounding (a+b+1)>>1.
Luca Barbato's avatar
Luca Barbato committed
248
     * this is an array[4][4] of motion compensation functions for 4
249
     * horizontal blocksizes (8,16) and the 4 halfpel positions<br>
250
     * *pixels_tab[ 0->16xH 1->8xH ][ xhalfpel + 2*yhalfpel ]
Michael Niedermayer's avatar
Michael Niedermayer committed
251 252 253 254 255
     * @param block destination where the result is stored
     * @param pixels source
     * @param line_size number of bytes in a horizontal line of block
     * @param h height
     */
256
    op_pixels_func put_pixels_tab[4][4];
Michael Niedermayer's avatar
Michael Niedermayer committed
257 258 259

    /**
     * Halfpel motion compensation with rounding (a+b+1)>>1.
260
     * This is an array[4][4] of motion compensation functions for 4
261
     * horizontal blocksizes (8,16) and the 4 halfpel positions<br>
262
     * *pixels_tab[ 0->16xH 1->8xH ][ xhalfpel + 2*yhalfpel ]
Michael Niedermayer's avatar
Michael Niedermayer committed
263 264 265 266 267
     * @param block destination into which the result is averaged (a+b+1)>>1
     * @param pixels source
     * @param line_size number of bytes in a horizontal line of block
     * @param h height
     */
268
    op_pixels_func avg_pixels_tab[4][4];
Michael Niedermayer's avatar
Michael Niedermayer committed
269 270 271

    /**
     * Halfpel motion compensation with no rounding (a+b)>>1.
Luca Barbato's avatar
Luca Barbato committed
272
     * this is an array[2][4] of motion compensation functions for 2
Michael Niedermayer's avatar
Michael Niedermayer committed
273
     * horizontal blocksizes (8,16) and the 4 halfpel positions<br>
274
     * *pixels_tab[ 0->16xH 1->8xH ][ xhalfpel + 2*yhalfpel ]
Michael Niedermayer's avatar
Michael Niedermayer committed
275 276 277 278 279
     * @param block destination where the result is stored
     * @param pixels source
     * @param line_size number of bytes in a horizontal line of block
     * @param h height
     */
Michael Niedermayer's avatar
Michael Niedermayer committed
280
    op_pixels_func put_no_rnd_pixels_tab[4][4];
Michael Niedermayer's avatar
Michael Niedermayer committed
281 282 283

    /**
     * Halfpel motion compensation with no rounding (a+b)>>1.
Luca Barbato's avatar
Luca Barbato committed
284
     * this is an array[2][4] of motion compensation functions for 2
Michael Niedermayer's avatar
Michael Niedermayer committed
285
     * horizontal blocksizes (8,16) and the 4 halfpel positions<br>
286
     * *pixels_tab[ 0->16xH 1->8xH ][ xhalfpel + 2*yhalfpel ]
Michael Niedermayer's avatar
Michael Niedermayer committed
287 288 289 290 291
     * @param block destination into which the result is averaged (a+b)>>1
     * @param pixels source
     * @param line_size number of bytes in a horizontal line of block
     * @param h height
     */
Michael Niedermayer's avatar
Michael Niedermayer committed
292
    op_pixels_func avg_no_rnd_pixels_tab[4][4];
293

294
    void (*put_no_rnd_pixels_l2[2])(uint8_t *block/*align width (8 or 16)*/, const uint8_t *a/*align 1*/, const uint8_t *b/*align 1*/, int line_size, int h);
295

296 297
    /**
     * Thirdpel motion compensation with rounding (a+b+1)>>1.
Luca Barbato's avatar
Luca Barbato committed
298 299
     * this is an array[12] of motion compensation functions for the 9 thirdpe
     * positions<br>
300 301 302 303 304 305 306
     * *pixels_tab[ xthirdpel + 4*ythirdpel ]
     * @param block destination where the result is stored
     * @param pixels source
     * @param line_size number of bytes in a horizontal line of block
     * @param h height
     */
    tpel_mc_func put_tpel_pixels_tab[11]; //FIXME individual func ptr per width?
307 308
    tpel_mc_func avg_tpel_pixels_tab[11]; //FIXME individual func ptr per width?

309 310 311 312
    qpel_mc_func put_qpel_pixels_tab[2][16];
    qpel_mc_func avg_qpel_pixels_tab[2][16];
    qpel_mc_func put_no_rnd_qpel_pixels_tab[2][16];
    qpel_mc_func avg_no_rnd_qpel_pixels_tab[2][16];
Michael Niedermayer's avatar
Michael Niedermayer committed
313
    qpel_mc_func put_mspel_pixels_tab[8];
314

315
    /**
Luca Barbato's avatar
Luca Barbato committed
316
     * h264 Chroma MC
317 318 319
     */
    h264_chroma_mc_func put_h264_chroma_pixels_tab[3];
    h264_chroma_mc_func avg_h264_chroma_pixels_tab[3];
320 321
    /* This is really one func used in VC-1 decoding */
    h264_chroma_mc_func put_no_rnd_vc1_chroma_pixels_tab[3];
322
    h264_chroma_mc_func avg_no_rnd_vc1_chroma_pixels_tab[3];
323

324 325
    qpel_mc_func put_h264_qpel_pixels_tab[4][16];
    qpel_mc_func avg_h264_qpel_pixels_tab[4][16];
326

327 328 329
    qpel_mc_func put_2tap_qpel_pixels_tab[4][16];
    qpel_mc_func avg_2tap_qpel_pixels_tab[4][16];

330 331
    h264_weight_func weight_h264_pixels_tab[10];
    h264_biweight_func biweight_h264_pixels_tab[10];
332

333 334 335 336 337 338 339 340 341
    /* AVS specific */
    qpel_mc_func put_cavs_qpel_pixels_tab[2][16];
    qpel_mc_func avg_cavs_qpel_pixels_tab[2][16];
    void (*cavs_filter_lv)(uint8_t *pix, int stride, int alpha, int beta, int tc, int bs1, int bs2);
    void (*cavs_filter_lh)(uint8_t *pix, int stride, int alpha, int beta, int tc, int bs1, int bs2);
    void (*cavs_filter_cv)(uint8_t *pix, int stride, int alpha, int beta, int tc, int bs1, int bs2);
    void (*cavs_filter_ch)(uint8_t *pix, int stride, int alpha, int beta, int tc, int bs1, int bs2);
    void (*cavs_idct8_add)(uint8_t *dst, DCTELEM *block, int stride);

342
    me_cmp_func pix_abs[2][4];
343

Michael Niedermayer's avatar
Michael Niedermayer committed
344 345
    /* huffyuv specific */
    void (*add_bytes)(uint8_t *dst/*align 16*/, uint8_t *src/*align 16*/, int w);
346
    void (*add_bytes_l2)(uint8_t *dst/*align 16*/, uint8_t *src1/*align 16*/, uint8_t *src2/*align 16*/, int w);
Michael Niedermayer's avatar
Michael Niedermayer committed
347
    void (*diff_bytes)(uint8_t *dst/*align 16*/, uint8_t *src1/*align 16*/, uint8_t *src2/*align 1*/,int w);
348 349 350 351 352
    /**
     * subtract huffyuv's variant of median prediction
     * note, this might read from src1[-1], src2[-1]
     */
    void (*sub_hfyu_median_prediction)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w, int *left, int *left_top);
353
    void (*add_hfyu_median_prediction)(uint8_t *dst, uint8_t *top, uint8_t *diff, int w, int *left, int *left_top);
354 355
    /* this might write to dst[w] */
    void (*add_png_paeth_prediction)(uint8_t *dst, uint8_t *src, uint8_t *top, int w, int bpp);
356
    void (*bswap_buf)(uint32_t *dst, const uint32_t *src, int w);
357

358 359 360
    void (*h264_v_loop_filter_luma)(uint8_t *pix/*align 16*/, int stride, int alpha, int beta, int8_t *tc0);
    void (*h264_h_loop_filter_luma)(uint8_t *pix/*align 4 */, int stride, int alpha, int beta, int8_t *tc0);
    /* v/h_loop_filter_luma_intra: align 16 */
361 362
    void (*h264_v_loop_filter_luma_intra)(uint8_t *pix, int stride, int alpha, int beta);
    void (*h264_h_loop_filter_luma_intra)(uint8_t *pix, int stride, int alpha, int beta);
363 364 365 366
    void (*h264_v_loop_filter_chroma)(uint8_t *pix/*align 8*/, int stride, int alpha, int beta, int8_t *tc0);
    void (*h264_h_loop_filter_chroma)(uint8_t *pix/*align 4*/, int stride, int alpha, int beta, int8_t *tc0);
    void (*h264_v_loop_filter_chroma_intra)(uint8_t *pix/*align 8*/, int stride, int alpha, int beta);
    void (*h264_h_loop_filter_chroma_intra)(uint8_t *pix/*align 8*/, int stride, int alpha, int beta);
367 368
    // h264_loop_filter_strength: simd only. the C version is inlined in h264.c
    void (*h264_loop_filter_strength)(int16_t bS[2][4][4], uint8_t nnz[40], int8_t ref[2][40], int16_t mv[2][40][2],
369
                                      int bidir, int edges, int step, int mask_mv0, int mask_mv1, int field);
370

Michael Niedermayer's avatar
Michael Niedermayer committed
371 372 373
    void (*h263_v_loop_filter)(uint8_t *src, int stride, int qscale);
    void (*h263_h_loop_filter)(uint8_t *src, int stride, int qscale);

374
    void (*h261_loop_filter)(uint8_t *src, int stride);
375

376 377 378
    void (*x8_v_loop_filter)(uint8_t *src, int stride, int qscale);
    void (*x8_h_loop_filter)(uint8_t *src, int stride, int qscale);

379 380 381
    void (*vp3_v_loop_filter)(uint8_t *src, int stride, int *bounding_values);
    void (*vp3_h_loop_filter)(uint8_t *src, int stride, int *bounding_values);

382 383 384
    void (*vp6_filter_diag4)(uint8_t *dst, uint8_t *src, int stride,
                             const int16_t *h_weights,const int16_t *v_weights);

385
    /* assume len is a multiple of 4, and arrays are 16-byte aligned */
386
    void (*vorbis_inverse_coupling)(float *mag, float *ang, int blocksize);
Loren Merritt's avatar
Loren Merritt committed
387
    void (*ac3_downmix)(float (*samples)[256], float (*matrix)[2], int out_ch, int in_ch, int len);
388 389
    /* no alignment needed */
    void (*flac_compute_autocorr)(const int32_t *data, int len, int lag, double *autoc);
390
    /* assume len is a multiple of 8, and arrays are 16-byte aligned */
Loren Merritt's avatar
Loren Merritt committed
391
    void (*vector_fmul)(float *dst, const float *src, int len);
392 393 394
    void (*vector_fmul_reverse)(float *dst, const float *src0, const float *src1, int len);
    /* assume len is a multiple of 8, and src arrays are 16-byte aligned */
    void (*vector_fmul_add_add)(float *dst, const float *src0, const float *src1, const float *src2, int src3, int len, int step);
395 396
    /* assume len is a multiple of 4, and arrays are 16-byte aligned */
    void (*vector_fmul_window)(float *dst, const float *src0, const float *src1, const float *win, float add_bias, int len);
Loren Merritt's avatar
Loren Merritt committed
397 398
    /* assume len is a multiple of 8, and arrays are 16-byte aligned */
    void (*int32_to_float_fmul_scalar)(float *dst, const int *src, float mul, int len);
399 400

    /* C version: convert floats from the range [384.0,386.0] to ints in [-32768,32767]
401
     * simd versions: convert floats from [-32768.0,32767.0] without rescaling and arrays are 16byte aligned */
Michael Niedermayer's avatar
Michael Niedermayer committed
402
    void (*float_to_int16)(int16_t *dst, const float *src, long len);
403
    void (*float_to_int16_interleave)(int16_t *dst, const float **src, long len, int channels);
404

405 406
    /* (I)DCT */
    void (*fdct)(DCTELEM *block/* align 16*/);
407
    void (*fdct248)(DCTELEM *block/* align 16*/);
408

409 410
    /* IDCT really*/
    void (*idct)(DCTELEM *block/* align 16*/);
411

Michael Niedermayer's avatar
Michael Niedermayer committed
412
    /**
Michael Niedermayer's avatar
Michael Niedermayer committed
413
     * block -> idct -> clip to unsigned 8 bit -> dest.
Michael Niedermayer's avatar
Michael Niedermayer committed
414
     * (-1392, 0, 0, ...) -> idct -> (-174, -174, ...) -> put -> (0, 0, ...)
Panagiotis Issaris's avatar
Panagiotis Issaris committed
415
     * @param line_size size in bytes of a horizontal line of dest
Michael Niedermayer's avatar
Michael Niedermayer committed
416
     */
417
    void (*idct_put)(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/);
418

Michael Niedermayer's avatar
Michael Niedermayer committed
419 420
    /**
     * block -> idct -> add dest -> clip to unsigned 8 bit -> dest.
Panagiotis Issaris's avatar
Panagiotis Issaris committed
421
     * @param line_size size in bytes of a horizontal line of dest
Michael Niedermayer's avatar
Michael Niedermayer committed
422
     */
423
    void (*idct_add)(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/);
424

Michael Niedermayer's avatar
Michael Niedermayer committed
425
    /**
Michael Niedermayer's avatar
Michael Niedermayer committed
426
     * idct input permutation.
427 428 429 430
     * several optimized IDCTs need a permutated input (relative to the normal order of the reference
     * IDCT)
     * this permutation must be performed before the idct_put/add, note, normally this can be merged
     * with the zigzag/alternate scan<br>
Michael Niedermayer's avatar
Michael Niedermayer committed
431 432 433 434 435 436
     * an example to avoid confusion:
     * - (->decode coeffs -> zigzag reorder -> dequant -> reference idct ->...)
     * - (x -> referece dct -> reference idct -> x)
     * - (x -> referece dct -> simple_mmx_perm = idct_permutation -> simple_idct_mmx -> x)
     * - (->decode coeffs -> zigzag reorder -> simple_mmx_perm -> dequant -> simple_idct_mmx ->...)
     */
437 438 439 440 441 442
    uint8_t idct_permutation[64];
    int idct_permutation_type;
#define FF_NO_IDCT_PERM 1
#define FF_LIBMPEG2_IDCT_PERM 2
#define FF_SIMPLE_IDCT_PERM 3
#define FF_TRANSPOSE_IDCT_PERM 4
443
#define FF_PARTTRANS_IDCT_PERM 5
444
#define FF_SSE2_IDCT_PERM 6
445

446 447 448 449
    int (*try_8x8basis)(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale);
    void (*add_8x8basis)(int16_t rem[64], int16_t basis[64], int scale);
#define BASIS_SHIFT 16
#define RECON_SHIFT 6
450

451
    void (*draw_edges)(uint8_t *buf, int wrap, int width, int height, int w);
452
#define EDGE_WIDTH 16
453

454
    /* h264 functions */
455 456 457 458
    /* NOTE!!! if you implement any of h264_idct8_add, h264_idct8_add4 then you must implement all of them
       NOTE!!! if you implement any of h264_idct_add, h264_idct_add16, h264_idct_add16intra, h264_idct_add8 then you must implement all of them
        The reason for above, is that no 2 out of one list may use a different permutation.
    */
459 460 461 462
    void (*h264_idct_add)(uint8_t *dst/*align 4*/, DCTELEM *block/*align 16*/, int stride);
    void (*h264_idct8_add)(uint8_t *dst/*align 8*/, DCTELEM *block/*align 16*/, int stride);
    void (*h264_idct_dc_add)(uint8_t *dst/*align 4*/, DCTELEM *block/*align 16*/, int stride);
    void (*h264_idct8_dc_add)(uint8_t *dst/*align 8*/, DCTELEM *block/*align 16*/, int stride);
463
    void (*h264_dct)(DCTELEM block[4][4]);
464 465 466 467
    void (*h264_idct_add16)(uint8_t *dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[6*8]);
    void (*h264_idct8_add4)(uint8_t *dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[6*8]);
    void (*h264_idct_add8)(uint8_t **dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[6*8]);
    void (*h264_idct_add16intra)(uint8_t *dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[6*8]);
468 469

    /* snow wavelet */
470 471
    void (*vertical_compose97i)(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width);
    void (*horizontal_compose97i)(IDWTELEM *b, int width);
472
    void (*inner_add_yblock)(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h, int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8);
473 474

    void (*prefetch)(void *mem, int stride, int h);
475 476

    void (*shrink[4])(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height);
477

478
    /* mlp/truehd functions */
479 480
    void (*mlp_filter_channel)(int32_t *state, const int32_t *coeff,
                               int firorder, int iirorder,
481 482 483
                               unsigned int filter_shift, int32_t mask, int blocksize,
                               int32_t *sample_buffer);

484 485
    /* vc1 functions */
    void (*vc1_inv_trans_8x8)(DCTELEM *b);
486 487 488
    void (*vc1_inv_trans_8x4)(uint8_t *dest, int line_size, DCTELEM *block);
    void (*vc1_inv_trans_4x8)(uint8_t *dest, int line_size, DCTELEM *block);
    void (*vc1_inv_trans_4x4)(uint8_t *dest, int line_size, DCTELEM *block);
489 490
    void (*vc1_v_overlap)(uint8_t* src, int stride);
    void (*vc1_h_overlap)(uint8_t* src, int stride);
491 492 493 494 495 496
    void (*vc1_v_loop_filter4)(uint8_t *src, int stride, int pq);
    void (*vc1_h_loop_filter4)(uint8_t *src, int stride, int pq);
    void (*vc1_v_loop_filter8)(uint8_t *src, int stride, int pq);
    void (*vc1_h_loop_filter8)(uint8_t *src, int stride, int pq);
    void (*vc1_v_loop_filter16)(uint8_t *src, int stride, int pq);
    void (*vc1_h_loop_filter16)(uint8_t *src, int stride, int pq);
497 498 499 500
    /* put 8x8 block with bicubic interpolation and quarterpel precision
     * last argument is actually round value instead of height
     */
    op_pixels_func put_vc1_mspel_pixels_tab[16];
501
    op_pixels_func avg_vc1_mspel_pixels_tab[16];
502 503

    /* intrax8 functions */
504 505
    void (*x8_spatial_compensation[12])(uint8_t *src , uint8_t *dst, int linesize);
    void (*x8_setup_spatial_compensation)(uint8_t *src, uint8_t *dst, int linesize,
506 507
           int * range, int * sum,  int edges);

508 509 510
    /* ape functions */
    /**
     * Add contents of the second vector to the first one.
511
     * @param len length of vectors, should be multiple of 16
512 513 514 515
     */
    void (*add_int16)(int16_t *v1/*align 16*/, int16_t *v2, int len);
    /**
     * Add contents of the second vector to the first one.
516
     * @param len length of vectors, should be multiple of 16
517 518 519 520
     */
    void (*sub_int16)(int16_t *v1/*align 16*/, int16_t *v2, int len);
    /**
     * Calculate scalar product of two vectors.
521
     * @param len length of vectors, should be multiple of 16
522 523 524
     * @param shift number of bits to discard from product
     */
    int32_t (*scalarproduct_int16)(int16_t *v1, int16_t *v2/*align 16*/, int len, int shift);
Kostya Shishkov's avatar
Kostya Shishkov committed
525

Kostya Shishkov's avatar
Kostya Shishkov committed
526 527 528 529
    /* rv30 functions */
    qpel_mc_func put_rv30_tpel_pixels_tab[4][16];
    qpel_mc_func avg_rv30_tpel_pixels_tab[4][16];

Kostya Shishkov's avatar
Kostya Shishkov committed
530 531 532 533 534
    /* rv40 functions */
    qpel_mc_func put_rv40_qpel_pixels_tab[4][16];
    qpel_mc_func avg_rv40_qpel_pixels_tab[4][16];
    h264_chroma_mc_func put_rv40_chroma_pixels_tab[3];
    h264_chroma_mc_func avg_rv40_chroma_pixels_tab[3];
535 536
} DSPContext;

Måns Rullgård's avatar
Måns Rullgård committed
537
void dsputil_static_init(void);
538
void dsputil_init(DSPContext* p, AVCodecContext *avctx);
Fabrice Bellard's avatar
Fabrice Bellard committed
539

540 541
int ff_check_alignment(void);

542 543 544 545
/**
 * permute block according to permuatation.
 * @param last last non zero element in scantable order
 */
546
void ff_block_permute(DCTELEM *block, uint8_t *permutation, const uint8_t *scantable, int last);
547

548 549
void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type);

550
#define         BYTE_VEC32(c)   ((c)*0x01010101UL)
Michael Niedermayer's avatar
Michael Niedermayer committed
551 552 553 554 555 556 557 558 559 560 561

static inline uint32_t rnd_avg32(uint32_t a, uint32_t b)
{
    return (a | b) - (((a ^ b) & ~BYTE_VEC32(0x01)) >> 1);
}

static inline uint32_t no_rnd_avg32(uint32_t a, uint32_t b)
{
    return (a & b) + (((a ^ b) & ~BYTE_VEC32(0x01)) >> 1);
}

562 563 564 565 566 567 568 569 570 571 572 573
static inline int get_penalty_factor(int lambda, int lambda2, int type){
    switch(type&0xFF){
    default:
    case FF_CMP_SAD:
        return lambda>>FF_LAMBDA_SHIFT;
    case FF_CMP_DCT:
        return (3*lambda)>>(FF_LAMBDA_SHIFT+1);
    case FF_CMP_W53:
        return (4*lambda)>>(FF_LAMBDA_SHIFT);
    case FF_CMP_W97:
        return (2*lambda)>>(FF_LAMBDA_SHIFT);
    case FF_CMP_SATD:
574
    case FF_CMP_DCT264:
575 576 577 578 579 580 581 582 583 584 585
        return (2*lambda)>>FF_LAMBDA_SHIFT;
    case FF_CMP_RD:
    case FF_CMP_PSNR:
    case FF_CMP_SSE:
    case FF_CMP_NSSE:
        return lambda2>>FF_LAMBDA_SHIFT;
    case FF_CMP_BIT:
        return 1;
    }
}

Michael Niedermayer's avatar
Michael Niedermayer committed
586
/**
Michael Niedermayer's avatar
Michael Niedermayer committed
587
 * Empty mmx state.
Michael Niedermayer's avatar
Michael Niedermayer committed
588 589 590
 * this must be called between any dsp function and float/double code.
 * for example sin(); dsp->idct_put(); emms_c(); cos()
 */
591 592
#define emms_c()

593 594 595 596
/* should be defined by architectures supporting
   one or more MultiMedia extension */
int mm_support(void);

597
void dsputil_init_alpha(DSPContext* c, AVCodecContext *avctx);
598
void dsputil_init_arm(DSPContext* c, AVCodecContext *avctx);
599 600 601 602 603 604 605 606
void dsputil_init_bfin(DSPContext* c, AVCodecContext *avctx);
void dsputil_init_mlib(DSPContext* c, AVCodecContext *avctx);
void dsputil_init_mmi(DSPContext* c, AVCodecContext *avctx);
void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx);
void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx);
void dsputil_init_sh4(DSPContext* c, AVCodecContext *avctx);
void dsputil_init_vis(DSPContext* c, AVCodecContext *avctx);

607
#define DECLARE_ALIGNED_16(t, v) DECLARE_ALIGNED(16, t, v)
608

609
#if HAVE_MMX
Fabrice Bellard's avatar
Fabrice Bellard committed
610

611
#undef emms_c
612

Måns Rullgård's avatar
Måns Rullgård committed
613 614
extern int mm_flags;

615 616
void add_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size);
void put_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size);
617
void put_signed_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size);
Fabrice Bellard's avatar
Fabrice Bellard committed
618 619 620

static inline void emms(void)
{
621
    __asm__ volatile ("emms;":::"memory");
622 623
}

Michael Niedermayer's avatar
Michael Niedermayer committed
624

625 626
#define emms_c() \
{\
627
    if (mm_flags & FF_MM_MMX)\
628
        emms();\
Fabrice Bellard's avatar
Fabrice Bellard committed
629 630
}

631
void dsputil_init_pix_mmx(DSPContext* c, AVCodecContext *avctx);
632

633
#elif ARCH_ARM
Fabrice Bellard's avatar
Fabrice Bellard committed
634

Måns Rullgård's avatar
Måns Rullgård committed
635 636
extern int mm_flags;

637
#if HAVE_NEON
638 639 640 641
#   define DECLARE_ALIGNED_8(t, v) DECLARE_ALIGNED(16, t, v)
#   define STRIDE_ALIGN 16
#endif

642
#elif ARCH_PPC
643

Måns Rullgård's avatar
Måns Rullgård committed
644 645
extern int mm_flags;

646
#define DECLARE_ALIGNED_8(t, v) DECLARE_ALIGNED(16, t, v)
647
#define STRIDE_ALIGN 16
648

649
#elif HAVE_MMI
650

651
#define DECLARE_ALIGNED_8(t, v) DECLARE_ALIGNED(16, t, v)
652
#define STRIDE_ALIGN 16
653

654 655 656 657 658
#else

#define mm_flags 0
#define mm_support() 0

659
#endif
Fabrice Bellard's avatar
Fabrice Bellard committed
660

661 662 663
#ifndef DECLARE_ALIGNED_8
#   define DECLARE_ALIGNED_8(t, v) DECLARE_ALIGNED(8, t, v)
#endif
Fabrice Bellard's avatar
Fabrice Bellard committed
664

665 666
#ifndef STRIDE_ALIGN
#   define STRIDE_ALIGN 8
Fabrice Bellard's avatar
Fabrice Bellard committed
667 668
#endif

669
/* PSNR */
670
void get_psnr(uint8_t *orig_image[3], uint8_t *coded_image[3],
671 672
              int orig_linesize[3], int coded_linesize,
              AVCodecContext *avctx);
673 674 675 676 677 678 679

/* FFT computation */

/* NOTE: soon integer code will be added, so you must use the
   FFTSample type */
typedef float FFTSample;

680 681
struct MDCTContext;

682 683 684 685 686 687 688 689 690 691
typedef struct FFTComplex {
    FFTSample re, im;
} FFTComplex;

typedef struct FFTContext {
    int nbits;
    int inverse;
    uint16_t *revtab;
    FFTComplex *exptab;
    FFTComplex *exptab1; /* only used by SSE code */
Loren Merritt's avatar
Loren Merritt committed
692 693
    FFTComplex *tmp_buf;
    void (*fft_permute)(struct FFTContext *s, FFTComplex *z);
694
    void (*fft_calc)(struct FFTContext *s, FFTComplex *z);
Loren Merritt's avatar
Loren Merritt committed
695 696
    void (*imdct_calc)(struct MDCTContext *s, FFTSample *output, const FFTSample *input);
    void (*imdct_half)(struct MDCTContext *s, FFTSample *output, const FFTSample *input);
697 698
} FFTContext;

699 700
extern FFTSample* ff_cos_tabs[13];

701 702 703 704 705
/**
 * Sets up a complex FFT.
 * @param nbits           log2 of the length of the input array
 * @param inverse         if 0 perform the forward transform, if 1 perform the inverse
 */
706
int ff_fft_init(FFTContext *s, int nbits, int inverse);
Loren Merritt's avatar
Loren Merritt committed
707 708
void ff_fft_permute_c(FFTContext *s, FFTComplex *z);
void ff_fft_permute_sse(FFTContext *s, FFTComplex *z);
709 710
void ff_fft_calc_c(FFTContext *s, FFTComplex *z);
void ff_fft_calc_sse(FFTContext *s, FFTComplex *z);
711 712
void ff_fft_calc_3dn(FFTContext *s, FFTComplex *z);
void ff_fft_calc_3dn2(FFTContext *s, FFTComplex *z);
713
void ff_fft_calc_altivec(FFTContext *s, FFTComplex *z);
714

715 716 717
/**
 * Do the permutation needed BEFORE calling ff_fft_calc().
 */
Loren Merritt's avatar
Loren Merritt committed
718 719 720 721
static inline void ff_fft_permute(FFTContext *s, FFTComplex *z)
{
    s->fft_permute(s, z);
}
722 723 724 725
/**
 * Do a complex FFT with the parameters defined in ff_fft_init(). The
 * input data must be permuted before. No 1.0/sqrt(n) normalization is done.
 */
726
static inline void ff_fft_calc(FFTContext *s, FFTComplex *z)
727 728 729
{
    s->fft_calc(s, z);
}
730
void ff_fft_end(FFTContext *s);
731 732 733 734 735 736 737 738 739 740 741 742

/* MDCT computation */

typedef struct MDCTContext {
    int n;  /* size of MDCT (i.e. number of input data * 2) */
    int nbits; /* n = 2^nbits */
    /* pre/post rotation tables */
    FFTSample *tcos;
    FFTSample *tsin;
    FFTContext fft;
} MDCTContext;

743 744 745 746 747 748 749 750 751
static inline void ff_imdct_calc(MDCTContext *s, FFTSample *output, const FFTSample *input)
{
    s->fft.imdct_calc(s, output, input);
}
static inline void ff_imdct_half(MDCTContext *s, FFTSample *output, const FFTSample *input)
{
    s->fft.imdct_half(s, output, input);
}

752 753 754
/**
 * Generate a Kaiser-Bessel Derived Window.
 * @param   window  pointer to half window
755 756
 * @param   alpha   determines window shape
 * @param   n       size of half window
757
 */
758
void ff_kbd_window_init(float *window, float alpha, int n);
759

760 761 762 763 764 765
/**
 * Generate a sine window.
 * @param   window  pointer to half window
 * @param   n       size of half window
 */
void ff_sine_window_init(float *window, int n);
766 767 768 769 770
extern float ff_sine_128 [ 128];
extern float ff_sine_256 [ 256];
extern float ff_sine_512 [ 512];
extern float ff_sine_1024[1024];
extern float ff_sine_2048[2048];
771 772
extern float ff_sine_4096[4096];
extern float *ff_sine_windows[6];
773

Fabrice Bellard's avatar
Fabrice Bellard committed
774
int ff_mdct_init(MDCTContext *s, int nbits, int inverse);
775 776
void ff_imdct_calc_c(MDCTContext *s, FFTSample *output, const FFTSample *input);
void ff_imdct_half_c(MDCTContext *s, FFTSample *output, const FFTSample *input);
Loren Merritt's avatar
Loren Merritt committed
777
void ff_imdct_calc_3dn(MDCTContext *s, FFTSample *output, const FFTSample *input);
Loren Merritt's avatar
Loren Merritt committed
778
void ff_imdct_half_3dn(MDCTContext *s, FFTSample *output, const FFTSample *input);
Loren Merritt's avatar
Loren Merritt committed
779
void ff_imdct_calc_3dn2(MDCTContext *s, FFTSample *output, const FFTSample *input);
Loren Merritt's avatar
Loren Merritt committed
780
void ff_imdct_half_3dn2(MDCTContext *s, FFTSample *output, const FFTSample *input);
Loren Merritt's avatar
Loren Merritt committed
781
void ff_imdct_calc_sse(MDCTContext *s, FFTSample *output, const FFTSample *input);
Loren Merritt's avatar
Loren Merritt committed
782
void ff_imdct_half_sse(MDCTContext *s, FFTSample *output, const FFTSample *input);
Loren Merritt's avatar
Loren Merritt committed
783
void ff_mdct_calc(MDCTContext *s, FFTSample *out, const FFTSample *input);
Fabrice Bellard's avatar
Fabrice Bellard committed
784
void ff_mdct_end(MDCTContext *s);
785

786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814
/* Real Discrete Fourier Transform */

enum RDFTransformType {
    RDFT,
    IRDFT,
    RIDFT,
    IRIDFT,
};

typedef struct {
    int nbits;
    int inverse;
    int sign_convention;

    /* pre/post rotation tables */
    FFTSample *tcos;
    FFTSample *tsin;
    FFTContext fft;
} RDFTContext;

/**
 * Sets up a real FFT.
 * @param nbits           log2 of the length of the input array
 * @param trans           the type of transform
 */
int ff_rdft_init(RDFTContext *s, int nbits, enum RDFTransformType trans);
void ff_rdft_calc(RDFTContext *s, FFTSample *data);
void ff_rdft_end(RDFTContext *s);

815
#define WRAPPER8_16(name8, name16)\
816 817 818 819 820
static int name16(void /*MpegEncContext*/ *s, uint8_t *dst, uint8_t *src, int stride, int h){\
    return name8(s, dst           , src           , stride, h)\
          +name8(s, dst+8         , src+8         , stride, h);\
}

821
#define WRAPPER8_16_SQ(name8, name16)\
822 823 824 825 826 827 828 829 830 831 832
static int name16(void /*MpegEncContext*/ *s, uint8_t *dst, uint8_t *src, int stride, int h){\
    int score=0;\
    score +=name8(s, dst           , src           , stride, 8);\
    score +=name8(s, dst+8         , src+8         , stride, 8);\
    if(h==16){\
        dst += 8*stride;\
        src += 8*stride;\
        score +=name8(s, dst           , src           , stride, 8);\
        score +=name8(s, dst+8         , src+8         , stride, 8);\
    }\
    return score;\
Michael Niedermayer's avatar
Michael Niedermayer committed
833 834
}

835

836
static inline void copy_block2(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
837 838 839 840
{
    int i;
    for(i=0; i<h; i++)
    {
841
        AV_WN16(dst   , AV_RN16(src   ));
842 843 844 845 846
        dst+=dstStride;
        src+=srcStride;
    }
}

847
static inline void copy_block4(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
848 849 850 851
{
    int i;
    for(i=0; i<h; i++)
    {
852
        AV_WN32(dst   , AV_RN32(src   ));
853 854 855 856 857
        dst+=dstStride;
        src+=srcStride;
    }
}

858
static inline void copy_block8(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
859 860 861 862
{
    int i;
    for(i=0; i<h; i++)
    {
863 864
        AV_WN32(dst   , AV_RN32(src   ));
        AV_WN32(dst+4 , AV_RN32(src+4 ));
865 866 867 868 869
        dst+=dstStride;
        src+=srcStride;
    }
}

870
static inline void copy_block9(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
871 872 873 874
{
    int i;
    for(i=0; i<h; i++)
    {
875 876
        AV_WN32(dst   , AV_RN32(src   ));
        AV_WN32(dst+4 , AV_RN32(src+4 ));
877 878 879 880 881 882
        dst[8]= src[8];
        dst+=dstStride;
        src+=srcStride;
    }
}

883
static inline void copy_block16(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
884 885 886 887
{
    int i;
    for(i=0; i<h; i++)
    {
888 889 890 891
        AV_WN32(dst   , AV_RN32(src   ));
        AV_WN32(dst+4 , AV_RN32(src+4 ));
        AV_WN32(dst+8 , AV_RN32(src+8 ));
        AV_WN32(dst+12, AV_RN32(src+12));
892 893 894 895 896
        dst+=dstStride;
        src+=srcStride;
    }
}

897
static inline void copy_block17(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
898 899 900 901
{
    int i;
    for(i=0; i<h; i++)
    {
902 903 904 905
        AV_WN32(dst   , AV_RN32(src   ));
        AV_WN32(dst+4 , AV_RN32(src+4 ));
        AV_WN32(dst+8 , AV_RN32(src+8 ));
        AV_WN32(dst+12, AV_RN32(src+12));
906 907 908 909 910 911
        dst[16]= src[16];
        dst+=dstStride;
        src+=srcStride;
    }
}

912
#endif /* AVCODEC_DSPUTIL_H */