dct-test.c 16.1 KB
Newer Older
1 2
/*
 * (c) 2001 Fabrice Bellard
3
 *     2007 Marc Hoffman <marc.hoffman@analog.com>
4
 *
5 6 7
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
8 9
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
10
 * version 2.1 of the License, or (at your option) any later version.
11
 *
12
 * FFmpeg is distributed in the hope that it will be useful,
13 14 15 16 17
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
18
 * License along with FFmpeg; if not, write to the Free Software
19 20 21
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

Michael Niedermayer's avatar
Michael Niedermayer committed
22 23
/**
 * @file dct-test.c
24
 * DCT test. (c) 2001 Fabrice Bellard.
Michael Niedermayer's avatar
Michael Niedermayer committed
25 26 27
 * Started from sample code by Juan J. Sierralta P.
 */

Fabrice Bellard's avatar
Fabrice Bellard committed
28 29 30 31 32
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <sys/time.h>
#include <unistd.h>
33
#include <math.h>
Fabrice Bellard's avatar
Fabrice Bellard committed
34

35
#include "libavutil/common.h"
Fabrice Bellard's avatar
Fabrice Bellard committed
36

Michael Niedermayer's avatar
Michael Niedermayer committed
37
#include "simple_idct.h"
38
#include "aandcttab.h"
39
#include "faandct.h"
40
#include "faanidct.h"
41
#include "x86/idct_xvid.h"
42

43
#undef printf
44
#undef random
45 46 47

void *fast_memcpy(void *a, const void *b, size_t c){return memcpy(a,b,c);};

48
/* reference fdct/idct */
49 50 51
void fdct(DCTELEM *block);
void idct(DCTELEM *block);
void init_fdct();
Fabrice Bellard's avatar
Fabrice Bellard committed
52

53 54
void ff_mmx_idct(DCTELEM *data);
void ff_mmxext_idct(DCTELEM *data);
55

56
void odivx_idct_c(short *block);
Michael Niedermayer's avatar
Michael Niedermayer committed
57

58
// BFIN
59 60
void ff_bfin_idct(DCTELEM *block);
void ff_bfin_fdct(DCTELEM *block);
61 62

// ALTIVEC
63 64
void fdct_altivec(DCTELEM *block);
//void idct_altivec(DCTELEM *block);?? no routine
65

66 67 68 69 70 71
// ARM
void j_rev_dct_ARM(DCTELEM *data);
void simple_idct_ARM(DCTELEM *data);
void simple_idct_armv5te(DCTELEM *data);
void ff_simple_idct_armv6(DCTELEM *data);
void ff_simple_idct_neon(DCTELEM *data);
72 73

struct algo {
74
  const char *name;
75 76 77
  enum { FDCT, IDCT } is_idct;
  void (* func) (DCTELEM *block);
  void (* ref)  (DCTELEM *block);
78
  enum formattag { NO_PERM,MMX_PERM, MMX_SIMPLE_PERM, SCALE_PERM, SSE2_PERM, PARTTRANS_PERM } format;
79
  int  mm_support;
80 81 82 83 84 85 86 87
};

#ifndef FAAN_POSTSCALE
#define FAAN_SCALE SCALE_PERM
#else
#define FAAN_SCALE NO_PERM
#endif

88 89
static int cpu_flags;

90
struct algo algos[] = {
91 92 93 94 95 96 97 98
  {"REF-DBL",         0, fdct,               fdct, NO_PERM},
  {"FAAN",            0, ff_faandct,         fdct, FAAN_SCALE},
  {"FAANI",           1, ff_faanidct,        idct, NO_PERM},
  {"IJG-AAN-INT",     0, fdct_ifast,         fdct, SCALE_PERM},
  {"IJG-LLM-INT",     0, ff_jpeg_fdct_islow, fdct, NO_PERM},
  {"REF-DBL",         1, idct,               idct, NO_PERM},
  {"INT",             1, j_rev_dct,          idct, MMX_PERM},
  {"SIMPLE-C",        1, ff_simple_idct,     idct, NO_PERM},
99

100
#ifdef HAVE_MMX
101
  {"MMX",             0, ff_fdct_mmx,        fdct, NO_PERM, FF_MM_MMX},
102
#ifdef HAVE_MMX2
103
  {"MMX2",            0, ff_fdct_mmx2,       fdct, NO_PERM, FF_MM_MMXEXT},
Baptiste Coudurier's avatar
Baptiste Coudurier committed
104
  {"SSE2",            0, ff_fdct_sse2,       fdct, NO_PERM, FF_MM_SSE2},
105
#endif
106

107
#ifdef CONFIG_GPL
108 109
  {"LIBMPEG2-MMX",    1, ff_mmx_idct,        idct, MMX_PERM, FF_MM_MMX},
  {"LIBMPEG2-MMXEXT", 1, ff_mmxext_idct,     idct, MMX_PERM, FF_MM_MMXEXT},
110
#endif
111 112 113 114
  {"SIMPLE-MMX",      1, ff_simple_idct_mmx, idct, MMX_SIMPLE_PERM, FF_MM_MMX},
  {"XVID-MMX",        1, ff_idct_xvid_mmx,   idct, NO_PERM, FF_MM_MMX},
  {"XVID-MMX2",       1, ff_idct_xvid_mmx2,  idct, NO_PERM, FF_MM_MMXEXT},
  {"XVID-SSE2",       1, ff_idct_xvid_sse2,  idct, SSE2_PERM, FF_MM_SSE2},
115 116 117
#endif

#ifdef HAVE_ALTIVEC
118
  {"altivecfdct",     0, fdct_altivec,       fdct, NO_PERM, FF_MM_ALTIVEC},
119 120 121
#endif

#ifdef ARCH_BFIN
122 123
  {"BFINfdct",        0, ff_bfin_fdct,       fdct, NO_PERM},
  {"BFINidct",        1, ff_bfin_idct,       idct, NO_PERM},
124 125
#endif

126
#ifdef ARCH_ARM
127 128 129 130 131 132 133 134 135 136 137
  {"SIMPLE-ARM",      1, simple_idct_ARM,    idct, NO_PERM },
  {"INT-ARM",         1, j_rev_dct_ARM,      idct, MMX_PERM },
#ifdef HAVE_ARMV5TE
  {"SIMPLE-ARMV5TE",  1, simple_idct_armv5te, idct, NO_PERM },
#endif
#ifdef HAVE_ARMV6
  {"SIMPLE-ARMV6",    1, ff_simple_idct_armv6, idct, MMX_PERM },
#endif
#ifdef HAVE_NEON
  {"SIMPLE-NEON",     1, ff_simple_idct_neon, idct, PARTTRANS_PERM },
#endif
138
#endif /* ARCH_ARM */
139

140 141 142
  { 0 }
};

Fabrice Bellard's avatar
Fabrice Bellard committed
143 144
#define AANSCALE_BITS 12

Måns Rullgård's avatar
Måns Rullgård committed
145
uint8_t cropTbl[256 + 2 * MAX_NEG_CROP];
Michael Niedermayer's avatar
Michael Niedermayer committed
146

147
int64_t gettime(void)
Fabrice Bellard's avatar
Fabrice Bellard committed
148 149 150
{
    struct timeval tv;
    gettimeofday(&tv,NULL);
151
    return (int64_t)tv.tv_sec * 1000000 + tv.tv_usec;
Fabrice Bellard's avatar
Fabrice Bellard committed
152 153 154 155 156
}

#define NB_ITS 20000
#define NB_ITS_SPEED 50000

157 158
static short idct_mmx_perm[64];

Michael Niedermayer's avatar
Michael Niedermayer committed
159
static short idct_simple_mmx_perm[64]={
160 161 162 163 164 165 166 167
        0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
        0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
        0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
        0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
        0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
        0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
        0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
        0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
Michael Niedermayer's avatar
Michael Niedermayer committed
168 169
};

170 171
static const uint8_t idct_sse2_row_perm[8] = {0, 4, 1, 5, 2, 6, 3, 7};

172 173 174 175 176 177
void idct_mmx_init(void)
{
    int i;

    /* the mmx/mmxext idct uses a reordered input, so we patch scan tables */
    for (i = 0; i < 64; i++) {
178 179
        idct_mmx_perm[i] = (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
//        idct_simple_mmx_perm[i] = simple_block_permute_op(i);
180 181 182
    }
}

183
static DCTELEM block[64] __attribute__ ((aligned (16)));
184
static DCTELEM block1[64] __attribute__ ((aligned (8)));
Michael Niedermayer's avatar
Michael Niedermayer committed
185
static DCTELEM block_org[64] __attribute__ ((aligned (8)));
186

187 188 189
static inline void mmx_emms(void)
{
#ifdef HAVE_MMX
190
    if (cpu_flags & FF_MM_MMX)
191
        __asm__ volatile ("emms\n\t");
192 193 194
#endif
}

195 196
void dct_error(const char *name, int is_idct,
               void (*fdct_func)(DCTELEM *block),
197
               void (*fdct_ref)(DCTELEM *block), int form, int test)
Fabrice Bellard's avatar
Fabrice Bellard committed
198 199 200
{
    int it, i, scale;
    int err_inf, v;
201 202
    int64_t err2, ti, ti1, it1;
    int64_t sysErr[64], sysErrMax=0;
Michael Niedermayer's avatar
Michael Niedermayer committed
203 204
    int maxout=0;
    int blockSumErrMax=0, blockSumErr;
Fabrice Bellard's avatar
Fabrice Bellard committed
205 206 207 208 209

    srandom(0);

    err_inf = 0;
    err2 = 0;
Michael Niedermayer's avatar
Michael Niedermayer committed
210
    for(i=0; i<64; i++) sysErr[i]=0;
Fabrice Bellard's avatar
Fabrice Bellard committed
211
    for(it=0;it<NB_ITS;it++) {
Michael Niedermayer's avatar
Michael Niedermayer committed
212 213 214
        for(i=0;i<64;i++)
            block1[i] = 0;
        switch(test){
215
        case 0:
Michael Niedermayer's avatar
Michael Niedermayer committed
216 217
            for(i=0;i<64;i++)
                block1[i] = (random() % 512) -256;
Michael Niedermayer's avatar
Michael Niedermayer committed
218
            if (is_idct){
Michael Niedermayer's avatar
Michael Niedermayer committed
219
                fdct(block1);
Michael Niedermayer's avatar
Michael Niedermayer committed
220 221 222 223

                for(i=0;i<64;i++)
                    block1[i]>>=3;
            }
Michael Niedermayer's avatar
Michael Niedermayer committed
224 225 226 227 228 229 230 231 232 233 234
        break;
        case 1:{
            int num= (random()%10)+1;
            for(i=0;i<num;i++)
                block1[random()%64] = (random() % 512) -256;
        }break;
        case 2:
            block1[0]= (random()%4096)-2048;
            block1[63]= (block1[0]&1)^1;
        break;
        }
235

Michael Niedermayer's avatar
Michael Niedermayer committed
236 237 238 239 240
#if 0 // simulate mismatch control
{ int sum=0;
        for(i=0;i<64;i++)
           sum+=block1[i];

241
        if((sum&1)==0) block1[63]^=1;
Michael Niedermayer's avatar
Michael Niedermayer committed
242 243 244 245 246
}
#endif

        for(i=0; i<64; i++)
            block_org[i]= block1[i];
247

248
        if (form == MMX_PERM) {
Michael Niedermayer's avatar
Michael Niedermayer committed
249
            for(i=0;i<64;i++)
250
                block[idct_mmx_perm[i]] = block1[i];
251
            } else if (form == MMX_SIMPLE_PERM) {
Michael Niedermayer's avatar
Michael Niedermayer committed
252 253 254
            for(i=0;i<64;i++)
                block[idct_simple_mmx_perm[i]] = block1[i];

255 256 257
        } else if (form == SSE2_PERM) {
            for(i=0; i<64; i++)
                block[(i&0x38) | idct_sse2_row_perm[i&7]] = block1[i];
258 259 260
        } else if (form == PARTTRANS_PERM) {
            for(i=0; i<64; i++)
                block[(i&0x24) | ((i&3)<<3) | ((i>>3)&3)] = block1[i];
261
        } else {
Michael Niedermayer's avatar
Michael Niedermayer committed
262 263
            for(i=0; i<64; i++)
                block[i]= block1[i];
264
        }
Michael Niedermayer's avatar
Michael Niedermayer committed
265 266 267 268 269
#if 0 // simulate mismatch control for tested IDCT but not the ref
{ int sum=0;
        for(i=0;i<64;i++)
           sum+=block[i];

270
        if((sum&1)==0) block[63]^=1;
Michael Niedermayer's avatar
Michael Niedermayer committed
271 272
}
#endif
273

Fabrice Bellard's avatar
Fabrice Bellard committed
274
        fdct_func(block);
275
        mmx_emms();
276

277
        if (form == SCALE_PERM) {
Fabrice Bellard's avatar
Fabrice Bellard committed
278
            for(i=0; i<64; i++) {
279
                scale = 8*(1 << (AANSCALE_BITS + 11)) / ff_aanscales[i];
Michael Niedermayer's avatar
Michael Niedermayer committed
280 281 282 283
                block[i] = (block[i] * scale /*+ (1<<(AANSCALE_BITS-1))*/) >> AANSCALE_BITS;
            }
        }

284
        fdct_ref(block1);
Fabrice Bellard's avatar
Fabrice Bellard committed
285

Michael Niedermayer's avatar
Michael Niedermayer committed
286
        blockSumErr=0;
Fabrice Bellard's avatar
Fabrice Bellard committed
287 288 289 290 291
        for(i=0;i<64;i++) {
            v = abs(block[i] - block1[i]);
            if (v > err_inf)
                err_inf = v;
            err2 += v * v;
292 293 294
            sysErr[i] += block[i] - block1[i];
            blockSumErr += v;
            if( abs(block[i])>maxout) maxout=abs(block[i]);
Fabrice Bellard's avatar
Fabrice Bellard committed
295
        }
Michael Niedermayer's avatar
Michael Niedermayer committed
296 297 298 299 300 301 302 303 304 305 306 307 308 309 310
        if(blockSumErrMax < blockSumErr) blockSumErrMax= blockSumErr;
#if 0 // print different matrix pairs
        if(blockSumErr){
            printf("\n");
            for(i=0; i<64; i++){
                if((i&7)==0) printf("\n");
                printf("%4d ", block_org[i]);
            }
            for(i=0; i<64; i++){
                if((i&7)==0) printf("\n");
                printf("%4d ", block[i] - block1[i]);
            }
        }
#endif
    }
311
    for(i=0; i<64; i++) sysErrMax= FFMAX(sysErrMax, FFABS(sysErr[i]));
312

Michael Niedermayer's avatar
Michael Niedermayer committed
313 314
#if 1 // dump systematic errors
    for(i=0; i<64; i++){
315
        if(i%8==0) printf("\n");
Michael Niedermayer's avatar
Michael Niedermayer committed
316
        printf("%5d ", (int)sysErr[i]);
Fabrice Bellard's avatar
Fabrice Bellard committed
317
    }
Michael Niedermayer's avatar
Michael Niedermayer committed
318 319
    printf("\n");
#endif
320

Michael Niedermayer's avatar
Michael Niedermayer committed
321
    printf("%s %s: err_inf=%d err2=%0.8f syserr=%0.8f maxout=%d blockSumErr=%d\n",
322
           is_idct ? "IDCT" : "DCT",
Michael Niedermayer's avatar
Michael Niedermayer committed
323 324
           name, err_inf, (double)err2 / NB_ITS / 64.0, (double)sysErrMax / NB_ITS, maxout, blockSumErrMax);
#if 1 //Speed test
Fabrice Bellard's avatar
Fabrice Bellard committed
325
    /* speed test */
Michael Niedermayer's avatar
Michael Niedermayer committed
326 327 328
    for(i=0;i<64;i++)
        block1[i] = 0;
    switch(test){
329
    case 0:
Michael Niedermayer's avatar
Michael Niedermayer committed
330 331
        for(i=0;i<64;i++)
            block1[i] = (random() % 512) -256;
Michael Niedermayer's avatar
Michael Niedermayer committed
332
        if (is_idct){
Michael Niedermayer's avatar
Michael Niedermayer committed
333
            fdct(block1);
Michael Niedermayer's avatar
Michael Niedermayer committed
334 335 336 337

            for(i=0;i<64;i++)
                block1[i]>>=3;
        }
Michael Niedermayer's avatar
Michael Niedermayer committed
338 339 340 341 342 343 344 345 346
    break;
    case 1:{
    case 2:
        block1[0] = (random() % 512) -256;
        block1[1] = (random() % 512) -256;
        block1[2] = (random() % 512) -256;
        block1[3] = (random() % 512) -256;
    }break;
    }
Fabrice Bellard's avatar
Fabrice Bellard committed
347

348
    if (form == MMX_PERM) {
Michael Niedermayer's avatar
Michael Niedermayer committed
349
        for(i=0;i<64;i++)
350
            block[idct_mmx_perm[i]] = block1[i];
351
    } else if(form == MMX_SIMPLE_PERM) {
Michael Niedermayer's avatar
Michael Niedermayer committed
352 353 354 355 356
        for(i=0;i<64;i++)
            block[idct_simple_mmx_perm[i]] = block1[i];
    } else {
        for(i=0; i<64; i++)
            block[i]= block1[i];
357 358
    }

Fabrice Bellard's avatar
Fabrice Bellard committed
359 360 361 362
    ti = gettime();
    it1 = 0;
    do {
        for(it=0;it<NB_ITS_SPEED;it++) {
Michael Niedermayer's avatar
Michael Niedermayer committed
363 364 365
            for(i=0; i<64; i++)
                block[i]= block1[i];
//            memcpy(block, block1, sizeof(DCTELEM) * 64);
Diego Biurrun's avatar
Diego Biurrun committed
366
// do not memcpy especially not fastmemcpy because it does movntq !!!
Fabrice Bellard's avatar
Fabrice Bellard committed
367 368 369 370 371
            fdct_func(block);
        }
        it1 += NB_ITS_SPEED;
        ti1 = gettime() - ti;
    } while (ti1 < 1000000);
372
    mmx_emms();
Fabrice Bellard's avatar
Fabrice Bellard committed
373

Michael Niedermayer's avatar
Michael Niedermayer committed
374
    printf("%s %s: %0.1f kdct/s\n",
375
           is_idct ? "IDCT" : "DCT",
Fabrice Bellard's avatar
Fabrice Bellard committed
376
           name, (double)it1 * 1000.0 / (double)ti1);
Michael Niedermayer's avatar
Michael Niedermayer committed
377
#endif
Fabrice Bellard's avatar
Fabrice Bellard committed
378 379
}

380 381
static uint8_t img_dest[64] __attribute__ ((aligned (8)));
static uint8_t img_dest1[64] __attribute__ ((aligned (8)));
Fabrice Bellard's avatar
Fabrice Bellard committed
382

383
void idct248_ref(uint8_t *dest, int linesize, int16_t *block)
Fabrice Bellard's avatar
Fabrice Bellard committed
384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402
{
    static int init;
    static double c8[8][8];
    static double c4[4][4];
    double block1[64], block2[64], block3[64];
    double s, sum, v;
    int i, j, k;

    if (!init) {
        init = 1;

        for(i=0;i<8;i++) {
            sum = 0;
            for(j=0;j<8;j++) {
                s = (i==0) ? sqrt(1.0/8.0) : sqrt(1.0/4.0);
                c8[i][j] = s * cos(M_PI * i * (j + 0.5) / 8.0);
                sum += c8[i][j] * c8[i][j];
            }
        }
403

Fabrice Bellard's avatar
Fabrice Bellard committed
404 405 406 407 408 409 410 411 412 413 414
        for(i=0;i<4;i++) {
            sum = 0;
            for(j=0;j<4;j++) {
                s = (i==0) ? sqrt(1.0/4.0) : sqrt(1.0/2.0);
                c4[i][j] = s * cos(M_PI * i * (j + 0.5) / 4.0);
                sum += c4[i][j] * c4[i][j];
            }
        }
    }

    /* butterfly */
415
    s = 0.5 * sqrt(2.0);
Fabrice Bellard's avatar
Fabrice Bellard committed
416 417
    for(i=0;i<4;i++) {
        for(j=0;j<8;j++) {
418 419
            block1[8*(2*i)+j] = (block[8*(2*i)+j] + block[8*(2*i+1)+j]) * s;
            block1[8*(2*i+1)+j] = (block[8*(2*i)+j] - block[8*(2*i+1)+j]) * s;
Fabrice Bellard's avatar
Fabrice Bellard committed
420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452
        }
    }

    /* idct8 on lines */
    for(i=0;i<8;i++) {
        for(j=0;j<8;j++) {
            sum = 0;
            for(k=0;k<8;k++)
                sum += c8[k][j] * block1[8*i+k];
            block2[8*i+j] = sum;
        }
    }

    /* idct4 */
    for(i=0;i<8;i++) {
        for(j=0;j<4;j++) {
            /* top */
            sum = 0;
            for(k=0;k<4;k++)
                sum += c4[k][j] * block2[8*(2*k)+i];
            block3[8*(2*j)+i] = sum;

            /* bottom */
            sum = 0;
            for(k=0;k<4;k++)
                sum += c4[k][j] * block2[8*(2*k+1)+i];
            block3[8*(2*j+1)+i] = sum;
        }
    }

    /* clamp and store the result */
    for(i=0;i<8;i++) {
        for(j=0;j<8;j++) {
453
            v = block3[8*i+j];
Fabrice Bellard's avatar
Fabrice Bellard committed
454 455 456 457 458 459 460 461 462
            if (v < 0)
                v = 0;
            else if (v > 255)
                v = 255;
            dest[i * linesize + j] = (int)rint(v);
        }
    }
}

463
void idct248_error(const char *name,
464
                    void (*idct248_put)(uint8_t *dest, int line_size, int16_t *block))
Fabrice Bellard's avatar
Fabrice Bellard committed
465 466 467 468
{
    int it, i, it1, ti, ti1, err_max, v;

    srandom(0);
469

Fabrice Bellard's avatar
Fabrice Bellard committed
470 471 472 473
    /* just one test to see if code is correct (precision is less
       important here) */
    err_max = 0;
    for(it=0;it<NB_ITS;it++) {
474

475 476 477 478 479
        /* XXX: use forward transform to generate values */
        for(i=0;i<64;i++)
            block1[i] = (random() % 256) - 128;
        block1[0] += 1024;

Fabrice Bellard's avatar
Fabrice Bellard committed
480 481 482
        for(i=0; i<64; i++)
            block[i]= block1[i];
        idct248_ref(img_dest1, 8, block);
483

484 485 486
        for(i=0; i<64; i++)
            block[i]= block1[i];
        idct248_put(img_dest, 8, block);
487

488 489 490 491 492 493 494
        for(i=0;i<64;i++) {
            v = abs((int)img_dest[i] - (int)img_dest1[i]);
            if (v == 255)
                printf("%d %d\n", img_dest[i], img_dest1[i]);
            if (v > err_max)
                err_max = v;
        }
Fabrice Bellard's avatar
Fabrice Bellard committed
495 496 497 498 499 500 501 502 503
#if 0
        printf("ref=\n");
        for(i=0;i<8;i++) {
            int j;
            for(j=0;j<8;j++) {
                printf(" %3d", img_dest1[i*8+j]);
            }
            printf("\n");
        }
504

Fabrice Bellard's avatar
Fabrice Bellard committed
505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525
        printf("out=\n");
        for(i=0;i<8;i++) {
            int j;
            for(j=0;j<8;j++) {
                printf(" %3d", img_dest[i*8+j]);
            }
            printf("\n");
        }
#endif
    }
    printf("%s %s: err_inf=%d\n",
           1 ? "IDCT248" : "DCT248",
           name, err_max);

    ti = gettime();
    it1 = 0;
    do {
        for(it=0;it<NB_ITS_SPEED;it++) {
            for(i=0; i<64; i++)
                block[i]= block1[i];
//            memcpy(block, block1, sizeof(DCTELEM) * 64);
Diego Biurrun's avatar
Diego Biurrun committed
526
// do not memcpy especially not fastmemcpy because it does movntq !!!
Fabrice Bellard's avatar
Fabrice Bellard committed
527 528 529 530 531
            idct248_put(img_dest, 8, block);
        }
        it1 += NB_ITS_SPEED;
        ti1 = gettime() - ti;
    } while (ti1 < 1000000);
532
    mmx_emms();
Fabrice Bellard's avatar
Fabrice Bellard committed
533 534 535 536 537 538

    printf("%s %s: %0.1f kdct/s\n",
           1 ? "IDCT248" : "DCT248",
           name, (double)it1 * 1000.0 / (double)ti1);
}

539 540
void help(void)
{
Michael Niedermayer's avatar
Michael Niedermayer committed
541 542 543 544
    printf("dct-test [-i] [<test-number>]\n"
           "test-number 0 -> test with random matrixes\n"
           "            1 -> test with random sparse matrixes\n"
           "            2 -> do 3. test from mpeg4 std\n"
Fabrice Bellard's avatar
Fabrice Bellard committed
545 546
           "-i          test IDCT implementations\n"
           "-4          test IDCT248 implementations\n");
547 548
}

Fabrice Bellard's avatar
Fabrice Bellard committed
549 550
int main(int argc, char **argv)
{
Fabrice Bellard's avatar
Fabrice Bellard committed
551
    int test_idct = 0, test_248_dct = 0;
Michael Niedermayer's avatar
Michael Niedermayer committed
552 553
    int c,i;
    int test=1;
554
    cpu_flags = mm_support();
555

Fabrice Bellard's avatar
Fabrice Bellard committed
556
    init_fdct();
557
    idct_mmx_init();
558

Måns Rullgård's avatar
Måns Rullgård committed
559
    for(i=0;i<256;i++) cropTbl[i + MAX_NEG_CROP] = i;
Michael Niedermayer's avatar
Michael Niedermayer committed
560
    for(i=0;i<MAX_NEG_CROP;i++) {
Måns Rullgård's avatar
Måns Rullgård committed
561 562
        cropTbl[i] = 0;
        cropTbl[i + MAX_NEG_CROP + 256] = 255;
Michael Niedermayer's avatar
Michael Niedermayer committed
563
    }
564

565
    for(;;) {
Fabrice Bellard's avatar
Fabrice Bellard committed
566
        c = getopt(argc, argv, "ih4");
567 568 569 570 571 572
        if (c == -1)
            break;
        switch(c) {
        case 'i':
            test_idct = 1;
            break;
Fabrice Bellard's avatar
Fabrice Bellard committed
573 574 575
        case '4':
            test_248_dct = 1;
            break;
Michael Niedermayer's avatar
Michael Niedermayer committed
576
        default :
577 578
        case 'h':
            help();
579
            return 0;
580 581
        }
    }
582

Michael Niedermayer's avatar
Michael Niedermayer committed
583
    if(optind <argc) test= atoi(argv[optind]);
584

585 586
    printf("ffmpeg DCT/IDCT test\n");

Fabrice Bellard's avatar
Fabrice Bellard committed
587
    if (test_248_dct) {
588
        idct248_error("SIMPLE-C", ff_simple_idct248_put);
589
    } else {
590
      for (i=0;algos[i].name;i++)
591
        if (algos[i].is_idct == test_idct && !(~cpu_flags & algos[i].mm_support)) {
592
          dct_error (algos[i].name, algos[i].is_idct, algos[i].func, algos[i].ref, algos[i].format, test);
Fabrice Bellard's avatar
Fabrice Bellard committed
593
        }
594
    }
Fabrice Bellard's avatar
Fabrice Bellard committed
595 596
    return 0;
}