jfdctfst.c 11 KB
Newer Older
Fabrice Bellard's avatar
Fabrice Bellard committed
1 2
/*
 * This file is part of the Independent JPEG Group's software.
3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37
 *
 * The authors make NO WARRANTY or representation, either express or implied,
 * with respect to this software, its quality, accuracy, merchantability, or
 * fitness for a particular purpose.  This software is provided "AS IS", and
 * you, its user, assume the entire risk as to its quality and accuracy.
 *
 * This software is copyright (C) 1994-1996, Thomas G. Lane.
 * All Rights Reserved except as specified below.
 *
 * Permission is hereby granted to use, copy, modify, and distribute this
 * software (or portions thereof) for any purpose, without fee, subject to
 * these conditions:
 * (1) If any part of the source code for this software is distributed, then
 * this README file must be included, with this copyright and no-warranty
 * notice unaltered; and any additions, deletions, or changes to the original
 * files must be clearly indicated in accompanying documentation.
 * (2) If only executable code is distributed, then the accompanying
 * documentation must state that "this software is based in part on the work
 * of the Independent JPEG Group".
 * (3) Permission for use of this software is granted only if the user accepts
 * full responsibility for any undesirable consequences; the authors accept
 * NO LIABILITY for damages of any kind.
 *
 * These conditions apply to any software derived from or based on the IJG
 * code, not just to the unmodified library.  If you use our work, you ought
 * to acknowledge us.
 *
 * Permission is NOT granted for the use of any IJG author's name or company
 * name in advertising or publicity relating to this software or products
 * derived from it.  This software may be referred to only as "the Independent
 * JPEG Group's software".
 *
 * We specifically permit and encourage the use of this software as the basis
 * of commercial products, provided that all warranty or liability claims are
 * assumed by the product vendor.
Fabrice Bellard's avatar
Fabrice Bellard committed
38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63
 *
 * This file contains a fast, not so accurate integer implementation of the
 * forward DCT (Discrete Cosine Transform).
 *
 * A 2-D DCT can be done by 1-D DCT on each row followed by 1-D DCT
 * on each column.  Direct algorithms are also available, but they are
 * much more complex and seem not to be any faster when reduced to code.
 *
 * This implementation is based on Arai, Agui, and Nakajima's algorithm for
 * scaled DCT.  Their original paper (Trans. IEICE E-71(11):1095) is in
 * Japanese, but the algorithm is described in the Pennebaker & Mitchell
 * JPEG textbook (see REFERENCES section in file README).  The following code
 * is based directly on figure 4-8 in P&M.
 * While an 8-point DCT cannot be done in less than 11 multiplies, it is
 * possible to arrange the computation so that many of the multiplies are
 * simple scalings of the final outputs.  These multiplies can then be
 * folded into the multiplications or divisions by the JPEG quantization
 * table entries.  The AA&N method leaves only 5 multiplies and 29 adds
 * to be done in the DCT itself.
 * The primary disadvantage of this method is that with fixed-point math,
 * accuracy is lost due to imprecise representation of the scaled
 * quantization values.  The smaller the quantization table entry, the less
 * precise the scaled value, so this implementation does worse with high-
 * quality-setting files than with low-quality ones.
 */

Michael Niedermayer's avatar
Michael Niedermayer committed
64
/**
65
 * @file
Michael Niedermayer's avatar
Michael Niedermayer committed
66 67
 * Independent JPEG Group's fast AAN dct.
 */
68

Fabrice Bellard's avatar
Fabrice Bellard committed
69 70
#include <stdlib.h>
#include <stdio.h>
71
#include "libavutil/common.h"
72
#include "dct.h"
Fabrice Bellard's avatar
Fabrice Bellard committed
73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115

#define DCTSIZE 8
#define GLOBAL(x) x
#define RIGHT_SHIFT(x, n) ((x) >> (n))

/*
 * This module is specialized to the case DCTSIZE = 8.
 */

#if DCTSIZE != 8
  Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */
#endif


/* Scaling decisions are generally the same as in the LL&M algorithm;
 * see jfdctint.c for more details.  However, we choose to descale
 * (right shift) multiplication products as soon as they are formed,
 * rather than carrying additional fractional bits into subsequent additions.
 * This compromises accuracy slightly, but it lets us save a few shifts.
 * More importantly, 16-bit arithmetic is then adequate (for 8-bit samples)
 * everywhere except in the multiplications proper; this saves a good deal
 * of work on 16-bit-int machines.
 *
 * Again to save a few shifts, the intermediate results between pass 1 and
 * pass 2 are not upscaled, but are represented only to integral precision.
 *
 * A final compromise is to represent the multiplicative constants to only
 * 8 fractional bits, rather than 13.  This saves some shifting work on some
 * machines, and may also reduce the cost of multiplication (since there
 * are fewer one-bits in the constants).
 */

#define CONST_BITS  8


/* Some C compilers fail to reduce "FIX(constant)" at compile time, thus
 * causing a lot of useless floating-point operations at run time.
 * To get around this we use the following pre-calculated constants.
 * If you change CONST_BITS you may want to add appropriate values.
 * (With a reasonable C compiler, you can just rely on the FIX() macro...)
 */

#if CONST_BITS == 8
116 117 118 119
#define FIX_0_382683433  ((int32_t)   98)       /* FIX(0.382683433) */
#define FIX_0_541196100  ((int32_t)  139)       /* FIX(0.541196100) */
#define FIX_0_707106781  ((int32_t)  181)       /* FIX(0.707106781) */
#define FIX_1_306562965  ((int32_t)  334)       /* FIX(1.306562965) */
Fabrice Bellard's avatar
Fabrice Bellard committed
120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138
#else
#define FIX_0_382683433  FIX(0.382683433)
#define FIX_0_541196100  FIX(0.541196100)
#define FIX_0_707106781  FIX(0.707106781)
#define FIX_1_306562965  FIX(1.306562965)
#endif


/* We can gain a little more speed, with a further compromise in accuracy,
 * by omitting the addition in a descaling shift.  This yields an incorrectly
 * rounded result half the time...
 */

#ifndef USE_ACCURATE_ROUNDING
#undef DESCALE
#define DESCALE(x,n)  RIGHT_SHIFT(x, n)
#endif


Diego Biurrun's avatar
Diego Biurrun committed
139 140
/* Multiply a int16_t variable by an int32_t constant, and immediately
 * descale to yield a int16_t result.
Fabrice Bellard's avatar
Fabrice Bellard committed
141 142
 */

Diego Biurrun's avatar
Diego Biurrun committed
143
#define MULTIPLY(var,const)  ((int16_t) DESCALE((var) * (const), CONST_BITS))
Fabrice Bellard's avatar
Fabrice Bellard committed
144

Diego Biurrun's avatar
Diego Biurrun committed
145
static av_always_inline void row_fdct(int16_t * data){
146 147 148
  int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
  int tmp10, tmp11, tmp12, tmp13;
  int z1, z2, z3, z4, z5, z11, z13;
Diego Biurrun's avatar
Diego Biurrun committed
149
  int16_t *dataptr;
Fabrice Bellard's avatar
Fabrice Bellard committed
150 151 152 153 154 155 156 157 158 159 160 161 162 163
  int ctr;

  /* Pass 1: process rows. */

  dataptr = data;
  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
    tmp0 = dataptr[0] + dataptr[7];
    tmp7 = dataptr[0] - dataptr[7];
    tmp1 = dataptr[1] + dataptr[6];
    tmp6 = dataptr[1] - dataptr[6];
    tmp2 = dataptr[2] + dataptr[5];
    tmp5 = dataptr[2] - dataptr[5];
    tmp3 = dataptr[3] + dataptr[4];
    tmp4 = dataptr[3] - dataptr[4];
164

Fabrice Bellard's avatar
Fabrice Bellard committed
165
    /* Even part */
166

167
    tmp10 = tmp0 + tmp3;        /* phase 2 */
Fabrice Bellard's avatar
Fabrice Bellard committed
168 169 170
    tmp13 = tmp0 - tmp3;
    tmp11 = tmp1 + tmp2;
    tmp12 = tmp1 - tmp2;
171

Fabrice Bellard's avatar
Fabrice Bellard committed
172 173
    dataptr[0] = tmp10 + tmp11; /* phase 3 */
    dataptr[4] = tmp10 - tmp11;
174

Fabrice Bellard's avatar
Fabrice Bellard committed
175
    z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781); /* c4 */
176
    dataptr[2] = tmp13 + z1;    /* phase 5 */
Fabrice Bellard's avatar
Fabrice Bellard committed
177
    dataptr[6] = tmp13 - z1;
178

Fabrice Bellard's avatar
Fabrice Bellard committed
179 180
    /* Odd part */

181
    tmp10 = tmp4 + tmp5;        /* phase 2 */
Fabrice Bellard's avatar
Fabrice Bellard committed
182 183 184 185 186
    tmp11 = tmp5 + tmp6;
    tmp12 = tmp6 + tmp7;

    /* The rotator is modified from fig 4-8 to avoid extra negations. */
    z5 = MULTIPLY(tmp10 - tmp12, FIX_0_382683433); /* c6 */
187 188 189
    z2 = MULTIPLY(tmp10, FIX_0_541196100) + z5;    /* c2-c6 */
    z4 = MULTIPLY(tmp12, FIX_1_306562965) + z5;    /* c2+c6 */
    z3 = MULTIPLY(tmp11, FIX_0_707106781);         /* c4 */
Fabrice Bellard's avatar
Fabrice Bellard committed
190

191
    z11 = tmp7 + z3;            /* phase 5 */
Fabrice Bellard's avatar
Fabrice Bellard committed
192 193
    z13 = tmp7 - z3;

194
    dataptr[5] = z13 + z2;      /* phase 6 */
Fabrice Bellard's avatar
Fabrice Bellard committed
195 196 197 198
    dataptr[3] = z13 - z2;
    dataptr[1] = z11 + z4;
    dataptr[7] = z11 - z4;

199
    dataptr += DCTSIZE;         /* advance pointer to next row */
Fabrice Bellard's avatar
Fabrice Bellard committed
200
  }
201
}
Fabrice Bellard's avatar
Fabrice Bellard committed
202

203 204 205 206 207
/*
 * Perform the forward DCT on one block of samples.
 */

GLOBAL(void)
Diego Biurrun's avatar
Diego Biurrun committed
208
ff_fdct_ifast (int16_t * data)
209
{
210 211 212
  int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
  int tmp10, tmp11, tmp12, tmp13;
  int z1, z2, z3, z4, z5, z11, z13;
Diego Biurrun's avatar
Diego Biurrun committed
213
  int16_t *dataptr;
214 215 216
  int ctr;

  row_fdct(data);
217

Fabrice Bellard's avatar
Fabrice Bellard committed
218 219 220 221 222 223 224 225 226 227 228 229
  /* Pass 2: process columns. */

  dataptr = data;
  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7];
    tmp7 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7];
    tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6];
    tmp6 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6];
    tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5];
    tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5];
    tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4];
    tmp4 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4];
230

Fabrice Bellard's avatar
Fabrice Bellard committed
231
    /* Even part */
232

233
    tmp10 = tmp0 + tmp3;        /* phase 2 */
Fabrice Bellard's avatar
Fabrice Bellard committed
234 235 236
    tmp13 = tmp0 - tmp3;
    tmp11 = tmp1 + tmp2;
    tmp12 = tmp1 - tmp2;
237

Fabrice Bellard's avatar
Fabrice Bellard committed
238 239
    dataptr[DCTSIZE*0] = tmp10 + tmp11; /* phase 3 */
    dataptr[DCTSIZE*4] = tmp10 - tmp11;
240

Fabrice Bellard's avatar
Fabrice Bellard committed
241 242 243
    z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781); /* c4 */
    dataptr[DCTSIZE*2] = tmp13 + z1; /* phase 5 */
    dataptr[DCTSIZE*6] = tmp13 - z1;
244

Fabrice Bellard's avatar
Fabrice Bellard committed
245 246
    /* Odd part */

247
    tmp10 = tmp4 + tmp5;        /* phase 2 */
Fabrice Bellard's avatar
Fabrice Bellard committed
248 249 250 251 252 253 254 255 256
    tmp11 = tmp5 + tmp6;
    tmp12 = tmp6 + tmp7;

    /* The rotator is modified from fig 4-8 to avoid extra negations. */
    z5 = MULTIPLY(tmp10 - tmp12, FIX_0_382683433); /* c6 */
    z2 = MULTIPLY(tmp10, FIX_0_541196100) + z5; /* c2-c6 */
    z4 = MULTIPLY(tmp12, FIX_1_306562965) + z5; /* c2+c6 */
    z3 = MULTIPLY(tmp11, FIX_0_707106781); /* c4 */

257
    z11 = tmp7 + z3;            /* phase 5 */
Fabrice Bellard's avatar
Fabrice Bellard committed
258 259 260 261 262 263 264
    z13 = tmp7 - z3;

    dataptr[DCTSIZE*5] = z13 + z2; /* phase 6 */
    dataptr[DCTSIZE*3] = z13 - z2;
    dataptr[DCTSIZE*1] = z11 + z4;
    dataptr[DCTSIZE*7] = z11 - z4;

265
    dataptr++;                  /* advance pointer to next column */
Fabrice Bellard's avatar
Fabrice Bellard committed
266 267
  }
}
268

269 270 271 272 273
/*
 * Perform the forward 2-4-8 DCT on one block of samples.
 */

GLOBAL(void)
Diego Biurrun's avatar
Diego Biurrun committed
274
ff_fdct_ifast248 (int16_t * data)
275
{
276 277 278
  int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
  int tmp10, tmp11, tmp12, tmp13;
  int z1;
Diego Biurrun's avatar
Diego Biurrun committed
279
  int16_t *dataptr;
280 281
  int ctr;

282
  row_fdct(data);
283

284 285 286 287 288 289 290 291 292 293 294 295 296 297
  /* Pass 2: process columns. */

  dataptr = data;
  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*1];
    tmp1 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*3];
    tmp2 = dataptr[DCTSIZE*4] + dataptr[DCTSIZE*5];
    tmp3 = dataptr[DCTSIZE*6] + dataptr[DCTSIZE*7];
    tmp4 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*1];
    tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*3];
    tmp6 = dataptr[DCTSIZE*4] - dataptr[DCTSIZE*5];
    tmp7 = dataptr[DCTSIZE*6] - dataptr[DCTSIZE*7];

    /* Even part */
298

299 300 301 302
    tmp10 = tmp0 + tmp3;
    tmp11 = tmp1 + tmp2;
    tmp12 = tmp1 - tmp2;
    tmp13 = tmp0 - tmp3;
303

304 305
    dataptr[DCTSIZE*0] = tmp10 + tmp11;
    dataptr[DCTSIZE*4] = tmp10 - tmp11;
306

307 308 309 310 311 312 313 314
    z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781);
    dataptr[DCTSIZE*2] = tmp13 + z1;
    dataptr[DCTSIZE*6] = tmp13 - z1;

    tmp10 = tmp4 + tmp7;
    tmp11 = tmp5 + tmp6;
    tmp12 = tmp5 - tmp6;
    tmp13 = tmp4 - tmp7;
315

316 317
    dataptr[DCTSIZE*1] = tmp10 + tmp11;
    dataptr[DCTSIZE*5] = tmp10 - tmp11;
318

319 320 321
    z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781);
    dataptr[DCTSIZE*3] = tmp13 + z1;
    dataptr[DCTSIZE*7] = tmp13 - z1;
322

323
    dataptr++;                        /* advance pointer to next column */
324 325 326
  }
}

327 328 329 330 331 332

#undef GLOBAL
#undef CONST_BITS
#undef DESCALE
#undef FIX_0_541196100
#undef FIX_1_306562965