jfdctfst.c 11.2 KB
Newer Older
Fabrice Bellard's avatar
Fabrice Bellard committed
1 2 3 4
/*
 * jfdctfst.c
 *
 * This file is part of the Independent JPEG Group's software.
5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39
 *
 * The authors make NO WARRANTY or representation, either express or implied,
 * with respect to this software, its quality, accuracy, merchantability, or
 * fitness for a particular purpose.  This software is provided "AS IS", and
 * you, its user, assume the entire risk as to its quality and accuracy.
 *
 * This software is copyright (C) 1994-1996, Thomas G. Lane.
 * All Rights Reserved except as specified below.
 *
 * Permission is hereby granted to use, copy, modify, and distribute this
 * software (or portions thereof) for any purpose, without fee, subject to
 * these conditions:
 * (1) If any part of the source code for this software is distributed, then
 * this README file must be included, with this copyright and no-warranty
 * notice unaltered; and any additions, deletions, or changes to the original
 * files must be clearly indicated in accompanying documentation.
 * (2) If only executable code is distributed, then the accompanying
 * documentation must state that "this software is based in part on the work
 * of the Independent JPEG Group".
 * (3) Permission for use of this software is granted only if the user accepts
 * full responsibility for any undesirable consequences; the authors accept
 * NO LIABILITY for damages of any kind.
 *
 * These conditions apply to any software derived from or based on the IJG
 * code, not just to the unmodified library.  If you use our work, you ought
 * to acknowledge us.
 *
 * Permission is NOT granted for the use of any IJG author's name or company
 * name in advertising or publicity relating to this software or products
 * derived from it.  This software may be referred to only as "the Independent
 * JPEG Group's software".
 *
 * We specifically permit and encourage the use of this software as the basis
 * of commercial products, provided that all warranty or liability claims are
 * assumed by the product vendor.
Fabrice Bellard's avatar
Fabrice Bellard committed
40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65
 *
 * This file contains a fast, not so accurate integer implementation of the
 * forward DCT (Discrete Cosine Transform).
 *
 * A 2-D DCT can be done by 1-D DCT on each row followed by 1-D DCT
 * on each column.  Direct algorithms are also available, but they are
 * much more complex and seem not to be any faster when reduced to code.
 *
 * This implementation is based on Arai, Agui, and Nakajima's algorithm for
 * scaled DCT.  Their original paper (Trans. IEICE E-71(11):1095) is in
 * Japanese, but the algorithm is described in the Pennebaker & Mitchell
 * JPEG textbook (see REFERENCES section in file README).  The following code
 * is based directly on figure 4-8 in P&M.
 * While an 8-point DCT cannot be done in less than 11 multiplies, it is
 * possible to arrange the computation so that many of the multiplies are
 * simple scalings of the final outputs.  These multiplies can then be
 * folded into the multiplications or divisions by the JPEG quantization
 * table entries.  The AA&N method leaves only 5 multiplies and 29 adds
 * to be done in the DCT itself.
 * The primary disadvantage of this method is that with fixed-point math,
 * accuracy is lost due to imprecise representation of the scaled
 * quantization values.  The smaller the quantization table entry, the less
 * precise the scaled value, so this implementation does worse with high-
 * quality-setting files than with low-quality ones.
 */

Michael Niedermayer's avatar
Michael Niedermayer committed
66
/**
67
 * @file libavcodec/jfdctfst.c
Michael Niedermayer's avatar
Michael Niedermayer committed
68 69
 * Independent JPEG Group's fast AAN dct.
 */
70

Fabrice Bellard's avatar
Fabrice Bellard committed
71 72
#include <stdlib.h>
#include <stdio.h>
73
#include "libavutil/common.h"
Fabrice Bellard's avatar
Fabrice Bellard committed
74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118
#include "dsputil.h"

#define DCTSIZE 8
#define GLOBAL(x) x
#define RIGHT_SHIFT(x, n) ((x) >> (n))
#define SHIFT_TEMPS

/*
 * This module is specialized to the case DCTSIZE = 8.
 */

#if DCTSIZE != 8
  Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */
#endif


/* Scaling decisions are generally the same as in the LL&M algorithm;
 * see jfdctint.c for more details.  However, we choose to descale
 * (right shift) multiplication products as soon as they are formed,
 * rather than carrying additional fractional bits into subsequent additions.
 * This compromises accuracy slightly, but it lets us save a few shifts.
 * More importantly, 16-bit arithmetic is then adequate (for 8-bit samples)
 * everywhere except in the multiplications proper; this saves a good deal
 * of work on 16-bit-int machines.
 *
 * Again to save a few shifts, the intermediate results between pass 1 and
 * pass 2 are not upscaled, but are represented only to integral precision.
 *
 * A final compromise is to represent the multiplicative constants to only
 * 8 fractional bits, rather than 13.  This saves some shifting work on some
 * machines, and may also reduce the cost of multiplication (since there
 * are fewer one-bits in the constants).
 */

#define CONST_BITS  8


/* Some C compilers fail to reduce "FIX(constant)" at compile time, thus
 * causing a lot of useless floating-point operations at run time.
 * To get around this we use the following pre-calculated constants.
 * If you change CONST_BITS you may want to add appropriate values.
 * (With a reasonable C compiler, you can just rely on the FIX() macro...)
 */

#if CONST_BITS == 8
119 120 121 122
#define FIX_0_382683433  ((int32_t)   98)       /* FIX(0.382683433) */
#define FIX_0_541196100  ((int32_t)  139)       /* FIX(0.541196100) */
#define FIX_0_707106781  ((int32_t)  181)       /* FIX(0.707106781) */
#define FIX_1_306562965  ((int32_t)  334)       /* FIX(1.306562965) */
Fabrice Bellard's avatar
Fabrice Bellard committed
123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141
#else
#define FIX_0_382683433  FIX(0.382683433)
#define FIX_0_541196100  FIX(0.541196100)
#define FIX_0_707106781  FIX(0.707106781)
#define FIX_1_306562965  FIX(1.306562965)
#endif


/* We can gain a little more speed, with a further compromise in accuracy,
 * by omitting the addition in a descaling shift.  This yields an incorrectly
 * rounded result half the time...
 */

#ifndef USE_ACCURATE_ROUNDING
#undef DESCALE
#define DESCALE(x,n)  RIGHT_SHIFT(x, n)
#endif


142
/* Multiply a DCTELEM variable by an int32_t constant, and immediately
Fabrice Bellard's avatar
Fabrice Bellard committed
143 144 145 146 147
 * descale to yield a DCTELEM result.
 */

#define MULTIPLY(var,const)  ((DCTELEM) DESCALE((var) * (const), CONST_BITS))

148
static av_always_inline void row_fdct(DCTELEM * data){
149 150 151
  int_fast16_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
  int_fast16_t tmp10, tmp11, tmp12, tmp13;
  int_fast16_t z1, z2, z3, z4, z5, z11, z13;
Fabrice Bellard's avatar
Fabrice Bellard committed
152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167
  DCTELEM *dataptr;
  int ctr;
  SHIFT_TEMPS

  /* Pass 1: process rows. */

  dataptr = data;
  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
    tmp0 = dataptr[0] + dataptr[7];
    tmp7 = dataptr[0] - dataptr[7];
    tmp1 = dataptr[1] + dataptr[6];
    tmp6 = dataptr[1] - dataptr[6];
    tmp2 = dataptr[2] + dataptr[5];
    tmp5 = dataptr[2] - dataptr[5];
    tmp3 = dataptr[3] + dataptr[4];
    tmp4 = dataptr[3] - dataptr[4];
168

Fabrice Bellard's avatar
Fabrice Bellard committed
169
    /* Even part */
170

171
    tmp10 = tmp0 + tmp3;        /* phase 2 */
Fabrice Bellard's avatar
Fabrice Bellard committed
172 173 174
    tmp13 = tmp0 - tmp3;
    tmp11 = tmp1 + tmp2;
    tmp12 = tmp1 - tmp2;
175

Fabrice Bellard's avatar
Fabrice Bellard committed
176 177
    dataptr[0] = tmp10 + tmp11; /* phase 3 */
    dataptr[4] = tmp10 - tmp11;
178

Fabrice Bellard's avatar
Fabrice Bellard committed
179
    z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781); /* c4 */
180
    dataptr[2] = tmp13 + z1;    /* phase 5 */
Fabrice Bellard's avatar
Fabrice Bellard committed
181
    dataptr[6] = tmp13 - z1;
182

Fabrice Bellard's avatar
Fabrice Bellard committed
183 184
    /* Odd part */

185
    tmp10 = tmp4 + tmp5;        /* phase 2 */
Fabrice Bellard's avatar
Fabrice Bellard committed
186 187 188 189 190
    tmp11 = tmp5 + tmp6;
    tmp12 = tmp6 + tmp7;

    /* The rotator is modified from fig 4-8 to avoid extra negations. */
    z5 = MULTIPLY(tmp10 - tmp12, FIX_0_382683433); /* c6 */
191 192 193
    z2 = MULTIPLY(tmp10, FIX_0_541196100) + z5;    /* c2-c6 */
    z4 = MULTIPLY(tmp12, FIX_1_306562965) + z5;    /* c2+c6 */
    z3 = MULTIPLY(tmp11, FIX_0_707106781);         /* c4 */
Fabrice Bellard's avatar
Fabrice Bellard committed
194

195
    z11 = tmp7 + z3;            /* phase 5 */
Fabrice Bellard's avatar
Fabrice Bellard committed
196 197
    z13 = tmp7 - z3;

198
    dataptr[5] = z13 + z2;      /* phase 6 */
Fabrice Bellard's avatar
Fabrice Bellard committed
199 200 201 202
    dataptr[3] = z13 - z2;
    dataptr[1] = z11 + z4;
    dataptr[7] = z11 - z4;

203
    dataptr += DCTSIZE;         /* advance pointer to next row */
Fabrice Bellard's avatar
Fabrice Bellard committed
204
  }
205
}
Fabrice Bellard's avatar
Fabrice Bellard committed
206

207 208 209 210 211 212 213 214 215 216 217 218 219 220 221
/*
 * Perform the forward DCT on one block of samples.
 */

GLOBAL(void)
fdct_ifast (DCTELEM * data)
{
  int_fast16_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
  int_fast16_t tmp10, tmp11, tmp12, tmp13;
  int_fast16_t z1, z2, z3, z4, z5, z11, z13;
  DCTELEM *dataptr;
  int ctr;
  SHIFT_TEMPS

  row_fdct(data);
222

Fabrice Bellard's avatar
Fabrice Bellard committed
223 224 225 226 227 228 229 230 231 232 233 234
  /* Pass 2: process columns. */

  dataptr = data;
  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7];
    tmp7 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7];
    tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6];
    tmp6 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6];
    tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5];
    tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5];
    tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4];
    tmp4 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4];
235

Fabrice Bellard's avatar
Fabrice Bellard committed
236
    /* Even part */
237

238
    tmp10 = tmp0 + tmp3;        /* phase 2 */
Fabrice Bellard's avatar
Fabrice Bellard committed
239 240 241
    tmp13 = tmp0 - tmp3;
    tmp11 = tmp1 + tmp2;
    tmp12 = tmp1 - tmp2;
242

Fabrice Bellard's avatar
Fabrice Bellard committed
243 244
    dataptr[DCTSIZE*0] = tmp10 + tmp11; /* phase 3 */
    dataptr[DCTSIZE*4] = tmp10 - tmp11;
245

Fabrice Bellard's avatar
Fabrice Bellard committed
246 247 248
    z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781); /* c4 */
    dataptr[DCTSIZE*2] = tmp13 + z1; /* phase 5 */
    dataptr[DCTSIZE*6] = tmp13 - z1;
249

Fabrice Bellard's avatar
Fabrice Bellard committed
250 251
    /* Odd part */

252
    tmp10 = tmp4 + tmp5;        /* phase 2 */
Fabrice Bellard's avatar
Fabrice Bellard committed
253 254 255 256 257 258 259 260 261
    tmp11 = tmp5 + tmp6;
    tmp12 = tmp6 + tmp7;

    /* The rotator is modified from fig 4-8 to avoid extra negations. */
    z5 = MULTIPLY(tmp10 - tmp12, FIX_0_382683433); /* c6 */
    z2 = MULTIPLY(tmp10, FIX_0_541196100) + z5; /* c2-c6 */
    z4 = MULTIPLY(tmp12, FIX_1_306562965) + z5; /* c2+c6 */
    z3 = MULTIPLY(tmp11, FIX_0_707106781); /* c4 */

262
    z11 = tmp7 + z3;            /* phase 5 */
Fabrice Bellard's avatar
Fabrice Bellard committed
263 264 265 266 267 268 269
    z13 = tmp7 - z3;

    dataptr[DCTSIZE*5] = z13 + z2; /* phase 6 */
    dataptr[DCTSIZE*3] = z13 - z2;
    dataptr[DCTSIZE*1] = z11 + z4;
    dataptr[DCTSIZE*7] = z11 - z4;

270
    dataptr++;                  /* advance pointer to next column */
Fabrice Bellard's avatar
Fabrice Bellard committed
271 272
  }
}
273

274 275 276 277 278 279 280
/*
 * Perform the forward 2-4-8 DCT on one block of samples.
 */

GLOBAL(void)
fdct_ifast248 (DCTELEM * data)
{
281 282 283
  int_fast16_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
  int_fast16_t tmp10, tmp11, tmp12, tmp13;
  int_fast16_t z1;
284 285 286 287
  DCTELEM *dataptr;
  int ctr;
  SHIFT_TEMPS

288
  row_fdct(data);
289

290 291 292 293 294 295 296 297 298 299 300 301 302 303
  /* Pass 2: process columns. */

  dataptr = data;
  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*1];
    tmp1 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*3];
    tmp2 = dataptr[DCTSIZE*4] + dataptr[DCTSIZE*5];
    tmp3 = dataptr[DCTSIZE*6] + dataptr[DCTSIZE*7];
    tmp4 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*1];
    tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*3];
    tmp6 = dataptr[DCTSIZE*4] - dataptr[DCTSIZE*5];
    tmp7 = dataptr[DCTSIZE*6] - dataptr[DCTSIZE*7];

    /* Even part */
304

305 306 307 308
    tmp10 = tmp0 + tmp3;
    tmp11 = tmp1 + tmp2;
    tmp12 = tmp1 - tmp2;
    tmp13 = tmp0 - tmp3;
309

310 311
    dataptr[DCTSIZE*0] = tmp10 + tmp11;
    dataptr[DCTSIZE*4] = tmp10 - tmp11;
312

313 314 315 316 317 318 319 320
    z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781);
    dataptr[DCTSIZE*2] = tmp13 + z1;
    dataptr[DCTSIZE*6] = tmp13 - z1;

    tmp10 = tmp4 + tmp7;
    tmp11 = tmp5 + tmp6;
    tmp12 = tmp5 - tmp6;
    tmp13 = tmp4 - tmp7;
321

322 323
    dataptr[DCTSIZE*1] = tmp10 + tmp11;
    dataptr[DCTSIZE*5] = tmp10 - tmp11;
324

325 326 327
    z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781);
    dataptr[DCTSIZE*3] = tmp13 + z1;
    dataptr[DCTSIZE*7] = tmp13 - z1;
328

329
    dataptr++;                        /* advance pointer to next column */
330 331 332
  }
}

333 334 335 336 337 338

#undef GLOBAL
#undef CONST_BITS
#undef DESCALE
#undef FIX_0_541196100
#undef FIX_1_306562965