fdctdsp.c 18.4 KB
Newer Older
1
/*
2 3
 * Copyright (C) 2003  James Klicman <james@klicman.org>
 *
4 5 6
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
7 8 9 10
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
11
 * FFmpeg is distributed in the hope that it will be useful,
12 13 14 15 16
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
17
 * License along with FFmpeg; if not, write to the Free Software
18
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 20
 */

21 22 23 24
#include "config.h"
#if HAVE_ALTIVEC_H
#include <altivec.h>
#endif
25

26 27 28 29
#include "libavutil/attributes.h"
#include "libavutil/cpu.h"
#include "libavutil/ppc/cpu.h"
#include "libavcodec/fdctdsp.h"
30
#include "fdct.h"
31 32

#if HAVE_ALTIVEC
33

34 35 36 37 38 39
#define vs16(v)   ((vector signed short) (v))
#define vs32(v)     ((vector signed int) (v))
#define  vu8(v)  ((vector unsigned char) (v))
#define vu16(v) ((vector unsigned short) (v))
#define vu32(v)   ((vector unsigned int) (v))

40 41 42 43 44 45 46
#define C1     0.98078528040323044912618224 /* cos(1 * PI / 16) */
#define C2     0.92387953251128675612818319 /* cos(2 * PI / 16) */
#define C3     0.83146961230254523707878838 /* cos(3 * PI / 16) */
#define C4     0.70710678118654752440084436 /* cos(4 * PI / 16) */
#define C5     0.55557023301960222474283081 /* cos(5 * PI / 16) */
#define C6     0.38268343236508977172845998 /* cos(6 * PI / 16) */
#define C7     0.19509032201612826784828487 /* cos(7 * PI / 16) */
47 48

#define W0 -(2 * C2)
49
#define W1  (2 * C6)
50 51 52 53 54 55 56 57 58 59
#define W2 (M_SQRT2 * C6)
#define W3 (M_SQRT2 * C3)
#define W4 (M_SQRT2 * (-C1 + C3 + C5 - C7))
#define W5 (M_SQRT2 *  (C1 + C3 - C5 + C7))
#define W6 (M_SQRT2 *  (C1 + C3 + C5 - C7))
#define W7 (M_SQRT2 *  (C1 + C3 - C5 - C7))
#define W8 (M_SQRT2 *  (C7 - C3))
#define W9 (M_SQRT2 * (-C1 - C3))
#define WA (M_SQRT2 * (-C3 - C5))
#define WB (M_SQRT2 *  (C5 - C3))
60

61
static const vector float fdctconsts[3] = {
62 63 64
    { W0, W1, W2, W3 },
    { W4, W5, W6, W7 },
    { W8, W9, WA, WB }
65 66 67 68 69 70 71 72 73 74 75 76 77 78 79
};

#define LD_W0 vec_splat(cnsts0, 0)
#define LD_W1 vec_splat(cnsts0, 1)
#define LD_W2 vec_splat(cnsts0, 2)
#define LD_W3 vec_splat(cnsts0, 3)
#define LD_W4 vec_splat(cnsts1, 0)
#define LD_W5 vec_splat(cnsts1, 1)
#define LD_W6 vec_splat(cnsts1, 2)
#define LD_W7 vec_splat(cnsts1, 3)
#define LD_W8 vec_splat(cnsts2, 0)
#define LD_W9 vec_splat(cnsts2, 1)
#define LD_WA vec_splat(cnsts2, 2)
#define LD_WB vec_splat(cnsts2, 3)

80
#define FDCTROW(b0, b1, b2, b3, b4, b5, b6, b7) /* {{{ */           \
81 82 83 84 85 86 87 88 89 90 91 92 93 94
    x0 = vec_add(b0, b7);               /* x0 = b0 + b7; */         \
    x7 = vec_sub(b0, b7);               /* x7 = b0 - b7; */         \
    x1 = vec_add(b1, b6);               /* x1 = b1 + b6; */         \
    x6 = vec_sub(b1, b6);               /* x6 = b1 - b6; */         \
    x2 = vec_add(b2, b5);               /* x2 = b2 + b5; */         \
    x5 = vec_sub(b2, b5);               /* x5 = b2 - b5; */         \
    x3 = vec_add(b3, b4);               /* x3 = b3 + b4; */         \
    x4 = vec_sub(b3, b4);               /* x4 = b3 - b4; */         \
                                                                    \
    b7 = vec_add(x0, x3);               /* b7 = x0 + x3; */         \
    b1 = vec_add(x1, x2);               /* b1 = x1 + x2; */         \
    b0 = vec_add(b7, b1);               /* b0 = b7 + b1; */         \
    b4 = vec_sub(b7, b1);               /* b4 = b7 - b1; */         \
                                                                    \
95 96 97
    b2   = vec_sub(x0, x3);             /* b2 = x0 - x3; */         \
    b6   = vec_sub(x1, x2);             /* b6 = x1 - x2; */         \
    b5   = vec_add(b6, b2);             /* b5 = b6 + b2; */         \
98
    cnst = LD_W2;                                                   \
99
    b5   = vec_madd(cnst, b5, mzero);   /* b5 = b5 * W2; */         \
100
    cnst = LD_W1;                                                   \
101
    b2   = vec_madd(cnst, b2, b5);      /* b2 = b5 + b2 * W1; */    \
102
    cnst = LD_W0;                                                   \
103
    b6   = vec_madd(cnst, b6, b5);      /* b6 = b5 + b6 * W0; */    \
104
                                                                    \
105 106 107 108 109
    x0   = vec_add(x4, x7);             /* x0 = x4 + x7; */         \
    x1   = vec_add(x5, x6);             /* x1 = x5 + x6; */         \
    x2   = vec_add(x4, x6);             /* x2 = x4 + x6; */         \
    x3   = vec_add(x5, x7);             /* x3 = x5 + x7; */         \
    x8   = vec_add(x2, x3);             /* x8 = x2 + x3; */         \
110
    cnst = LD_W3;                                                   \
111
    x8   = vec_madd(cnst, x8, mzero);   /* x8 = x8 * W3; */         \
112 113
                                                                    \
    cnst = LD_W8;                                                   \
114
    x0   = vec_madd(cnst, x0, mzero);   /* x0 *= W8; */             \
115
    cnst = LD_W9;                                                   \
116
    x1   = vec_madd(cnst, x1, mzero);   /* x1 *= W9; */             \
117
    cnst = LD_WA;                                                   \
118
    x2   = vec_madd(cnst, x2, x8);      /* x2 = x2 * WA + x8; */    \
119
    cnst = LD_WB;                                                   \
120
    x3   = vec_madd(cnst, x3, x8);      /* x3 = x3 * WB + x8; */    \
121 122
                                                                    \
    cnst = LD_W4;                                                   \
123
    b7   = vec_madd(cnst, x4, x0);      /* b7 = x4 * W4 + x0; */    \
124
    cnst = LD_W5;                                                   \
125
    b5   = vec_madd(cnst, x5, x1);      /* b5 = x5 * W5 + x1; */    \
126
    cnst = LD_W6;                                                   \
127
    b3   = vec_madd(cnst, x6, x1);      /* b3 = x6 * W6 + x1; */    \
128
    cnst = LD_W7;                                                   \
129
    b1   = vec_madd(cnst, x7, x0);      /* b1 = x7 * W7 + x0; */    \
130 131 132 133
                                                                    \
    b7 = vec_add(b7, x2);               /* b7 = b7 + x2; */         \
    b5 = vec_add(b5, x3);               /* b5 = b5 + x3; */         \
    b3 = vec_add(b3, x2);               /* b3 = b3 + x2; */         \
134
    b1 = vec_add(b1, x3)                /* b1 = b1 + x3; */         \
135 136
    /* }}} */

137
#define FDCTCOL(b0, b1, b2, b3, b4, b5, b6, b7) /* {{{ */           \
138 139 140 141 142 143 144 145 146 147 148 149 150 151
    x0 = vec_add(b0, b7);               /* x0 = b0 + b7; */         \
    x7 = vec_sub(b0, b7);               /* x7 = b0 - b7; */         \
    x1 = vec_add(b1, b6);               /* x1 = b1 + b6; */         \
    x6 = vec_sub(b1, b6);               /* x6 = b1 - b6; */         \
    x2 = vec_add(b2, b5);               /* x2 = b2 + b5; */         \
    x5 = vec_sub(b2, b5);               /* x5 = b2 - b5; */         \
    x3 = vec_add(b3, b4);               /* x3 = b3 + b4; */         \
    x4 = vec_sub(b3, b4);               /* x4 = b3 - b4; */         \
                                                                    \
    b7 = vec_add(x0, x3);               /* b7 = x0 + x3; */         \
    b1 = vec_add(x1, x2);               /* b1 = x1 + x2; */         \
    b0 = vec_add(b7, b1);               /* b0 = b7 + b1; */         \
    b4 = vec_sub(b7, b1);               /* b4 = b7 - b1; */         \
                                                                    \
152 153 154
    b2   = vec_sub(x0, x3);             /* b2 = x0 - x3; */         \
    b6   = vec_sub(x1, x2);             /* b6 = x1 - x2; */         \
    b5   = vec_add(b6, b2);             /* b5 = b6 + b2; */         \
155
    cnst = LD_W2;                                                   \
156
    b5   = vec_madd(cnst, b5, mzero);   /* b5 = b5 * W2; */         \
157
    cnst = LD_W1;                                                   \
158
    b2   = vec_madd(cnst, b2, b5);      /* b2 = b5 + b2 * W1; */    \
159
    cnst = LD_W0;                                                   \
160
    b6   = vec_madd(cnst, b6, b5);      /* b6 = b5 + b6 * W0; */    \
161
                                                                    \
162 163 164 165 166
    x0   = vec_add(x4, x7);             /* x0 = x4 + x7; */         \
    x1   = vec_add(x5, x6);             /* x1 = x5 + x6; */         \
    x2   = vec_add(x4, x6);             /* x2 = x4 + x6; */         \
    x3   = vec_add(x5, x7);             /* x3 = x5 + x7; */         \
    x8   = vec_add(x2, x3);             /* x8 = x2 + x3; */         \
167
    cnst = LD_W3;                                                   \
168
    x8   = vec_madd(cnst, x8, mzero);   /* x8 = x8 * W3; */         \
169 170
                                                                    \
    cnst = LD_W8;                                                   \
171
    x0   = vec_madd(cnst, x0, mzero);   /* x0 *= W8; */             \
172
    cnst = LD_W9;                                                   \
173
    x1   = vec_madd(cnst, x1, mzero);   /* x1 *= W9; */             \
174
    cnst = LD_WA;                                                   \
175
    x2   = vec_madd(cnst, x2, x8);      /* x2 = x2 * WA + x8; */    \
176
    cnst = LD_WB;                                                   \
177
    x3   = vec_madd(cnst, x3, x8);      /* x3 = x3 * WB + x8; */    \
178 179
                                                                    \
    cnst = LD_W4;                                                   \
180
    b7   = vec_madd(cnst, x4, x0);      /* b7 = x4 * W4 + x0; */    \
181
    cnst = LD_W5;                                                   \
182
    b5   = vec_madd(cnst, x5, x1);      /* b5 = x5 * W5 + x1; */    \
183
    cnst = LD_W6;                                                   \
184
    b3   = vec_madd(cnst, x6, x1);      /* b3 = x6 * W6 + x1; */    \
185
    cnst = LD_W7;                                                   \
186
    b1   = vec_madd(cnst, x7, x0);      /* b1 = x7 * W7 + x0; */    \
187 188 189 190
                                                                    \
    b7 = vec_add(b7, x2);               /* b7 += x2; */             \
    b5 = vec_add(b5, x3);               /* b5 += x3; */             \
    b3 = vec_add(b3, x2);               /* b3 += x2; */             \
191
    b1 = vec_add(b1, x3)                /* b1 += x3; */             \
192 193 194
    /* }}} */

/* two dimensional discrete cosine transform */
195
void ff_fdct_altivec(int16_t *block)
196 197
{
    vector signed short *bp;
198
    const vector float *cp = fdctconsts;
199 200 201 202 203 204 205
    vector float b00, b10, b20, b30, b40, b50, b60, b70;
    vector float b01, b11, b21, b31, b41, b51, b61, b71;
    vector float mzero, cnst, cnsts0, cnsts1, cnsts2;
    vector float x0, x1, x2, x3, x4, x5, x6, x7, x8;

    /* setup constants {{{ */
    /* mzero = -0.0 */
206 207 208 209 210 211
    mzero  = ((vector float) vec_splat_u32(-1));
    mzero  = ((vector float) vec_sl(vu32(mzero), vu32(mzero)));
    cnsts0 = vec_ld(0, cp);
    cp++;
    cnsts1 = vec_ld(0, cp);
    cp++;
212 213 214 215
    cnsts2 = vec_ld(0, cp);
    /* }}} */

    /* 8x8 matrix transpose (vector short[8]) {{{ */
216
#define MERGE_S16(hl, a, b) vec_merge ## hl(vs16(a), vs16(b))
217

218 219 220 221 222
    bp  = (vector signed short *) block;
    b00 = ((vector float) vec_ld(0,      bp));
    b40 = ((vector float) vec_ld(16 * 4, bp));
    b01 = ((vector float) MERGE_S16(h, b00, b40));
    b11 = ((vector float) MERGE_S16(l, b00, b40));
223
    bp++;
224 225 226 227
    b10 = ((vector float) vec_ld(0,      bp));
    b50 = ((vector float) vec_ld(16 * 4, bp));
    b21 = ((vector float) MERGE_S16(h, b10, b50));
    b31 = ((vector float) MERGE_S16(l, b10, b50));
228
    bp++;
229 230 231 232
    b20 = ((vector float) vec_ld(0,      bp));
    b60 = ((vector float) vec_ld(16 * 4, bp));
    b41 = ((vector float) MERGE_S16(h, b20, b60));
    b51 = ((vector float) MERGE_S16(l, b20, b60));
233
    bp++;
234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255
    b30 = ((vector float) vec_ld(0,      bp));
    b70 = ((vector float) vec_ld(16 * 4, bp));
    b61 = ((vector float) MERGE_S16(h, b30, b70));
    b71 = ((vector float) MERGE_S16(l, b30, b70));

    x0 = ((vector float) MERGE_S16(h, b01, b41));
    x1 = ((vector float) MERGE_S16(l, b01, b41));
    x2 = ((vector float) MERGE_S16(h, b11, b51));
    x3 = ((vector float) MERGE_S16(l, b11, b51));
    x4 = ((vector float) MERGE_S16(h, b21, b61));
    x5 = ((vector float) MERGE_S16(l, b21, b61));
    x6 = ((vector float) MERGE_S16(h, b31, b71));
    x7 = ((vector float) MERGE_S16(l, b31, b71));

    b00 = ((vector float) MERGE_S16(h, x0, x4));
    b10 = ((vector float) MERGE_S16(l, x0, x4));
    b20 = ((vector float) MERGE_S16(h, x1, x5));
    b30 = ((vector float) MERGE_S16(l, x1, x5));
    b40 = ((vector float) MERGE_S16(h, x2, x6));
    b50 = ((vector float) MERGE_S16(l, x2, x6));
    b60 = ((vector float) MERGE_S16(h, x3, x7));
    b70 = ((vector float) MERGE_S16(l, x3, x7));
256 257 258 259

#undef MERGE_S16
    /* }}} */

260 261 262
    /* Some of the initial calculations can be done as vector short
     * before conversion to vector float.  The following code section
     * takes advantage of this. */
263 264

    /* fdct rows {{{ */
265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283
    x0 = ((vector float) vec_add(vs16(b00), vs16(b70)));
    x7 = ((vector float) vec_sub(vs16(b00), vs16(b70)));
    x1 = ((vector float) vec_add(vs16(b10), vs16(b60)));
    x6 = ((vector float) vec_sub(vs16(b10), vs16(b60)));
    x2 = ((vector float) vec_add(vs16(b20), vs16(b50)));
    x5 = ((vector float) vec_sub(vs16(b20), vs16(b50)));
    x3 = ((vector float) vec_add(vs16(b30), vs16(b40)));
    x4 = ((vector float) vec_sub(vs16(b30), vs16(b40)));

    b70 = ((vector float) vec_add(vs16(x0), vs16(x3)));
    b10 = ((vector float) vec_add(vs16(x1), vs16(x2)));

    b00 = ((vector float) vec_add(vs16(b70), vs16(b10)));
    b40 = ((vector float) vec_sub(vs16(b70), vs16(b10)));

#define CTF0(n)                                                    \
    b ## n ## 1 = ((vector float) vec_unpackl(vs16(b ## n ## 0))); \
    b ## n ## 0 = ((vector float) vec_unpackh(vs16(b ## n ## 0))); \
    b ## n ## 1 = vec_ctf(vs32(b ## n ## 1), 0);                   \
284
    b ## n ## 0 = vec_ctf(vs32(b ## n ## 0), 0)
285 286 287 288

    CTF0(0);
    CTF0(4);

289 290
    b20 = ((vector float) vec_sub(vs16(x0), vs16(x3)));
    b60 = ((vector float) vec_sub(vs16(x1), vs16(x2)));
291 292 293 294 295 296 297 298 299 300

    CTF0(2);
    CTF0(6);

#undef CTF0

    x0 = vec_add(b60, b20);
    x1 = vec_add(b61, b21);

    cnst = LD_W2;
301 302
    x0   = vec_madd(cnst, x0, mzero);
    x1   = vec_madd(cnst, x1, mzero);
303
    cnst = LD_W1;
304 305
    b20  = vec_madd(cnst, b20, x0);
    b21  = vec_madd(cnst, b21, x1);
306
    cnst = LD_W0;
307 308
    b60  = vec_madd(cnst, b60, x0);
    b61  = vec_madd(cnst, b61, x1);
309

310 311 312 313
#define CTFX(x, b)                                  \
    b ## 0 = ((vector float) vec_unpackh(vs16(x))); \
    b ## 1 = ((vector float) vec_unpackl(vs16(x))); \
    b ## 0 = vec_ctf(vs32(b ## 0), 0);              \
314
    b ## 1 = vec_ctf(vs32(b ## 1), 0)
315 316 317 318 319 320 321 322

    CTFX(x4, b7);
    CTFX(x5, b5);
    CTFX(x6, b3);
    CTFX(x7, b1);

#undef CTFX

323 324 325 326 327
    x0   = vec_add(b70, b10);
    x1   = vec_add(b50, b30);
    x2   = vec_add(b70, b30);
    x3   = vec_add(b50, b10);
    x8   = vec_add(x2, x3);
328
    cnst = LD_W3;
329
    x8   = vec_madd(cnst, x8, mzero);
330 331

    cnst = LD_W8;
332
    x0   = vec_madd(cnst, x0, mzero);
333
    cnst = LD_W9;
334
    x1   = vec_madd(cnst, x1, mzero);
335
    cnst = LD_WA;
336
    x2   = vec_madd(cnst, x2, x8);
337
    cnst = LD_WB;
338
    x3   = vec_madd(cnst, x3, x8);
339 340

    cnst = LD_W4;
341
    b70  = vec_madd(cnst, b70, x0);
342
    cnst = LD_W5;
343
    b50  = vec_madd(cnst, b50, x1);
344
    cnst = LD_W6;
345
    b30  = vec_madd(cnst, b30, x1);
346
    cnst = LD_W7;
347
    b10  = vec_madd(cnst, b10, x0);
348 349 350 351 352 353

    b70 = vec_add(b70, x2);
    b50 = vec_add(b50, x3);
    b30 = vec_add(b30, x2);
    b10 = vec_add(b10, x3);

354 355 356 357 358
    x0   = vec_add(b71, b11);
    x1   = vec_add(b51, b31);
    x2   = vec_add(b71, b31);
    x3   = vec_add(b51, b11);
    x8   = vec_add(x2, x3);
359
    cnst = LD_W3;
360
    x8   = vec_madd(cnst, x8, mzero);
361 362

    cnst = LD_W8;
363
    x0   = vec_madd(cnst, x0, mzero);
364
    cnst = LD_W9;
365
    x1   = vec_madd(cnst, x1, mzero);
366
    cnst = LD_WA;
367
    x2   = vec_madd(cnst, x2, x8);
368
    cnst = LD_WB;
369
    x3   = vec_madd(cnst, x3, x8);
370 371

    cnst = LD_W4;
372
    b71  = vec_madd(cnst, b71, x0);
373
    cnst = LD_W5;
374
    b51  = vec_madd(cnst, b51, x1);
375
    cnst = LD_W6;
376
    b31  = vec_madd(cnst, b31, x1);
377
    cnst = LD_W7;
378
    b11  = vec_madd(cnst, b11, x0);
379 380 381 382 383 384 385 386

    b71 = vec_add(b71, x2);
    b51 = vec_add(b51, x3);
    b31 = vec_add(b31, x2);
    b11 = vec_add(b11, x3);
    /* }}} */

    /* 8x8 matrix transpose (vector float[8][2]) {{{ */
387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425
    x0 = vec_mergel(b00, b20);
    x1 = vec_mergeh(b00, b20);
    x2 = vec_mergel(b10, b30);
    x3 = vec_mergeh(b10, b30);

    b00 = vec_mergeh(x1, x3);
    b10 = vec_mergel(x1, x3);
    b20 = vec_mergeh(x0, x2);
    b30 = vec_mergel(x0, x2);

    x4 = vec_mergel(b41, b61);
    x5 = vec_mergeh(b41, b61);
    x6 = vec_mergel(b51, b71);
    x7 = vec_mergeh(b51, b71);

    b41 = vec_mergeh(x5, x7);
    b51 = vec_mergel(x5, x7);
    b61 = vec_mergeh(x4, x6);
    b71 = vec_mergel(x4, x6);

    x0 = vec_mergel(b01, b21);
    x1 = vec_mergeh(b01, b21);
    x2 = vec_mergel(b11, b31);
    x3 = vec_mergeh(b11, b31);

    x4 = vec_mergel(b40, b60);
    x5 = vec_mergeh(b40, b60);
    x6 = vec_mergel(b50, b70);
    x7 = vec_mergeh(b50, b70);

    b40 = vec_mergeh(x1, x3);
    b50 = vec_mergel(x1, x3);
    b60 = vec_mergeh(x0, x2);
    b70 = vec_mergel(x0, x2);

    b01 = vec_mergeh(x5, x7);
    b11 = vec_mergel(x5, x7);
    b21 = vec_mergeh(x4, x6);
    b31 = vec_mergel(x4, x6);
426 427 428 429 430 431
    /* }}} */

    FDCTCOL(b00, b10, b20, b30, b40, b50, b60, b70);
    FDCTCOL(b01, b11, b21, b31, b41, b51, b61, b71);

    /* round, convert back to short {{{ */
432 433 434 435 436 437 438
#define CTS(n)                                                  \
    b ## n ## 0 = vec_round(b ## n ## 0);                       \
    b ## n ## 1 = vec_round(b ## n ## 1);                       \
    b ## n ## 0 = ((vector float) vec_cts(b ## n ## 0, 0));     \
    b ## n ## 1 = ((vector float) vec_cts(b ## n ## 1, 0));     \
    b ## n ## 0 = ((vector float) vec_pack(vs32(b ## n ## 0),   \
                                           vs32(b ## n ## 1))); \
439
    vec_st(vs16(b ## n ## 0), 0, bp)
440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455

    bp = (vector signed short *) block;
    CTS(0);
    bp++;
    CTS(1);
    bp++;
    CTS(2);
    bp++;
    CTS(3);
    bp++;
    CTS(4);
    bp++;
    CTS(5);
    bp++;
    CTS(6);
    bp++;
456 457 458 459 460
    CTS(7);

#undef CTS
    /* }}} */
}
461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478

#endif /* HAVE_ALTIVEC */

av_cold void ff_fdctdsp_init_ppc(FDCTDSPContext *c, AVCodecContext *avctx,
                                 unsigned high_bit_depth)
{
#if HAVE_ALTIVEC
    if (!PPC_ALTIVEC(av_get_cpu_flags()))
        return;

    if (!high_bit_depth) {
        if (avctx->dct_algo == FF_DCT_AUTO ||
            avctx->dct_algo == FF_DCT_ALTIVEC) {
            c->fdct = ff_fdct_altivec;
        }
    }
#endif /* HAVE_ALTIVEC */
}