h264idct_template.c 11.8 KB
Newer Older
1 2 3 4
/*
 * H.264 IDCT
 * Copyright (c) 2004-2011 Michael Niedermayer <michaelni@gmx.at>
 *
5
 * This file is part of FFmpeg.
6
 *
7
 * FFmpeg is free software; you can redistribute it and/or
8 9 10 11
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
12
 * FFmpeg is distributed in the hope that it will be useful,
13 14 15 16 17
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
18
 * License along with FFmpeg; if not, write to the Free Software
19 20 21 22 23 24 25 26 27
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

/**
 * @file
 * H.264 IDCT.
 * @author Michael Niedermayer <michaelni@gmx.at>
 */

28
#include "bit_depth_template.c"
29
#include "libavutil/common.h"
30
#include "h264dec.h"
31
#include "h264idct.h"
32

Diego Biurrun's avatar
Diego Biurrun committed
33
void FUNCC(ff_h264_idct_add)(uint8_t *_dst, int16_t *_block, int stride)
34
{
35
    int i;
36 37
    pixel *dst = (pixel*)_dst;
    dctcoef *block = (dctcoef*)_block;
38
    stride >>= sizeof(pixel)-1;
39

40
    block[0] += 1 << 5;
41 42

    for(i=0; i<4; i++){
43 44 45 46
        const SUINT z0=  block[i + 4*0]     +  block[i + 4*2];
        const SUINT z1=  block[i + 4*0]     -  block[i + 4*2];
        const SUINT z2= (block[i + 4*1]>>1) -  block[i + 4*3];
        const SUINT z3=  block[i + 4*1]     + (block[i + 4*3]>>1);
47 48 49 50 51

        block[i + 4*0]= z0 + z3;
        block[i + 4*1]= z1 + z2;
        block[i + 4*2]= z1 - z2;
        block[i + 4*3]= z0 - z3;
52 53 54
    }

    for(i=0; i<4; i++){
55 56 57 58
        const SUINT z0=  block[0 + 4*i]     +  (SUINT)block[2 + 4*i];
        const SUINT z1=  block[0 + 4*i]     -  (SUINT)block[2 + 4*i];
        const SUINT z2= (block[1 + 4*i]>>1) -  (SUINT)block[3 + 4*i];
        const SUINT z3=  block[1 + 4*i]     + (SUINT)(block[3 + 4*i]>>1);
59

60 61 62 63
        dst[i + 0*stride]= av_clip_pixel(dst[i + 0*stride] + ((int)(z0 + z3) >> 6));
        dst[i + 1*stride]= av_clip_pixel(dst[i + 1*stride] + ((int)(z1 + z2) >> 6));
        dst[i + 2*stride]= av_clip_pixel(dst[i + 2*stride] + ((int)(z1 - z2) >> 6));
        dst[i + 3*stride]= av_clip_pixel(dst[i + 3*stride] + ((int)(z0 - z3) >> 6));
64
    }
65 66

    memset(block, 0, 16 * sizeof(dctcoef));
67 68
}

Diego Biurrun's avatar
Diego Biurrun committed
69
void FUNCC(ff_h264_idct8_add)(uint8_t *_dst, int16_t *_block, int stride){
70
    int i;
71 72
    pixel *dst = (pixel*)_dst;
    dctcoef *block = (dctcoef*)_block;
73
    stride >>= sizeof(pixel)-1;
74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109

    block[0] += 32;

    for( i = 0; i < 8; i++ )
    {
        const int a0 =  block[i+0*8] + block[i+4*8];
        const int a2 =  block[i+0*8] - block[i+4*8];
        const int a4 = (block[i+2*8]>>1) - block[i+6*8];
        const int a6 = (block[i+6*8]>>1) + block[i+2*8];

        const int b0 = a0 + a6;
        const int b2 = a2 + a4;
        const int b4 = a2 - a4;
        const int b6 = a0 - a6;

        const int a1 = -block[i+3*8] + block[i+5*8] - block[i+7*8] - (block[i+7*8]>>1);
        const int a3 =  block[i+1*8] + block[i+7*8] - block[i+3*8] - (block[i+3*8]>>1);
        const int a5 = -block[i+1*8] + block[i+7*8] + block[i+5*8] + (block[i+5*8]>>1);
        const int a7 =  block[i+3*8] + block[i+5*8] + block[i+1*8] + (block[i+1*8]>>1);

        const int b1 = (a7>>2) + a1;
        const int b3 =  a3 + (a5>>2);
        const int b5 = (a3>>2) - a5;
        const int b7 =  a7 - (a1>>2);

        block[i+0*8] = b0 + b7;
        block[i+7*8] = b0 - b7;
        block[i+1*8] = b2 + b5;
        block[i+6*8] = b2 - b5;
        block[i+2*8] = b4 + b3;
        block[i+5*8] = b4 - b3;
        block[i+3*8] = b6 + b1;
        block[i+4*8] = b6 - b1;
    }
    for( i = 0; i < 8; i++ )
    {
110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137
        const unsigned a0 =  block[0+i*8] + block[4+i*8];
        const unsigned a2 =  block[0+i*8] - block[4+i*8];
        const unsigned a4 = (block[2+i*8]>>1) - block[6+i*8];
        const unsigned a6 = (block[6+i*8]>>1) + block[2+i*8];

        const unsigned b0 = a0 + a6;
        const unsigned b2 = a2 + a4;
        const unsigned b4 = a2 - a4;
        const unsigned b6 = a0 - a6;

        const int a1 = -(unsigned)block[3+i*8] + block[5+i*8] - block[7+i*8] - (block[7+i*8]>>1);
        const int a3 =  (unsigned)block[1+i*8] + block[7+i*8] - block[3+i*8] - (block[3+i*8]>>1);
        const int a5 = -(unsigned)block[1+i*8] + block[7+i*8] + block[5+i*8] + (block[5+i*8]>>1);
        const int a7 =  (unsigned)block[3+i*8] + block[5+i*8] + block[1+i*8] + (block[1+i*8]>>1);

        const unsigned b1 = (a7>>2) + (unsigned)a1;
        const unsigned b3 =  (unsigned)a3 + (a5>>2);
        const unsigned b5 = (a3>>2) - (unsigned)a5;
        const unsigned b7 =  (unsigned)a7 - (a1>>2);

        dst[i + 0*stride] = av_clip_pixel( dst[i + 0*stride] + ((int)(b0 + b7) >> 6) );
        dst[i + 1*stride] = av_clip_pixel( dst[i + 1*stride] + ((int)(b2 + b5) >> 6) );
        dst[i + 2*stride] = av_clip_pixel( dst[i + 2*stride] + ((int)(b4 + b3) >> 6) );
        dst[i + 3*stride] = av_clip_pixel( dst[i + 3*stride] + ((int)(b6 + b1) >> 6) );
        dst[i + 4*stride] = av_clip_pixel( dst[i + 4*stride] + ((int)(b6 - b1) >> 6) );
        dst[i + 5*stride] = av_clip_pixel( dst[i + 5*stride] + ((int)(b4 - b3) >> 6) );
        dst[i + 6*stride] = av_clip_pixel( dst[i + 6*stride] + ((int)(b2 - b5) >> 6) );
        dst[i + 7*stride] = av_clip_pixel( dst[i + 7*stride] + ((int)(b0 - b7) >> 6) );
138
    }
139 140

    memset(block, 0, 64 * sizeof(dctcoef));
141 142 143
}

// assumes all AC coefs are 0
144
void FUNCC(ff_h264_idct_dc_add)(uint8_t *_dst, int16_t *_block, int stride){
145
    int i, j;
146
    pixel *dst = (pixel*)_dst;
147 148
    dctcoef *block = (dctcoef*)_block;
    int dc = (block[0] + 32) >> 6;
149
    stride /= sizeof(pixel);
150
    block[0] = 0;
151 152 153
    for( j = 0; j < 4; j++ )
    {
        for( i = 0; i < 4; i++ )
154
            dst[i] = av_clip_pixel( dst[i] + dc );
155 156 157 158
        dst += stride;
    }
}

159
void FUNCC(ff_h264_idct8_dc_add)(uint8_t *_dst, int16_t *_block, int stride){
160
    int i, j;
161
    pixel *dst = (pixel*)_dst;
162 163 164
    dctcoef *block = (dctcoef*)_block;
    int dc = (block[0] + 32) >> 6;
    block[0] = 0;
165
    stride /= sizeof(pixel);
166 167 168
    for( j = 0; j < 8; j++ )
    {
        for( i = 0; i < 8; i++ )
169
            dst[i] = av_clip_pixel( dst[i] + dc );
170 171 172 173
        dst += stride;
    }
}

Diego Biurrun's avatar
Diego Biurrun committed
174
void FUNCC(ff_h264_idct_add16)(uint8_t *dst, const int *block_offset, int16_t *block, int stride, const uint8_t nnzc[15*8]){
175 176 177 178
    int i;
    for(i=0; i<16; i++){
        int nnz = nnzc[ scan8[i] ];
        if(nnz){
179
            if(nnz==1 && ((dctcoef*)block)[i*16]) FUNCC(ff_h264_idct_dc_add)(dst + block_offset[i], block + i*16*sizeof(pixel), stride);
180
            else                                  FUNCC(ff_h264_idct_add   )(dst + block_offset[i], block + i*16*sizeof(pixel), stride);
181 182 183 184
        }
    }
}

Diego Biurrun's avatar
Diego Biurrun committed
185
void FUNCC(ff_h264_idct_add16intra)(uint8_t *dst, const int *block_offset, int16_t *block, int stride, const uint8_t nnzc[15*8]){
186 187
    int i;
    for(i=0; i<16; i++){
188
        if(nnzc[ scan8[i] ])             FUNCC(ff_h264_idct_add   )(dst + block_offset[i], block + i*16*sizeof(pixel), stride);
189
        else if(((dctcoef*)block)[i*16]) FUNCC(ff_h264_idct_dc_add)(dst + block_offset[i], block + i*16*sizeof(pixel), stride);
190 191 192
    }
}

Diego Biurrun's avatar
Diego Biurrun committed
193
void FUNCC(ff_h264_idct8_add4)(uint8_t *dst, const int *block_offset, int16_t *block, int stride, const uint8_t nnzc[15*8]){
194 195 196 197
    int i;
    for(i=0; i<16; i+=4){
        int nnz = nnzc[ scan8[i] ];
        if(nnz){
198 199
            if(nnz==1 && ((dctcoef*)block)[i*16]) FUNCC(ff_h264_idct8_dc_add)(dst + block_offset[i], block + i*16*sizeof(pixel), stride);
            else                                  FUNCC(ff_h264_idct8_add   )(dst + block_offset[i], block + i*16*sizeof(pixel), stride);
200 201 202 203
        }
    }
}

Diego Biurrun's avatar
Diego Biurrun committed
204
void FUNCC(ff_h264_idct_add8)(uint8_t **dest, const int *block_offset, int16_t *block, int stride, const uint8_t nnzc[15*8]){
205 206 207 208 209 210 211 212
    int i, j;
    for(j=1; j<3; j++){
        for(i=j*16; i<j*16+4; i++){
            if(nnzc[ scan8[i] ])
                FUNCC(ff_h264_idct_add   )(dest[j-1] + block_offset[i], block + i*16*sizeof(pixel), stride);
            else if(((dctcoef*)block)[i*16])
                FUNCC(ff_h264_idct_dc_add)(dest[j-1] + block_offset[i], block + i*16*sizeof(pixel), stride);
        }
213 214
    }
}
215

Diego Biurrun's avatar
Diego Biurrun committed
216
void FUNCC(ff_h264_idct_add8_422)(uint8_t **dest, const int *block_offset, int16_t *block, int stride, const uint8_t nnzc[15*8]){
217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237
    int i, j;

    for(j=1; j<3; j++){
        for(i=j*16; i<j*16+4; i++){
            if(nnzc[ scan8[i] ])
                FUNCC(ff_h264_idct_add   )(dest[j-1] + block_offset[i], block + i*16*sizeof(pixel), stride);
            else if(((dctcoef*)block)[i*16])
                FUNCC(ff_h264_idct_dc_add)(dest[j-1] + block_offset[i], block + i*16*sizeof(pixel), stride);
        }
    }

    for(j=1; j<3; j++){
        for(i=j*16+4; i<j*16+8; i++){
            if(nnzc[ scan8[i+4] ])
                FUNCC(ff_h264_idct_add   )(dest[j-1] + block_offset[i+4], block + i*16*sizeof(pixel), stride);
            else if(((dctcoef*)block)[i*16])
                FUNCC(ff_h264_idct_dc_add)(dest[j-1] + block_offset[i+4], block + i*16*sizeof(pixel), stride);
        }
    }
}

238 239
/**
 * IDCT transforms the 16 dc values and dequantizes them.
Diego Biurrun's avatar
Diego Biurrun committed
240
 * @param qmul quantization parameter
241
 */
Diego Biurrun's avatar
Diego Biurrun committed
242
void FUNCC(ff_h264_luma_dc_dequant_idct)(int16_t *_output, int16_t *_input, int qmul){
243 244 245 246
#define stride 16
    int i;
    int temp[16];
    static const uint8_t x_offset[4]={0, 2*stride, 8*stride, 10*stride};
247 248
    dctcoef *input = (dctcoef*)_input;
    dctcoef *output = (dctcoef*)_output;
249 250 251 252 253 254 255 256 257 258 259 260 261 262 263

    for(i=0; i<4; i++){
        const int z0= input[4*i+0] + input[4*i+1];
        const int z1= input[4*i+0] - input[4*i+1];
        const int z2= input[4*i+2] - input[4*i+3];
        const int z3= input[4*i+2] + input[4*i+3];

        temp[4*i+0]= z0+z3;
        temp[4*i+1]= z0-z3;
        temp[4*i+2]= z1-z2;
        temp[4*i+3]= z1+z2;
    }

    for(i=0; i<4; i++){
        const int offset= x_offset[i];
264 265 266 267 268 269 270 271 272
        const SUINT z0= temp[4*0+i] + temp[4*2+i];
        const SUINT z1= temp[4*0+i] - temp[4*2+i];
        const SUINT z2= temp[4*1+i] - temp[4*3+i];
        const SUINT z3= temp[4*1+i] + temp[4*3+i];

        output[stride* 0+offset]= (int)((z0 + z3)*qmul + 128 ) >> 8;
        output[stride* 1+offset]= (int)((z1 + z2)*qmul + 128 ) >> 8;
        output[stride* 4+offset]= (int)((z1 - z2)*qmul + 128 ) >> 8;
        output[stride* 5+offset]= (int)((z0 - z3)*qmul + 128 ) >> 8;
273 274 275 276
    }
#undef stride
}

Diego Biurrun's avatar
Diego Biurrun committed
277
void FUNCC(ff_h264_chroma422_dc_dequant_idct)(int16_t *_block, int qmul){
278 279 280 281 282
    const int stride= 16*2;
    const int xStride= 16;
    int i;
    int temp[8];
    static const uint8_t x_offset[2]={0, 16};
283
    dctcoef *block = (dctcoef*)_block;
284 285 286 287 288 289 290 291

    for(i=0; i<4; i++){
        temp[2*i+0] = block[stride*i + xStride*0] + block[stride*i + xStride*1];
        temp[2*i+1] = block[stride*i + xStride*0] - block[stride*i + xStride*1];
    }

    for(i=0; i<2; i++){
        const int offset= x_offset[i];
292 293 294 295 296 297 298 299 300
        const SUINT z0= temp[2*0+i] + temp[2*2+i];
        const SUINT z1= temp[2*0+i] - temp[2*2+i];
        const SUINT z2= temp[2*1+i] - temp[2*3+i];
        const SUINT z3= temp[2*1+i] + temp[2*3+i];

        block[stride*0+offset]= (int)((z0 + z3)*qmul + 128) >> 8;
        block[stride*1+offset]= (int)((z1 + z2)*qmul + 128) >> 8;
        block[stride*2+offset]= (int)((z1 - z2)*qmul + 128) >> 8;
        block[stride*3+offset]= (int)((z0 - z3)*qmul + 128) >> 8;
301 302 303
    }
}

Diego Biurrun's avatar
Diego Biurrun committed
304
void FUNCC(ff_h264_chroma_dc_dequant_idct)(int16_t *_block, int qmul){
305 306
    const int stride= 16*2;
    const int xStride= 16;
307
    SUINT a,b,c,d,e;
308
    dctcoef *block = (dctcoef*)_block;
309 310 311 312 313 314 315 316 317 318 319

    a= block[stride*0 + xStride*0];
    b= block[stride*0 + xStride*1];
    c= block[stride*1 + xStride*0];
    d= block[stride*1 + xStride*1];

    e= a-b;
    a= a+b;
    b= c-d;
    c= c+d;

320 321 322 323
    block[stride*0 + xStride*0]= (int)((a+c)*qmul) >> 7;
    block[stride*0 + xStride*1]= (int)((e+b)*qmul) >> 7;
    block[stride*1 + xStride*0]= (int)((a-c)*qmul) >> 7;
    block[stride*1 + xStride*1]= (int)((e-b)*qmul) >> 7;
324
}