postprocess.c 37.4 KB
Newer Older
1
/*
2 3 4 5
 * Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at)
 *
 * AltiVec optimizations (C) 2004 Romain Dolbeau <romain@dolbeau.org>
 *
6
 * This file is part of FFmpeg.
7 8 9 10 11 12 13 14 15 16 17 18 19 20 21
 *
 * FFmpeg is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * FFmpeg is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with FFmpeg; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */
22

Michael Niedermayer's avatar
Michael Niedermayer committed
23
/**
24
 * @file
Michael Niedermayer's avatar
Michael Niedermayer committed
25 26
 * postprocessing.
 */
27

28
/*
29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48
                        C       MMX     MMX2    3DNow   AltiVec
isVertDC                Ec      Ec                      Ec
isVertMinMaxOk          Ec      Ec                      Ec
doVertLowPass           E               e       e       Ec
doVertDefFilter         Ec      Ec      e       e       Ec
isHorizDC               Ec      Ec                      Ec
isHorizMinMaxOk         a       E                       Ec
doHorizLowPass          E               e       e       Ec
doHorizDefFilter        Ec      Ec      e       e       Ec
do_a_deblock            Ec      E       Ec      E
deRing                  E               e       e*      Ecp
Vertical RKAlgo1        E               a       a
Horizontal RKAlgo1                      a       a
Vertical X1#            a               E       E
Horizontal X1#          a               E       E
LinIpolDeinterlace      e               E       E*
CubicIpolDeinterlace    a               e       e*
LinBlendDeinterlace     e               E       E*
MedianDeinterlace#      E       Ec      Ec
TempDeNoiser#           E               e       e       Ec
49

Diego Biurrun's avatar
Diego Biurrun committed
50
* I do not have a 3DNow! CPU -> it is untested, but no one said it does not work so it seems to work
Diego Biurrun's avatar
Diego Biurrun committed
51
# more or less selfinvented filters so the exactness is not too meaningful
52
E = Exact implementation
Diego Biurrun's avatar
Diego Biurrun committed
53
e = almost exact implementation (slightly different rounding,...)
54 55
a = alternative / approximate impl
c = checked against the other implementations (-vo md5)
56
p = partially optimized, still some work to do
57 58 59 60 61 62 63
*/

/*
TODO:
reduce the time wasted on the mem transfer
unroll stuff if instructions depend too much on the prior one
move YScale thing to the end instead of fixing QP
64
write a faster and higher quality deblocking filter :)
65
make the mainloop more flexible (variable number of blocks at once
66
        (the if/else stuff per block is slowing things down)
67 68
compare the quality & speed of all filters
split this huge file
Michael Niedermayer's avatar
Michael Niedermayer committed
69
optimize c versions
70
try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
71
...
72 73
*/

74
//Changelog: use git log
75

76
#include "config.h"
77
#include "libavutil/avutil.h"
78
#include "libavutil/avassert.h"
79 80
#include <inttypes.h>
#include <stdio.h>
81
#include <stdlib.h>
82
#include <string.h>
83
//#undef HAVE_MMX2
84
//#define HAVE_AMD3DNOW
85
//#undef HAVE_MMX
86
//#undef ARCH_X86
87
//#define DEBUG_BRIGHTNESS
88
#include "postprocess.h"
Michael Niedermayer's avatar
Michael Niedermayer committed
89
#include "postprocess_internal.h"
90
#include "libavutil/avstring.h"
91

92 93
unsigned postproc_version(void)
{
94
    av_assert0(LIBPOSTPROC_VERSION_MICRO >= 100);
95 96 97
    return LIBPOSTPROC_VERSION_INT;
}

98
const char *postproc_configuration(void)
99
{
100
    return FFMPEG_CONFIGURATION;
101 102
}

103
const char *postproc_license(void)
104 105
{
#define LICENSE_PREFIX "libpostproc license: "
106
    return LICENSE_PREFIX FFMPEG_LICENSE + sizeof(LICENSE_PREFIX) - 1;
107 108
}

109
#if HAVE_ALTIVEC_H
110 111 112
#include <altivec.h>
#endif

113 114
#define GET_MODE_BUFFER_SIZE 500
#define OPTIONS_ARRAY_SIZE 10
115 116 117
#define BLOCK_SIZE 8
#define TEMP_STRIDE 8
//#define NUM_BLOCKS_AT_ONCE 16 //not used yet
118

119
#if ARCH_X86
120 121 122 123 124 125 126 127
DECLARE_ASM_CONST(8, uint64_t, w05)= 0x0005000500050005LL;
DECLARE_ASM_CONST(8, uint64_t, w04)= 0x0004000400040004LL;
DECLARE_ASM_CONST(8, uint64_t, w20)= 0x0020002000200020LL;
DECLARE_ASM_CONST(8, uint64_t, b00)= 0x0000000000000000LL;
DECLARE_ASM_CONST(8, uint64_t, b01)= 0x0101010101010101LL;
DECLARE_ASM_CONST(8, uint64_t, b02)= 0x0202020202020202LL;
DECLARE_ASM_CONST(8, uint64_t, b08)= 0x0808080808080808LL;
DECLARE_ASM_CONST(8, uint64_t, b80)= 0x8080808080808080LL;
128
#endif
129

130
DECLARE_ASM_CONST(8, int, deringThreshold)= 20;
131

132

133 134
static struct PPFilter filters[]=
{
135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152
    {"hb", "hdeblock",              1, 1, 3, H_DEBLOCK},
    {"vb", "vdeblock",              1, 2, 4, V_DEBLOCK},
/*  {"hr", "rkhdeblock",            1, 1, 3, H_RK1_FILTER},
    {"vr", "rkvdeblock",            1, 2, 4, V_RK1_FILTER},*/
    {"h1", "x1hdeblock",            1, 1, 3, H_X1_FILTER},
    {"v1", "x1vdeblock",            1, 2, 4, V_X1_FILTER},
    {"ha", "ahdeblock",             1, 1, 3, H_A_DEBLOCK},
    {"va", "avdeblock",             1, 2, 4, V_A_DEBLOCK},
    {"dr", "dering",                1, 5, 6, DERING},
    {"al", "autolevels",            0, 1, 2, LEVEL_FIX},
    {"lb", "linblenddeint",         1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
    {"li", "linipoldeint",          1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
    {"ci", "cubicipoldeint",        1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
    {"md", "mediandeint",           1, 1, 4, MEDIAN_DEINT_FILTER},
    {"fd", "ffmpegdeint",           1, 1, 4, FFMPEG_DEINT_FILTER},
    {"l5", "lowpass5",              1, 1, 4, LOWPASS5_DEINT_FILTER},
    {"tn", "tmpnoise",              1, 7, 8, TEMP_NOISE_FILTER},
    {"fq", "forcequant",            1, 0, 0, FORCE_QUANT},
153
    {"be", "bitexact",              1, 0, 0, BITEXACT},
154
    {NULL, NULL,0,0,0,0} //End Marker
155 156
};

157
static const char *replaceTable[]=
158
{
159 160 161 162 163 164
    "default",      "hb:a,vb:a,dr:a",
    "de",           "hb:a,vb:a,dr:a",
    "fast",         "h1:a,v1:a,dr:a",
    "fa",           "h1:a,v1:a,dr:a",
    "ac",           "ha:a:128:7,va:a,dr:a",
    NULL //End Marker
165 166
};

167

168
#if ARCH_X86
169 170
static inline void prefetchnta(void *p)
{
171
    __asm__ volatile(   "prefetchnta (%0)\n\t"
172 173
        : : "r" (p)
    );
174 175 176 177
}

static inline void prefetcht0(void *p)
{
178
    __asm__ volatile(   "prefetcht0 (%0)\n\t"
179 180
        : : "r" (p)
    );
181 182 183 184
}

static inline void prefetcht1(void *p)
{
185
    __asm__ volatile(   "prefetcht1 (%0)\n\t"
186 187
        : : "r" (p)
    );
188 189 190 191
}

static inline void prefetcht2(void *p)
{
192
    __asm__ volatile(   "prefetcht2 (%0)\n\t"
193 194
        : : "r" (p)
    );
195
}
196
#endif
197

Diego Biurrun's avatar
Diego Biurrun committed
198 199
/* The horizontal functions exist only in C because the MMX
 * code is faster with vertical filters and transposing. */
200

201 202 203
/**
 * Check if the given 8x8 Block is mostly "flat"
 */
204
static inline int isHorizDC_C(uint8_t src[], int stride, PPContext *c)
205
{
206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221
    int numEq= 0;
    int y;
    const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
    const int dcThreshold= dcOffset*2 + 1;

    for(y=0; y<BLOCK_SIZE; y++){
        if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++;
        if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++;
        if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++;
        if(((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold) numEq++;
        if(((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold) numEq++;
        if(((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold) numEq++;
        if(((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold) numEq++;
        src+= stride;
    }
    return numEq > c->ppMode.flatnessThreshold;
222 223 224 225 226
}

/**
 * Check if the middle 8x8 Block in the given 8x16 block is flat
 */
227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246
static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c)
{
    int numEq= 0;
    int y;
    const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
    const int dcThreshold= dcOffset*2 + 1;

    src+= stride*4; // src points to begin of the 8x8 Block
    for(y=0; y<BLOCK_SIZE-1; y++){
        if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++;
        if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++;
        if(((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold) numEq++;
        if(((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold) numEq++;
        if(((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold) numEq++;
        if(((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold) numEq++;
        if(((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold) numEq++;
        if(((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold) numEq++;
        src+= stride;
    }
    return numEq > c->ppMode.flatnessThreshold;
247 248
}

249
static inline int isHorizMinMaxOk_C(uint8_t src[], int stride, int QP)
250
{
251 252 253 254 255 256 257 258 259 260 261 262
    int i;
    for(i=0; i<2; i++){
        if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0;
        src += stride;
        if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0;
        src += stride;
        if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0;
        src += stride;
        if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0;
        src += stride;
    }
    return 1;
263
}
264

265 266
static inline int isVertMinMaxOk_C(uint8_t src[], int stride, int QP)
{
267 268 269 270 271 272 273 274 275
    int x;
    src+= stride*4;
    for(x=0; x<BLOCK_SIZE; x+=4){
        if((unsigned)(src[  x + 0*stride] - src[  x + 5*stride] + 2*QP) > 4*QP) return 0;
        if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0;
        if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0;
        if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0;
    }
    return 1;
276 277
}

278 279 280 281 282 283 284 285 286 287
static inline int horizClassify_C(uint8_t src[], int stride, PPContext *c)
{
    if( isHorizDC_C(src, stride, c) ){
        if( isHorizMinMaxOk_C(src, stride, c->QP) )
            return 1;
        else
            return 0;
    }else{
        return 2;
    }
288 289
}

290 291 292 293 294 295 296 297 298 299
static inline int vertClassify_C(uint8_t src[], int stride, PPContext *c)
{
    if( isVertDC_C(src, stride, c) ){
        if( isVertMinMaxOk_C(src, stride, c->QP) )
            return 1;
        else
            return 0;
    }else{
        return 2;
    }
300 301
}

302
static inline void doHorizDefFilter_C(uint8_t dst[], int stride, PPContext *c)
303
{
304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331
    int y;
    for(y=0; y<BLOCK_SIZE; y++){
        const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]);

        if(FFABS(middleEnergy) < 8*c->QP){
            const int q=(dst[3] - dst[4])/2;
            const int leftEnergy=  5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
            const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);

            int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
            d= FFMAX(d, 0);

            d= (5*d + 32) >> 6;
            d*= FFSIGN(-middleEnergy);

            if(q>0)
            {
                d= d<0 ? 0 : d;
                d= d>q ? q : d;
            }
            else
            {
                d= d>0 ? 0 : d;
                d= d<q ? q : d;
            }

            dst[3]-= d;
            dst[4]+= d;
332
        }
333 334
        dst+= stride;
    }
335 336 337 338 339 340
}

/**
 * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block)
 * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version)
 */
341
static inline void doHorizLowPass_C(uint8_t dst[], int stride, PPContext *c)
342
{
343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370
    int y;
    for(y=0; y<BLOCK_SIZE; y++){
        const int first= FFABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0];
        const int last= FFABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7];

        int sums[10];
        sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4;
        sums[1] = sums[0] - first  + dst[3];
        sums[2] = sums[1] - first  + dst[4];
        sums[3] = sums[2] - first  + dst[5];
        sums[4] = sums[3] - first  + dst[6];
        sums[5] = sums[4] - dst[0] + dst[7];
        sums[6] = sums[5] - dst[1] + last;
        sums[7] = sums[6] - dst[2] + last;
        sums[8] = sums[7] - dst[3] + last;
        sums[9] = sums[8] - dst[4] + last;

        dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4;
        dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4;
        dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4;
        dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4;
        dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4;
        dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4;
        dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4;
        dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4;

        dst+= stride;
    }
371 372
}

373
/**
374 375
 * Experimental Filter 1 (Horizontal)
 * will not damage linear gradients
Diego Biurrun's avatar
Diego Biurrun committed
376
 * Flat blocks should look like they were passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter
Diego Biurrun's avatar
Diego Biurrun committed
377 378
 * can only smooth blocks at the expected locations (it cannot smooth them if they did move)
 * MMX2 version does correct clipping C version does not
379
 * not identical with the vertical one
380
 */
381 382
static inline void horizX1Filter(uint8_t *src, int stride, int QP)
{
383 384 385 386 387 388 389
    int y;
    static uint64_t *lut= NULL;
    if(lut==NULL)
    {
        int i;
        lut = av_malloc(256*8);
        for(i=0; i<256; i++)
390
        {
391
            int v= i < 128 ? 2*i : 2*(i-256);
392
/*
393
//Simulate 112242211 9-Tap filter
394 395 396 397
            uint64_t a= (v/16)  & 0xFF;
            uint64_t b= (v/8)   & 0xFF;
            uint64_t c= (v/4)   & 0xFF;
            uint64_t d= (3*v/8) & 0xFF;
398
*/
399
//Simulate piecewise linear interpolation
400 401 402 403 404 405 406 407 408 409 410 411
            uint64_t a= (v/16)   & 0xFF;
            uint64_t b= (v*3/16) & 0xFF;
            uint64_t c= (v*5/16) & 0xFF;
            uint64_t d= (7*v/16) & 0xFF;
            uint64_t A= (0x100 - a)&0xFF;
            uint64_t B= (0x100 - b)&0xFF;
            uint64_t C= (0x100 - c)&0xFF;
            uint64_t D= (0x100 - c)&0xFF;

            lut[i]   = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
                       (D<<24) | (C<<16) | (B<<8)  | (A);
            //lut[i] = (v<<32) | (v<<24);
412
        }
413
    }
414

415 416 417 418
    for(y=0; y<BLOCK_SIZE; y++){
        int a= src[1] - src[2];
        int b= src[3] - src[4];
        int c= src[5] - src[6];
419

420
        int d= FFMAX(FFABS(b) - (FFABS(a) + FFABS(c))/2, 0);
421

422 423
        if(d < QP){
            int v = d * FFSIGN(-b);
424

425 426 427 428 429 430
            src[1] +=v/8;
            src[2] +=v/4;
            src[3] +=3*v/8;
            src[4] -=3*v/8;
            src[5] -=v/4;
            src[6] -=v/8;
431
        }
432 433
        src+=stride;
    }
434 435
}

436 437 438
/**
 * accurate deblock filter
 */
439
static av_always_inline void do_a_deblock_C(uint8_t *src, int step, int stride, PPContext *c){
440 441 442 443
    int y;
    const int QP= c->QP;
    const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
    const int dcThreshold= dcOffset*2 + 1;
444
//START_TIMER
445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471
    src+= step*4; // src points to begin of the 8x8 Block
    for(y=0; y<8; y++){
        int numEq= 0;

        if(((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold) numEq++;
        if(((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold) numEq++;
        if(((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold) numEq++;
        if(((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold) numEq++;
        if(((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold) numEq++;
        if(((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold) numEq++;
        if(((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold) numEq++;
        if(((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold) numEq++;
        if(((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold) numEq++;
        if(numEq > c->ppMode.flatnessThreshold){
            int min, max, x;

            if(src[0] > src[step]){
                max= src[0];
                min= src[step];
            }else{
                max= src[step];
                min= src[0];
            }
            for(x=2; x<8; x+=2){
                if(src[x*step] > src[(x+1)*step]){
                        if(src[x    *step] > max) max= src[ x   *step];
                        if(src[(x+1)*step] < min) min= src[(x+1)*step];
472
                }else{
473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521
                        if(src[(x+1)*step] > max) max= src[(x+1)*step];
                        if(src[ x   *step] < min) min= src[ x   *step];
                }
            }
            if(max-min < 2*QP){
                const int first= FFABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0];
                const int last= FFABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step];

                int sums[10];
                sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4;
                sums[1] = sums[0] - first       + src[3*step];
                sums[2] = sums[1] - first       + src[4*step];
                sums[3] = sums[2] - first       + src[5*step];
                sums[4] = sums[3] - first       + src[6*step];
                sums[5] = sums[4] - src[0*step] + src[7*step];
                sums[6] = sums[5] - src[1*step] + last;
                sums[7] = sums[6] - src[2*step] + last;
                sums[8] = sums[7] - src[3*step] + last;
                sums[9] = sums[8] - src[4*step] + last;

                src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4;
                src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4;
                src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4;
                src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4;
                src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4;
                src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4;
                src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4;
                src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4;
            }
        }else{
            const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]);

            if(FFABS(middleEnergy) < 8*QP){
                const int q=(src[3*step] - src[4*step])/2;
                const int leftEnergy=  5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]);
                const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]);

                int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
                d= FFMAX(d, 0);

                d= (5*d + 32) >> 6;
                d*= FFSIGN(-middleEnergy);

                if(q>0){
                    d= d<0 ? 0 : d;
                    d= d>q ? q : d;
                }else{
                    d= d>0 ? 0 : d;
                    d= d<q ? q : d;
522 523
                }

524 525 526
                src[3*step]-= d;
                src[4*step]+= d;
            }
527
        }
528 529 530

        src += stride;
    }
531 532 533 534 535 536
/*if(step==16){
    STOP_TIMER("step16")
}else{
    STOP_TIMER("stepX")
}*/
}
537

538
//Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
539
//Plain C versions
540
//we always compile C for testing which needs bitexactness
541 542
#define COMPILE_C

543
#if HAVE_ALTIVEC
544 545 546
#define COMPILE_ALTIVEC
#endif //HAVE_ALTIVEC

547
#if ARCH_X86
548

549
#if (HAVE_MMX && !HAVE_AMD3DNOW && !HAVE_MMX2) || CONFIG_RUNTIME_CPUDETECT
550 551 552
#define COMPILE_MMX
#endif

553
#if HAVE_MMX2 || CONFIG_RUNTIME_CPUDETECT
554 555 556
#define COMPILE_MMX2
#endif

557
#if (HAVE_AMD3DNOW && !HAVE_MMX2) || CONFIG_RUNTIME_CPUDETECT
558 559
#define COMPILE_3DNOW
#endif
560
#endif /* ARCH_X86 */
561 562

#undef HAVE_MMX
563
#define HAVE_MMX 0
564
#undef HAVE_MMX2
565
#define HAVE_MMX2 0
566 567
#undef HAVE_AMD3DNOW
#define HAVE_AMD3DNOW 0
568
#undef HAVE_ALTIVEC
569
#define HAVE_ALTIVEC 0
570 571

#ifdef COMPILE_C
572 573
#define RENAME(a) a ## _C
#include "postprocess_template.c"
574
#endif
575

576 577
#ifdef COMPILE_ALTIVEC
#undef RENAME
578 579
#undef HAVE_ALTIVEC
#define HAVE_ALTIVEC 1
580 581 582 583 584
#define RENAME(a) a ## _altivec
#include "postprocess_altivec_template.c"
#include "postprocess_template.c"
#endif

585
//MMX versions
586
#ifdef COMPILE_MMX
587
#undef RENAME
588 589
#undef HAVE_MMX
#define HAVE_MMX 1
590 591
#define RENAME(a) a ## _MMX
#include "postprocess_template.c"
592
#endif
593 594

//MMX2 versions
595
#ifdef COMPILE_MMX2
596
#undef RENAME
597 598 599 600
#undef HAVE_MMX
#undef HAVE_MMX2
#define HAVE_MMX 1
#define HAVE_MMX2 1
601 602
#define RENAME(a) a ## _MMX2
#include "postprocess_template.c"
603
#endif
604 605

//3DNOW versions
606
#ifdef COMPILE_3DNOW
607
#undef RENAME
608
#undef HAVE_MMX
609
#undef HAVE_MMX2
610
#undef HAVE_AMD3DNOW
611 612
#define HAVE_MMX 1
#define HAVE_MMX2 0
613
#define HAVE_AMD3DNOW 1
614 615
#define RENAME(a) a ## _3DNow
#include "postprocess_template.c"
616
#endif
617

Diego Biurrun's avatar
Diego Biurrun committed
618
// minor note: the HAVE_xyz is messed up after that line so do not use it.
619

Michael Niedermayer's avatar
Michael Niedermayer committed
620
static inline void postProcess(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
621
        const QP_STORE_T QPs[], int QPStride, int isColor, pp_mode *vm, pp_context *vc)
622
{
623 624 625
    PPContext *c= (PPContext *)vc;
    PPMode *ppMode= (PPMode *)vm;
    c->ppMode= *ppMode; //FIXME
626

627 628 629
    if(ppMode->lumMode & BITEXACT)
        return postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);

630 631 632
    // Using ifs here as they are faster than function pointers although the
    // difference would not be measurable here but it is much better because
    // someone might exchange the CPU whithout restarting MPlayer ;)
633
#if CONFIG_RUNTIME_CPUDETECT
634
#if ARCH_X86
635 636 637 638 639 640 641 642 643
    // ordered per speed fastest first
    if(c->cpuCaps & PP_CPU_CAPS_MMX2)
        postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
    else if(c->cpuCaps & PP_CPU_CAPS_3DNOW)
        postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
    else if(c->cpuCaps & PP_CPU_CAPS_MMX)
        postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
    else
        postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
644
#else
645
#if HAVE_ALTIVEC
646 647 648
    if(c->cpuCaps & PP_CPU_CAPS_ALTIVEC)
            postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
    else
649
#endif
650
            postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
651
#endif
652
#else /* CONFIG_RUNTIME_CPUDETECT */
653
#if   HAVE_MMX2
654
            postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
655
#elif HAVE_AMD3DNOW
656
            postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
657
#elif HAVE_MMX
658
            postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
659
#elif HAVE_ALTIVEC
660
            postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
661
#else
662
            postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
663
#endif
664
#endif /* !CONFIG_RUNTIME_CPUDETECT */
665 666
}

667
//static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
668
//        QP_STORE_T QPs[], int QPStride, int isColor, struct PPMode *ppMode);
669

670 671
/* -pp Command line Help
*/
672
#if LIBPOSTPROC_VERSION_INT < (52<<16)
673
const char *const pp_help=
674 675 676
#else
const char pp_help[] =
#endif
677
"Available postprocessing filters:\n"
678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708
"Filters                        Options\n"
"short  long name       short   long option     Description\n"
"*      *               a       autoq           CPU power dependent enabler\n"
"                       c       chrom           chrominance filtering enabled\n"
"                       y       nochrom         chrominance filtering disabled\n"
"                       n       noluma          luma filtering disabled\n"
"hb     hdeblock        (2 threshold)           horizontal deblocking filter\n"
"       1. difference factor: default=32, higher -> more deblocking\n"
"       2. flatness threshold: default=39, lower -> more deblocking\n"
"                       the h & v deblocking filters share these\n"
"                       so you can't set different thresholds for h / v\n"
"vb     vdeblock        (2 threshold)           vertical deblocking filter\n"
"ha     hadeblock       (2 threshold)           horizontal deblocking filter\n"
"va     vadeblock       (2 threshold)           vertical deblocking filter\n"
"h1     x1hdeblock                              experimental h deblock filter 1\n"
"v1     x1vdeblock                              experimental v deblock filter 1\n"
"dr     dering                                  deringing filter\n"
"al     autolevels                              automatic brightness / contrast\n"
"                       f        fullyrange     stretch luminance to (0..255)\n"
"lb     linblenddeint                           linear blend deinterlacer\n"
"li     linipoldeint                            linear interpolating deinterlace\n"
"ci     cubicipoldeint                          cubic interpolating deinterlacer\n"
"md     mediandeint                             median deinterlacer\n"
"fd     ffmpegdeint                             ffmpeg deinterlacer\n"
"l5     lowpass5                                FIR lowpass deinterlacer\n"
"de     default                                 hb:a,vb:a,dr:a\n"
"fa     fast                                    h1:a,v1:a,dr:a\n"
"ac                                             ha:a:128:7,va:a,dr:a\n"
"tn     tmpnoise        (3 threshold)           temporal noise reducer\n"
"                     1. <= 2. <= 3.            larger -> stronger filtering\n"
"fq     forceQuant      <quantizer>             force quantizer\n"
709 710 711
"Usage:\n"
"<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n"
"long form example:\n"
712
"vdeblock:autoq/hdeblock:autoq/linblenddeint    default,-vdeblock\n"
713
"short form example:\n"
714
"vb:a/hb:a/lb                                   de,-vb\n"
715 716
"more examples:\n"
"tn:64:128:256\n"
717
"\n"
Michael Niedermayer's avatar
Michael Niedermayer committed
718
;
719

720
pp_mode *pp_get_mode_by_name_and_quality(const char *name, int quality)
721
{
722 723 724 725 726 727 728
    char temp[GET_MODE_BUFFER_SIZE];
    char *p= temp;
    static const char filterDelimiters[] = ",/";
    static const char optionDelimiters[] = ":";
    struct PPMode *ppMode;
    char *filterToken;

729 730 731 732 733 734 735 736 737
    if (!strcmp(name, "help")) {
        const char *p;
        for (p = pp_help; strchr(p, '\n'); p = strchr(p, '\n') + 1) {
            av_strlcpy(temp, p, FFMIN(sizeof(temp), strchr(p, '\n') - p + 2));
            av_log(NULL, AV_LOG_INFO, "%s", temp);
        }
        return NULL;
    }

738 739 740 741 742 743 744 745 746 747 748 749 750 751
    ppMode= av_malloc(sizeof(PPMode));

    ppMode->lumMode= 0;
    ppMode->chromMode= 0;
    ppMode->maxTmpNoise[0]= 700;
    ppMode->maxTmpNoise[1]= 1500;
    ppMode->maxTmpNoise[2]= 3000;
    ppMode->maxAllowedY= 234;
    ppMode->minAllowedY= 16;
    ppMode->baseDcDiff= 256/8;
    ppMode->flatnessThreshold= 56-16-1;
    ppMode->maxClippedThreshold= 0.01;
    ppMode->error=0;

752 753
    memset(temp, 0, GET_MODE_BUFFER_SIZE);
    av_strlcpy(temp, name, GET_MODE_BUFFER_SIZE - 1);
754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778

    av_log(NULL, AV_LOG_DEBUG, "pp: %s\n", name);

    for(;;){
        char *filterName;
        int q= 1000000; //PP_QUALITY_MAX;
        int chrom=-1;
        int luma=-1;
        char *option;
        char *options[OPTIONS_ARRAY_SIZE];
        int i;
        int filterNameOk=0;
        int numOfUnknownOptions=0;
        int enable=1; //does the user want us to enabled or disabled the filter

        filterToken= strtok(p, filterDelimiters);
        if(filterToken == NULL) break;
        p+= strlen(filterToken) + 1; // p points to next filterToken
        filterName= strtok(filterToken, optionDelimiters);
        av_log(NULL, AV_LOG_DEBUG, "pp: %s::%s\n", filterToken, filterName);

        if(*filterName == '-'){
            enable=0;
            filterName++;
        }
779

780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803
        for(;;){ //for all options
            option= strtok(NULL, optionDelimiters);
            if(option == NULL) break;

            av_log(NULL, AV_LOG_DEBUG, "pp: option: %s\n", option);
            if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
            else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
            else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
            else if(!strcmp("noluma", option) || !strcmp("n", option)) luma=0;
            else{
                options[numOfUnknownOptions] = option;
                numOfUnknownOptions++;
            }
            if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
        }
        options[numOfUnknownOptions] = NULL;

        /* replace stuff from the replace Table */
        for(i=0; replaceTable[2*i]!=NULL; i++){
            if(!strcmp(replaceTable[2*i], filterName)){
                int newlen= strlen(replaceTable[2*i + 1]);
                int plen;
                int spaceLeft;

804
                p--, *p=',';
805 806 807

                plen= strlen(p);
                spaceLeft= p - temp + plen;
808
                if(spaceLeft + newlen  >= GET_MODE_BUFFER_SIZE - 1){
809 810
                    ppMode->error++;
                    break;
811
                }
812 813 814 815 816
                memmove(p + newlen, p, plen+1);
                memcpy(p, replaceTable[2*i + 1], newlen);
                filterNameOk=1;
            }
        }
817

818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842
        for(i=0; filters[i].shortName!=NULL; i++){
            if(   !strcmp(filters[i].longName, filterName)
               || !strcmp(filters[i].shortName, filterName)){
                ppMode->lumMode &= ~filters[i].mask;
                ppMode->chromMode &= ~filters[i].mask;

                filterNameOk=1;
                if(!enable) break; // user wants to disable it

                if(q >= filters[i].minLumQuality && luma)
                    ppMode->lumMode|= filters[i].mask;
                if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
                    if(q >= filters[i].minChromQuality)
                            ppMode->chromMode|= filters[i].mask;

                if(filters[i].mask == LEVEL_FIX){
                    int o;
                    ppMode->minAllowedY= 16;
                    ppMode->maxAllowedY= 234;
                    for(o=0; options[o]!=NULL; o++){
                        if(  !strcmp(options[o],"fullyrange")
                           ||!strcmp(options[o],"f")){
                            ppMode->minAllowedY= 0;
                            ppMode->maxAllowedY= 255;
                            numOfUnknownOptions--;
843
                        }
844
                    }
845
                }
846
                else if(filters[i].mask == TEMP_NOISE_FILTER)
847
                {
848 849 850 851 852 853 854 855 856 857 858
                    int o;
                    int numOfNoises=0;

                    for(o=0; options[o]!=NULL; o++){
                        char *tail;
                        ppMode->maxTmpNoise[numOfNoises]=
                            strtol(options[o], &tail, 0);
                        if(tail!=options[o]){
                            numOfNoises++;
                            numOfUnknownOptions--;
                            if(numOfNoises >= 3) break;
859
                        }
860
                    }
861
                }
862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889
                else if(filters[i].mask == V_DEBLOCK   || filters[i].mask == H_DEBLOCK
                     || filters[i].mask == V_A_DEBLOCK || filters[i].mask == H_A_DEBLOCK){
                    int o;

                    for(o=0; options[o]!=NULL && o<2; o++){
                        char *tail;
                        int val= strtol(options[o], &tail, 0);
                        if(tail==options[o]) break;

                        numOfUnknownOptions--;
                        if(o==0) ppMode->baseDcDiff= val;
                        else ppMode->flatnessThreshold= val;
                    }
                }
                else if(filters[i].mask == FORCE_QUANT){
                    int o;
                    ppMode->forcedQuant= 15;

                    for(o=0; options[o]!=NULL && o<1; o++){
                        char *tail;
                        int val= strtol(options[o], &tail, 0);
                        if(tail==options[o]) break;

                        numOfUnknownOptions--;
                        ppMode->forcedQuant= val;
                    }
                }
            }
890
        }
891 892 893 894 895 896 897 898 899 900 901
        if(!filterNameOk) ppMode->error++;
        ppMode->error += numOfUnknownOptions;
    }

    av_log(NULL, AV_LOG_DEBUG, "pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode);
    if(ppMode->error){
        av_log(NULL, AV_LOG_ERROR, "%d errors in postprocess string \"%s\"\n", ppMode->error, name);
        av_free(ppMode);
        return NULL;
    }
    return ppMode;
902 903
}

904
void pp_free_mode(pp_mode *mode){
Luca Barbato's avatar
Luca Barbato committed
905
    av_free(mode);
Michael Niedermayer's avatar
Michael Niedermayer committed
906 907
}

908
static void reallocAlign(void **p, int alignment, int size){
909 910
    av_free(*p);
    *p= av_mallocz(size);
911 912
}

Michael Niedermayer's avatar
Michael Niedermayer committed
913
static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){
914 915 916 917 918 919 920 921 922 923 924 925 926 927 928
    int mbWidth = (width+15)>>4;
    int mbHeight= (height+15)>>4;
    int i;

    c->stride= stride;
    c->qpStride= qpStride;

    reallocAlign((void **)&c->tempDst, 8, stride*24);
    reallocAlign((void **)&c->tempSrc, 8, stride*24);
    reallocAlign((void **)&c->tempBlocks, 8, 2*16*8);
    reallocAlign((void **)&c->yHistogram, 8, 256*sizeof(uint64_t));
    for(i=0; i<256; i++)
            c->yHistogram[i]= width*height/64*15/256;

    for(i=0; i<3; i++){
929
        //Note: The +17*1024 is just there so I do not have to worry about r/w over the end.
930 931
        reallocAlign((void **)&c->tempBlurred[i], 8, stride*mbHeight*16 + 17*1024);
        reallocAlign((void **)&c->tempBlurredPast[i], 8, 256*((height+7)&(~7))/2 + 17*1024);//FIXME size
932 933 934 935 936 937
    }

    reallocAlign((void **)&c->deintTemp, 8, 2*width+32);
    reallocAlign((void **)&c->nonBQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
    reallocAlign((void **)&c->stdQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
    reallocAlign((void **)&c->forcedQPTable, 8, mbWidth*sizeof(QP_STORE_T));
938 939
}

940 941 942 943
static const char * context_to_name(void * ptr) {
    return "postproc";
}

944
static const AVClass av_codec_context_class = { "Postproc", context_to_name, NULL };
945

946
pp_context *pp_get_context(int width, int height, int cpuCaps){
947
    PPContext *c= av_malloc(sizeof(PPContext));
948
    int stride= FFALIGN(width, 16);  //assumed / will realloc if needed
949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966
    int qpStride= (width+15)/16 + 2; //assumed / will realloc if needed

    memset(c, 0, sizeof(PPContext));
    c->av_class = &av_codec_context_class;
    c->cpuCaps= cpuCaps;
    if(cpuCaps&PP_FORMAT){
        c->hChromaSubSample= cpuCaps&0x3;
        c->vChromaSubSample= (cpuCaps>>4)&0x3;
    }else{
        c->hChromaSubSample= 1;
        c->vChromaSubSample= 1;
    }

    reallocBuffers(c, width, height, stride, qpStride);

    c->frameNum=-1;

    return c;
967 968
}

Michael Niedermayer's avatar
Michael Niedermayer committed
969
void pp_free_context(void *vc){
970 971
    PPContext *c = (PPContext*)vc;
    int i;
972

973 974
    for(i=0; i<3; i++) av_free(c->tempBlurred[i]);
    for(i=0; i<3; i++) av_free(c->tempBlurredPast[i]);
975

976 977 978 979 980 981 982 983
    av_free(c->tempBlocks);
    av_free(c->yHistogram);
    av_free(c->tempDst);
    av_free(c->tempSrc);
    av_free(c->deintTemp);
    av_free(c->stdQPTable);
    av_free(c->nonBQPTable);
    av_free(c->forcedQPTable);
984

985
    memset(c, 0, sizeof(PPContext));
986

987
    av_free(c);
988 989
}

Michael Niedermayer's avatar
Michael Niedermayer committed
990
void  pp_postprocess(const uint8_t * src[3], const int srcStride[3],
991 992 993
                     uint8_t * dst[3], const int dstStride[3],
                     int width, int height,
                     const QP_STORE_T *QP_store,  int QPStride,
994
                     pp_mode *vm,  void *vc, int pict_type)
995
{
996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017
    int mbWidth = (width+15)>>4;
    int mbHeight= (height+15)>>4;
    PPMode *mode = (PPMode*)vm;
    PPContext *c = (PPContext*)vc;
    int minStride= FFMAX(FFABS(srcStride[0]), FFABS(dstStride[0]));
    int absQPStride = FFABS(QPStride);

    // c->stride and c->QPStride are always positive
    if(c->stride < minStride || c->qpStride < absQPStride)
        reallocBuffers(c, width, height,
                       FFMAX(minStride, c->stride),
                       FFMAX(c->qpStride, absQPStride));

    if(QP_store==NULL || (mode->lumMode & FORCE_QUANT)){
        int i;
        QP_store= c->forcedQPTable;
        absQPStride = QPStride = 0;
        if(mode->lumMode & FORCE_QUANT)
            for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= mode->forcedQuant;
        else
            for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= 1;
    }
Michael Niedermayer's avatar
Michael Niedermayer committed
1018

1019 1020 1021 1022 1023
    if(pict_type & PP_PICT_TYPE_QP2){
        int i;
        const int count= mbHeight * absQPStride;
        for(i=0; i<(count>>2); i++){
            ((uint32_t*)c->stdQPTable)[i] = (((const uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F;
1024
        }
1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035
        for(i<<=2; i<count; i++){
            c->stdQPTable[i] = QP_store[i]>>1;
        }
        QP_store= c->stdQPTable;
        QPStride= absQPStride;
    }

    if(0){
        int x,y;
        for(y=0; y<mbHeight; y++){
            for(x=0; x<mbWidth; x++){
1036
                av_log(c, AV_LOG_INFO, "%2d ", QP_store[x + y*QPStride]);
1037 1038
            }
            av_log(c, AV_LOG_INFO, "\n");
1039
        }
1040
        av_log(c, AV_LOG_INFO, "\n");
1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057
    }

    if((pict_type&7)!=3){
        if (QPStride >= 0){
            int i;
            const int count= mbHeight * QPStride;
            for(i=0; i<(count>>2); i++){
                ((uint32_t*)c->nonBQPTable)[i] = ((const uint32_t*)QP_store)[i] & 0x3F3F3F3F;
            }
            for(i<<=2; i<count; i++){
                c->nonBQPTable[i] = QP_store[i] & 0x3F;
            }
        } else {
            int i,j;
            for(i=0; i<mbHeight; i++) {
                for(j=0; j<absQPStride; j++) {
                    c->nonBQPTable[i*absQPStride+j] = QP_store[i*QPStride+j] & 0x3F;
1058
                }
1059
            }
1060
        }
1061
    }
1062

1063 1064
    av_log(c, AV_LOG_DEBUG, "using npp filters 0x%X/0x%X\n",
           mode->lumMode, mode->chromMode);
1065

1066
    postProcess(src[0], srcStride[0], dst[0], dstStride[0],
1067 1068
                width, height, QP_store, QPStride, 0, mode, c);

1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085
    width  = (width )>>c->hChromaSubSample;
    height = (height)>>c->vChromaSubSample;

    if(mode->chromMode){
        postProcess(src[1], srcStride[1], dst[1], dstStride[1],
                    width, height, QP_store, QPStride, 1, mode, c);
        postProcess(src[2], srcStride[2], dst[2], dstStride[2],
                    width, height, QP_store, QPStride, 2, mode, c);
    }
    else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2]){
        linecpy(dst[1], src[1], height, srcStride[1]);
        linecpy(dst[2], src[2], height, srcStride[2]);
    }else{
        int y;
        for(y=0; y<height; y++){
            memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
            memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);
1086
        }
1087
    }
1088
}