postprocess.c 37.5 KB
Newer Older
1
/*
2 3 4 5
 * Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at)
 *
 * AltiVec optimizations (C) 2004 Romain Dolbeau <romain@dolbeau.org>
 *
6
 * This file is part of FFmpeg.
7 8 9 10 11 12 13 14 15 16 17 18 19 20 21
 *
 * FFmpeg is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * FFmpeg is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with FFmpeg; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */
22

Michael Niedermayer's avatar
Michael Niedermayer committed
23
/**
24
 * @file
Michael Niedermayer's avatar
Michael Niedermayer committed
25 26
 * postprocessing.
 */
27

28
/*
29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48
                        C       MMX     MMX2    3DNow   AltiVec
isVertDC                Ec      Ec                      Ec
isVertMinMaxOk          Ec      Ec                      Ec
doVertLowPass           E               e       e       Ec
doVertDefFilter         Ec      Ec      e       e       Ec
isHorizDC               Ec      Ec                      Ec
isHorizMinMaxOk         a       E                       Ec
doHorizLowPass          E               e       e       Ec
doHorizDefFilter        Ec      Ec      e       e       Ec
do_a_deblock            Ec      E       Ec      E
deRing                  E               e       e*      Ecp
Vertical RKAlgo1        E               a       a
Horizontal RKAlgo1                      a       a
Vertical X1#            a               E       E
Horizontal X1#          a               E       E
LinIpolDeinterlace      e               E       E*
CubicIpolDeinterlace    a               e       e*
LinBlendDeinterlace     e               E       E*
MedianDeinterlace#      E       Ec      Ec
TempDeNoiser#           E               e       e       Ec
49

Diego Biurrun's avatar
Diego Biurrun committed
50
* I do not have a 3DNow! CPU -> it is untested, but no one said it does not work so it seems to work
Diego Biurrun's avatar
Diego Biurrun committed
51
# more or less selfinvented filters so the exactness is not too meaningful
52
E = Exact implementation
Diego Biurrun's avatar
Diego Biurrun committed
53
e = almost exact implementation (slightly different rounding,...)
54 55
a = alternative / approximate impl
c = checked against the other implementations (-vo md5)
56
p = partially optimized, still some work to do
57 58 59 60 61 62 63
*/

/*
TODO:
reduce the time wasted on the mem transfer
unroll stuff if instructions depend too much on the prior one
move YScale thing to the end instead of fixing QP
64
write a faster and higher quality deblocking filter :)
65
make the mainloop more flexible (variable number of blocks at once
66
        (the if/else stuff per block is slowing things down)
67 68
compare the quality & speed of all filters
split this huge file
Michael Niedermayer's avatar
Michael Niedermayer committed
69
optimize c versions
70
try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
71
...
72 73
*/

Diego Biurrun's avatar
Diego Biurrun committed
74
//Changelog: use the Subversion log
75

76
#include "config.h"
77
#include "libavutil/avutil.h"
78 79
#include <inttypes.h>
#include <stdio.h>
80
#include <stdlib.h>
81
#include <string.h>
82
//#undef HAVE_MMX2
83
//#define HAVE_AMD3DNOW
84
//#undef HAVE_MMX
85
//#undef ARCH_X86
86
//#define DEBUG_BRIGHTNESS
87
#include "postprocess.h"
Michael Niedermayer's avatar
Michael Niedermayer committed
88
#include "postprocess_internal.h"
89

90 91 92 93 94
unsigned postproc_version(void)
{
    return LIBPOSTPROC_VERSION_INT;
}

95
const char *postproc_configuration(void)
96 97 98 99
{
    return FFMPEG_CONFIGURATION;
}

100
const char *postproc_license(void)
101 102 103 104 105
{
#define LICENSE_PREFIX "libpostproc license: "
    return LICENSE_PREFIX FFMPEG_LICENSE + sizeof(LICENSE_PREFIX) - 1;
}

106
#if HAVE_ALTIVEC_H
107 108 109
#include <altivec.h>
#endif

110 111
#define GET_MODE_BUFFER_SIZE 500
#define OPTIONS_ARRAY_SIZE 10
112 113 114
#define BLOCK_SIZE 8
#define TEMP_STRIDE 8
//#define NUM_BLOCKS_AT_ONCE 16 //not used yet
115

116
#if ARCH_X86
117 118 119 120 121 122 123 124
DECLARE_ASM_CONST(8, uint64_t, w05)= 0x0005000500050005LL;
DECLARE_ASM_CONST(8, uint64_t, w04)= 0x0004000400040004LL;
DECLARE_ASM_CONST(8, uint64_t, w20)= 0x0020002000200020LL;
DECLARE_ASM_CONST(8, uint64_t, b00)= 0x0000000000000000LL;
DECLARE_ASM_CONST(8, uint64_t, b01)= 0x0101010101010101LL;
DECLARE_ASM_CONST(8, uint64_t, b02)= 0x0202020202020202LL;
DECLARE_ASM_CONST(8, uint64_t, b08)= 0x0808080808080808LL;
DECLARE_ASM_CONST(8, uint64_t, b80)= 0x8080808080808080LL;
125
#endif
126

127
DECLARE_ASM_CONST(8, int, deringThreshold)= 20;
128

129

130 131
static struct PPFilter filters[]=
{
132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150
    {"hb", "hdeblock",              1, 1, 3, H_DEBLOCK},
    {"vb", "vdeblock",              1, 2, 4, V_DEBLOCK},
/*  {"hr", "rkhdeblock",            1, 1, 3, H_RK1_FILTER},
    {"vr", "rkvdeblock",            1, 2, 4, V_RK1_FILTER},*/
    {"h1", "x1hdeblock",            1, 1, 3, H_X1_FILTER},
    {"v1", "x1vdeblock",            1, 2, 4, V_X1_FILTER},
    {"ha", "ahdeblock",             1, 1, 3, H_A_DEBLOCK},
    {"va", "avdeblock",             1, 2, 4, V_A_DEBLOCK},
    {"dr", "dering",                1, 5, 6, DERING},
    {"al", "autolevels",            0, 1, 2, LEVEL_FIX},
    {"lb", "linblenddeint",         1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
    {"li", "linipoldeint",          1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
    {"ci", "cubicipoldeint",        1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
    {"md", "mediandeint",           1, 1, 4, MEDIAN_DEINT_FILTER},
    {"fd", "ffmpegdeint",           1, 1, 4, FFMPEG_DEINT_FILTER},
    {"l5", "lowpass5",              1, 1, 4, LOWPASS5_DEINT_FILTER},
    {"tn", "tmpnoise",              1, 7, 8, TEMP_NOISE_FILTER},
    {"fq", "forcequant",            1, 0, 0, FORCE_QUANT},
    {NULL, NULL,0,0,0,0} //End Marker
151 152
};

153
static const char *replaceTable[]=
154
{
155 156 157 158 159 160
    "default",      "hb:a,vb:a,dr:a",
    "de",           "hb:a,vb:a,dr:a",
    "fast",         "h1:a,v1:a,dr:a",
    "fa",           "h1:a,v1:a,dr:a",
    "ac",           "ha:a:128:7,va:a,dr:a",
    NULL //End Marker
161 162
};

163

164
#if ARCH_X86
165 166
static inline void prefetchnta(void *p)
{
167
    __asm__ volatile(   "prefetchnta (%0)\n\t"
168 169
        : : "r" (p)
    );
170 171 172 173
}

static inline void prefetcht0(void *p)
{
174
    __asm__ volatile(   "prefetcht0 (%0)\n\t"
175 176
        : : "r" (p)
    );
177 178 179 180
}

static inline void prefetcht1(void *p)
{
181
    __asm__ volatile(   "prefetcht1 (%0)\n\t"
182 183
        : : "r" (p)
    );
184 185 186 187
}

static inline void prefetcht2(void *p)
{
188
    __asm__ volatile(   "prefetcht2 (%0)\n\t"
189 190
        : : "r" (p)
    );
191
}
192
#endif
193

Diego Biurrun's avatar
Diego Biurrun committed
194 195
/* The horizontal functions exist only in C because the MMX
 * code is faster with vertical filters and transposing. */
196

197 198 199
/**
 * Check if the given 8x8 Block is mostly "flat"
 */
200
static inline int isHorizDC_C(uint8_t src[], int stride, PPContext *c)
201
{
202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217
    int numEq= 0;
    int y;
    const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
    const int dcThreshold= dcOffset*2 + 1;

    for(y=0; y<BLOCK_SIZE; y++){
        if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++;
        if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++;
        if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++;
        if(((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold) numEq++;
        if(((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold) numEq++;
        if(((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold) numEq++;
        if(((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold) numEq++;
        src+= stride;
    }
    return numEq > c->ppMode.flatnessThreshold;
218 219 220 221 222
}

/**
 * Check if the middle 8x8 Block in the given 8x16 block is flat
 */
223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242
static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c)
{
    int numEq= 0;
    int y;
    const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
    const int dcThreshold= dcOffset*2 + 1;

    src+= stride*4; // src points to begin of the 8x8 Block
    for(y=0; y<BLOCK_SIZE-1; y++){
        if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++;
        if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++;
        if(((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold) numEq++;
        if(((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold) numEq++;
        if(((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold) numEq++;
        if(((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold) numEq++;
        if(((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold) numEq++;
        if(((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold) numEq++;
        src+= stride;
    }
    return numEq > c->ppMode.flatnessThreshold;
243 244
}

245
static inline int isHorizMinMaxOk_C(uint8_t src[], int stride, int QP)
246
{
247
    int i;
248
#if 1
249 250 251 252 253 254 255 256 257 258
    for(i=0; i<2; i++){
        if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0;
        src += stride;
        if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0;
        src += stride;
        if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0;
        src += stride;
        if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0;
        src += stride;
    }
259
#else
260 261 262 263
    for(i=0; i<8; i++){
        if((unsigned)(src[0] - src[7] + 2*QP) > 4*QP) return 0;
        src += stride;
    }
264
#endif
265
    return 1;
266
}
267

268 269 270 271
static inline int isVertMinMaxOk_C(uint8_t src[], int stride, int QP)
{
#if 1
#if 1
272 273 274 275 276 277 278 279
    int x;
    src+= stride*4;
    for(x=0; x<BLOCK_SIZE; x+=4){
        if((unsigned)(src[  x + 0*stride] - src[  x + 5*stride] + 2*QP) > 4*QP) return 0;
        if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0;
        if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0;
        if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0;
    }
280
#else
281 282 283 284 285
    int x;
    src+= stride*3;
    for(x=0; x<BLOCK_SIZE; x++){
        if((unsigned)(src[x + stride] - src[x + (stride<<3)] + 2*QP) > 4*QP) return 0;
    }
286
#endif
287
    return 1;
288
#else
289 290 291 292 293 294 295 296 297 298
    int x;
    src+= stride*4;
    for(x=0; x<BLOCK_SIZE; x++){
        int min=255;
        int max=0;
        int y;
        for(y=0; y<8; y++){
            int v= src[x + y*stride];
            if(v>max) max=v;
            if(v<min) min=v;
299
        }
300 301 302
        if(max-min > 2*QP) return 0;
    }
    return 1;
303 304 305
#endif
}

306 307 308 309 310 311 312 313 314 315
static inline int horizClassify_C(uint8_t src[], int stride, PPContext *c)
{
    if( isHorizDC_C(src, stride, c) ){
        if( isHorizMinMaxOk_C(src, stride, c->QP) )
            return 1;
        else
            return 0;
    }else{
        return 2;
    }
316 317
}

318 319 320 321 322 323 324 325 326 327
static inline int vertClassify_C(uint8_t src[], int stride, PPContext *c)
{
    if( isVertDC_C(src, stride, c) ){
        if( isVertMinMaxOk_C(src, stride, c->QP) )
            return 1;
        else
            return 0;
    }else{
        return 2;
    }
328 329
}

330
static inline void doHorizDefFilter_C(uint8_t dst[], int stride, PPContext *c)
331
{
332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359
    int y;
    for(y=0; y<BLOCK_SIZE; y++){
        const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]);

        if(FFABS(middleEnergy) < 8*c->QP){
            const int q=(dst[3] - dst[4])/2;
            const int leftEnergy=  5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
            const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);

            int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
            d= FFMAX(d, 0);

            d= (5*d + 32) >> 6;
            d*= FFSIGN(-middleEnergy);

            if(q>0)
            {
                d= d<0 ? 0 : d;
                d= d>q ? q : d;
            }
            else
            {
                d= d>0 ? 0 : d;
                d= d<q ? q : d;
            }

            dst[3]-= d;
            dst[4]+= d;
360
        }
361 362
        dst+= stride;
    }
363 364 365 366 367 368
}

/**
 * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block)
 * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version)
 */
369
static inline void doHorizLowPass_C(uint8_t dst[], int stride, PPContext *c)
370
{
371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398
    int y;
    for(y=0; y<BLOCK_SIZE; y++){
        const int first= FFABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0];
        const int last= FFABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7];

        int sums[10];
        sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4;
        sums[1] = sums[0] - first  + dst[3];
        sums[2] = sums[1] - first  + dst[4];
        sums[3] = sums[2] - first  + dst[5];
        sums[4] = sums[3] - first  + dst[6];
        sums[5] = sums[4] - dst[0] + dst[7];
        sums[6] = sums[5] - dst[1] + last;
        sums[7] = sums[6] - dst[2] + last;
        sums[8] = sums[7] - dst[3] + last;
        sums[9] = sums[8] - dst[4] + last;

        dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4;
        dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4;
        dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4;
        dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4;
        dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4;
        dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4;
        dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4;
        dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4;

        dst+= stride;
    }
399 400
}

401
/**
402 403
 * Experimental Filter 1 (Horizontal)
 * will not damage linear gradients
Diego Biurrun's avatar
Diego Biurrun committed
404
 * Flat blocks should look like they were passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter
Diego Biurrun's avatar
Diego Biurrun committed
405 406
 * can only smooth blocks at the expected locations (it cannot smooth them if they did move)
 * MMX2 version does correct clipping C version does not
407
 * not identical with the vertical one
408
 */
409 410
static inline void horizX1Filter(uint8_t *src, int stride, int QP)
{
411 412 413 414 415 416 417
    int y;
    static uint64_t *lut= NULL;
    if(lut==NULL)
    {
        int i;
        lut = av_malloc(256*8);
        for(i=0; i<256; i++)
418
        {
419
            int v= i < 128 ? 2*i : 2*(i-256);
420
/*
421
//Simulate 112242211 9-Tap filter
422 423 424 425
            uint64_t a= (v/16)  & 0xFF;
            uint64_t b= (v/8)   & 0xFF;
            uint64_t c= (v/4)   & 0xFF;
            uint64_t d= (3*v/8) & 0xFF;
426
*/
427
//Simulate piecewise linear interpolation
428 429 430 431 432 433 434 435 436 437 438 439
            uint64_t a= (v/16)   & 0xFF;
            uint64_t b= (v*3/16) & 0xFF;
            uint64_t c= (v*5/16) & 0xFF;
            uint64_t d= (7*v/16) & 0xFF;
            uint64_t A= (0x100 - a)&0xFF;
            uint64_t B= (0x100 - b)&0xFF;
            uint64_t C= (0x100 - c)&0xFF;
            uint64_t D= (0x100 - c)&0xFF;

            lut[i]   = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
                       (D<<24) | (C<<16) | (B<<8)  | (A);
            //lut[i] = (v<<32) | (v<<24);
440
        }
441
    }
442

443 444 445 446
    for(y=0; y<BLOCK_SIZE; y++){
        int a= src[1] - src[2];
        int b= src[3] - src[4];
        int c= src[5] - src[6];
447

448
        int d= FFMAX(FFABS(b) - (FFABS(a) + FFABS(c))/2, 0);
449

450 451
        if(d < QP){
            int v = d * FFSIGN(-b);
452

453 454 455 456 457 458
            src[1] +=v/8;
            src[2] +=v/4;
            src[3] +=3*v/8;
            src[4] -=3*v/8;
            src[5] -=v/4;
            src[6] -=v/8;
459
        }
460 461
        src+=stride;
    }
462 463
}

464 465 466
/**
 * accurate deblock filter
 */
467
static av_always_inline void do_a_deblock_C(uint8_t *src, int step, int stride, PPContext *c){
468 469 470 471
    int y;
    const int QP= c->QP;
    const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
    const int dcThreshold= dcOffset*2 + 1;
472
//START_TIMER
473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499
    src+= step*4; // src points to begin of the 8x8 Block
    for(y=0; y<8; y++){
        int numEq= 0;

        if(((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold) numEq++;
        if(((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold) numEq++;
        if(((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold) numEq++;
        if(((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold) numEq++;
        if(((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold) numEq++;
        if(((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold) numEq++;
        if(((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold) numEq++;
        if(((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold) numEq++;
        if(((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold) numEq++;
        if(numEq > c->ppMode.flatnessThreshold){
            int min, max, x;

            if(src[0] > src[step]){
                max= src[0];
                min= src[step];
            }else{
                max= src[step];
                min= src[0];
            }
            for(x=2; x<8; x+=2){
                if(src[x*step] > src[(x+1)*step]){
                        if(src[x    *step] > max) max= src[ x   *step];
                        if(src[(x+1)*step] < min) min= src[(x+1)*step];
500
                }else{
501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549
                        if(src[(x+1)*step] > max) max= src[(x+1)*step];
                        if(src[ x   *step] < min) min= src[ x   *step];
                }
            }
            if(max-min < 2*QP){
                const int first= FFABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0];
                const int last= FFABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step];

                int sums[10];
                sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4;
                sums[1] = sums[0] - first       + src[3*step];
                sums[2] = sums[1] - first       + src[4*step];
                sums[3] = sums[2] - first       + src[5*step];
                sums[4] = sums[3] - first       + src[6*step];
                sums[5] = sums[4] - src[0*step] + src[7*step];
                sums[6] = sums[5] - src[1*step] + last;
                sums[7] = sums[6] - src[2*step] + last;
                sums[8] = sums[7] - src[3*step] + last;
                sums[9] = sums[8] - src[4*step] + last;

                src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4;
                src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4;
                src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4;
                src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4;
                src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4;
                src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4;
                src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4;
                src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4;
            }
        }else{
            const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]);

            if(FFABS(middleEnergy) < 8*QP){
                const int q=(src[3*step] - src[4*step])/2;
                const int leftEnergy=  5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]);
                const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]);

                int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
                d= FFMAX(d, 0);

                d= (5*d + 32) >> 6;
                d*= FFSIGN(-middleEnergy);

                if(q>0){
                    d= d<0 ? 0 : d;
                    d= d>q ? q : d;
                }else{
                    d= d>0 ? 0 : d;
                    d= d<q ? q : d;
550 551
                }

552 553 554
                src[3*step]-= d;
                src[4*step]+= d;
            }
555
        }
556 557 558

        src += stride;
    }
559 560 561 562 563 564
/*if(step==16){
    STOP_TIMER("step16")
}else{
    STOP_TIMER("stepX")
}*/
}
565

566
//Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
567
//Plain C versions
568
#if !(HAVE_MMX || HAVE_ALTIVEC) || CONFIG_RUNTIME_CPUDETECT
569 570 571
#define COMPILE_C
#endif

572
#if HAVE_ALTIVEC
573 574 575
#define COMPILE_ALTIVEC
#endif //HAVE_ALTIVEC

576
#if ARCH_X86
577

578
#if (HAVE_MMX && !HAVE_AMD3DNOW && !HAVE_MMX2) || CONFIG_RUNTIME_CPUDETECT
579 580 581
#define COMPILE_MMX
#endif

582
#if HAVE_MMX2 || CONFIG_RUNTIME_CPUDETECT
583 584 585
#define COMPILE_MMX2
#endif

586
#if (HAVE_AMD3DNOW && !HAVE_MMX2) || CONFIG_RUNTIME_CPUDETECT
587 588
#define COMPILE_3DNOW
#endif
589
#endif /* ARCH_X86 */
590 591

#undef HAVE_MMX
592
#define HAVE_MMX 0
593
#undef HAVE_MMX2
594
#define HAVE_MMX2 0
595 596
#undef HAVE_AMD3DNOW
#define HAVE_AMD3DNOW 0
597
#undef HAVE_ALTIVEC
598
#define HAVE_ALTIVEC 0
599 600

#ifdef COMPILE_C
601 602
#define RENAME(a) a ## _C
#include "postprocess_template.c"
603
#endif
604

605 606
#ifdef COMPILE_ALTIVEC
#undef RENAME
607 608
#undef HAVE_ALTIVEC
#define HAVE_ALTIVEC 1
609 610 611 612 613
#define RENAME(a) a ## _altivec
#include "postprocess_altivec_template.c"
#include "postprocess_template.c"
#endif

614
//MMX versions
615
#ifdef COMPILE_MMX
616
#undef RENAME
617 618
#undef HAVE_MMX
#define HAVE_MMX 1
619 620
#define RENAME(a) a ## _MMX
#include "postprocess_template.c"
621
#endif
622 623

//MMX2 versions
624
#ifdef COMPILE_MMX2
625
#undef RENAME
626 627 628 629
#undef HAVE_MMX
#undef HAVE_MMX2
#define HAVE_MMX 1
#define HAVE_MMX2 1
630 631
#define RENAME(a) a ## _MMX2
#include "postprocess_template.c"
632
#endif
633 634

//3DNOW versions
635
#ifdef COMPILE_3DNOW
636
#undef RENAME
637
#undef HAVE_MMX
638
#undef HAVE_MMX2
639
#undef HAVE_AMD3DNOW
640 641
#define HAVE_MMX 1
#define HAVE_MMX2 0
642
#define HAVE_AMD3DNOW 1
643 644
#define RENAME(a) a ## _3DNow
#include "postprocess_template.c"
645
#endif
646

Diego Biurrun's avatar
Diego Biurrun committed
647
// minor note: the HAVE_xyz is messed up after that line so do not use it.
648

Michael Niedermayer's avatar
Michael Niedermayer committed
649
static inline void postProcess(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
650
        const QP_STORE_T QPs[], int QPStride, int isColor, pp_mode *vm, pp_context *vc)
651
{
652 653 654
    PPContext *c= (PPContext *)vc;
    PPMode *ppMode= (PPMode *)vm;
    c->ppMode= *ppMode; //FIXME
655

656 657 658
    // Using ifs here as they are faster than function pointers although the
    // difference would not be measurable here but it is much better because
    // someone might exchange the CPU whithout restarting MPlayer ;)
659
#if CONFIG_RUNTIME_CPUDETECT
660
#if ARCH_X86
661 662 663 664 665 666 667 668 669
    // ordered per speed fastest first
    if(c->cpuCaps & PP_CPU_CAPS_MMX2)
        postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
    else if(c->cpuCaps & PP_CPU_CAPS_3DNOW)
        postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
    else if(c->cpuCaps & PP_CPU_CAPS_MMX)
        postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
    else
        postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
670
#else
671
#if HAVE_ALTIVEC
672 673 674
    if(c->cpuCaps & PP_CPU_CAPS_ALTIVEC)
            postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
    else
675
#endif
676
            postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
677
#endif
678
#else //CONFIG_RUNTIME_CPUDETECT
679
#if   HAVE_MMX2
680
            postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
681
#elif HAVE_AMD3DNOW
682
            postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
683
#elif HAVE_MMX
684
            postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
685
#elif HAVE_ALTIVEC
686
            postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
687
#else
688
            postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
689
#endif
690
#endif //!CONFIG_RUNTIME_CPUDETECT
691 692
}

693
//static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
694
//        QP_STORE_T QPs[], int QPStride, int isColor, struct PPMode *ppMode);
695

696 697
/* -pp Command line Help
*/
698
#if LIBPOSTPROC_VERSION_INT < (52<<16)
699
const char *const pp_help=
700 701 702
#else
const char pp_help[] =
#endif
703
"Available postprocessing filters:\n"
704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734
"Filters                        Options\n"
"short  long name       short   long option     Description\n"
"*      *               a       autoq           CPU power dependent enabler\n"
"                       c       chrom           chrominance filtering enabled\n"
"                       y       nochrom         chrominance filtering disabled\n"
"                       n       noluma          luma filtering disabled\n"
"hb     hdeblock        (2 threshold)           horizontal deblocking filter\n"
"       1. difference factor: default=32, higher -> more deblocking\n"
"       2. flatness threshold: default=39, lower -> more deblocking\n"
"                       the h & v deblocking filters share these\n"
"                       so you can't set different thresholds for h / v\n"
"vb     vdeblock        (2 threshold)           vertical deblocking filter\n"
"ha     hadeblock       (2 threshold)           horizontal deblocking filter\n"
"va     vadeblock       (2 threshold)           vertical deblocking filter\n"
"h1     x1hdeblock                              experimental h deblock filter 1\n"
"v1     x1vdeblock                              experimental v deblock filter 1\n"
"dr     dering                                  deringing filter\n"
"al     autolevels                              automatic brightness / contrast\n"
"                       f        fullyrange     stretch luminance to (0..255)\n"
"lb     linblenddeint                           linear blend deinterlacer\n"
"li     linipoldeint                            linear interpolating deinterlace\n"
"ci     cubicipoldeint                          cubic interpolating deinterlacer\n"
"md     mediandeint                             median deinterlacer\n"
"fd     ffmpegdeint                             ffmpeg deinterlacer\n"
"l5     lowpass5                                FIR lowpass deinterlacer\n"
"de     default                                 hb:a,vb:a,dr:a\n"
"fa     fast                                    h1:a,v1:a,dr:a\n"
"ac                                             ha:a:128:7,va:a,dr:a\n"
"tn     tmpnoise        (3 threshold)           temporal noise reducer\n"
"                     1. <= 2. <= 3.            larger -> stronger filtering\n"
"fq     forceQuant      <quantizer>             force quantizer\n"
735 736 737
"Usage:\n"
"<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n"
"long form example:\n"
738
"vdeblock:autoq/hdeblock:autoq/linblenddeint    default,-vdeblock\n"
739
"short form example:\n"
740
"vb:a/hb:a/lb                                   de,-vb\n"
741 742
"more examples:\n"
"tn:64:128:256\n"
743
"\n"
Michael Niedermayer's avatar
Michael Niedermayer committed
744
;
745

746
pp_mode *pp_get_mode_by_name_and_quality(const char *name, int quality)
747
{
748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794
    char temp[GET_MODE_BUFFER_SIZE];
    char *p= temp;
    static const char filterDelimiters[] = ",/";
    static const char optionDelimiters[] = ":";
    struct PPMode *ppMode;
    char *filterToken;

    ppMode= av_malloc(sizeof(PPMode));

    ppMode->lumMode= 0;
    ppMode->chromMode= 0;
    ppMode->maxTmpNoise[0]= 700;
    ppMode->maxTmpNoise[1]= 1500;
    ppMode->maxTmpNoise[2]= 3000;
    ppMode->maxAllowedY= 234;
    ppMode->minAllowedY= 16;
    ppMode->baseDcDiff= 256/8;
    ppMode->flatnessThreshold= 56-16-1;
    ppMode->maxClippedThreshold= 0.01;
    ppMode->error=0;

    strncpy(temp, name, GET_MODE_BUFFER_SIZE);

    av_log(NULL, AV_LOG_DEBUG, "pp: %s\n", name);

    for(;;){
        char *filterName;
        int q= 1000000; //PP_QUALITY_MAX;
        int chrom=-1;
        int luma=-1;
        char *option;
        char *options[OPTIONS_ARRAY_SIZE];
        int i;
        int filterNameOk=0;
        int numOfUnknownOptions=0;
        int enable=1; //does the user want us to enabled or disabled the filter

        filterToken= strtok(p, filterDelimiters);
        if(filterToken == NULL) break;
        p+= strlen(filterToken) + 1; // p points to next filterToken
        filterName= strtok(filterToken, optionDelimiters);
        av_log(NULL, AV_LOG_DEBUG, "pp: %s::%s\n", filterToken, filterName);

        if(*filterName == '-'){
            enable=0;
            filterName++;
        }
795

796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827
        for(;;){ //for all options
            option= strtok(NULL, optionDelimiters);
            if(option == NULL) break;

            av_log(NULL, AV_LOG_DEBUG, "pp: option: %s\n", option);
            if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
            else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
            else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
            else if(!strcmp("noluma", option) || !strcmp("n", option)) luma=0;
            else{
                options[numOfUnknownOptions] = option;
                numOfUnknownOptions++;
            }
            if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
        }
        options[numOfUnknownOptions] = NULL;

        /* replace stuff from the replace Table */
        for(i=0; replaceTable[2*i]!=NULL; i++){
            if(!strcmp(replaceTable[2*i], filterName)){
                int newlen= strlen(replaceTable[2*i + 1]);
                int plen;
                int spaceLeft;

                if(p==NULL) p= temp, *p=0;      //last filter
                else p--, *p=',';               //not last filter

                plen= strlen(p);
                spaceLeft= p - temp + plen;
                if(spaceLeft + newlen  >= GET_MODE_BUFFER_SIZE){
                    ppMode->error++;
                    break;
828
                }
829 830 831 832 833
                memmove(p + newlen, p, plen+1);
                memcpy(p, replaceTable[2*i + 1], newlen);
                filterNameOk=1;
            }
        }
834

835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859
        for(i=0; filters[i].shortName!=NULL; i++){
            if(   !strcmp(filters[i].longName, filterName)
               || !strcmp(filters[i].shortName, filterName)){
                ppMode->lumMode &= ~filters[i].mask;
                ppMode->chromMode &= ~filters[i].mask;

                filterNameOk=1;
                if(!enable) break; // user wants to disable it

                if(q >= filters[i].minLumQuality && luma)
                    ppMode->lumMode|= filters[i].mask;
                if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
                    if(q >= filters[i].minChromQuality)
                            ppMode->chromMode|= filters[i].mask;

                if(filters[i].mask == LEVEL_FIX){
                    int o;
                    ppMode->minAllowedY= 16;
                    ppMode->maxAllowedY= 234;
                    for(o=0; options[o]!=NULL; o++){
                        if(  !strcmp(options[o],"fullyrange")
                           ||!strcmp(options[o],"f")){
                            ppMode->minAllowedY= 0;
                            ppMode->maxAllowedY= 255;
                            numOfUnknownOptions--;
860
                        }
861
                    }
862
                }
863
                else if(filters[i].mask == TEMP_NOISE_FILTER)
864
                {
865 866 867 868 869 870 871 872 873 874 875
                    int o;
                    int numOfNoises=0;

                    for(o=0; options[o]!=NULL; o++){
                        char *tail;
                        ppMode->maxTmpNoise[numOfNoises]=
                            strtol(options[o], &tail, 0);
                        if(tail!=options[o]){
                            numOfNoises++;
                            numOfUnknownOptions--;
                            if(numOfNoises >= 3) break;
876
                        }
877
                    }
878
                }
879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906
                else if(filters[i].mask == V_DEBLOCK   || filters[i].mask == H_DEBLOCK
                     || filters[i].mask == V_A_DEBLOCK || filters[i].mask == H_A_DEBLOCK){
                    int o;

                    for(o=0; options[o]!=NULL && o<2; o++){
                        char *tail;
                        int val= strtol(options[o], &tail, 0);
                        if(tail==options[o]) break;

                        numOfUnknownOptions--;
                        if(o==0) ppMode->baseDcDiff= val;
                        else ppMode->flatnessThreshold= val;
                    }
                }
                else if(filters[i].mask == FORCE_QUANT){
                    int o;
                    ppMode->forcedQuant= 15;

                    for(o=0; options[o]!=NULL && o<1; o++){
                        char *tail;
                        int val= strtol(options[o], &tail, 0);
                        if(tail==options[o]) break;

                        numOfUnknownOptions--;
                        ppMode->forcedQuant= val;
                    }
                }
            }
907
        }
908 909 910 911 912 913 914 915 916 917 918
        if(!filterNameOk) ppMode->error++;
        ppMode->error += numOfUnknownOptions;
    }

    av_log(NULL, AV_LOG_DEBUG, "pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode);
    if(ppMode->error){
        av_log(NULL, AV_LOG_ERROR, "%d errors in postprocess string \"%s\"\n", ppMode->error, name);
        av_free(ppMode);
        return NULL;
    }
    return ppMode;
919 920
}

921
void pp_free_mode(pp_mode *mode){
Luca Barbato's avatar
Luca Barbato committed
922
    av_free(mode);
Michael Niedermayer's avatar
Michael Niedermayer committed
923 924
}

925
static void reallocAlign(void **p, int alignment, int size){
926 927
    av_free(*p);
    *p= av_mallocz(size);
928 929
}

Michael Niedermayer's avatar
Michael Niedermayer committed
930
static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){
931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946
    int mbWidth = (width+15)>>4;
    int mbHeight= (height+15)>>4;
    int i;

    c->stride= stride;
    c->qpStride= qpStride;

    reallocAlign((void **)&c->tempDst, 8, stride*24);
    reallocAlign((void **)&c->tempSrc, 8, stride*24);
    reallocAlign((void **)&c->tempBlocks, 8, 2*16*8);
    reallocAlign((void **)&c->yHistogram, 8, 256*sizeof(uint64_t));
    for(i=0; i<256; i++)
            c->yHistogram[i]= width*height/64*15/256;

    for(i=0; i<3; i++){
        //Note: The +17*1024 is just there so i do not have to worry about r/w over the end.
947 948
        reallocAlign((void **)&c->tempBlurred[i], 8, stride*mbHeight*16 + 17*1024);
        reallocAlign((void **)&c->tempBlurredPast[i], 8, 256*((height+7)&(~7))/2 + 17*1024);//FIXME size
949 950 951 952 953 954
    }

    reallocAlign((void **)&c->deintTemp, 8, 2*width+32);
    reallocAlign((void **)&c->nonBQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
    reallocAlign((void **)&c->stdQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
    reallocAlign((void **)&c->forcedQPTable, 8, mbWidth*sizeof(QP_STORE_T));
955 956
}

957 958 959 960
static const char * context_to_name(void * ptr) {
    return "postproc";
}

961
static const AVClass av_codec_context_class = { "Postproc", context_to_name, NULL };
962

963
pp_context *pp_get_context(int width, int height, int cpuCaps){
964
    PPContext *c= av_malloc(sizeof(PPContext));
965
    int stride= FFALIGN(width, 16);  //assumed / will realloc if needed
966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983
    int qpStride= (width+15)/16 + 2; //assumed / will realloc if needed

    memset(c, 0, sizeof(PPContext));
    c->av_class = &av_codec_context_class;
    c->cpuCaps= cpuCaps;
    if(cpuCaps&PP_FORMAT){
        c->hChromaSubSample= cpuCaps&0x3;
        c->vChromaSubSample= (cpuCaps>>4)&0x3;
    }else{
        c->hChromaSubSample= 1;
        c->vChromaSubSample= 1;
    }

    reallocBuffers(c, width, height, stride, qpStride);

    c->frameNum=-1;

    return c;
984 985
}

Michael Niedermayer's avatar
Michael Niedermayer committed
986
void pp_free_context(void *vc){
987 988
    PPContext *c = (PPContext*)vc;
    int i;
989

990 991
    for(i=0; i<3; i++) av_free(c->tempBlurred[i]);
    for(i=0; i<3; i++) av_free(c->tempBlurredPast[i]);
992

993 994 995 996 997 998 999 1000
    av_free(c->tempBlocks);
    av_free(c->yHistogram);
    av_free(c->tempDst);
    av_free(c->tempSrc);
    av_free(c->deintTemp);
    av_free(c->stdQPTable);
    av_free(c->nonBQPTable);
    av_free(c->forcedQPTable);
1001

1002
    memset(c, 0, sizeof(PPContext));
1003

1004
    av_free(c);
1005 1006
}

Michael Niedermayer's avatar
Michael Niedermayer committed
1007
void  pp_postprocess(const uint8_t * src[3], const int srcStride[3],
1008 1009 1010
                     uint8_t * dst[3], const int dstStride[3],
                     int width, int height,
                     const QP_STORE_T *QP_store,  int QPStride,
1011
                     pp_mode *vm,  void *vc, int pict_type)
1012
{
1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034
    int mbWidth = (width+15)>>4;
    int mbHeight= (height+15)>>4;
    PPMode *mode = (PPMode*)vm;
    PPContext *c = (PPContext*)vc;
    int minStride= FFMAX(FFABS(srcStride[0]), FFABS(dstStride[0]));
    int absQPStride = FFABS(QPStride);

    // c->stride and c->QPStride are always positive
    if(c->stride < minStride || c->qpStride < absQPStride)
        reallocBuffers(c, width, height,
                       FFMAX(minStride, c->stride),
                       FFMAX(c->qpStride, absQPStride));

    if(QP_store==NULL || (mode->lumMode & FORCE_QUANT)){
        int i;
        QP_store= c->forcedQPTable;
        absQPStride = QPStride = 0;
        if(mode->lumMode & FORCE_QUANT)
            for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= mode->forcedQuant;
        else
            for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= 1;
    }
Michael Niedermayer's avatar
Michael Niedermayer committed
1035

1036 1037 1038 1039 1040
    if(pict_type & PP_PICT_TYPE_QP2){
        int i;
        const int count= mbHeight * absQPStride;
        for(i=0; i<(count>>2); i++){
            ((uint32_t*)c->stdQPTable)[i] = (((const uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F;
1041
        }
1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052
        for(i<<=2; i<count; i++){
            c->stdQPTable[i] = QP_store[i]>>1;
        }
        QP_store= c->stdQPTable;
        QPStride= absQPStride;
    }

    if(0){
        int x,y;
        for(y=0; y<mbHeight; y++){
            for(x=0; x<mbWidth; x++){
1053
                av_log(c, AV_LOG_INFO, "%2d ", QP_store[x + y*QPStride]);
1054 1055
            }
            av_log(c, AV_LOG_INFO, "\n");
1056
        }
1057
        av_log(c, AV_LOG_INFO, "\n");
1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074
    }

    if((pict_type&7)!=3){
        if (QPStride >= 0){
            int i;
            const int count= mbHeight * QPStride;
            for(i=0; i<(count>>2); i++){
                ((uint32_t*)c->nonBQPTable)[i] = ((const uint32_t*)QP_store)[i] & 0x3F3F3F3F;
            }
            for(i<<=2; i<count; i++){
                c->nonBQPTable[i] = QP_store[i] & 0x3F;
            }
        } else {
            int i,j;
            for(i=0; i<mbHeight; i++) {
                for(j=0; j<absQPStride; j++) {
                    c->nonBQPTable[i*absQPStride+j] = QP_store[i*QPStride+j] & 0x3F;
1075
                }
1076
            }
1077
        }
1078
    }
1079

1080 1081
    av_log(c, AV_LOG_DEBUG, "using npp filters 0x%X/0x%X\n",
           mode->lumMode, mode->chromMode);
1082

1083
    postProcess(src[0], srcStride[0], dst[0], dstStride[0],
1084 1085
                width, height, QP_store, QPStride, 0, mode, c);

1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102
    width  = (width )>>c->hChromaSubSample;
    height = (height)>>c->vChromaSubSample;

    if(mode->chromMode){
        postProcess(src[1], srcStride[1], dst[1], dstStride[1],
                    width, height, QP_store, QPStride, 1, mode, c);
        postProcess(src[2], srcStride[2], dst[2], dstStride[2],
                    width, height, QP_store, QPStride, 2, mode, c);
    }
    else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2]){
        linecpy(dst[1], src[1], height, srcStride[1]);
        linecpy(dst[2], src[2], height, srcStride[2]);
    }else{
        int y;
        for(y=0; y<height; y++){
            memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
            memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);
1103
        }
1104
    }
1105 1106
}