postprocess.c 37.4 KB
Newer Older
1
/*
2 3 4 5
 * Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at)
 *
 * AltiVec optimizations (C) 2004 Romain Dolbeau <romain@dolbeau.org>
 *
6
 * This file is part of FFmpeg.
7 8 9 10 11 12 13 14 15 16 17 18 19 20 21
 *
 * FFmpeg is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * FFmpeg is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with FFmpeg; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */
22

Michael Niedermayer's avatar
Michael Niedermayer committed
23
/**
24
 * @file
Michael Niedermayer's avatar
Michael Niedermayer committed
25 26
 * postprocessing.
 */
27

28
/*
29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48
                        C       MMX     MMX2    3DNow   AltiVec
isVertDC                Ec      Ec                      Ec
isVertMinMaxOk          Ec      Ec                      Ec
doVertLowPass           E               e       e       Ec
doVertDefFilter         Ec      Ec      e       e       Ec
isHorizDC               Ec      Ec                      Ec
isHorizMinMaxOk         a       E                       Ec
doHorizLowPass          E               e       e       Ec
doHorizDefFilter        Ec      Ec      e       e       Ec
do_a_deblock            Ec      E       Ec      E
deRing                  E               e       e*      Ecp
Vertical RKAlgo1        E               a       a
Horizontal RKAlgo1                      a       a
Vertical X1#            a               E       E
Horizontal X1#          a               E       E
LinIpolDeinterlace      e               E       E*
CubicIpolDeinterlace    a               e       e*
LinBlendDeinterlace     e               E       E*
MedianDeinterlace#      E       Ec      Ec
TempDeNoiser#           E               e       e       Ec
49

Diego Biurrun's avatar
Diego Biurrun committed
50
* I do not have a 3DNow! CPU -> it is untested, but no one said it does not work so it seems to work
Diego Biurrun's avatar
Diego Biurrun committed
51
# more or less selfinvented filters so the exactness is not too meaningful
52
E = Exact implementation
Diego Biurrun's avatar
Diego Biurrun committed
53
e = almost exact implementation (slightly different rounding,...)
54 55
a = alternative / approximate impl
c = checked against the other implementations (-vo md5)
56
p = partially optimized, still some work to do
57 58 59 60 61 62 63
*/

/*
TODO:
reduce the time wasted on the mem transfer
unroll stuff if instructions depend too much on the prior one
move YScale thing to the end instead of fixing QP
64
write a faster and higher quality deblocking filter :)
65
make the mainloop more flexible (variable number of blocks at once
66
        (the if/else stuff per block is slowing things down)
67 68
compare the quality & speed of all filters
split this huge file
Michael Niedermayer's avatar
Michael Niedermayer committed
69
optimize c versions
70
try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
71
...
72 73
*/

74
//Changelog: use git log
75

76
#include "config.h"
77
#include "libavutil/avutil.h"
78 79
#include <inttypes.h>
#include <stdio.h>
80
#include <stdlib.h>
81
#include <string.h>
82
//#undef HAVE_MMX2
83
//#define HAVE_AMD3DNOW
84
//#undef HAVE_MMX
85
//#undef ARCH_X86
86
//#define DEBUG_BRIGHTNESS
87
#include "postprocess.h"
Michael Niedermayer's avatar
Michael Niedermayer committed
88
#include "postprocess_internal.h"
89
#include "libavutil/avstring.h"
90

91 92 93 94 95
unsigned postproc_version(void)
{
    return LIBPOSTPROC_VERSION_INT;
}

96
const char *postproc_configuration(void)
97
{
98
    return FFMPEG_CONFIGURATION;
99 100
}

101
const char *postproc_license(void)
102 103
{
#define LICENSE_PREFIX "libpostproc license: "
104
    return LICENSE_PREFIX FFMPEG_LICENSE + sizeof(LICENSE_PREFIX) - 1;
105 106
}

107
#if HAVE_ALTIVEC_H
108 109 110
#include <altivec.h>
#endif

111 112
#define GET_MODE_BUFFER_SIZE 500
#define OPTIONS_ARRAY_SIZE 10
113 114 115
#define BLOCK_SIZE 8
#define TEMP_STRIDE 8
//#define NUM_BLOCKS_AT_ONCE 16 //not used yet
116

117
#if ARCH_X86
118 119 120 121 122 123 124 125
DECLARE_ASM_CONST(8, uint64_t, w05)= 0x0005000500050005LL;
DECLARE_ASM_CONST(8, uint64_t, w04)= 0x0004000400040004LL;
DECLARE_ASM_CONST(8, uint64_t, w20)= 0x0020002000200020LL;
DECLARE_ASM_CONST(8, uint64_t, b00)= 0x0000000000000000LL;
DECLARE_ASM_CONST(8, uint64_t, b01)= 0x0101010101010101LL;
DECLARE_ASM_CONST(8, uint64_t, b02)= 0x0202020202020202LL;
DECLARE_ASM_CONST(8, uint64_t, b08)= 0x0808080808080808LL;
DECLARE_ASM_CONST(8, uint64_t, b80)= 0x8080808080808080LL;
126
#endif
127

128
DECLARE_ASM_CONST(8, int, deringThreshold)= 20;
129

130

131 132
static struct PPFilter filters[]=
{
133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151
    {"hb", "hdeblock",              1, 1, 3, H_DEBLOCK},
    {"vb", "vdeblock",              1, 2, 4, V_DEBLOCK},
/*  {"hr", "rkhdeblock",            1, 1, 3, H_RK1_FILTER},
    {"vr", "rkvdeblock",            1, 2, 4, V_RK1_FILTER},*/
    {"h1", "x1hdeblock",            1, 1, 3, H_X1_FILTER},
    {"v1", "x1vdeblock",            1, 2, 4, V_X1_FILTER},
    {"ha", "ahdeblock",             1, 1, 3, H_A_DEBLOCK},
    {"va", "avdeblock",             1, 2, 4, V_A_DEBLOCK},
    {"dr", "dering",                1, 5, 6, DERING},
    {"al", "autolevels",            0, 1, 2, LEVEL_FIX},
    {"lb", "linblenddeint",         1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
    {"li", "linipoldeint",          1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
    {"ci", "cubicipoldeint",        1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
    {"md", "mediandeint",           1, 1, 4, MEDIAN_DEINT_FILTER},
    {"fd", "ffmpegdeint",           1, 1, 4, FFMPEG_DEINT_FILTER},
    {"l5", "lowpass5",              1, 1, 4, LOWPASS5_DEINT_FILTER},
    {"tn", "tmpnoise",              1, 7, 8, TEMP_NOISE_FILTER},
    {"fq", "forcequant",            1, 0, 0, FORCE_QUANT},
    {NULL, NULL,0,0,0,0} //End Marker
152 153
};

154
static const char *replaceTable[]=
155
{
156 157 158 159 160 161
    "default",      "hb:a,vb:a,dr:a",
    "de",           "hb:a,vb:a,dr:a",
    "fast",         "h1:a,v1:a,dr:a",
    "fa",           "h1:a,v1:a,dr:a",
    "ac",           "ha:a:128:7,va:a,dr:a",
    NULL //End Marker
162 163
};

164

165
#if ARCH_X86
166 167
static inline void prefetchnta(void *p)
{
168
    __asm__ volatile(   "prefetchnta (%0)\n\t"
169 170
        : : "r" (p)
    );
171 172 173 174
}

static inline void prefetcht0(void *p)
{
175
    __asm__ volatile(   "prefetcht0 (%0)\n\t"
176 177
        : : "r" (p)
    );
178 179 180 181
}

static inline void prefetcht1(void *p)
{
182
    __asm__ volatile(   "prefetcht1 (%0)\n\t"
183 184
        : : "r" (p)
    );
185 186 187 188
}

static inline void prefetcht2(void *p)
{
189
    __asm__ volatile(   "prefetcht2 (%0)\n\t"
190 191
        : : "r" (p)
    );
192
}
193
#endif
194

Diego Biurrun's avatar
Diego Biurrun committed
195 196
/* The horizontal functions exist only in C because the MMX
 * code is faster with vertical filters and transposing. */
197

198 199 200
/**
 * Check if the given 8x8 Block is mostly "flat"
 */
201
static inline int isHorizDC_C(uint8_t src[], int stride, PPContext *c)
202
{
203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218
    int numEq= 0;
    int y;
    const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
    const int dcThreshold= dcOffset*2 + 1;

    for(y=0; y<BLOCK_SIZE; y++){
        if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++;
        if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++;
        if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++;
        if(((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold) numEq++;
        if(((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold) numEq++;
        if(((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold) numEq++;
        if(((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold) numEq++;
        src+= stride;
    }
    return numEq > c->ppMode.flatnessThreshold;
219 220 221 222 223
}

/**
 * Check if the middle 8x8 Block in the given 8x16 block is flat
 */
224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243
static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c)
{
    int numEq= 0;
    int y;
    const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
    const int dcThreshold= dcOffset*2 + 1;

    src+= stride*4; // src points to begin of the 8x8 Block
    for(y=0; y<BLOCK_SIZE-1; y++){
        if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++;
        if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++;
        if(((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold) numEq++;
        if(((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold) numEq++;
        if(((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold) numEq++;
        if(((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold) numEq++;
        if(((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold) numEq++;
        if(((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold) numEq++;
        src+= stride;
    }
    return numEq > c->ppMode.flatnessThreshold;
244 245
}

246
static inline int isHorizMinMaxOk_C(uint8_t src[], int stride, int QP)
247
{
248
    int i;
249
#if 1
250 251 252 253 254 255 256 257 258 259
    for(i=0; i<2; i++){
        if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0;
        src += stride;
        if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0;
        src += stride;
        if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0;
        src += stride;
        if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0;
        src += stride;
    }
260
#else
261 262 263 264
    for(i=0; i<8; i++){
        if((unsigned)(src[0] - src[7] + 2*QP) > 4*QP) return 0;
        src += stride;
    }
265
#endif
266
    return 1;
267
}
268

269 270 271 272
static inline int isVertMinMaxOk_C(uint8_t src[], int stride, int QP)
{
#if 1
#if 1
273 274 275 276 277 278 279 280
    int x;
    src+= stride*4;
    for(x=0; x<BLOCK_SIZE; x+=4){
        if((unsigned)(src[  x + 0*stride] - src[  x + 5*stride] + 2*QP) > 4*QP) return 0;
        if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0;
        if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0;
        if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0;
    }
281
#else
282 283 284 285 286
    int x;
    src+= stride*3;
    for(x=0; x<BLOCK_SIZE; x++){
        if((unsigned)(src[x + stride] - src[x + (stride<<3)] + 2*QP) > 4*QP) return 0;
    }
287
#endif
288
    return 1;
289
#else
290 291 292 293 294 295 296 297 298 299
    int x;
    src+= stride*4;
    for(x=0; x<BLOCK_SIZE; x++){
        int min=255;
        int max=0;
        int y;
        for(y=0; y<8; y++){
            int v= src[x + y*stride];
            if(v>max) max=v;
            if(v<min) min=v;
300
        }
301 302 303
        if(max-min > 2*QP) return 0;
    }
    return 1;
304 305 306
#endif
}

307 308 309 310 311 312 313 314 315 316
static inline int horizClassify_C(uint8_t src[], int stride, PPContext *c)
{
    if( isHorizDC_C(src, stride, c) ){
        if( isHorizMinMaxOk_C(src, stride, c->QP) )
            return 1;
        else
            return 0;
    }else{
        return 2;
    }
317 318
}

319 320 321 322 323 324 325 326 327 328
static inline int vertClassify_C(uint8_t src[], int stride, PPContext *c)
{
    if( isVertDC_C(src, stride, c) ){
        if( isVertMinMaxOk_C(src, stride, c->QP) )
            return 1;
        else
            return 0;
    }else{
        return 2;
    }
329 330
}

331
static inline void doHorizDefFilter_C(uint8_t dst[], int stride, PPContext *c)
332
{
333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360
    int y;
    for(y=0; y<BLOCK_SIZE; y++){
        const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]);

        if(FFABS(middleEnergy) < 8*c->QP){
            const int q=(dst[3] - dst[4])/2;
            const int leftEnergy=  5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
            const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);

            int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
            d= FFMAX(d, 0);

            d= (5*d + 32) >> 6;
            d*= FFSIGN(-middleEnergy);

            if(q>0)
            {
                d= d<0 ? 0 : d;
                d= d>q ? q : d;
            }
            else
            {
                d= d>0 ? 0 : d;
                d= d<q ? q : d;
            }

            dst[3]-= d;
            dst[4]+= d;
361
        }
362 363
        dst+= stride;
    }
364 365 366 367 368 369
}

/**
 * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block)
 * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version)
 */
370
static inline void doHorizLowPass_C(uint8_t dst[], int stride, PPContext *c)
371
{
372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399
    int y;
    for(y=0; y<BLOCK_SIZE; y++){
        const int first= FFABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0];
        const int last= FFABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7];

        int sums[10];
        sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4;
        sums[1] = sums[0] - first  + dst[3];
        sums[2] = sums[1] - first  + dst[4];
        sums[3] = sums[2] - first  + dst[5];
        sums[4] = sums[3] - first  + dst[6];
        sums[5] = sums[4] - dst[0] + dst[7];
        sums[6] = sums[5] - dst[1] + last;
        sums[7] = sums[6] - dst[2] + last;
        sums[8] = sums[7] - dst[3] + last;
        sums[9] = sums[8] - dst[4] + last;

        dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4;
        dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4;
        dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4;
        dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4;
        dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4;
        dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4;
        dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4;
        dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4;

        dst+= stride;
    }
400 401
}

402
/**
403 404
 * Experimental Filter 1 (Horizontal)
 * will not damage linear gradients
Diego Biurrun's avatar
Diego Biurrun committed
405
 * Flat blocks should look like they were passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter
Diego Biurrun's avatar
Diego Biurrun committed
406 407
 * can only smooth blocks at the expected locations (it cannot smooth them if they did move)
 * MMX2 version does correct clipping C version does not
408
 * not identical with the vertical one
409
 */
410 411
static inline void horizX1Filter(uint8_t *src, int stride, int QP)
{
412 413 414 415 416 417 418
    int y;
    static uint64_t *lut= NULL;
    if(lut==NULL)
    {
        int i;
        lut = av_malloc(256*8);
        for(i=0; i<256; i++)
419
        {
420
            int v= i < 128 ? 2*i : 2*(i-256);
421
/*
422
//Simulate 112242211 9-Tap filter
423 424 425 426
            uint64_t a= (v/16)  & 0xFF;
            uint64_t b= (v/8)   & 0xFF;
            uint64_t c= (v/4)   & 0xFF;
            uint64_t d= (3*v/8) & 0xFF;
427
*/
428
//Simulate piecewise linear interpolation
429 430 431 432 433 434 435 436 437 438 439 440
            uint64_t a= (v/16)   & 0xFF;
            uint64_t b= (v*3/16) & 0xFF;
            uint64_t c= (v*5/16) & 0xFF;
            uint64_t d= (7*v/16) & 0xFF;
            uint64_t A= (0x100 - a)&0xFF;
            uint64_t B= (0x100 - b)&0xFF;
            uint64_t C= (0x100 - c)&0xFF;
            uint64_t D= (0x100 - c)&0xFF;

            lut[i]   = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
                       (D<<24) | (C<<16) | (B<<8)  | (A);
            //lut[i] = (v<<32) | (v<<24);
441
        }
442
    }
443

444 445 446 447
    for(y=0; y<BLOCK_SIZE; y++){
        int a= src[1] - src[2];
        int b= src[3] - src[4];
        int c= src[5] - src[6];
448

449
        int d= FFMAX(FFABS(b) - (FFABS(a) + FFABS(c))/2, 0);
450

451 452
        if(d < QP){
            int v = d * FFSIGN(-b);
453

454 455 456 457 458 459
            src[1] +=v/8;
            src[2] +=v/4;
            src[3] +=3*v/8;
            src[4] -=3*v/8;
            src[5] -=v/4;
            src[6] -=v/8;
460
        }
461 462
        src+=stride;
    }
463 464
}

465 466 467
/**
 * accurate deblock filter
 */
468
static av_always_inline void do_a_deblock_C(uint8_t *src, int step, int stride, PPContext *c){
469 470 471 472
    int y;
    const int QP= c->QP;
    const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
    const int dcThreshold= dcOffset*2 + 1;
473
//START_TIMER
474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500
    src+= step*4; // src points to begin of the 8x8 Block
    for(y=0; y<8; y++){
        int numEq= 0;

        if(((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold) numEq++;
        if(((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold) numEq++;
        if(((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold) numEq++;
        if(((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold) numEq++;
        if(((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold) numEq++;
        if(((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold) numEq++;
        if(((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold) numEq++;
        if(((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold) numEq++;
        if(((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold) numEq++;
        if(numEq > c->ppMode.flatnessThreshold){
            int min, max, x;

            if(src[0] > src[step]){
                max= src[0];
                min= src[step];
            }else{
                max= src[step];
                min= src[0];
            }
            for(x=2; x<8; x+=2){
                if(src[x*step] > src[(x+1)*step]){
                        if(src[x    *step] > max) max= src[ x   *step];
                        if(src[(x+1)*step] < min) min= src[(x+1)*step];
501
                }else{
502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550
                        if(src[(x+1)*step] > max) max= src[(x+1)*step];
                        if(src[ x   *step] < min) min= src[ x   *step];
                }
            }
            if(max-min < 2*QP){
                const int first= FFABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0];
                const int last= FFABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step];

                int sums[10];
                sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4;
                sums[1] = sums[0] - first       + src[3*step];
                sums[2] = sums[1] - first       + src[4*step];
                sums[3] = sums[2] - first       + src[5*step];
                sums[4] = sums[3] - first       + src[6*step];
                sums[5] = sums[4] - src[0*step] + src[7*step];
                sums[6] = sums[5] - src[1*step] + last;
                sums[7] = sums[6] - src[2*step] + last;
                sums[8] = sums[7] - src[3*step] + last;
                sums[9] = sums[8] - src[4*step] + last;

                src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4;
                src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4;
                src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4;
                src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4;
                src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4;
                src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4;
                src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4;
                src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4;
            }
        }else{
            const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]);

            if(FFABS(middleEnergy) < 8*QP){
                const int q=(src[3*step] - src[4*step])/2;
                const int leftEnergy=  5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]);
                const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]);

                int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
                d= FFMAX(d, 0);

                d= (5*d + 32) >> 6;
                d*= FFSIGN(-middleEnergy);

                if(q>0){
                    d= d<0 ? 0 : d;
                    d= d>q ? q : d;
                }else{
                    d= d>0 ? 0 : d;
                    d= d<q ? q : d;
551 552
                }

553 554 555
                src[3*step]-= d;
                src[4*step]+= d;
            }
556
        }
557 558 559

        src += stride;
    }
560 561 562 563 564 565
/*if(step==16){
    STOP_TIMER("step16")
}else{
    STOP_TIMER("stepX")
}*/
}
566

567
//Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
568
//Plain C versions
569
#if !(HAVE_MMX || HAVE_ALTIVEC) || CONFIG_RUNTIME_CPUDETECT
570 571 572
#define COMPILE_C
#endif

573
#if HAVE_ALTIVEC
574 575 576
#define COMPILE_ALTIVEC
#endif //HAVE_ALTIVEC

577
#if ARCH_X86
578

579
#if (HAVE_MMX && !HAVE_AMD3DNOW && !HAVE_MMX2) || CONFIG_RUNTIME_CPUDETECT
580 581 582
#define COMPILE_MMX
#endif

583
#if HAVE_MMX2 || CONFIG_RUNTIME_CPUDETECT
584 585 586
#define COMPILE_MMX2
#endif

587
#if (HAVE_AMD3DNOW && !HAVE_MMX2) || CONFIG_RUNTIME_CPUDETECT
588 589
#define COMPILE_3DNOW
#endif
590
#endif /* ARCH_X86 */
591 592

#undef HAVE_MMX
593
#define HAVE_MMX 0
594
#undef HAVE_MMX2
595
#define HAVE_MMX2 0
596 597
#undef HAVE_AMD3DNOW
#define HAVE_AMD3DNOW 0
598
#undef HAVE_ALTIVEC
599
#define HAVE_ALTIVEC 0
600 601

#ifdef COMPILE_C
602 603
#define RENAME(a) a ## _C
#include "postprocess_template.c"
604
#endif
605

606 607
#ifdef COMPILE_ALTIVEC
#undef RENAME
608 609
#undef HAVE_ALTIVEC
#define HAVE_ALTIVEC 1
610 611 612 613 614
#define RENAME(a) a ## _altivec
#include "postprocess_altivec_template.c"
#include "postprocess_template.c"
#endif

615
//MMX versions
616
#ifdef COMPILE_MMX
617
#undef RENAME
618 619
#undef HAVE_MMX
#define HAVE_MMX 1
620 621
#define RENAME(a) a ## _MMX
#include "postprocess_template.c"
622
#endif
623 624

//MMX2 versions
625
#ifdef COMPILE_MMX2
626
#undef RENAME
627 628 629 630
#undef HAVE_MMX
#undef HAVE_MMX2
#define HAVE_MMX 1
#define HAVE_MMX2 1
631 632
#define RENAME(a) a ## _MMX2
#include "postprocess_template.c"
633
#endif
634 635

//3DNOW versions
636
#ifdef COMPILE_3DNOW
637
#undef RENAME
638
#undef HAVE_MMX
639
#undef HAVE_MMX2
640
#undef HAVE_AMD3DNOW
641 642
#define HAVE_MMX 1
#define HAVE_MMX2 0
643
#define HAVE_AMD3DNOW 1
644 645
#define RENAME(a) a ## _3DNow
#include "postprocess_template.c"
646
#endif
647

Diego Biurrun's avatar
Diego Biurrun committed
648
// minor note: the HAVE_xyz is messed up after that line so do not use it.
649

Michael Niedermayer's avatar
Michael Niedermayer committed
650
static inline void postProcess(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
651
        const QP_STORE_T QPs[], int QPStride, int isColor, pp_mode *vm, pp_context *vc)
652
{
653 654 655
    PPContext *c= (PPContext *)vc;
    PPMode *ppMode= (PPMode *)vm;
    c->ppMode= *ppMode; //FIXME
656

657 658 659
    // Using ifs here as they are faster than function pointers although the
    // difference would not be measurable here but it is much better because
    // someone might exchange the CPU whithout restarting MPlayer ;)
660
#if CONFIG_RUNTIME_CPUDETECT
661
#if ARCH_X86
662 663 664 665 666 667 668 669 670
    // ordered per speed fastest first
    if(c->cpuCaps & PP_CPU_CAPS_MMX2)
        postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
    else if(c->cpuCaps & PP_CPU_CAPS_3DNOW)
        postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
    else if(c->cpuCaps & PP_CPU_CAPS_MMX)
        postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
    else
        postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
671
#else
672
#if HAVE_ALTIVEC
673 674 675
    if(c->cpuCaps & PP_CPU_CAPS_ALTIVEC)
            postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
    else
676
#endif
677
            postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
678
#endif
679
#else //CONFIG_RUNTIME_CPUDETECT
680
#if   HAVE_MMX2
681
            postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
682
#elif HAVE_AMD3DNOW
683
            postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
684
#elif HAVE_MMX
685
            postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
686
#elif HAVE_ALTIVEC
687
            postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
688
#else
689
            postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
690
#endif
691
#endif //!CONFIG_RUNTIME_CPUDETECT
692 693
}

694
//static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
695
//        QP_STORE_T QPs[], int QPStride, int isColor, struct PPMode *ppMode);
696

697 698
/* -pp Command line Help
*/
699
#if LIBPOSTPROC_VERSION_INT < (52<<16)
700
const char *const pp_help=
701 702 703
#else
const char pp_help[] =
#endif
704
"Available postprocessing filters:\n"
705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735
"Filters                        Options\n"
"short  long name       short   long option     Description\n"
"*      *               a       autoq           CPU power dependent enabler\n"
"                       c       chrom           chrominance filtering enabled\n"
"                       y       nochrom         chrominance filtering disabled\n"
"                       n       noluma          luma filtering disabled\n"
"hb     hdeblock        (2 threshold)           horizontal deblocking filter\n"
"       1. difference factor: default=32, higher -> more deblocking\n"
"       2. flatness threshold: default=39, lower -> more deblocking\n"
"                       the h & v deblocking filters share these\n"
"                       so you can't set different thresholds for h / v\n"
"vb     vdeblock        (2 threshold)           vertical deblocking filter\n"
"ha     hadeblock       (2 threshold)           horizontal deblocking filter\n"
"va     vadeblock       (2 threshold)           vertical deblocking filter\n"
"h1     x1hdeblock                              experimental h deblock filter 1\n"
"v1     x1vdeblock                              experimental v deblock filter 1\n"
"dr     dering                                  deringing filter\n"
"al     autolevels                              automatic brightness / contrast\n"
"                       f        fullyrange     stretch luminance to (0..255)\n"
"lb     linblenddeint                           linear blend deinterlacer\n"
"li     linipoldeint                            linear interpolating deinterlace\n"
"ci     cubicipoldeint                          cubic interpolating deinterlacer\n"
"md     mediandeint                             median deinterlacer\n"
"fd     ffmpegdeint                             ffmpeg deinterlacer\n"
"l5     lowpass5                                FIR lowpass deinterlacer\n"
"de     default                                 hb:a,vb:a,dr:a\n"
"fa     fast                                    h1:a,v1:a,dr:a\n"
"ac                                             ha:a:128:7,va:a,dr:a\n"
"tn     tmpnoise        (3 threshold)           temporal noise reducer\n"
"                     1. <= 2. <= 3.            larger -> stronger filtering\n"
"fq     forceQuant      <quantizer>             force quantizer\n"
736 737 738
"Usage:\n"
"<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n"
"long form example:\n"
739
"vdeblock:autoq/hdeblock:autoq/linblenddeint    default,-vdeblock\n"
740
"short form example:\n"
741
"vb:a/hb:a/lb                                   de,-vb\n"
742 743
"more examples:\n"
"tn:64:128:256\n"
744
"\n"
Michael Niedermayer's avatar
Michael Niedermayer committed
745
;
746

747
pp_mode *pp_get_mode_by_name_and_quality(const char *name, int quality)
748
{
749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769
    char temp[GET_MODE_BUFFER_SIZE];
    char *p= temp;
    static const char filterDelimiters[] = ",/";
    static const char optionDelimiters[] = ":";
    struct PPMode *ppMode;
    char *filterToken;

    ppMode= av_malloc(sizeof(PPMode));

    ppMode->lumMode= 0;
    ppMode->chromMode= 0;
    ppMode->maxTmpNoise[0]= 700;
    ppMode->maxTmpNoise[1]= 1500;
    ppMode->maxTmpNoise[2]= 3000;
    ppMode->maxAllowedY= 234;
    ppMode->minAllowedY= 16;
    ppMode->baseDcDiff= 256/8;
    ppMode->flatnessThreshold= 56-16-1;
    ppMode->maxClippedThreshold= 0.01;
    ppMode->error=0;

770 771
    memset(temp, 0, GET_MODE_BUFFER_SIZE);
    av_strlcpy(temp, name, GET_MODE_BUFFER_SIZE - 1);
772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796

    av_log(NULL, AV_LOG_DEBUG, "pp: %s\n", name);

    for(;;){
        char *filterName;
        int q= 1000000; //PP_QUALITY_MAX;
        int chrom=-1;
        int luma=-1;
        char *option;
        char *options[OPTIONS_ARRAY_SIZE];
        int i;
        int filterNameOk=0;
        int numOfUnknownOptions=0;
        int enable=1; //does the user want us to enabled or disabled the filter

        filterToken= strtok(p, filterDelimiters);
        if(filterToken == NULL) break;
        p+= strlen(filterToken) + 1; // p points to next filterToken
        filterName= strtok(filterToken, optionDelimiters);
        av_log(NULL, AV_LOG_DEBUG, "pp: %s::%s\n", filterToken, filterName);

        if(*filterName == '-'){
            enable=0;
            filterName++;
        }
797

798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821
        for(;;){ //for all options
            option= strtok(NULL, optionDelimiters);
            if(option == NULL) break;

            av_log(NULL, AV_LOG_DEBUG, "pp: option: %s\n", option);
            if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
            else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
            else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
            else if(!strcmp("noluma", option) || !strcmp("n", option)) luma=0;
            else{
                options[numOfUnknownOptions] = option;
                numOfUnknownOptions++;
            }
            if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
        }
        options[numOfUnknownOptions] = NULL;

        /* replace stuff from the replace Table */
        for(i=0; replaceTable[2*i]!=NULL; i++){
            if(!strcmp(replaceTable[2*i], filterName)){
                int newlen= strlen(replaceTable[2*i + 1]);
                int plen;
                int spaceLeft;

822
                p--, *p=',';
823 824 825

                plen= strlen(p);
                spaceLeft= p - temp + plen;
826
                if(spaceLeft + newlen  >= GET_MODE_BUFFER_SIZE - 1){
827 828
                    ppMode->error++;
                    break;
829
                }
830 831 832 833 834
                memmove(p + newlen, p, plen+1);
                memcpy(p, replaceTable[2*i + 1], newlen);
                filterNameOk=1;
            }
        }
835

836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860
        for(i=0; filters[i].shortName!=NULL; i++){
            if(   !strcmp(filters[i].longName, filterName)
               || !strcmp(filters[i].shortName, filterName)){
                ppMode->lumMode &= ~filters[i].mask;
                ppMode->chromMode &= ~filters[i].mask;

                filterNameOk=1;
                if(!enable) break; // user wants to disable it

                if(q >= filters[i].minLumQuality && luma)
                    ppMode->lumMode|= filters[i].mask;
                if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
                    if(q >= filters[i].minChromQuality)
                            ppMode->chromMode|= filters[i].mask;

                if(filters[i].mask == LEVEL_FIX){
                    int o;
                    ppMode->minAllowedY= 16;
                    ppMode->maxAllowedY= 234;
                    for(o=0; options[o]!=NULL; o++){
                        if(  !strcmp(options[o],"fullyrange")
                           ||!strcmp(options[o],"f")){
                            ppMode->minAllowedY= 0;
                            ppMode->maxAllowedY= 255;
                            numOfUnknownOptions--;
861
                        }
862
                    }
863
                }
864
                else if(filters[i].mask == TEMP_NOISE_FILTER)
865
                {
866 867 868 869 870 871 872 873 874 875 876
                    int o;
                    int numOfNoises=0;

                    for(o=0; options[o]!=NULL; o++){
                        char *tail;
                        ppMode->maxTmpNoise[numOfNoises]=
                            strtol(options[o], &tail, 0);
                        if(tail!=options[o]){
                            numOfNoises++;
                            numOfUnknownOptions--;
                            if(numOfNoises >= 3) break;
877
                        }
878
                    }
879
                }
880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907
                else if(filters[i].mask == V_DEBLOCK   || filters[i].mask == H_DEBLOCK
                     || filters[i].mask == V_A_DEBLOCK || filters[i].mask == H_A_DEBLOCK){
                    int o;

                    for(o=0; options[o]!=NULL && o<2; o++){
                        char *tail;
                        int val= strtol(options[o], &tail, 0);
                        if(tail==options[o]) break;

                        numOfUnknownOptions--;
                        if(o==0) ppMode->baseDcDiff= val;
                        else ppMode->flatnessThreshold= val;
                    }
                }
                else if(filters[i].mask == FORCE_QUANT){
                    int o;
                    ppMode->forcedQuant= 15;

                    for(o=0; options[o]!=NULL && o<1; o++){
                        char *tail;
                        int val= strtol(options[o], &tail, 0);
                        if(tail==options[o]) break;

                        numOfUnknownOptions--;
                        ppMode->forcedQuant= val;
                    }
                }
            }
908
        }
909 910 911 912 913 914 915 916 917 918 919
        if(!filterNameOk) ppMode->error++;
        ppMode->error += numOfUnknownOptions;
    }

    av_log(NULL, AV_LOG_DEBUG, "pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode);
    if(ppMode->error){
        av_log(NULL, AV_LOG_ERROR, "%d errors in postprocess string \"%s\"\n", ppMode->error, name);
        av_free(ppMode);
        return NULL;
    }
    return ppMode;
920 921
}

922
void pp_free_mode(pp_mode *mode){
Luca Barbato's avatar
Luca Barbato committed
923
    av_free(mode);
Michael Niedermayer's avatar
Michael Niedermayer committed
924 925
}

926
static void reallocAlign(void **p, int alignment, int size){
927 928
    av_free(*p);
    *p= av_mallocz(size);
929 930
}

Michael Niedermayer's avatar
Michael Niedermayer committed
931
static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){
932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947
    int mbWidth = (width+15)>>4;
    int mbHeight= (height+15)>>4;
    int i;

    c->stride= stride;
    c->qpStride= qpStride;

    reallocAlign((void **)&c->tempDst, 8, stride*24);
    reallocAlign((void **)&c->tempSrc, 8, stride*24);
    reallocAlign((void **)&c->tempBlocks, 8, 2*16*8);
    reallocAlign((void **)&c->yHistogram, 8, 256*sizeof(uint64_t));
    for(i=0; i<256; i++)
            c->yHistogram[i]= width*height/64*15/256;

    for(i=0; i<3; i++){
        //Note: The +17*1024 is just there so i do not have to worry about r/w over the end.
948 949
        reallocAlign((void **)&c->tempBlurred[i], 8, stride*mbHeight*16 + 17*1024);
        reallocAlign((void **)&c->tempBlurredPast[i], 8, 256*((height+7)&(~7))/2 + 17*1024);//FIXME size
950 951 952 953 954 955
    }

    reallocAlign((void **)&c->deintTemp, 8, 2*width+32);
    reallocAlign((void **)&c->nonBQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
    reallocAlign((void **)&c->stdQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
    reallocAlign((void **)&c->forcedQPTable, 8, mbWidth*sizeof(QP_STORE_T));
956 957
}

958 959 960 961
static const char * context_to_name(void * ptr) {
    return "postproc";
}

962
static const AVClass av_codec_context_class = { "Postproc", context_to_name, NULL };
963

964
pp_context *pp_get_context(int width, int height, int cpuCaps){
965
    PPContext *c= av_malloc(sizeof(PPContext));
966
    int stride= FFALIGN(width, 16);  //assumed / will realloc if needed
967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984
    int qpStride= (width+15)/16 + 2; //assumed / will realloc if needed

    memset(c, 0, sizeof(PPContext));
    c->av_class = &av_codec_context_class;
    c->cpuCaps= cpuCaps;
    if(cpuCaps&PP_FORMAT){
        c->hChromaSubSample= cpuCaps&0x3;
        c->vChromaSubSample= (cpuCaps>>4)&0x3;
    }else{
        c->hChromaSubSample= 1;
        c->vChromaSubSample= 1;
    }

    reallocBuffers(c, width, height, stride, qpStride);

    c->frameNum=-1;

    return c;
985 986
}

Michael Niedermayer's avatar
Michael Niedermayer committed
987
void pp_free_context(void *vc){
988 989
    PPContext *c = (PPContext*)vc;
    int i;
990

991 992
    for(i=0; i<3; i++) av_free(c->tempBlurred[i]);
    for(i=0; i<3; i++) av_free(c->tempBlurredPast[i]);
993

994 995 996 997 998 999 1000 1001
    av_free(c->tempBlocks);
    av_free(c->yHistogram);
    av_free(c->tempDst);
    av_free(c->tempSrc);
    av_free(c->deintTemp);
    av_free(c->stdQPTable);
    av_free(c->nonBQPTable);
    av_free(c->forcedQPTable);
1002

1003
    memset(c, 0, sizeof(PPContext));
1004

1005
    av_free(c);
1006 1007
}

Michael Niedermayer's avatar
Michael Niedermayer committed
1008
void  pp_postprocess(const uint8_t * src[3], const int srcStride[3],
1009 1010 1011
                     uint8_t * dst[3], const int dstStride[3],
                     int width, int height,
                     const QP_STORE_T *QP_store,  int QPStride,
1012
                     pp_mode *vm,  void *vc, int pict_type)
1013
{
1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035
    int mbWidth = (width+15)>>4;
    int mbHeight= (height+15)>>4;
    PPMode *mode = (PPMode*)vm;
    PPContext *c = (PPContext*)vc;
    int minStride= FFMAX(FFABS(srcStride[0]), FFABS(dstStride[0]));
    int absQPStride = FFABS(QPStride);

    // c->stride and c->QPStride are always positive
    if(c->stride < minStride || c->qpStride < absQPStride)
        reallocBuffers(c, width, height,
                       FFMAX(minStride, c->stride),
                       FFMAX(c->qpStride, absQPStride));

    if(QP_store==NULL || (mode->lumMode & FORCE_QUANT)){
        int i;
        QP_store= c->forcedQPTable;
        absQPStride = QPStride = 0;
        if(mode->lumMode & FORCE_QUANT)
            for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= mode->forcedQuant;
        else
            for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= 1;
    }
Michael Niedermayer's avatar
Michael Niedermayer committed
1036

1037 1038 1039 1040 1041
    if(pict_type & PP_PICT_TYPE_QP2){
        int i;
        const int count= mbHeight * absQPStride;
        for(i=0; i<(count>>2); i++){
            ((uint32_t*)c->stdQPTable)[i] = (((const uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F;
1042
        }
1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053
        for(i<<=2; i<count; i++){
            c->stdQPTable[i] = QP_store[i]>>1;
        }
        QP_store= c->stdQPTable;
        QPStride= absQPStride;
    }

    if(0){
        int x,y;
        for(y=0; y<mbHeight; y++){
            for(x=0; x<mbWidth; x++){
1054
                av_log(c, AV_LOG_INFO, "%2d ", QP_store[x + y*QPStride]);
1055 1056
            }
            av_log(c, AV_LOG_INFO, "\n");
1057
        }
1058
        av_log(c, AV_LOG_INFO, "\n");
1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075
    }

    if((pict_type&7)!=3){
        if (QPStride >= 0){
            int i;
            const int count= mbHeight * QPStride;
            for(i=0; i<(count>>2); i++){
                ((uint32_t*)c->nonBQPTable)[i] = ((const uint32_t*)QP_store)[i] & 0x3F3F3F3F;
            }
            for(i<<=2; i<count; i++){
                c->nonBQPTable[i] = QP_store[i] & 0x3F;
            }
        } else {
            int i,j;
            for(i=0; i<mbHeight; i++) {
                for(j=0; j<absQPStride; j++) {
                    c->nonBQPTable[i*absQPStride+j] = QP_store[i*QPStride+j] & 0x3F;
1076
                }
1077
            }
1078
        }
1079
    }
1080

1081 1082
    av_log(c, AV_LOG_DEBUG, "using npp filters 0x%X/0x%X\n",
           mode->lumMode, mode->chromMode);
1083

1084
    postProcess(src[0], srcStride[0], dst[0], dstStride[0],
1085 1086
                width, height, QP_store, QPStride, 0, mode, c);

1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103
    width  = (width )>>c->hChromaSubSample;
    height = (height)>>c->vChromaSubSample;

    if(mode->chromMode){
        postProcess(src[1], srcStride[1], dst[1], dstStride[1],
                    width, height, QP_store, QPStride, 1, mode, c);
        postProcess(src[2], srcStride[2], dst[2], dstStride[2],
                    width, height, QP_store, QPStride, 2, mode, c);
    }
    else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2]){
        linecpy(dst[1], src[1], height, srcStride[1]);
        linecpy(dst[2], src[2], height, srcStride[2]);
    }else{
        int y;
        for(y=0; y<height; y++){
            memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
            memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);
1104
        }
1105
    }
1106
}