postprocess.c 37.2 KB
Newer Older
1
/*
2 3 4 5
 * Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at)
 *
 * AltiVec optimizations (C) 2004 Romain Dolbeau <romain@dolbeau.org>
 *
6
 * This file is part of FFmpeg.
7 8 9 10 11 12 13 14 15 16 17 18 19 20 21
 *
 * FFmpeg is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * FFmpeg is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with FFmpeg; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */
22

Michael Niedermayer's avatar
Michael Niedermayer committed
23 24 25 26
/**
 * @file postprocess.c
 * postprocessing.
 */
27

28
/*
29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48
                        C       MMX     MMX2    3DNow   AltiVec
isVertDC                Ec      Ec                      Ec
isVertMinMaxOk          Ec      Ec                      Ec
doVertLowPass           E               e       e       Ec
doVertDefFilter         Ec      Ec      e       e       Ec
isHorizDC               Ec      Ec                      Ec
isHorizMinMaxOk         a       E                       Ec
doHorizLowPass          E               e       e       Ec
doHorizDefFilter        Ec      Ec      e       e       Ec
do_a_deblock            Ec      E       Ec      E
deRing                  E               e       e*      Ecp
Vertical RKAlgo1        E               a       a
Horizontal RKAlgo1                      a       a
Vertical X1#            a               E       E
Horizontal X1#          a               E       E
LinIpolDeinterlace      e               E       E*
CubicIpolDeinterlace    a               e       e*
LinBlendDeinterlace     e               E       E*
MedianDeinterlace#      E       Ec      Ec
TempDeNoiser#           E               e       e       Ec
49

Diego Biurrun's avatar
Diego Biurrun committed
50
* I do not have a 3DNow! CPU -> it is untested, but no one said it does not work so it seems to work
Diego Biurrun's avatar
Diego Biurrun committed
51
# more or less selfinvented filters so the exactness is not too meaningful
52
E = Exact implementation
Diego Biurrun's avatar
Diego Biurrun committed
53
e = almost exact implementation (slightly different rounding,...)
54 55
a = alternative / approximate impl
c = checked against the other implementations (-vo md5)
56
p = partially optimized, still some work to do
57 58 59 60 61 62 63
*/

/*
TODO:
reduce the time wasted on the mem transfer
unroll stuff if instructions depend too much on the prior one
move YScale thing to the end instead of fixing QP
64
write a faster and higher quality deblocking filter :)
65
make the mainloop more flexible (variable number of blocks at once
66
        (the if/else stuff per block is slowing things down)
67 68
compare the quality & speed of all filters
split this huge file
Michael Niedermayer's avatar
Michael Niedermayer committed
69
optimize c versions
70
try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
71
...
72 73
*/

Diego Biurrun's avatar
Diego Biurrun committed
74
//Changelog: use the Subversion log
75

76
#include "config.h"
77
#include "libavutil/avutil.h"
78 79
#include <inttypes.h>
#include <stdio.h>
80
#include <stdlib.h>
81
#include <string.h>
82 83 84
#ifdef HAVE_MALLOC_H
#include <malloc.h>
#endif
85
//#undef HAVE_MMX2
86
//#define HAVE_3DNOW
87
//#undef HAVE_MMX
88
//#undef ARCH_X86
89
//#define DEBUG_BRIGHTNESS
90
#include "postprocess.h"
Michael Niedermayer's avatar
Michael Niedermayer committed
91
#include "postprocess_internal.h"
92

93 94 95 96
#ifdef HAVE_ALTIVEC_H
#include <altivec.h>
#endif

97 98
#define GET_MODE_BUFFER_SIZE 500
#define OPTIONS_ARRAY_SIZE 10
99 100 101
#define BLOCK_SIZE 8
#define TEMP_STRIDE 8
//#define NUM_BLOCKS_AT_ONCE 16 //not used yet
102

103
#if defined(ARCH_X86)
104 105 106 107 108 109 110 111
DECLARE_ASM_CONST(8, uint64_t, w05)= 0x0005000500050005LL;
DECLARE_ASM_CONST(8, uint64_t, w04)= 0x0004000400040004LL;
DECLARE_ASM_CONST(8, uint64_t, w20)= 0x0020002000200020LL;
DECLARE_ASM_CONST(8, uint64_t, b00)= 0x0000000000000000LL;
DECLARE_ASM_CONST(8, uint64_t, b01)= 0x0101010101010101LL;
DECLARE_ASM_CONST(8, uint64_t, b02)= 0x0202020202020202LL;
DECLARE_ASM_CONST(8, uint64_t, b08)= 0x0808080808080808LL;
DECLARE_ASM_CONST(8, uint64_t, b80)= 0x8080808080808080LL;
112
#endif
113

114
DECLARE_ASM_CONST(8, int, deringThreshold)= 20;
115

116

117 118
static struct PPFilter filters[]=
{
119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137
    {"hb", "hdeblock",              1, 1, 3, H_DEBLOCK},
    {"vb", "vdeblock",              1, 2, 4, V_DEBLOCK},
/*  {"hr", "rkhdeblock",            1, 1, 3, H_RK1_FILTER},
    {"vr", "rkvdeblock",            1, 2, 4, V_RK1_FILTER},*/
    {"h1", "x1hdeblock",            1, 1, 3, H_X1_FILTER},
    {"v1", "x1vdeblock",            1, 2, 4, V_X1_FILTER},
    {"ha", "ahdeblock",             1, 1, 3, H_A_DEBLOCK},
    {"va", "avdeblock",             1, 2, 4, V_A_DEBLOCK},
    {"dr", "dering",                1, 5, 6, DERING},
    {"al", "autolevels",            0, 1, 2, LEVEL_FIX},
    {"lb", "linblenddeint",         1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
    {"li", "linipoldeint",          1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
    {"ci", "cubicipoldeint",        1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
    {"md", "mediandeint",           1, 1, 4, MEDIAN_DEINT_FILTER},
    {"fd", "ffmpegdeint",           1, 1, 4, FFMPEG_DEINT_FILTER},
    {"l5", "lowpass5",              1, 1, 4, LOWPASS5_DEINT_FILTER},
    {"tn", "tmpnoise",              1, 7, 8, TEMP_NOISE_FILTER},
    {"fq", "forcequant",            1, 0, 0, FORCE_QUANT},
    {NULL, NULL,0,0,0,0} //End Marker
138 139
};

140
static const char *replaceTable[]=
141
{
142 143 144 145 146 147
    "default",      "hb:a,vb:a,dr:a",
    "de",           "hb:a,vb:a,dr:a",
    "fast",         "h1:a,v1:a,dr:a",
    "fa",           "h1:a,v1:a,dr:a",
    "ac",           "ha:a:128:7,va:a,dr:a",
    NULL //End Marker
148 149
};

150

151
#if defined(ARCH_X86)
152 153
static inline void prefetchnta(void *p)
{
154 155 156
    asm volatile(   "prefetchnta (%0)\n\t"
        : : "r" (p)
    );
157 158 159 160
}

static inline void prefetcht0(void *p)
{
161 162 163
    asm volatile(   "prefetcht0 (%0)\n\t"
        : : "r" (p)
    );
164 165 166 167
}

static inline void prefetcht1(void *p)
{
168 169 170
    asm volatile(   "prefetcht1 (%0)\n\t"
        : : "r" (p)
    );
171 172 173 174
}

static inline void prefetcht2(void *p)
{
175 176 177
    asm volatile(   "prefetcht2 (%0)\n\t"
        : : "r" (p)
    );
178
}
179
#endif
180

Diego Biurrun's avatar
Diego Biurrun committed
181 182
/* The horizontal functions exist only in C because the MMX
 * code is faster with vertical filters and transposing. */
183

184 185 186
/**
 * Check if the given 8x8 Block is mostly "flat"
 */
187
static inline int isHorizDC_C(uint8_t src[], int stride, PPContext *c)
188
{
189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204
    int numEq= 0;
    int y;
    const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
    const int dcThreshold= dcOffset*2 + 1;

    for(y=0; y<BLOCK_SIZE; y++){
        if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++;
        if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++;
        if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++;
        if(((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold) numEq++;
        if(((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold) numEq++;
        if(((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold) numEq++;
        if(((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold) numEq++;
        src+= stride;
    }
    return numEq > c->ppMode.flatnessThreshold;
205 206 207 208 209
}

/**
 * Check if the middle 8x8 Block in the given 8x16 block is flat
 */
210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229
static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c)
{
    int numEq= 0;
    int y;
    const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
    const int dcThreshold= dcOffset*2 + 1;

    src+= stride*4; // src points to begin of the 8x8 Block
    for(y=0; y<BLOCK_SIZE-1; y++){
        if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++;
        if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++;
        if(((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold) numEq++;
        if(((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold) numEq++;
        if(((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold) numEq++;
        if(((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold) numEq++;
        if(((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold) numEq++;
        if(((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold) numEq++;
        src+= stride;
    }
    return numEq > c->ppMode.flatnessThreshold;
230 231
}

232
static inline int isHorizMinMaxOk_C(uint8_t src[], int stride, int QP)
233
{
234
    int i;
235
#if 1
236 237 238 239 240 241 242 243 244 245
    for(i=0; i<2; i++){
        if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0;
        src += stride;
        if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0;
        src += stride;
        if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0;
        src += stride;
        if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0;
        src += stride;
    }
246
#else
247 248 249 250
    for(i=0; i<8; i++){
        if((unsigned)(src[0] - src[7] + 2*QP) > 4*QP) return 0;
        src += stride;
    }
251
#endif
252
    return 1;
253
}
254

255 256 257 258
static inline int isVertMinMaxOk_C(uint8_t src[], int stride, int QP)
{
#if 1
#if 1
259 260 261 262 263 264 265 266
    int x;
    src+= stride*4;
    for(x=0; x<BLOCK_SIZE; x+=4){
        if((unsigned)(src[  x + 0*stride] - src[  x + 5*stride] + 2*QP) > 4*QP) return 0;
        if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0;
        if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0;
        if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0;
    }
267
#else
268 269 270 271 272
    int x;
    src+= stride*3;
    for(x=0; x<BLOCK_SIZE; x++){
        if((unsigned)(src[x + stride] - src[x + (stride<<3)] + 2*QP) > 4*QP) return 0;
    }
273
#endif
274
    return 1;
275
#else
276 277 278 279 280 281 282 283 284 285
    int x;
    src+= stride*4;
    for(x=0; x<BLOCK_SIZE; x++){
        int min=255;
        int max=0;
        int y;
        for(y=0; y<8; y++){
            int v= src[x + y*stride];
            if(v>max) max=v;
            if(v<min) min=v;
286
        }
287 288 289
        if(max-min > 2*QP) return 0;
    }
    return 1;
290 291 292
#endif
}

293 294 295 296 297 298 299 300 301 302
static inline int horizClassify_C(uint8_t src[], int stride, PPContext *c)
{
    if( isHorizDC_C(src, stride, c) ){
        if( isHorizMinMaxOk_C(src, stride, c->QP) )
            return 1;
        else
            return 0;
    }else{
        return 2;
    }
303 304
}

305 306 307 308 309 310 311 312 313 314
static inline int vertClassify_C(uint8_t src[], int stride, PPContext *c)
{
    if( isVertDC_C(src, stride, c) ){
        if( isVertMinMaxOk_C(src, stride, c->QP) )
            return 1;
        else
            return 0;
    }else{
        return 2;
    }
315 316
}

317
static inline void doHorizDefFilter_C(uint8_t dst[], int stride, PPContext *c)
318
{
319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346
    int y;
    for(y=0; y<BLOCK_SIZE; y++){
        const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]);

        if(FFABS(middleEnergy) < 8*c->QP){
            const int q=(dst[3] - dst[4])/2;
            const int leftEnergy=  5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
            const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);

            int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
            d= FFMAX(d, 0);

            d= (5*d + 32) >> 6;
            d*= FFSIGN(-middleEnergy);

            if(q>0)
            {
                d= d<0 ? 0 : d;
                d= d>q ? q : d;
            }
            else
            {
                d= d>0 ? 0 : d;
                d= d<q ? q : d;
            }

            dst[3]-= d;
            dst[4]+= d;
347
        }
348 349
        dst+= stride;
    }
350 351 352 353 354 355
}

/**
 * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block)
 * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version)
 */
356
static inline void doHorizLowPass_C(uint8_t dst[], int stride, PPContext *c)
357
{
358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385
    int y;
    for(y=0; y<BLOCK_SIZE; y++){
        const int first= FFABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0];
        const int last= FFABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7];

        int sums[10];
        sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4;
        sums[1] = sums[0] - first  + dst[3];
        sums[2] = sums[1] - first  + dst[4];
        sums[3] = sums[2] - first  + dst[5];
        sums[4] = sums[3] - first  + dst[6];
        sums[5] = sums[4] - dst[0] + dst[7];
        sums[6] = sums[5] - dst[1] + last;
        sums[7] = sums[6] - dst[2] + last;
        sums[8] = sums[7] - dst[3] + last;
        sums[9] = sums[8] - dst[4] + last;

        dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4;
        dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4;
        dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4;
        dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4;
        dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4;
        dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4;
        dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4;
        dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4;

        dst+= stride;
    }
386 387
}

388
/**
389 390
 * Experimental Filter 1 (Horizontal)
 * will not damage linear gradients
Diego Biurrun's avatar
Diego Biurrun committed
391
 * Flat blocks should look like they were passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter
Diego Biurrun's avatar
Diego Biurrun committed
392 393
 * can only smooth blocks at the expected locations (it cannot smooth them if they did move)
 * MMX2 version does correct clipping C version does not
394
 * not identical with the vertical one
395
 */
396 397
static inline void horizX1Filter(uint8_t *src, int stride, int QP)
{
398 399 400 401 402 403 404
    int y;
    static uint64_t *lut= NULL;
    if(lut==NULL)
    {
        int i;
        lut = av_malloc(256*8);
        for(i=0; i<256; i++)
405
        {
406
            int v= i < 128 ? 2*i : 2*(i-256);
407
/*
408
//Simulate 112242211 9-Tap filter
409 410 411 412
            uint64_t a= (v/16)  & 0xFF;
            uint64_t b= (v/8)   & 0xFF;
            uint64_t c= (v/4)   & 0xFF;
            uint64_t d= (3*v/8) & 0xFF;
413
*/
414
//Simulate piecewise linear interpolation
415 416 417 418 419 420 421 422 423 424 425 426
            uint64_t a= (v/16)   & 0xFF;
            uint64_t b= (v*3/16) & 0xFF;
            uint64_t c= (v*5/16) & 0xFF;
            uint64_t d= (7*v/16) & 0xFF;
            uint64_t A= (0x100 - a)&0xFF;
            uint64_t B= (0x100 - b)&0xFF;
            uint64_t C= (0x100 - c)&0xFF;
            uint64_t D= (0x100 - c)&0xFF;

            lut[i]   = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
                       (D<<24) | (C<<16) | (B<<8)  | (A);
            //lut[i] = (v<<32) | (v<<24);
427
        }
428
    }
429

430 431 432 433
    for(y=0; y<BLOCK_SIZE; y++){
        int a= src[1] - src[2];
        int b= src[3] - src[4];
        int c= src[5] - src[6];
434

435
        int d= FFMAX(FFABS(b) - (FFABS(a) + FFABS(c))/2, 0);
436

437 438
        if(d < QP){
            int v = d * FFSIGN(-b);
439

440 441 442 443 444 445
            src[1] +=v/8;
            src[2] +=v/4;
            src[3] +=3*v/8;
            src[4] -=3*v/8;
            src[5] -=v/4;
            src[6] -=v/8;
446
        }
447 448
        src+=stride;
    }
449 450
}

451 452 453
/**
 * accurate deblock filter
 */
454
static av_always_inline void do_a_deblock_C(uint8_t *src, int step, int stride, PPContext *c){
455 456 457 458
    int y;
    const int QP= c->QP;
    const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
    const int dcThreshold= dcOffset*2 + 1;
459
//START_TIMER
460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486
    src+= step*4; // src points to begin of the 8x8 Block
    for(y=0; y<8; y++){
        int numEq= 0;

        if(((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold) numEq++;
        if(((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold) numEq++;
        if(((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold) numEq++;
        if(((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold) numEq++;
        if(((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold) numEq++;
        if(((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold) numEq++;
        if(((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold) numEq++;
        if(((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold) numEq++;
        if(((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold) numEq++;
        if(numEq > c->ppMode.flatnessThreshold){
            int min, max, x;

            if(src[0] > src[step]){
                max= src[0];
                min= src[step];
            }else{
                max= src[step];
                min= src[0];
            }
            for(x=2; x<8; x+=2){
                if(src[x*step] > src[(x+1)*step]){
                        if(src[x    *step] > max) max= src[ x   *step];
                        if(src[(x+1)*step] < min) min= src[(x+1)*step];
487
                }else{
488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536
                        if(src[(x+1)*step] > max) max= src[(x+1)*step];
                        if(src[ x   *step] < min) min= src[ x   *step];
                }
            }
            if(max-min < 2*QP){
                const int first= FFABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0];
                const int last= FFABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step];

                int sums[10];
                sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4;
                sums[1] = sums[0] - first       + src[3*step];
                sums[2] = sums[1] - first       + src[4*step];
                sums[3] = sums[2] - first       + src[5*step];
                sums[4] = sums[3] - first       + src[6*step];
                sums[5] = sums[4] - src[0*step] + src[7*step];
                sums[6] = sums[5] - src[1*step] + last;
                sums[7] = sums[6] - src[2*step] + last;
                sums[8] = sums[7] - src[3*step] + last;
                sums[9] = sums[8] - src[4*step] + last;

                src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4;
                src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4;
                src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4;
                src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4;
                src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4;
                src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4;
                src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4;
                src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4;
            }
        }else{
            const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]);

            if(FFABS(middleEnergy) < 8*QP){
                const int q=(src[3*step] - src[4*step])/2;
                const int leftEnergy=  5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]);
                const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]);

                int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
                d= FFMAX(d, 0);

                d= (5*d + 32) >> 6;
                d*= FFSIGN(-middleEnergy);

                if(q>0){
                    d= d<0 ? 0 : d;
                    d= d>q ? q : d;
                }else{
                    d= d>0 ? 0 : d;
                    d= d<q ? q : d;
537 538
                }

539 540 541
                src[3*step]-= d;
                src[4*step]+= d;
            }
542
        }
543 544 545

        src += stride;
    }
546 547 548 549 550 551
/*if(step==16){
    STOP_TIMER("step16")
}else{
    STOP_TIMER("stepX")
}*/
}
552

553
//Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
554
//Plain C versions
555 556 557 558
#if !defined (HAVE_MMX) || defined (RUNTIME_CPUDETECT)
#define COMPILE_C
#endif

559 560 561 562
#ifdef HAVE_ALTIVEC
#define COMPILE_ALTIVEC
#endif //HAVE_ALTIVEC

563
#if defined(ARCH_X86)
564 565 566 567 568 569 570 571 572 573 574 575

#if (defined (HAVE_MMX) && !defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
#define COMPILE_MMX
#endif

#if defined (HAVE_MMX2) || defined (RUNTIME_CPUDETECT)
#define COMPILE_MMX2
#endif

#if (defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
#define COMPILE_3DNOW
#endif
576
#endif /* defined(ARCH_X86) */
577 578 579 580

#undef HAVE_MMX
#undef HAVE_MMX2
#undef HAVE_3DNOW
581
#undef HAVE_ALTIVEC
582 583

#ifdef COMPILE_C
584 585 586 587 588
#undef HAVE_MMX
#undef HAVE_MMX2
#undef HAVE_3DNOW
#define RENAME(a) a ## _C
#include "postprocess_template.c"
589
#endif
590

591 592 593 594 595 596 597 598
#ifdef COMPILE_ALTIVEC
#undef RENAME
#define HAVE_ALTIVEC
#define RENAME(a) a ## _altivec
#include "postprocess_altivec_template.c"
#include "postprocess_template.c"
#endif

599
//MMX versions
600
#ifdef COMPILE_MMX
601 602 603 604 605 606
#undef RENAME
#define HAVE_MMX
#undef HAVE_MMX2
#undef HAVE_3DNOW
#define RENAME(a) a ## _MMX
#include "postprocess_template.c"
607
#endif
608 609

//MMX2 versions
610
#ifdef COMPILE_MMX2
611 612 613 614 615 616
#undef RENAME
#define HAVE_MMX
#define HAVE_MMX2
#undef HAVE_3DNOW
#define RENAME(a) a ## _MMX2
#include "postprocess_template.c"
617
#endif
618 619

//3DNOW versions
620
#ifdef COMPILE_3DNOW
621 622 623 624 625 626
#undef RENAME
#define HAVE_MMX
#undef HAVE_MMX2
#define HAVE_3DNOW
#define RENAME(a) a ## _3DNow
#include "postprocess_template.c"
627
#endif
628

Diego Biurrun's avatar
Diego Biurrun committed
629
// minor note: the HAVE_xyz is messed up after that line so do not use it.
630

Michael Niedermayer's avatar
Michael Niedermayer committed
631 632
static inline void postProcess(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
        const QP_STORE_T QPs[], int QPStride, int isColor, pp_mode_t *vm, pp_context_t *vc)
633
{
634 635 636
    PPContext *c= (PPContext *)vc;
    PPMode *ppMode= (PPMode *)vm;
    c->ppMode= *ppMode; //FIXME
637

638 639 640
    // Using ifs here as they are faster than function pointers although the
    // difference would not be measurable here but it is much better because
    // someone might exchange the CPU whithout restarting MPlayer ;)
641
#ifdef RUNTIME_CPUDETECT
642
#if defined(ARCH_X86)
643 644 645 646 647 648 649 650 651
    // ordered per speed fastest first
    if(c->cpuCaps & PP_CPU_CAPS_MMX2)
        postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
    else if(c->cpuCaps & PP_CPU_CAPS_3DNOW)
        postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
    else if(c->cpuCaps & PP_CPU_CAPS_MMX)
        postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
    else
        postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
652
#else
653
#ifdef HAVE_ALTIVEC
654 655 656
    if(c->cpuCaps & PP_CPU_CAPS_ALTIVEC)
            postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
    else
657
#endif
658
            postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
659
#endif
660 661
#else //RUNTIME_CPUDETECT
#ifdef HAVE_MMX2
662
            postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
663
#elif defined (HAVE_3DNOW)
664
            postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
665
#elif defined (HAVE_MMX)
666
            postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
667
#elif defined (HAVE_ALTIVEC)
668
            postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
669
#else
670
            postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
671 672
#endif
#endif //!RUNTIME_CPUDETECT
673 674
}

675
//static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
676
//        QP_STORE_T QPs[], int QPStride, int isColor, struct PPMode *ppMode);
677

678 679
/* -pp Command line Help
*/
680
#if LIBPOSTPROC_VERSION_INT < (52<<16)
681
const char *const pp_help=
682 683 684
#else
const char pp_help[] =
#endif
685
"Available postprocessing filters:\n"
686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716
"Filters                        Options\n"
"short  long name       short   long option     Description\n"
"*      *               a       autoq           CPU power dependent enabler\n"
"                       c       chrom           chrominance filtering enabled\n"
"                       y       nochrom         chrominance filtering disabled\n"
"                       n       noluma          luma filtering disabled\n"
"hb     hdeblock        (2 threshold)           horizontal deblocking filter\n"
"       1. difference factor: default=32, higher -> more deblocking\n"
"       2. flatness threshold: default=39, lower -> more deblocking\n"
"                       the h & v deblocking filters share these\n"
"                       so you can't set different thresholds for h / v\n"
"vb     vdeblock        (2 threshold)           vertical deblocking filter\n"
"ha     hadeblock       (2 threshold)           horizontal deblocking filter\n"
"va     vadeblock       (2 threshold)           vertical deblocking filter\n"
"h1     x1hdeblock                              experimental h deblock filter 1\n"
"v1     x1vdeblock                              experimental v deblock filter 1\n"
"dr     dering                                  deringing filter\n"
"al     autolevels                              automatic brightness / contrast\n"
"                       f        fullyrange     stretch luminance to (0..255)\n"
"lb     linblenddeint                           linear blend deinterlacer\n"
"li     linipoldeint                            linear interpolating deinterlace\n"
"ci     cubicipoldeint                          cubic interpolating deinterlacer\n"
"md     mediandeint                             median deinterlacer\n"
"fd     ffmpegdeint                             ffmpeg deinterlacer\n"
"l5     lowpass5                                FIR lowpass deinterlacer\n"
"de     default                                 hb:a,vb:a,dr:a\n"
"fa     fast                                    h1:a,v1:a,dr:a\n"
"ac                                             ha:a:128:7,va:a,dr:a\n"
"tn     tmpnoise        (3 threshold)           temporal noise reducer\n"
"                     1. <= 2. <= 3.            larger -> stronger filtering\n"
"fq     forceQuant      <quantizer>             force quantizer\n"
717 718 719
"Usage:\n"
"<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n"
"long form example:\n"
720
"vdeblock:autoq/hdeblock:autoq/linblenddeint    default,-vdeblock\n"
721
"short form example:\n"
722
"vb:a/hb:a/lb                                   de,-vb\n"
723 724
"more examples:\n"
"tn:64:128:256\n"
725
"\n"
Michael Niedermayer's avatar
Michael Niedermayer committed
726
;
727

728
pp_mode_t *pp_get_mode_by_name_and_quality(const char *name, int quality)
729
{
730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776
    char temp[GET_MODE_BUFFER_SIZE];
    char *p= temp;
    static const char filterDelimiters[] = ",/";
    static const char optionDelimiters[] = ":";
    struct PPMode *ppMode;
    char *filterToken;

    ppMode= av_malloc(sizeof(PPMode));

    ppMode->lumMode= 0;
    ppMode->chromMode= 0;
    ppMode->maxTmpNoise[0]= 700;
    ppMode->maxTmpNoise[1]= 1500;
    ppMode->maxTmpNoise[2]= 3000;
    ppMode->maxAllowedY= 234;
    ppMode->minAllowedY= 16;
    ppMode->baseDcDiff= 256/8;
    ppMode->flatnessThreshold= 56-16-1;
    ppMode->maxClippedThreshold= 0.01;
    ppMode->error=0;

    strncpy(temp, name, GET_MODE_BUFFER_SIZE);

    av_log(NULL, AV_LOG_DEBUG, "pp: %s\n", name);

    for(;;){
        char *filterName;
        int q= 1000000; //PP_QUALITY_MAX;
        int chrom=-1;
        int luma=-1;
        char *option;
        char *options[OPTIONS_ARRAY_SIZE];
        int i;
        int filterNameOk=0;
        int numOfUnknownOptions=0;
        int enable=1; //does the user want us to enabled or disabled the filter

        filterToken= strtok(p, filterDelimiters);
        if(filterToken == NULL) break;
        p+= strlen(filterToken) + 1; // p points to next filterToken
        filterName= strtok(filterToken, optionDelimiters);
        av_log(NULL, AV_LOG_DEBUG, "pp: %s::%s\n", filterToken, filterName);

        if(*filterName == '-'){
            enable=0;
            filterName++;
        }
777

778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809
        for(;;){ //for all options
            option= strtok(NULL, optionDelimiters);
            if(option == NULL) break;

            av_log(NULL, AV_LOG_DEBUG, "pp: option: %s\n", option);
            if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
            else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
            else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
            else if(!strcmp("noluma", option) || !strcmp("n", option)) luma=0;
            else{
                options[numOfUnknownOptions] = option;
                numOfUnknownOptions++;
            }
            if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
        }
        options[numOfUnknownOptions] = NULL;

        /* replace stuff from the replace Table */
        for(i=0; replaceTable[2*i]!=NULL; i++){
            if(!strcmp(replaceTable[2*i], filterName)){
                int newlen= strlen(replaceTable[2*i + 1]);
                int plen;
                int spaceLeft;

                if(p==NULL) p= temp, *p=0;      //last filter
                else p--, *p=',';               //not last filter

                plen= strlen(p);
                spaceLeft= p - temp + plen;
                if(spaceLeft + newlen  >= GET_MODE_BUFFER_SIZE){
                    ppMode->error++;
                    break;
810
                }
811 812 813 814 815
                memmove(p + newlen, p, plen+1);
                memcpy(p, replaceTable[2*i + 1], newlen);
                filterNameOk=1;
            }
        }
816

817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841
        for(i=0; filters[i].shortName!=NULL; i++){
            if(   !strcmp(filters[i].longName, filterName)
               || !strcmp(filters[i].shortName, filterName)){
                ppMode->lumMode &= ~filters[i].mask;
                ppMode->chromMode &= ~filters[i].mask;

                filterNameOk=1;
                if(!enable) break; // user wants to disable it

                if(q >= filters[i].minLumQuality && luma)
                    ppMode->lumMode|= filters[i].mask;
                if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
                    if(q >= filters[i].minChromQuality)
                            ppMode->chromMode|= filters[i].mask;

                if(filters[i].mask == LEVEL_FIX){
                    int o;
                    ppMode->minAllowedY= 16;
                    ppMode->maxAllowedY= 234;
                    for(o=0; options[o]!=NULL; o++){
                        if(  !strcmp(options[o],"fullyrange")
                           ||!strcmp(options[o],"f")){
                            ppMode->minAllowedY= 0;
                            ppMode->maxAllowedY= 255;
                            numOfUnknownOptions--;
842
                        }
843
                    }
844
                }
845
                else if(filters[i].mask == TEMP_NOISE_FILTER)
846
                {
847 848 849 850 851 852 853 854 855 856 857
                    int o;
                    int numOfNoises=0;

                    for(o=0; options[o]!=NULL; o++){
                        char *tail;
                        ppMode->maxTmpNoise[numOfNoises]=
                            strtol(options[o], &tail, 0);
                        if(tail!=options[o]){
                            numOfNoises++;
                            numOfUnknownOptions--;
                            if(numOfNoises >= 3) break;
858
                        }
859
                    }
860
                }
861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888
                else if(filters[i].mask == V_DEBLOCK   || filters[i].mask == H_DEBLOCK
                     || filters[i].mask == V_A_DEBLOCK || filters[i].mask == H_A_DEBLOCK){
                    int o;

                    for(o=0; options[o]!=NULL && o<2; o++){
                        char *tail;
                        int val= strtol(options[o], &tail, 0);
                        if(tail==options[o]) break;

                        numOfUnknownOptions--;
                        if(o==0) ppMode->baseDcDiff= val;
                        else ppMode->flatnessThreshold= val;
                    }
                }
                else if(filters[i].mask == FORCE_QUANT){
                    int o;
                    ppMode->forcedQuant= 15;

                    for(o=0; options[o]!=NULL && o<1; o++){
                        char *tail;
                        int val= strtol(options[o], &tail, 0);
                        if(tail==options[o]) break;

                        numOfUnknownOptions--;
                        ppMode->forcedQuant= val;
                    }
                }
            }
889
        }
890 891 892 893 894 895 896 897 898 899 900
        if(!filterNameOk) ppMode->error++;
        ppMode->error += numOfUnknownOptions;
    }

    av_log(NULL, AV_LOG_DEBUG, "pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode);
    if(ppMode->error){
        av_log(NULL, AV_LOG_ERROR, "%d errors in postprocess string \"%s\"\n", ppMode->error, name);
        av_free(ppMode);
        return NULL;
    }
    return ppMode;
901 902
}

Michael Niedermayer's avatar
Michael Niedermayer committed
903
void pp_free_mode(pp_mode_t *mode){
Luca Barbato's avatar
Luca Barbato committed
904
    av_free(mode);
Michael Niedermayer's avatar
Michael Niedermayer committed
905 906
}

907
static void reallocAlign(void **p, int alignment, int size){
908 909
    av_free(*p);
    *p= av_mallocz(size);
910 911
}

Michael Niedermayer's avatar
Michael Niedermayer committed
912
static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){
913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928
    int mbWidth = (width+15)>>4;
    int mbHeight= (height+15)>>4;
    int i;

    c->stride= stride;
    c->qpStride= qpStride;

    reallocAlign((void **)&c->tempDst, 8, stride*24);
    reallocAlign((void **)&c->tempSrc, 8, stride*24);
    reallocAlign((void **)&c->tempBlocks, 8, 2*16*8);
    reallocAlign((void **)&c->yHistogram, 8, 256*sizeof(uint64_t));
    for(i=0; i<256; i++)
            c->yHistogram[i]= width*height/64*15/256;

    for(i=0; i<3; i++){
        //Note: The +17*1024 is just there so i do not have to worry about r/w over the end.
929 930
        reallocAlign((void **)&c->tempBlurred[i], 8, stride*mbHeight*16 + 17*1024);
        reallocAlign((void **)&c->tempBlurredPast[i], 8, 256*((height+7)&(~7))/2 + 17*1024);//FIXME size
931 932 933 934 935 936
    }

    reallocAlign((void **)&c->deintTemp, 8, 2*width+32);
    reallocAlign((void **)&c->nonBQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
    reallocAlign((void **)&c->stdQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
    reallocAlign((void **)&c->forcedQPTable, 8, mbWidth*sizeof(QP_STORE_T));
937 938
}

939 940 941 942
static const char * context_to_name(void * ptr) {
    return "postproc";
}

943
static const AVClass av_codec_context_class = { "Postproc", context_to_name, NULL };
944

945
pp_context_t *pp_get_context(int width, int height, int cpuCaps){
946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965
    PPContext *c= av_malloc(sizeof(PPContext));
    int stride= (width+15)&(~15);    //assumed / will realloc if needed
    int qpStride= (width+15)/16 + 2; //assumed / will realloc if needed

    memset(c, 0, sizeof(PPContext));
    c->av_class = &av_codec_context_class;
    c->cpuCaps= cpuCaps;
    if(cpuCaps&PP_FORMAT){
        c->hChromaSubSample= cpuCaps&0x3;
        c->vChromaSubSample= (cpuCaps>>4)&0x3;
    }else{
        c->hChromaSubSample= 1;
        c->vChromaSubSample= 1;
    }

    reallocBuffers(c, width, height, stride, qpStride);

    c->frameNum=-1;

    return c;
966 967
}

Michael Niedermayer's avatar
Michael Niedermayer committed
968
void pp_free_context(void *vc){
969 970
    PPContext *c = (PPContext*)vc;
    int i;
971

972 973
    for(i=0; i<3; i++) av_free(c->tempBlurred[i]);
    for(i=0; i<3; i++) av_free(c->tempBlurredPast[i]);
974

975 976 977 978 979 980 981 982
    av_free(c->tempBlocks);
    av_free(c->yHistogram);
    av_free(c->tempDst);
    av_free(c->tempSrc);
    av_free(c->deintTemp);
    av_free(c->stdQPTable);
    av_free(c->nonBQPTable);
    av_free(c->forcedQPTable);
983

984
    memset(c, 0, sizeof(PPContext));
985

986
    av_free(c);
987 988
}

Michael Niedermayer's avatar
Michael Niedermayer committed
989
void  pp_postprocess(const uint8_t * src[3], const int srcStride[3],
990 991 992 993
                     uint8_t * dst[3], const int dstStride[3],
                     int width, int height,
                     const QP_STORE_T *QP_store,  int QPStride,
                     pp_mode_t *vm,  void *vc, int pict_type)
994
{
995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016
    int mbWidth = (width+15)>>4;
    int mbHeight= (height+15)>>4;
    PPMode *mode = (PPMode*)vm;
    PPContext *c = (PPContext*)vc;
    int minStride= FFMAX(FFABS(srcStride[0]), FFABS(dstStride[0]));
    int absQPStride = FFABS(QPStride);

    // c->stride and c->QPStride are always positive
    if(c->stride < minStride || c->qpStride < absQPStride)
        reallocBuffers(c, width, height,
                       FFMAX(minStride, c->stride),
                       FFMAX(c->qpStride, absQPStride));

    if(QP_store==NULL || (mode->lumMode & FORCE_QUANT)){
        int i;
        QP_store= c->forcedQPTable;
        absQPStride = QPStride = 0;
        if(mode->lumMode & FORCE_QUANT)
            for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= mode->forcedQuant;
        else
            for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= 1;
    }
Michael Niedermayer's avatar
Michael Niedermayer committed
1017

1018 1019 1020 1021 1022
    if(pict_type & PP_PICT_TYPE_QP2){
        int i;
        const int count= mbHeight * absQPStride;
        for(i=0; i<(count>>2); i++){
            ((uint32_t*)c->stdQPTable)[i] = (((const uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F;
1023
        }
1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034
        for(i<<=2; i<count; i++){
            c->stdQPTable[i] = QP_store[i]>>1;
        }
        QP_store= c->stdQPTable;
        QPStride= absQPStride;
    }

    if(0){
        int x,y;
        for(y=0; y<mbHeight; y++){
            for(x=0; x<mbWidth; x++){
1035
                av_log(c, AV_LOG_INFO, "%2d ", QP_store[x + y*QPStride]);
1036 1037
            }
            av_log(c, AV_LOG_INFO, "\n");
1038
        }
1039
        av_log(c, AV_LOG_INFO, "\n");
1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056
    }

    if((pict_type&7)!=3){
        if (QPStride >= 0){
            int i;
            const int count= mbHeight * QPStride;
            for(i=0; i<(count>>2); i++){
                ((uint32_t*)c->nonBQPTable)[i] = ((const uint32_t*)QP_store)[i] & 0x3F3F3F3F;
            }
            for(i<<=2; i<count; i++){
                c->nonBQPTable[i] = QP_store[i] & 0x3F;
            }
        } else {
            int i,j;
            for(i=0; i<mbHeight; i++) {
                for(j=0; j<absQPStride; j++) {
                    c->nonBQPTable[i*absQPStride+j] = QP_store[i*QPStride+j] & 0x3F;
1057
                }
1058
            }
1059
        }
1060
    }
1061

1062 1063
    av_log(c, AV_LOG_DEBUG, "using npp filters 0x%X/0x%X\n",
           mode->lumMode, mode->chromMode);
1064

1065
    postProcess(src[0], srcStride[0], dst[0], dstStride[0],
1066 1067
                width, height, QP_store, QPStride, 0, mode, c);

1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084
    width  = (width )>>c->hChromaSubSample;
    height = (height)>>c->vChromaSubSample;

    if(mode->chromMode){
        postProcess(src[1], srcStride[1], dst[1], dstStride[1],
                    width, height, QP_store, QPStride, 1, mode, c);
        postProcess(src[2], srcStride[2], dst[2], dstStride[2],
                    width, height, QP_store, QPStride, 2, mode, c);
    }
    else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2]){
        linecpy(dst[1], src[1], height, srcStride[1]);
        linecpy(dst[2], src[2], height, srcStride[2]);
    }else{
        int y;
        for(y=0; y<height; y++){
            memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
            memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);
1085
        }
1086
    }
1087 1088
}