rgb2rgb_template.c 28.7 KB
Newer Older
1
/*
2 3 4 5 6 7 8
 * software RGB to RGB converter
 * pluralize by software PAL8 to RGB converter
 *              software YUV to YUV converter
 *              software YUV to RGB converter
 * Written by Nick Kurshev.
 * palette & YUV & runtime CPU stuff by Michael (michaelni@gmx.at)
 * lot of big-endian byte order fixes by Alex Beregszaszi
9
 *
10
 * This file is part of Libav.
11
 *
12
 * Libav is free software; you can redistribute it and/or
13 14 15
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
16
 *
17
 * Libav is distributed in the hope that it will be useful,
18
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 20
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
21
 *
22
 * You should have received a copy of the GNU Lesser General Public
23
 * License along with Libav; if not, write to the Free Software
24
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
25 26
 */

Arpi's avatar
Arpi committed
27 28
#include <stddef.h>

29 30
#include "libavutil/attributes.h"

31 32
static inline void rgb24tobgr32_c(const uint8_t *src, uint8_t *dst,
                                  int src_size)
Nick Kurshev's avatar
Nick Kurshev committed
33
{
34 35
    uint8_t *dest      = dst;
    const uint8_t *s   = src;
36
    const uint8_t *end = s + src_size;
37

Ramiro Polla's avatar
Ramiro Polla committed
38
    while (s < end) {
Ramiro Polla's avatar
Ramiro Polla committed
39
#if HAVE_BIGENDIAN
40 41 42 43 44 45
        /* RGB24 (= R, G, B) -> RGB32 (= A, B, G, R) */
        *dest++  = 255;
        *dest++  = s[2];
        *dest++  = s[1];
        *dest++  = s[0];
        s       += 3;
Ramiro Polla's avatar
Ramiro Polla committed
46
#else
47 48 49 50
        *dest++  = *s++;
        *dest++  = *s++;
        *dest++  = *s++;
        *dest++  = 255;
Ramiro Polla's avatar
Ramiro Polla committed
51
#endif
52
    }
Nick Kurshev's avatar
Nick Kurshev committed
53
}
54

55 56
static inline void rgb32tobgr24_c(const uint8_t *src, uint8_t *dst,
                                  int src_size)
57
{
58 59
    uint8_t *dest      = dst;
    const uint8_t *s   = src;
60
    const uint8_t *end = s + src_size;
61

Ramiro Polla's avatar
Ramiro Polla committed
62
    while (s < end) {
63
#if HAVE_BIGENDIAN
64
        /* RGB32 (= A, B, G, R) -> RGB24 (= R, G, B) */
65
        s++;
66 67 68 69
        dest[2]  = *s++;
        dest[1]  = *s++;
        dest[0]  = *s++;
        dest    += 3;
70
#else
71 72 73
        *dest++  = *s++;
        *dest++  = *s++;
        *dest++  = *s++;
74
        s++;
75
#endif
76
    }
77
}
78

79
/*
80 81
 * original by Strepto/Astral
 * ported to gcc & bugfixed: A'rpi
82
 * MMXEXT, 3DNOW optimization by Nick Kurshev
83 84
 * 32-bit C version, and and&add trick by Michael Niedermayer
 */
85
static inline void rgb15to16_c(const uint8_t *src, uint8_t *dst, int src_size)
86
{
87 88
    register uint8_t *d         = dst;
    register const uint8_t *s   = src;
89 90 91
    register const uint8_t *end = s + src_size;
    const uint8_t *mm_end       = end - 3;

Ramiro Polla's avatar
Ramiro Polla committed
92
    while (s < mm_end) {
93 94 95 96
        register unsigned x = *((const uint32_t *)s);
        *((uint32_t *)d)    = (x & 0x7FFF7FFF) + (x & 0x7FE07FE0);
        d += 4;
        s += 4;
Arpi's avatar
Arpi committed
97
    }
Ramiro Polla's avatar
Ramiro Polla committed
98
    if (s < end) {
99 100
        register unsigned short x = *((const uint16_t *)s);
        *((uint16_t *)d)          = (x & 0x7FFF) + (x & 0x7FE0);
Arpi's avatar
Arpi committed
101
    }
102
}
103

104
static inline void rgb16to15_c(const uint8_t *src, uint8_t *dst, int src_size)
105
{
106 107
    register uint8_t *d         = dst;
    register const uint8_t *s   = src;
108 109
    register const uint8_t *end = s + src_size;
    const uint8_t *mm_end       = end - 3;
110

Ramiro Polla's avatar
Ramiro Polla committed
111
    while (s < mm_end) {
112 113 114 115
        register uint32_t x  = *((const uint32_t *)s);
        *((uint32_t *)d)     = ((x >> 1) & 0x7FE07FE0) | (x & 0x001F001F);
        s                   += 4;
        d                   += 4;
116
    }
Ramiro Polla's avatar
Ramiro Polla committed
117
    if (s < end) {
118 119
        register uint16_t x = *((const uint16_t *)s);
        *((uint16_t *)d)    = ((x >> 1) & 0x7FE0) | (x & 0x001F);
120 121 122
    }
}

123
static inline void rgb32to16_c(const uint8_t *src, uint8_t *dst, int src_size)
124
{
125 126
    uint16_t *d        = (uint16_t *)dst;
    const uint8_t *s   = src;
127
    const uint8_t *end = s + src_size;
128

Ramiro Polla's avatar
Ramiro Polla committed
129
    while (s < end) {
130 131 132 133 134
        register int rgb  = *(const uint32_t *)s;
        s                += 4;
        *d++              = ((rgb & 0xFF)     >> 3) +
                            ((rgb & 0xFC00)   >> 5) +
                            ((rgb & 0xF80000) >> 8);
135
    }
136 137
}

138 139
static inline void rgb32tobgr16_c(const uint8_t *src, uint8_t *dst,
                                  int src_size)
140
{
141 142
    uint16_t *d        = (uint16_t *)dst;
    const uint8_t *s   = src;
143 144
    const uint8_t *end = s + src_size;

Ramiro Polla's avatar
Ramiro Polla committed
145
    while (s < end) {
146 147 148 149 150
        register int rgb  = *(const uint32_t *)s;
        s                += 4;
        *d++              = ((rgb & 0xF8)     << 8) +
                            ((rgb & 0xFC00)   >> 5) +
                            ((rgb & 0xF80000) >> 19);
151
    }
152 153
}

154
static inline void rgb32to15_c(const uint8_t *src, uint8_t *dst, int src_size)
155
{
156 157
    uint16_t *d        = (uint16_t *)dst;
    const uint8_t *s   = src;
158 159
    const uint8_t *end = s + src_size;

Ramiro Polla's avatar
Ramiro Polla committed
160
    while (s < end) {
161 162 163 164 165
        register int rgb  = *(const uint32_t *)s;
        s                += 4;
        *d++              = ((rgb & 0xFF)     >> 3) +
                            ((rgb & 0xF800)   >> 6) +
                            ((rgb & 0xF80000) >> 9);
166
    }
167 168
}

169 170
static inline void rgb32tobgr15_c(const uint8_t *src, uint8_t *dst,
                                  int src_size)
171
{
172 173
    uint16_t *d        = (uint16_t *)dst;
    const uint8_t *s   = src;
174 175
    const uint8_t *end = s + src_size;

Ramiro Polla's avatar
Ramiro Polla committed
176
    while (s < end) {
177 178 179 180 181
        register int rgb  = *(const uint32_t *)s;
        s                += 4;
        *d++              = ((rgb & 0xF8)     <<  7) +
                            ((rgb & 0xF800)   >>  6) +
                            ((rgb & 0xF80000) >> 19);
182
    }
183 184
}

185 186
static inline void rgb24tobgr16_c(const uint8_t *src, uint8_t *dst,
                                  int src_size)
187
{
188 189
    uint16_t *d        = (uint16_t *)dst;
    const uint8_t *s   = src;
190 191
    const uint8_t *end = s + src_size;

Ramiro Polla's avatar
Ramiro Polla committed
192
    while (s < end) {
193 194 195
        const int b = *s++;
        const int g = *s++;
        const int r = *s++;
196
        *d++        = (b >> 3) | ((g & 0xFC) << 3) | ((r & 0xF8) << 8);
197
    }
198 199
}

200
static inline void rgb24to16_c(const uint8_t *src, uint8_t *dst, int src_size)
201
{
202 203
    uint16_t *d        = (uint16_t *)dst;
    const uint8_t *s   = src;
204 205
    const uint8_t *end = s + src_size;

Ramiro Polla's avatar
Ramiro Polla committed
206
    while (s < end) {
207 208 209
        const int r = *s++;
        const int g = *s++;
        const int b = *s++;
210
        *d++        = (b >> 3) | ((g & 0xFC) << 3) | ((r & 0xF8) << 8);
211
    }
212 213
}

214 215
static inline void rgb24tobgr15_c(const uint8_t *src, uint8_t *dst,
                                  int src_size)
216
{
217 218
    uint16_t *d        = (uint16_t *)dst;
    const uint8_t *s   = src;
219 220
    const uint8_t *end = s + src_size;

Ramiro Polla's avatar
Ramiro Polla committed
221
    while (s < end) {
222 223 224
        const int b = *s++;
        const int g = *s++;
        const int r = *s++;
225
        *d++        = (b >> 3) | ((g & 0xF8) << 2) | ((r & 0xF8) << 7);
226
    }
Arpi's avatar
Arpi committed
227 228
}

229
static inline void rgb24to15_c(const uint8_t *src, uint8_t *dst, int src_size)
230
{
231 232
    uint16_t *d        = (uint16_t *)dst;
    const uint8_t *s   = src;
233 234
    const uint8_t *end = s + src_size;

Ramiro Polla's avatar
Ramiro Polla committed
235
    while (s < end) {
236 237 238
        const int r = *s++;
        const int g = *s++;
        const int b = *s++;
239
        *d++        = (b >> 3) | ((g & 0xF8) << 2) | ((r & 0xF8) << 7);
240
    }
241 242
}

Arpi's avatar
Arpi committed
243
/*
244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268
 * I use less accurate approximation here by simply left-shifting the input
 * value and filling the low order bits with zeroes. This method improves PNG
 * compression but this scheme cannot reproduce white exactly, since it does
 * not generate an all-ones maximum value; the net effect is to darken the
 * image slightly.
 *
 * The better method should be "left bit replication":
 *
 *  4 3 2 1 0
 *  ---------
 *  1 1 0 1 1
 *
 *  7 6 5 4 3  2 1 0
 *  ----------------
 *  1 1 0 1 1  1 1 0
 *  |=======|  |===|
 *      |      leftmost bits repeated to fill open bits
 *      |
 *  original bits
 */
static inline void rgb15tobgr24_c(const uint8_t *src, uint8_t *dst,
                                  int src_size)
{
    uint8_t *d          = dst;
    const uint16_t *s   = (const uint16_t *)src;
269 270
    const uint16_t *end = s + src_size / 2;

Ramiro Polla's avatar
Ramiro Polla committed
271
    while (s < end) {
272
        register uint16_t bgr = *s++;
273 274 275
        *d++ = (bgr & 0x1F)   << 3;
        *d++ = (bgr & 0x3E0)  >> 2;
        *d++ = (bgr & 0x7C00) >> 7;
276
    }
Arpi's avatar
Arpi committed
277 278
}

279 280
static inline void rgb16tobgr24_c(const uint8_t *src, uint8_t *dst,
                                  int src_size)
Arpi's avatar
Arpi committed
281
{
282 283
    uint8_t *d          = (uint8_t *)dst;
    const uint16_t *s   = (const uint16_t *)src;
284 285
    const uint16_t *end = s + src_size / 2;

Ramiro Polla's avatar
Ramiro Polla committed
286
    while (s < end) {
287
        register uint16_t bgr = *s++;
288 289 290
        *d++ = (bgr & 0x1F)   << 3;
        *d++ = (bgr & 0x7E0)  >> 3;
        *d++ = (bgr & 0xF800) >> 8;
291
    }
Arpi's avatar
Arpi committed
292 293
}

294
static inline void rgb15to32_c(const uint8_t *src, uint8_t *dst, int src_size)
Arpi's avatar
Arpi committed
295
{
296 297
    uint8_t *d          = dst;
    const uint16_t *s   = (const uint16_t *)src;
298 299
    const uint16_t *end = s + src_size / 2;

Ramiro Polla's avatar
Ramiro Polla committed
300
    while (s < end) {
301
        register uint16_t bgr = *s++;
302
#if HAVE_BIGENDIAN
303
        *d++ = 255;
304 305 306
        *d++ = (bgr & 0x7C00) >> 7;
        *d++ = (bgr & 0x3E0)  >> 2;
        *d++ = (bgr & 0x1F)   << 3;
307
#else
308 309 310
        *d++ = (bgr & 0x1F)   << 3;
        *d++ = (bgr & 0x3E0)  >> 2;
        *d++ = (bgr & 0x7C00) >> 7;
311
        *d++ = 255;
Michael Niedermayer's avatar
Michael Niedermayer committed
312
#endif
313
    }
Arpi's avatar
Arpi committed
314
}
315

316
static inline void rgb16to32_c(const uint8_t *src, uint8_t *dst, int src_size)
Arpi's avatar
Arpi committed
317
{
318 319
    uint8_t *d          = dst;
    const uint16_t *s   = (const uint16_t *)src;
320 321
    const uint16_t *end = s + src_size / 2;

Ramiro Polla's avatar
Ramiro Polla committed
322
    while (s < end) {
323
        register uint16_t bgr = *s++;
324
#if HAVE_BIGENDIAN
325
        *d++ = 255;
326 327 328
        *d++ = (bgr & 0xF800) >> 8;
        *d++ = (bgr & 0x7E0)  >> 3;
        *d++ = (bgr & 0x1F)   << 3;
329
#else
330 331 332
        *d++ = (bgr & 0x1F)   << 3;
        *d++ = (bgr & 0x7E0)  >> 3;
        *d++ = (bgr & 0xF800) >> 8;
333
        *d++ = 255;
334
#endif
335
    }
336
}
337

338 339
static inline void shuffle_bytes_2103_c(const uint8_t *src, uint8_t *dst,
                                        int src_size)
340
{
341 342 343 344 345 346 347 348
    int idx          = 15  - src_size;
    const uint8_t *s = src - idx;
    uint8_t *d       = dst - idx;

    for (; idx < 15; idx += 4) {
        register int v        = *(const uint32_t *)&s[idx], g = v & 0xff00ff00;
        v                    &= 0xff00ff;
        *(uint32_t *)&d[idx]  = (v >> 16) + g + (v << 16);
349
    }
350 351
}

352
static inline void rgb24tobgr24_c(const uint8_t *src, uint8_t *dst, int src_size)
Michael Niedermayer's avatar
Michael Niedermayer committed
353
{
354
    unsigned i;
355 356

    for (i = 0; i < src_size; i += 3) {
357
        register uint8_t x = src[i + 2];
358 359 360
        dst[i + 1]         = src[i + 1];
        dst[i + 2]         = src[i + 0];
        dst[i + 0]         = x;
361
    }
Michael Niedermayer's avatar
Michael Niedermayer committed
362 363
}

364 365
static inline void yuvPlanartoyuy2_c(const uint8_t *ysrc, const uint8_t *usrc,
                                     const uint8_t *vsrc, uint8_t *dst,
366 367 368
                                     int width, int height,
                                     int lumStride, int chromStride,
                                     int dstStride, int vertLumPerChroma)
Michael Niedermayer's avatar
Michael Niedermayer committed
369
{
370
    int y, i;
371
    const int chromWidth = width >> 1;
372 373

    for (y = 0; y < height; y++) {
374
#if HAVE_FAST_64BIT
375
        uint64_t *ldst = (uint64_t *)dst;
376
        const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
Ramiro Polla's avatar
Ramiro Polla committed
377
        for (i = 0; i < chromWidth; i += 2) {
378 379 380 381
            uint64_t k = yc[0] + (uc[0] << 8) +
                         (yc[1] << 16) + (vc[0] << 24);
            uint64_t l = yc[2] + (uc[1] << 8) +
                         (yc[3] << 16) + (vc[1] << 24);
382
            *ldst++ = k + (l << 32);
383 384 385
            yc     += 4;
            uc     += 2;
            vc     += 2;
386
        }
Arpi's avatar
Arpi committed
387 388

#else
389
        int *idst = (int32_t *)dst;
390
        const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
391

Ramiro Polla's avatar
Ramiro Polla committed
392
        for (i = 0; i < chromWidth; i++) {
393
#if HAVE_BIGENDIAN
394 395
            *idst++ = (yc[0] << 24) + (uc[0] << 16) +
                      (yc[1] <<  8) + (vc[0] <<  0);
396
#else
397
            *idst++ = yc[0] + (uc[0] << 8) +
398
                      (yc[1] << 16) + (vc[0] << 24);
399 400 401 402 403 404
#endif
            yc += 2;
            uc++;
            vc++;
        }
#endif
405
        if ((y & (vertLumPerChroma - 1)) == vertLumPerChroma - 1) {
406 407 408 409 410 411
            usrc += chromStride;
            vsrc += chromStride;
        }
        ysrc += lumStride;
        dst  += dstStride;
    }
Michael Niedermayer's avatar
Michael Niedermayer committed
412 413
}

414
/**
415 416
 * Height should be a multiple of 2 and width should be a multiple of 16.
 * (If this is a problem for anyone then tell me, and I will fix it.)
417
 */
418 419
static inline void yv12toyuy2_c(const uint8_t *ysrc, const uint8_t *usrc,
                                const uint8_t *vsrc, uint8_t *dst,
420 421
                                int width, int height, int lumStride,
                                int chromStride, int dstStride)
422
{
423
    //FIXME interpolate chroma
424 425
    yuvPlanartoyuy2_c(ysrc, usrc, vsrc, dst, width, height, lumStride,
                      chromStride, dstStride, 2);
426 427
}

428 429
static inline void yuvPlanartouyvy_c(const uint8_t *ysrc, const uint8_t *usrc,
                                     const uint8_t *vsrc, uint8_t *dst,
430 431 432
                                     int width, int height,
                                     int lumStride, int chromStride,
                                     int dstStride, int vertLumPerChroma)
433
{
434
    int y, i;
435
    const int chromWidth = width >> 1;
436 437

    for (y = 0; y < height; y++) {
438
#if HAVE_FAST_64BIT
439
        uint64_t *ldst = (uint64_t *)dst;
440
        const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
Ramiro Polla's avatar
Ramiro Polla committed
441
        for (i = 0; i < chromWidth; i += 2) {
442 443 444 445
            uint64_t k = uc[0] + (yc[0] << 8) +
                         (vc[0] << 16) + (yc[1] << 24);
            uint64_t l = uc[1] + (yc[2] << 8) +
                         (vc[1] << 16) + (yc[3] << 24);
446
            *ldst++ = k + (l << 32);
447 448 449
            yc     += 4;
            uc     += 2;
            vc     += 2;
450
        }
451 452

#else
453
        int *idst = (int32_t *)dst;
454
        const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
455

Ramiro Polla's avatar
Ramiro Polla committed
456
        for (i = 0; i < chromWidth; i++) {
457
#if HAVE_BIGENDIAN
458 459
            *idst++ = (uc[0] << 24) + (yc[0] << 16) +
                      (vc[0] <<  8) + (yc[1] <<  0);
460
#else
461
            *idst++ = uc[0] + (yc[0] << 8) +
462
                      (vc[0] << 16) + (yc[1] << 24);
463 464 465 466 467 468
#endif
            yc += 2;
            uc++;
            vc++;
        }
#endif
469
        if ((y & (vertLumPerChroma - 1)) == vertLumPerChroma - 1) {
470 471 472 473
            usrc += chromStride;
            vsrc += chromStride;
        }
        ysrc += lumStride;
474
        dst  += dstStride;
475
    }
476 477 478
}

/**
479 480
 * Height should be a multiple of 2 and width should be a multiple of 16
 * (If this is a problem for anyone then tell me, and I will fix it.)
481
 */
482 483
static inline void yv12touyvy_c(const uint8_t *ysrc, const uint8_t *usrc,
                                const uint8_t *vsrc, uint8_t *dst,
484 485
                                int width, int height, int lumStride,
                                int chromStride, int dstStride)
486
{
487
    //FIXME interpolate chroma
488 489
    yuvPlanartouyvy_c(ysrc, usrc, vsrc, dst, width, height, lumStride,
                      chromStride, dstStride, 2);
490 491
}

492 493 494
/**
 * Width should be a multiple of 16.
 */
495 496
static inline void yuv422ptouyvy_c(const uint8_t *ysrc, const uint8_t *usrc,
                                   const uint8_t *vsrc, uint8_t *dst,
497 498
                                   int width, int height, int lumStride,
                                   int chromStride, int dstStride)
499
{
500 501
    yuvPlanartouyvy_c(ysrc, usrc, vsrc, dst, width, height, lumStride,
                      chromStride, dstStride, 1);
502 503
}

504
/**
505
 * Width should be a multiple of 16.
506
 */
507 508
static inline void yuv422ptoyuy2_c(const uint8_t *ysrc, const uint8_t *usrc,
                                   const uint8_t *vsrc, uint8_t *dst,
509 510
                                   int width, int height, int lumStride,
                                   int chromStride, int dstStride)
511
{
512 513
    yuvPlanartoyuy2_c(ysrc, usrc, vsrc, dst, width, height, lumStride,
                      chromStride, dstStride, 1);
514 515
}

516
/**
517 518
 * Height should be a multiple of 2 and width should be a multiple of 16.
 * (If this is a problem for anyone then tell me, and I will fix it.)
519
 */
520 521
static inline void yuy2toyv12_c(const uint8_t *src, uint8_t *ydst,
                                uint8_t *udst, uint8_t *vdst,
522 523
                                int width, int height, int lumStride,
                                int chromStride, int srcStride)
Michael Niedermayer's avatar
Michael Niedermayer committed
524
{
525
    int y;
526
    const int chromWidth = width >> 1;
527 528

    for (y = 0; y < height; y += 2) {
529
        int i;
530 531 532 533 534
        for (i = 0; i < chromWidth; i++) {
            ydst[2 * i + 0] = src[4 * i + 0];
            udst[i]         = src[4 * i + 1];
            ydst[2 * i + 1] = src[4 * i + 2];
            vdst[i]         = src[4 * i + 3];
535 536 537 538
        }
        ydst += lumStride;
        src  += srcStride;

539 540 541
        for (i = 0; i < chromWidth; i++) {
            ydst[2 * i + 0] = src[4 * i + 0];
            ydst[2 * i + 1] = src[4 * i + 2];
542 543 544 545 546 547
        }
        udst += chromStride;
        vdst += chromStride;
        ydst += lumStride;
        src  += srcStride;
    }
Michael Niedermayer's avatar
Michael Niedermayer committed
548
}
Arpi's avatar
Arpi committed
549

550 551
static inline void planar2x_c(const uint8_t *src, uint8_t *dst, int srcWidth,
                              int srcHeight, int srcStride, int dstStride)
552
{
553
    int x, y;
554

555
    dst[0] = src[0];
556

557
    // first line
558 559 560
    for (x = 0; x < srcWidth - 1; x++) {
        dst[2 * x + 1] = (3 * src[x] + src[x + 1]) >> 2;
        dst[2 * x + 2] = (src[x] + 3 * src[x + 1]) >> 2;
561
    }
562
    dst[2 * srcWidth - 1] = src[srcWidth - 1];
563

564
    dst += dstStride;
565

566
    for (y = 1; y < srcHeight; y++) {
567
        const int mmxSize = 1;
568

569 570
        dst[0]         = (src[0] * 3 + src[srcStride]) >> 2;
        dst[dstStride] = (src[0] + 3 * src[srcStride]) >> 2;
571

572 573 574 575 576
        for (x = mmxSize - 1; x < srcWidth - 1; x++) {
            dst[2 * x + 1]             = (src[x + 0] * 3 + src[x + srcStride + 1]) >> 2;
            dst[2 * x + dstStride + 2] = (src[x + 0] + 3 * src[x + srcStride + 1]) >> 2;
            dst[2 * x + dstStride + 1] = (src[x + 1] + 3 * src[x + srcStride])     >> 2;
            dst[2 * x + 2]             = (src[x + 1] * 3 + src[x + srcStride])     >> 2;
577
        }
578 579
        dst[srcWidth * 2 - 1]             = (src[srcWidth - 1] * 3 + src[srcWidth - 1 + srcStride]) >> 2;
        dst[srcWidth * 2 - 1 + dstStride] = (src[srcWidth - 1] + 3 * src[srcWidth - 1 + srcStride]) >> 2;
580

581 582
        dst += dstStride * 2;
        src += srcStride;
583
    }
584

585
    // last line
586
    dst[0] = src[0];
587

588 589 590
    for (x = 0; x < srcWidth - 1; x++) {
        dst[2 * x + 1] = (src[x] * 3 + src[x + 1]) >> 2;
        dst[2 * x + 2] = (src[x] + 3 * src[x + 1]) >> 2;
591
    }
592
    dst[2 * srcWidth - 1] = src[srcWidth - 1];
593 594
}

Arpi's avatar
Arpi committed
595
/**
596 597 598
 * Height should be a multiple of 2 and width should be a multiple of 16.
 * (If this is a problem for anyone then tell me, and I will fix it.)
 * Chrominance data is only taken from every second line, others are ignored.
599
 * FIXME: Write HQ version.
Arpi's avatar
Arpi committed
600
 */
601 602
static inline void uyvytoyv12_c(const uint8_t *src, uint8_t *ydst,
                                uint8_t *udst, uint8_t *vdst,
603 604
                                int width, int height, int lumStride,
                                int chromStride, int srcStride)
Arpi's avatar
Arpi committed
605
{
606
    int y;
607
    const int chromWidth = width >> 1;
608 609

    for (y = 0; y < height; y += 2) {
610
        int i;
611 612 613 614 615
        for (i = 0; i < chromWidth; i++) {
            udst[i]         = src[4 * i + 0];
            ydst[2 * i + 0] = src[4 * i + 1];
            vdst[i]         = src[4 * i + 2];
            ydst[2 * i + 1] = src[4 * i + 3];
616 617 618 619
        }
        ydst += lumStride;
        src  += srcStride;

620 621 622
        for (i = 0; i < chromWidth; i++) {
            ydst[2 * i + 0] = src[4 * i + 1];
            ydst[2 * i + 1] = src[4 * i + 3];
623 624 625 626 627 628
        }
        udst += chromStride;
        vdst += chromStride;
        ydst += lumStride;
        src  += srcStride;
    }
Arpi's avatar
Arpi committed
629 630
}

631
/**
632 633 634
 * Height should be a multiple of 2 and width should be a multiple of 2.
 * (If this is a problem for anyone then tell me, and I will fix it.)
 * Chrominance data is only taken from every second line,
635 636
 * others are ignored in the C version.
 * FIXME: Write HQ version.
637
 */
638
void rgb24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
639 640
                   uint8_t *vdst, int width, int height, int lumStride,
                   int chromStride, int srcStride)
641
{
642
    int y;
643
    const int chromWidth = width >> 1;
644 645

    for (y = 0; y < height; y += 2) {
646
        int i;
647 648 649 650
        for (i = 0; i < chromWidth; i++) {
            unsigned int b = src[6 * i + 0];
            unsigned int g = src[6 * i + 1];
            unsigned int r = src[6 * i + 2];
651

652 653 654
            unsigned int Y = ((RY * r + GY * g + BY * b) >> RGB2YUV_SHIFT) +  16;
            unsigned int V = ((RV * r + GV * g + BV * b) >> RGB2YUV_SHIFT) + 128;
            unsigned int U = ((RU * r + GU * g + BU * b) >> RGB2YUV_SHIFT) + 128;
655 656 657

            udst[i]     = U;
            vdst[i]     = V;
658
            ydst[2 * i] = Y;
659

660 661 662
            b = src[6 * i + 3];
            g = src[6 * i + 4];
            r = src[6 * i + 5];
663

664 665
            Y = ((RY * r + GY * g + BY * b) >> RGB2YUV_SHIFT) + 16;
            ydst[2 * i + 1] = Y;
666 667 668 669
        }
        ydst += lumStride;
        src  += srcStride;

670 671 672 673
        for (i = 0; i < chromWidth; i++) {
            unsigned int b = src[6 * i + 0];
            unsigned int g = src[6 * i + 1];
            unsigned int r = src[6 * i + 2];
674

675
            unsigned int Y = ((RY * r + GY * g + BY * b) >> RGB2YUV_SHIFT) + 16;
676

677
            ydst[2 * i] = Y;
678

679 680 681
            b = src[6 * i + 3];
            g = src[6 * i + 4];
            r = src[6 * i + 5];
682

683 684
            Y = ((RY * r + GY * g + BY * b) >> RGB2YUV_SHIFT) + 16;
            ydst[2 * i + 1] = Y;
685 686 687 688 689 690
        }
        udst += chromStride;
        vdst += chromStride;
        ydst += lumStride;
        src  += srcStride;
    }
691
}
692

693
static void interleaveBytes_c(const uint8_t *src1, const uint8_t *src2,
694 695
                              uint8_t *dest, int width, int height,
                              int src1Stride, int src2Stride, int dstStride)
Ramiro Polla's avatar
Ramiro Polla committed
696
{
697
    int h;
698

699
    for (h = 0; h < height; h++) {
700
        int w;
701 702 703
        for (w = 0; w < width; w++) {
            dest[2 * w + 0] = src1[w];
            dest[2 * w + 1] = src2[w];
704 705
        }
        dest += dstStride;
706 707
        src1 += src1Stride;
        src2 += src2Stride;
708
    }
709
}
710

711 712
static inline void vu9_to_vu12_c(const uint8_t *src1, const uint8_t *src2,
                                 uint8_t *dst1, uint8_t *dst2,
713 714 715
                                 int width, int height,
                                 int srcStride1, int srcStride2,
                                 int dstStride1, int dstStride2)
716
{
717 718 719 720
    int x, y;
    int w = width  / 2;
    int h = height / 2;

721 722 723
    for (y = 0; y < h; y++) {
        const uint8_t *s1 = src1 + srcStride1 * (y >> 1);
        uint8_t *d        = dst1 + dstStride1 *  y;
724 725
        for (x = 0; x < w; x++)
            d[2 * x] = d[2 * x + 1] = s1[x];
726
    }
727 728 729
    for (y = 0; y < h; y++) {
        const uint8_t *s2 = src2 + srcStride2 * (y >> 1);
        uint8_t *d        = dst2 + dstStride2 *  y;
730 731
        for (x = 0; x < w; x++)
            d[2 * x] = d[2 * x + 1] = s2[x];
732 733 734
    }
}

735 736
static inline void yvu9_to_yuy2_c(const uint8_t *src1, const uint8_t *src2,
                                  const uint8_t *src3, uint8_t *dst,
737 738 739
                                  int width, int height,
                                  int srcStride1, int srcStride2,
                                  int srcStride3, int dstStride)
740
{
741 742 743 744
    int x, y;
    int w = width / 2;
    int h = height;

745 746 747 748 749
    for (y = 0; y < h; y++) {
        const uint8_t *yp = src1 + srcStride1 *  y;
        const uint8_t *up = src2 + srcStride2 * (y >> 2);
        const uint8_t *vp = src3 + srcStride3 * (y >> 2);
        uint8_t *d        = dst  + dstStride  *  y;
750
        for (x = 0; x < w; x++) {
751 752 753 754 755 756 757 758 759
            const int x2 = x << 2;
            d[8 * x + 0] = yp[x2];
            d[8 * x + 1] = up[x];
            d[8 * x + 2] = yp[x2 + 1];
            d[8 * x + 3] = vp[x];
            d[8 * x + 4] = yp[x2 + 2];
            d[8 * x + 5] = up[x];
            d[8 * x + 6] = yp[x2 + 3];
            d[8 * x + 7] = vp[x];
Ramiro Polla's avatar
Ramiro Polla committed
760
        }
761 762
    }
}
763

764
static void extract_even_c(const uint8_t *src, uint8_t *dst, int count)
765
{
766 767 768 769 770
    dst   +=  count;
    src   +=  count * 2;
    count  = -count;
    while (count < 0) {
        dst[count] = src[2 * count];
771 772 773 774
        count++;
    }
}

775 776
static void extract_even2_c(const uint8_t *src, uint8_t *dst0, uint8_t *dst1,
                            int count)
777
{
778 779 780 781 782 783 784
    dst0  +=  count;
    dst1  +=  count;
    src   +=  count * 4;
    count  = -count;
    while (count < 0) {
        dst0[count] = src[4 * count + 0];
        dst1[count] = src[4 * count + 2];
785 786 787 788
        count++;
    }
}

789 790
static void extract_even2avg_c(const uint8_t *src0, const uint8_t *src1,
                               uint8_t *dst0, uint8_t *dst1, int count)
791
{
792 793 794 795 796 797 798 799
    dst0  +=  count;
    dst1  +=  count;
    src0  +=  count * 4;
    src1  +=  count * 4;
    count  = -count;
    while (count < 0) {
        dst0[count] = (src0[4 * count + 0] + src1[4 * count + 0]) >> 1;
        dst1[count] = (src0[4 * count + 2] + src1[4 * count + 2]) >> 1;
800 801 802 803
        count++;
    }
}

804 805
static void extract_odd2_c(const uint8_t *src, uint8_t *dst0, uint8_t *dst1,
                           int count)
806
{
807 808 809 810
    dst0  +=  count;
    dst1  +=  count;
    src   +=  count * 4;
    count  = -count;
811
    src++;
812 813 814
    while (count < 0) {
        dst0[count] = src[4 * count + 0];
        dst1[count] = src[4 * count + 2];
815 816 817 818
        count++;
    }
}

819 820
static void extract_odd2avg_c(const uint8_t *src0, const uint8_t *src1,
                              uint8_t *dst0, uint8_t *dst1, int count)
821
{
822 823 824 825 826
    dst0  +=  count;
    dst1  +=  count;
    src0  +=  count * 4;
    src1  +=  count * 4;
    count  = -count;
827 828
    src0++;
    src1++;
829 830 831
    while (count < 0) {
        dst0[count] = (src0[4 * count + 0] + src1[4 * count + 0]) >> 1;
        dst1[count] = (src0[4 * count + 2] + src1[4 * count + 2]) >> 1;
832 833 834 835
        count++;
    }
}

836
static void yuyvtoyuv420_c(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
837 838
                           const uint8_t *src, int width, int height,
                           int lumStride, int chromStride, int srcStride)
839
{
840
    int y;
841
    const int chromWidth = -((-width) >> 1);
842

843
    for (y = 0; y < height; y++) {
844
        extract_even_c(src, ydst, width);
845
        if (y & 1) {
846
            extract_odd2avg_c(src - srcStride, src, udst, vdst, chromWidth);
847 848
            udst += chromStride;
            vdst += chromStride;
849 850
        }

851 852
        src  += srcStride;
        ydst += lumStride;
853 854 855
    }
}

856
static void yuyvtoyuv422_c(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
857 858
                           const uint8_t *src, int width, int height,
                           int lumStride, int chromStride, int srcStride)
859
{
860
    int y;
861
    const int chromWidth = -((-width) >> 1);
862

863
    for (y = 0; y < height; y++) {
864 865
        extract_even_c(src, ydst, width);
        extract_odd2_c(src, udst, vdst, chromWidth);
866

867 868 869 870
        src  += srcStride;
        ydst += lumStride;
        udst += chromStride;
        vdst += chromStride;
871 872 873
    }
}

874
static void uyvytoyuv420_c(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
875 876
                           const uint8_t *src, int width, int height,
                           int lumStride, int chromStride, int srcStride)
877
{
878
    int y;
879
    const int chromWidth = -((-width) >> 1);
880

881
    for (y = 0; y < height; y++) {
882
        extract_even_c(src + 1, ydst, width);
883
        if (y & 1) {
884
            extract_even2avg_c(src - srcStride, src, udst, vdst, chromWidth);
885 886
            udst += chromStride;
            vdst += chromStride;
887 888
        }

889 890
        src  += srcStride;
        ydst += lumStride;
891 892 893
    }
}

894
static void uyvytoyuv422_c(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
895 896
                           const uint8_t *src, int width, int height,
                           int lumStride, int chromStride, int srcStride)
897
{
898
    int y;
899
    const int chromWidth = -((-width) >> 1);
900

901
    for (y = 0; y < height; y++) {
902 903
        extract_even_c(src + 1, ydst, width);
        extract_even2_c(src, udst, vdst, chromWidth);
904

905 906 907 908
        src  += srcStride;
        ydst += lumStride;
        udst += chromStride;
        vdst += chromStride;
909 910 911
    }
}

912
static av_cold void rgb2rgb_init_c(void)
913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946
{
    rgb15to16          = rgb15to16_c;
    rgb15tobgr24       = rgb15tobgr24_c;
    rgb15to32          = rgb15to32_c;
    rgb16tobgr24       = rgb16tobgr24_c;
    rgb16to32          = rgb16to32_c;
    rgb16to15          = rgb16to15_c;
    rgb24tobgr16       = rgb24tobgr16_c;
    rgb24tobgr15       = rgb24tobgr15_c;
    rgb24tobgr32       = rgb24tobgr32_c;
    rgb32to16          = rgb32to16_c;
    rgb32to15          = rgb32to15_c;
    rgb32tobgr24       = rgb32tobgr24_c;
    rgb24to15          = rgb24to15_c;
    rgb24to16          = rgb24to16_c;
    rgb24tobgr24       = rgb24tobgr24_c;
    shuffle_bytes_2103 = shuffle_bytes_2103_c;
    rgb32tobgr16       = rgb32tobgr16_c;
    rgb32tobgr15       = rgb32tobgr15_c;
    yv12toyuy2         = yv12toyuy2_c;
    yv12touyvy         = yv12touyvy_c;
    yuv422ptoyuy2      = yuv422ptoyuy2_c;
    yuv422ptouyvy      = yuv422ptouyvy_c;
    yuy2toyv12         = yuy2toyv12_c;
    planar2x           = planar2x_c;
    rgb24toyv12        = rgb24toyv12_c;
    interleaveBytes    = interleaveBytes_c;
    vu9_to_vu12        = vu9_to_vu12_c;
    yvu9_to_yuy2       = yvu9_to_yuy2_c;

    uyvytoyuv420       = uyvytoyuv420_c;
    uyvytoyuv422       = uyvytoyuv422_c;
    yuyvtoyuv420       = yuyvtoyuv420_c;
    yuyvtoyuv422       = yuyvtoyuv422_c;
947
}