rgb2rgb_template.c 30 KB
Newer Older
1
/*
2 3 4 5 6 7 8
 * software RGB to RGB converter
 * pluralize by software PAL8 to RGB converter
 *              software YUV to YUV converter
 *              software YUV to RGB converter
 * Written by Nick Kurshev.
 * palette & YUV & runtime CPU stuff by Michael (michaelni@gmx.at)
 * lot of big-endian byte order fixes by Alex Beregszaszi
9
 *
10 11
 * This file is part of FFmpeg.
 *
12 13 14 15
 * FFmpeg is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
16
 *
17
 * FFmpeg is distributed in the hope that it will be useful,
18
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 20
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
21
 *
22 23
 * You should have received a copy of the GNU Lesser General Public
 * License along with FFmpeg; if not, write to the Free Software
24
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
25 26
 */

Arpi's avatar
Arpi committed
27 28
#include <stddef.h>

29 30
#include "libavutil/attributes.h"

31 32
static inline void rgb24tobgr32_c(const uint8_t *src, uint8_t *dst,
                                  int src_size)
Nick Kurshev's avatar
Nick Kurshev committed
33
{
34 35
    uint8_t *dest      = dst;
    const uint8_t *s   = src;
36
    const uint8_t *end = s + src_size;
37

Ramiro Polla's avatar
Ramiro Polla committed
38
    while (s < end) {
Ramiro Polla's avatar
Ramiro Polla committed
39
#if HAVE_BIGENDIAN
40 41 42 43 44 45
        /* RGB24 (= R, G, B) -> RGB32 (= A, B, G, R) */
        *dest++  = 255;
        *dest++  = s[2];
        *dest++  = s[1];
        *dest++  = s[0];
        s       += 3;
Ramiro Polla's avatar
Ramiro Polla committed
46
#else
47 48 49 50
        *dest++  = *s++;
        *dest++  = *s++;
        *dest++  = *s++;
        *dest++  = 255;
Ramiro Polla's avatar
Ramiro Polla committed
51
#endif
52
    }
Nick Kurshev's avatar
Nick Kurshev committed
53
}
54

55 56
static inline void rgb32tobgr24_c(const uint8_t *src, uint8_t *dst,
                                  int src_size)
57
{
58 59
    uint8_t *dest      = dst;
    const uint8_t *s   = src;
60
    const uint8_t *end = s + src_size;
61

Ramiro Polla's avatar
Ramiro Polla committed
62
    while (s < end) {
63
#if HAVE_BIGENDIAN
64
        /* RGB32 (= A, B, G, R) -> RGB24 (= R, G, B) */
65
        s++;
66 67 68 69
        dest[2]  = *s++;
        dest[1]  = *s++;
        dest[0]  = *s++;
        dest    += 3;
70
#else
71 72 73
        *dest++  = *s++;
        *dest++  = *s++;
        *dest++  = *s++;
74
        s++;
75
#endif
76
    }
77
}
78

79
/*
80 81
 * original by Strepto/Astral
 * ported to gcc & bugfixed: A'rpi
82
 * MMXEXT, 3DNOW optimization by Nick Kurshev
83 84
 * 32-bit C version, and and&add trick by Michael Niedermayer
 */
85
static inline void rgb15to16_c(const uint8_t *src, uint8_t *dst, int src_size)
86
{
87 88
    register uint8_t *d         = dst;
    register const uint8_t *s   = src;
89 90 91
    register const uint8_t *end = s + src_size;
    const uint8_t *mm_end       = end - 3;

Ramiro Polla's avatar
Ramiro Polla committed
92
    while (s < mm_end) {
93 94 95 96
        register unsigned x = *((const uint32_t *)s);
        *((uint32_t *)d)    = (x & 0x7FFF7FFF) + (x & 0x7FE07FE0);
        d += 4;
        s += 4;
Arpi's avatar
Arpi committed
97
    }
Ramiro Polla's avatar
Ramiro Polla committed
98
    if (s < end) {
99 100
        register unsigned short x = *((const uint16_t *)s);
        *((uint16_t *)d)          = (x & 0x7FFF) + (x & 0x7FE0);
Arpi's avatar
Arpi committed
101
    }
102
}
103

104
static inline void rgb16to15_c(const uint8_t *src, uint8_t *dst, int src_size)
105
{
106 107
    register uint8_t *d         = dst;
    register const uint8_t *s   = src;
108 109
    register const uint8_t *end = s + src_size;
    const uint8_t *mm_end       = end - 3;
110

Ramiro Polla's avatar
Ramiro Polla committed
111
    while (s < mm_end) {
112 113 114 115
        register uint32_t x  = *((const uint32_t *)s);
        *((uint32_t *)d)     = ((x >> 1) & 0x7FE07FE0) | (x & 0x001F001F);
        s                   += 4;
        d                   += 4;
116
    }
Ramiro Polla's avatar
Ramiro Polla committed
117
    if (s < end) {
118 119
        register uint16_t x = *((const uint16_t *)s);
        *((uint16_t *)d)    = ((x >> 1) & 0x7FE0) | (x & 0x001F);
120 121 122
    }
}

123
static inline void rgb32to16_c(const uint8_t *src, uint8_t *dst, int src_size)
124
{
125 126
    uint16_t *d        = (uint16_t *)dst;
    const uint8_t *s   = src;
127
    const uint8_t *end = s + src_size;
128

Ramiro Polla's avatar
Ramiro Polla committed
129
    while (s < end) {
130 131 132 133 134
        register int rgb  = *(const uint32_t *)s;
        s                += 4;
        *d++              = ((rgb & 0xFF)     >> 3) +
                            ((rgb & 0xFC00)   >> 5) +
                            ((rgb & 0xF80000) >> 8);
135
    }
136 137
}

138 139
static inline void rgb32tobgr16_c(const uint8_t *src, uint8_t *dst,
                                  int src_size)
140
{
141 142
    uint16_t *d        = (uint16_t *)dst;
    const uint8_t *s   = src;
143 144
    const uint8_t *end = s + src_size;

Ramiro Polla's avatar
Ramiro Polla committed
145
    while (s < end) {
146 147 148 149 150
        register int rgb  = *(const uint32_t *)s;
        s                += 4;
        *d++              = ((rgb & 0xF8)     << 8) +
                            ((rgb & 0xFC00)   >> 5) +
                            ((rgb & 0xF80000) >> 19);
151
    }
152 153
}

154
static inline void rgb32to15_c(const uint8_t *src, uint8_t *dst, int src_size)
155
{
156 157
    uint16_t *d        = (uint16_t *)dst;
    const uint8_t *s   = src;
158 159
    const uint8_t *end = s + src_size;

Ramiro Polla's avatar
Ramiro Polla committed
160
    while (s < end) {
161 162 163 164 165
        register int rgb  = *(const uint32_t *)s;
        s                += 4;
        *d++              = ((rgb & 0xFF)     >> 3) +
                            ((rgb & 0xF800)   >> 6) +
                            ((rgb & 0xF80000) >> 9);
166
    }
167 168
}

169 170
static inline void rgb32tobgr15_c(const uint8_t *src, uint8_t *dst,
                                  int src_size)
171
{
172 173
    uint16_t *d        = (uint16_t *)dst;
    const uint8_t *s   = src;
174 175
    const uint8_t *end = s + src_size;

Ramiro Polla's avatar
Ramiro Polla committed
176
    while (s < end) {
177 178 179 180 181
        register int rgb  = *(const uint32_t *)s;
        s                += 4;
        *d++              = ((rgb & 0xF8)     <<  7) +
                            ((rgb & 0xF800)   >>  6) +
                            ((rgb & 0xF80000) >> 19);
182
    }
183 184
}

185 186
static inline void rgb24tobgr16_c(const uint8_t *src, uint8_t *dst,
                                  int src_size)
187
{
188 189
    uint16_t *d        = (uint16_t *)dst;
    const uint8_t *s   = src;
190 191
    const uint8_t *end = s + src_size;

Ramiro Polla's avatar
Ramiro Polla committed
192
    while (s < end) {
193 194 195
        const int b = *s++;
        const int g = *s++;
        const int r = *s++;
196
        *d++        = (b >> 3) | ((g & 0xFC) << 3) | ((r & 0xF8) << 8);
197
    }
198 199
}

200
static inline void rgb24to16_c(const uint8_t *src, uint8_t *dst, int src_size)
201
{
202 203
    uint16_t *d        = (uint16_t *)dst;
    const uint8_t *s   = src;
204 205
    const uint8_t *end = s + src_size;

Ramiro Polla's avatar
Ramiro Polla committed
206
    while (s < end) {
207 208 209
        const int r = *s++;
        const int g = *s++;
        const int b = *s++;
210
        *d++        = (b >> 3) | ((g & 0xFC) << 3) | ((r & 0xF8) << 8);
211
    }
212 213
}

214 215
static inline void rgb24tobgr15_c(const uint8_t *src, uint8_t *dst,
                                  int src_size)
216
{
217 218
    uint16_t *d        = (uint16_t *)dst;
    const uint8_t *s   = src;
219 220
    const uint8_t *end = s + src_size;

Ramiro Polla's avatar
Ramiro Polla committed
221
    while (s < end) {
222 223 224
        const int b = *s++;
        const int g = *s++;
        const int r = *s++;
225
        *d++        = (b >> 3) | ((g & 0xF8) << 2) | ((r & 0xF8) << 7);
226
    }
Arpi's avatar
Arpi committed
227 228
}

229
static inline void rgb24to15_c(const uint8_t *src, uint8_t *dst, int src_size)
230
{
231 232
    uint16_t *d        = (uint16_t *)dst;
    const uint8_t *s   = src;
233 234
    const uint8_t *end = s + src_size;

Ramiro Polla's avatar
Ramiro Polla committed
235
    while (s < end) {
236 237 238
        const int r = *s++;
        const int g = *s++;
        const int b = *s++;
239
        *d++        = (b >> 3) | ((g & 0xF8) << 2) | ((r & 0xF8) << 7);
240
    }
241 242
}

243 244
static inline void rgb15tobgr24_c(const uint8_t *src, uint8_t *dst,
                                  int src_size)
Arpi's avatar
Arpi committed
245
{
246 247
    uint8_t *d          = dst;
    const uint16_t *s   = (const uint16_t *)src;
248 249
    const uint16_t *end = s + src_size / 2;

Ramiro Polla's avatar
Ramiro Polla committed
250
    while (s < end) {
251
        register uint16_t bgr = *s++;
252 253
        *d++ = ((bgr&0x001F)<<3) | ((bgr&0x001F)>> 2);
        *d++ = ((bgr&0x03E0)>>2) | ((bgr&0x03E0)>> 7);
254
        *d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12);
255
    }
Arpi's avatar
Arpi committed
256 257
}

258 259
static inline void rgb16tobgr24_c(const uint8_t *src, uint8_t *dst,
                                  int src_size)
Arpi's avatar
Arpi committed
260
{
261 262
    uint8_t *d          = (uint8_t *)dst;
    const uint16_t *s   = (const uint16_t *)src;
263 264
    const uint16_t *end = s + src_size / 2;

Ramiro Polla's avatar
Ramiro Polla committed
265
    while (s < end) {
266
        register uint16_t bgr = *s++;
267 268
        *d++ = ((bgr&0x001F)<<3) | ((bgr&0x001F)>> 2);
        *d++ = ((bgr&0x07E0)>>3) | ((bgr&0x07E0)>> 9);
269
        *d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13);
270
    }
Arpi's avatar
Arpi committed
271 272
}

273
static inline void rgb15to32_c(const uint8_t *src, uint8_t *dst, int src_size)
Arpi's avatar
Arpi committed
274
{
275 276
    uint8_t *d          = dst;
    const uint16_t *s   = (const uint16_t *)src;
277 278
    const uint16_t *end = s + src_size / 2;

Ramiro Polla's avatar
Ramiro Polla committed
279
    while (s < end) {
280
        register uint16_t bgr = *s++;
281
#if HAVE_BIGENDIAN
282
        *d++ = 255;
283
        *d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12);
284 285
        *d++ = ((bgr&0x03E0)>>2) | ((bgr&0x03E0)>> 7);
        *d++ = ((bgr&0x001F)<<3) | ((bgr&0x001F)>> 2);
286
#else
287 288
        *d++ = ((bgr&0x001F)<<3) | ((bgr&0x001F)>> 2);
        *d++ = ((bgr&0x03E0)>>2) | ((bgr&0x03E0)>> 7);
289
        *d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12);
290
        *d++ = 255;
Michael Niedermayer's avatar
Michael Niedermayer committed
291
#endif
292
    }
Arpi's avatar
Arpi committed
293
}
294

295
static inline void rgb16to32_c(const uint8_t *src, uint8_t *dst, int src_size)
Arpi's avatar
Arpi committed
296
{
297 298
    uint8_t *d          = dst;
    const uint16_t *s   = (const uint16_t *)src;
299 300
    const uint16_t *end = s + src_size / 2;

Ramiro Polla's avatar
Ramiro Polla committed
301
    while (s < end) {
302
        register uint16_t bgr = *s++;
303
#if HAVE_BIGENDIAN
304
        *d++ = 255;
305
        *d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13);
306 307
        *d++ = ((bgr&0x07E0)>>3) | ((bgr&0x07E0)>> 9);
        *d++ = ((bgr&0x001F)<<3) | ((bgr&0x001F)>> 2);
308
#else
309 310
        *d++ = ((bgr&0x001F)<<3) | ((bgr&0x001F)>> 2);
        *d++ = ((bgr&0x07E0)>>3) | ((bgr&0x07E0)>> 9);
311
        *d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13);
312
        *d++ = 255;
313
#endif
314
    }
315
}
316

317 318
static inline void shuffle_bytes_2103_c(const uint8_t *src, uint8_t *dst,
                                        int src_size)
319
{
320 321 322 323 324
    int idx          = 15  - src_size;
    const uint8_t *s = src - idx;
    uint8_t *d       = dst - idx;

    for (; idx < 15; idx += 4) {
325
        register unsigned v   = *(const uint32_t *)&s[idx], g = v & 0xff00ff00;
326 327
        v                    &= 0xff00ff;
        *(uint32_t *)&d[idx]  = (v >> 16) + g + (v << 16);
328
    }
329 330
}

331 332 333 334 335 336 337 338 339 340 341 342 343 344
static inline void shuffle_bytes_0321_c(const uint8_t *src, uint8_t *dst,
                                        int src_size)
{
    int idx          = 15  - src_size;
    const uint8_t *s = src - idx;
    uint8_t *d       = dst - idx;

    for (; idx < 15; idx += 4) {
        register unsigned v   = *(const uint32_t *)&s[idx], g = v & 0x00ff00ff;
        v                    &= 0xff00ff00;
        *(uint32_t *)&d[idx]  = (v >> 16) + g + (v << 16);
    }
}

345
static inline void rgb24tobgr24_c(const uint8_t *src, uint8_t *dst, int src_size)
Michael Niedermayer's avatar
Michael Niedermayer committed
346
{
347
    unsigned i;
348 349

    for (i = 0; i < src_size; i += 3) {
350
        register uint8_t x = src[i + 2];
351 352 353
        dst[i + 1]         = src[i + 1];
        dst[i + 2]         = src[i + 0];
        dst[i + 0]         = x;
354
    }
Michael Niedermayer's avatar
Michael Niedermayer committed
355 356
}

357 358
static inline void yuvPlanartoyuy2_c(const uint8_t *ysrc, const uint8_t *usrc,
                                     const uint8_t *vsrc, uint8_t *dst,
359 360 361
                                     int width, int height,
                                     int lumStride, int chromStride,
                                     int dstStride, int vertLumPerChroma)
Michael Niedermayer's avatar
Michael Niedermayer committed
362
{
363
    int y, i;
364
    const int chromWidth = width >> 1;
365 366

    for (y = 0; y < height; y++) {
367
#if HAVE_FAST_64BIT
368
        uint64_t *ldst = (uint64_t *)dst;
369
        const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
Ramiro Polla's avatar
Ramiro Polla committed
370
        for (i = 0; i < chromWidth; i += 2) {
371
            uint64_t k = yc[0] + (uc[0] << 8) +
372
                         (yc[1] << 16) + ((unsigned) vc[0] << 24);
373
            uint64_t l = yc[2] + (uc[1] << 8) +
374
                         (yc[3] << 16) + ((unsigned) vc[1] << 24);
375
            *ldst++ = k + (l << 32);
376 377 378
            yc     += 4;
            uc     += 2;
            vc     += 2;
379
        }
Arpi's avatar
Arpi committed
380 381

#else
382
        int *idst = (int32_t *)dst;
383
        const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
384

Ramiro Polla's avatar
Ramiro Polla committed
385
        for (i = 0; i < chromWidth; i++) {
386
#if HAVE_BIGENDIAN
387 388
            *idst++ = (yc[0] << 24) + (uc[0] << 16) +
                      (yc[1] <<  8) + (vc[0] <<  0);
389
#else
390
            *idst++ = yc[0] + (uc[0] << 8) +
391
                      (yc[1] << 16) + (vc[0] << 24);
392 393 394 395 396 397
#endif
            yc += 2;
            uc++;
            vc++;
        }
#endif
398
        if ((y & (vertLumPerChroma - 1)) == vertLumPerChroma - 1) {
399 400 401 402 403 404
            usrc += chromStride;
            vsrc += chromStride;
        }
        ysrc += lumStride;
        dst  += dstStride;
    }
Michael Niedermayer's avatar
Michael Niedermayer committed
405 406
}

407
/**
408 409
 * Height should be a multiple of 2 and width should be a multiple of 16.
 * (If this is a problem for anyone then tell me, and I will fix it.)
410
 */
411 412
static inline void yv12toyuy2_c(const uint8_t *ysrc, const uint8_t *usrc,
                                const uint8_t *vsrc, uint8_t *dst,
413 414
                                int width, int height, int lumStride,
                                int chromStride, int dstStride)
415
{
416
    //FIXME interpolate chroma
417 418
    yuvPlanartoyuy2_c(ysrc, usrc, vsrc, dst, width, height, lumStride,
                      chromStride, dstStride, 2);
419 420
}

421 422
static inline void yuvPlanartouyvy_c(const uint8_t *ysrc, const uint8_t *usrc,
                                     const uint8_t *vsrc, uint8_t *dst,
423 424 425
                                     int width, int height,
                                     int lumStride, int chromStride,
                                     int dstStride, int vertLumPerChroma)
426
{
427
    int y, i;
428
    const int chromWidth = width >> 1;
429 430

    for (y = 0; y < height; y++) {
431
#if HAVE_FAST_64BIT
432
        uint64_t *ldst = (uint64_t *)dst;
433
        const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
Ramiro Polla's avatar
Ramiro Polla committed
434
        for (i = 0; i < chromWidth; i += 2) {
435
            uint64_t k = uc[0] + (yc[0] << 8) +
436
                         (vc[0] << 16) + ((unsigned) yc[1] << 24);
437
            uint64_t l = uc[1] + (yc[2] << 8) +
438
                         (vc[1] << 16) + ((unsigned) yc[3] << 24);
439
            *ldst++ = k + (l << 32);
440 441 442
            yc     += 4;
            uc     += 2;
            vc     += 2;
443
        }
444 445

#else
446
        int *idst = (int32_t *)dst;
447
        const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
448

Ramiro Polla's avatar
Ramiro Polla committed
449
        for (i = 0; i < chromWidth; i++) {
450
#if HAVE_BIGENDIAN
451 452
            *idst++ = (uc[0] << 24) + (yc[0] << 16) +
                      (vc[0] <<  8) + (yc[1] <<  0);
453
#else
454
            *idst++ = uc[0] + (yc[0] << 8) +
455
                      (vc[0] << 16) + (yc[1] << 24);
456 457 458 459 460 461
#endif
            yc += 2;
            uc++;
            vc++;
        }
#endif
462
        if ((y & (vertLumPerChroma - 1)) == vertLumPerChroma - 1) {
463 464 465 466
            usrc += chromStride;
            vsrc += chromStride;
        }
        ysrc += lumStride;
467
        dst  += dstStride;
468
    }
469 470 471
}

/**
472 473
 * Height should be a multiple of 2 and width should be a multiple of 16
 * (If this is a problem for anyone then tell me, and I will fix it.)
474
 */
475 476
static inline void yv12touyvy_c(const uint8_t *ysrc, const uint8_t *usrc,
                                const uint8_t *vsrc, uint8_t *dst,
477 478
                                int width, int height, int lumStride,
                                int chromStride, int dstStride)
479
{
480
    //FIXME interpolate chroma
481 482
    yuvPlanartouyvy_c(ysrc, usrc, vsrc, dst, width, height, lumStride,
                      chromStride, dstStride, 2);
483 484
}

485 486 487
/**
 * Width should be a multiple of 16.
 */
488 489
static inline void yuv422ptouyvy_c(const uint8_t *ysrc, const uint8_t *usrc,
                                   const uint8_t *vsrc, uint8_t *dst,
490 491
                                   int width, int height, int lumStride,
                                   int chromStride, int dstStride)
492
{
493 494
    yuvPlanartouyvy_c(ysrc, usrc, vsrc, dst, width, height, lumStride,
                      chromStride, dstStride, 1);
495 496
}

497
/**
498
 * Width should be a multiple of 16.
499
 */
500 501
static inline void yuv422ptoyuy2_c(const uint8_t *ysrc, const uint8_t *usrc,
                                   const uint8_t *vsrc, uint8_t *dst,
502 503
                                   int width, int height, int lumStride,
                                   int chromStride, int dstStride)
504
{
505 506
    yuvPlanartoyuy2_c(ysrc, usrc, vsrc, dst, width, height, lumStride,
                      chromStride, dstStride, 1);
507 508
}

509
/**
510 511
 * Height should be a multiple of 2 and width should be a multiple of 16.
 * (If this is a problem for anyone then tell me, and I will fix it.)
512
 */
513 514
static inline void yuy2toyv12_c(const uint8_t *src, uint8_t *ydst,
                                uint8_t *udst, uint8_t *vdst,
515 516
                                int width, int height, int lumStride,
                                int chromStride, int srcStride)
Michael Niedermayer's avatar
Michael Niedermayer committed
517
{
518
    int y;
519
    const int chromWidth = width >> 1;
520 521

    for (y = 0; y < height; y += 2) {
522
        int i;
523 524 525 526 527
        for (i = 0; i < chromWidth; i++) {
            ydst[2 * i + 0] = src[4 * i + 0];
            udst[i]         = src[4 * i + 1];
            ydst[2 * i + 1] = src[4 * i + 2];
            vdst[i]         = src[4 * i + 3];
528 529 530 531
        }
        ydst += lumStride;
        src  += srcStride;

532 533 534
        for (i = 0; i < chromWidth; i++) {
            ydst[2 * i + 0] = src[4 * i + 0];
            ydst[2 * i + 1] = src[4 * i + 2];
535 536 537 538 539 540
        }
        udst += chromStride;
        vdst += chromStride;
        ydst += lumStride;
        src  += srcStride;
    }
Michael Niedermayer's avatar
Michael Niedermayer committed
541
}
Arpi's avatar
Arpi committed
542

543 544
static inline void planar2x_c(const uint8_t *src, uint8_t *dst, int srcWidth,
                              int srcHeight, int srcStride, int dstStride)
545
{
546
    int x, y;
547

548
    dst[0] = src[0];
549

550
    // first line
551 552 553
    for (x = 0; x < srcWidth - 1; x++) {
        dst[2 * x + 1] = (3 * src[x] + src[x + 1]) >> 2;
        dst[2 * x + 2] = (src[x] + 3 * src[x + 1]) >> 2;
554
    }
555
    dst[2 * srcWidth - 1] = src[srcWidth - 1];
556

557
    dst += dstStride;
558

559
    for (y = 1; y < srcHeight; y++) {
560
        const int mmxSize = 1;
561

562 563
        dst[0]         = (src[0] * 3 + src[srcStride]) >> 2;
        dst[dstStride] = (src[0] + 3 * src[srcStride]) >> 2;
564

565 566 567 568 569
        for (x = mmxSize - 1; x < srcWidth - 1; x++) {
            dst[2 * x + 1]             = (src[x + 0] * 3 + src[x + srcStride + 1]) >> 2;
            dst[2 * x + dstStride + 2] = (src[x + 0] + 3 * src[x + srcStride + 1]) >> 2;
            dst[2 * x + dstStride + 1] = (src[x + 1] + 3 * src[x + srcStride])     >> 2;
            dst[2 * x + 2]             = (src[x + 1] * 3 + src[x + srcStride])     >> 2;
570
        }
571 572
        dst[srcWidth * 2 - 1]             = (src[srcWidth - 1] * 3 + src[srcWidth - 1 + srcStride]) >> 2;
        dst[srcWidth * 2 - 1 + dstStride] = (src[srcWidth - 1] + 3 * src[srcWidth - 1 + srcStride]) >> 2;
573

574 575
        dst += dstStride * 2;
        src += srcStride;
576
    }
577

578
    // last line
579
    dst[0] = src[0];
580

581 582 583
    for (x = 0; x < srcWidth - 1; x++) {
        dst[2 * x + 1] = (src[x] * 3 + src[x + 1]) >> 2;
        dst[2 * x + 2] = (src[x] + 3 * src[x + 1]) >> 2;
584
    }
585
    dst[2 * srcWidth - 1] = src[srcWidth - 1];
586 587
}

Arpi's avatar
Arpi committed
588
/**
589 590 591
 * Height should be a multiple of 2 and width should be a multiple of 16.
 * (If this is a problem for anyone then tell me, and I will fix it.)
 * Chrominance data is only taken from every second line, others are ignored.
592
 * FIXME: Write HQ version.
Arpi's avatar
Arpi committed
593
 */
594 595
static inline void uyvytoyv12_c(const uint8_t *src, uint8_t *ydst,
                                uint8_t *udst, uint8_t *vdst,
596 597
                                int width, int height, int lumStride,
                                int chromStride, int srcStride)
Arpi's avatar
Arpi committed
598
{
599
    int y;
600
    const int chromWidth = width >> 1;
601 602

    for (y = 0; y < height; y += 2) {
603
        int i;
604 605 606 607 608
        for (i = 0; i < chromWidth; i++) {
            udst[i]         = src[4 * i + 0];
            ydst[2 * i + 0] = src[4 * i + 1];
            vdst[i]         = src[4 * i + 2];
            ydst[2 * i + 1] = src[4 * i + 3];
609 610 611 612
        }
        ydst += lumStride;
        src  += srcStride;

613 614 615
        for (i = 0; i < chromWidth; i++) {
            ydst[2 * i + 0] = src[4 * i + 1];
            ydst[2 * i + 1] = src[4 * i + 3];
616 617 618 619 620 621
        }
        udst += chromStride;
        vdst += chromStride;
        ydst += lumStride;
        src  += srcStride;
    }
Arpi's avatar
Arpi committed
622 623
}

624
/**
625 626 627
 * Height should be a multiple of 2 and width should be a multiple of 2.
 * (If this is a problem for anyone then tell me, and I will fix it.)
 * Chrominance data is only taken from every second line,
628 629
 * others are ignored in the C version.
 * FIXME: Write HQ version.
630
 */
631
void ff_rgb24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
632
                   uint8_t *vdst, int width, int height, int lumStride,
633
                   int chromStride, int srcStride, int32_t *rgb2yuv)
634
{
635 636 637
    int32_t ry = rgb2yuv[RY_IDX], gy = rgb2yuv[GY_IDX], by = rgb2yuv[BY_IDX];
    int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX];
    int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX];
638
    int y;
639
    const int chromWidth = width >> 1;
640 641

    for (y = 0; y < height; y += 2) {
642
        int i;
643 644 645 646
        for (i = 0; i < chromWidth; i++) {
            unsigned int b = src[6 * i + 0];
            unsigned int g = src[6 * i + 1];
            unsigned int r = src[6 * i + 2];
647

648 649 650
            unsigned int Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) +  16;
            unsigned int V = ((rv * r + gv * g + bv * b) >> RGB2YUV_SHIFT) + 128;
            unsigned int U = ((ru * r + gu * g + bu * b) >> RGB2YUV_SHIFT) + 128;
651 652 653

            udst[i]     = U;
            vdst[i]     = V;
654
            ydst[2 * i] = Y;
655

656 657 658
            b = src[6 * i + 3];
            g = src[6 * i + 4];
            r = src[6 * i + 5];
659

660
            Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16;
661
            ydst[2 * i + 1] = Y;
662 663 664 665
        }
        ydst += lumStride;
        src  += srcStride;

666
        if (y+1 == height)
667 668
            break;

669 670 671 672
        for (i = 0; i < chromWidth; i++) {
            unsigned int b = src[6 * i + 0];
            unsigned int g = src[6 * i + 1];
            unsigned int r = src[6 * i + 2];
673

674
            unsigned int Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16;
675

676
            ydst[2 * i] = Y;
677

678 679 680
            b = src[6 * i + 3];
            g = src[6 * i + 4];
            r = src[6 * i + 5];
681

682
            Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16;
683
            ydst[2 * i + 1] = Y;
684 685 686 687 688 689
        }
        udst += chromStride;
        vdst += chromStride;
        ydst += lumStride;
        src  += srcStride;
    }
690
}
691

692
static void interleaveBytes_c(const uint8_t *src1, const uint8_t *src2,
693 694
                              uint8_t *dest, int width, int height,
                              int src1Stride, int src2Stride, int dstStride)
Ramiro Polla's avatar
Ramiro Polla committed
695
{
696
    int h;
697

698
    for (h = 0; h < height; h++) {
699
        int w;
700 701 702
        for (w = 0; w < width; w++) {
            dest[2 * w + 0] = src1[w];
            dest[2 * w + 1] = src2[w];
703 704
        }
        dest += dstStride;
705 706
        src1 += src1Stride;
        src2 += src2Stride;
707
    }
708
}
709

710 711 712 713 714 715 716 717 718 719 720 721
static void deinterleaveBytes_c(const uint8_t *src, uint8_t *dst1, uint8_t *dst2,
                                int width, int height, int srcStride,
                                int dst1Stride, int dst2Stride)
{
    int h;

    for (h = 0; h < height; h++) {
        int w;
        for (w = 0; w < width; w++) {
            dst1[w] = src[2 * w + 0];
            dst2[w] = src[2 * w + 1];
        }
722
        src  += srcStride;
723 724 725 726 727
        dst1 += dst1Stride;
        dst2 += dst2Stride;
    }
}

728 729
static inline void vu9_to_vu12_c(const uint8_t *src1, const uint8_t *src2,
                                 uint8_t *dst1, uint8_t *dst2,
730 731 732
                                 int width, int height,
                                 int srcStride1, int srcStride2,
                                 int dstStride1, int dstStride2)
733
{
734 735 736 737
    int x, y;
    int w = width  / 2;
    int h = height / 2;

738 739 740
    for (y = 0; y < h; y++) {
        const uint8_t *s1 = src1 + srcStride1 * (y >> 1);
        uint8_t *d        = dst1 + dstStride1 *  y;
741 742
        for (x = 0; x < w; x++)
            d[2 * x] = d[2 * x + 1] = s1[x];
743
    }
744 745 746
    for (y = 0; y < h; y++) {
        const uint8_t *s2 = src2 + srcStride2 * (y >> 1);
        uint8_t *d        = dst2 + dstStride2 *  y;
747 748
        for (x = 0; x < w; x++)
            d[2 * x] = d[2 * x + 1] = s2[x];
749 750 751
    }
}

752 753
static inline void yvu9_to_yuy2_c(const uint8_t *src1, const uint8_t *src2,
                                  const uint8_t *src3, uint8_t *dst,
754 755 756
                                  int width, int height,
                                  int srcStride1, int srcStride2,
                                  int srcStride3, int dstStride)
757
{
758 759 760 761
    int x, y;
    int w = width / 2;
    int h = height;

762 763 764 765 766
    for (y = 0; y < h; y++) {
        const uint8_t *yp = src1 + srcStride1 *  y;
        const uint8_t *up = src2 + srcStride2 * (y >> 2);
        const uint8_t *vp = src3 + srcStride3 * (y >> 2);
        uint8_t *d        = dst  + dstStride  *  y;
767
        for (x = 0; x < w; x++) {
768 769 770 771 772 773 774 775 776
            const int x2 = x << 2;
            d[8 * x + 0] = yp[x2];
            d[8 * x + 1] = up[x];
            d[8 * x + 2] = yp[x2 + 1];
            d[8 * x + 3] = vp[x];
            d[8 * x + 4] = yp[x2 + 2];
            d[8 * x + 5] = up[x];
            d[8 * x + 6] = yp[x2 + 3];
            d[8 * x + 7] = vp[x];
Ramiro Polla's avatar
Ramiro Polla committed
777
        }
778 779
    }
}
780

781
static void extract_even_c(const uint8_t *src, uint8_t *dst, int count)
782
{
783 784 785 786 787
    dst   +=  count;
    src   +=  count * 2;
    count  = -count;
    while (count < 0) {
        dst[count] = src[2 * count];
788 789 790 791
        count++;
    }
}

792 793
static void extract_even2_c(const uint8_t *src, uint8_t *dst0, uint8_t *dst1,
                            int count)
794
{
795 796 797 798 799 800 801
    dst0  +=  count;
    dst1  +=  count;
    src   +=  count * 4;
    count  = -count;
    while (count < 0) {
        dst0[count] = src[4 * count + 0];
        dst1[count] = src[4 * count + 2];
802 803 804 805
        count++;
    }
}

806 807
static void extract_even2avg_c(const uint8_t *src0, const uint8_t *src1,
                               uint8_t *dst0, uint8_t *dst1, int count)
808
{
809 810 811 812 813 814 815 816
    dst0  +=  count;
    dst1  +=  count;
    src0  +=  count * 4;
    src1  +=  count * 4;
    count  = -count;
    while (count < 0) {
        dst0[count] = (src0[4 * count + 0] + src1[4 * count + 0]) >> 1;
        dst1[count] = (src0[4 * count + 2] + src1[4 * count + 2]) >> 1;
817 818 819 820
        count++;
    }
}

821 822
static void extract_odd2_c(const uint8_t *src, uint8_t *dst0, uint8_t *dst1,
                           int count)
823
{
824 825 826 827
    dst0  +=  count;
    dst1  +=  count;
    src   +=  count * 4;
    count  = -count;
828
    src++;
829 830 831
    while (count < 0) {
        dst0[count] = src[4 * count + 0];
        dst1[count] = src[4 * count + 2];
832 833 834 835
        count++;
    }
}

836 837
static void extract_odd2avg_c(const uint8_t *src0, const uint8_t *src1,
                              uint8_t *dst0, uint8_t *dst1, int count)
838
{
839 840 841 842 843
    dst0  +=  count;
    dst1  +=  count;
    src0  +=  count * 4;
    src1  +=  count * 4;
    count  = -count;
844 845
    src0++;
    src1++;
846 847 848
    while (count < 0) {
        dst0[count] = (src0[4 * count + 0] + src1[4 * count + 0]) >> 1;
        dst1[count] = (src0[4 * count + 2] + src1[4 * count + 2]) >> 1;
849 850 851 852
        count++;
    }
}

853
static void yuyvtoyuv420_c(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
854 855
                           const uint8_t *src, int width, int height,
                           int lumStride, int chromStride, int srcStride)
856
{
857
    int y;
858
    const int chromWidth = FF_CEIL_RSHIFT(width, 1);
859

860
    for (y = 0; y < height; y++) {
861
        extract_even_c(src, ydst, width);
862
        if (y & 1) {
863
            extract_odd2avg_c(src - srcStride, src, udst, vdst, chromWidth);
864 865
            udst += chromStride;
            vdst += chromStride;
866 867
        }

868 869
        src  += srcStride;
        ydst += lumStride;
870 871 872
    }
}

873
static void yuyvtoyuv422_c(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
874 875
                           const uint8_t *src, int width, int height,
                           int lumStride, int chromStride, int srcStride)
876
{
877
    int y;
878
    const int chromWidth = FF_CEIL_RSHIFT(width, 1);
879

880
    for (y = 0; y < height; y++) {
881 882
        extract_even_c(src, ydst, width);
        extract_odd2_c(src, udst, vdst, chromWidth);
883

884 885 886 887
        src  += srcStride;
        ydst += lumStride;
        udst += chromStride;
        vdst += chromStride;
888 889 890
    }
}

891
static void uyvytoyuv420_c(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
892 893
                           const uint8_t *src, int width, int height,
                           int lumStride, int chromStride, int srcStride)
894
{
895
    int y;
896
    const int chromWidth = FF_CEIL_RSHIFT(width, 1);
897

898
    for (y = 0; y < height; y++) {
899
        extract_even_c(src + 1, ydst, width);
900
        if (y & 1) {
901
            extract_even2avg_c(src - srcStride, src, udst, vdst, chromWidth);
902 903
            udst += chromStride;
            vdst += chromStride;
904 905
        }

906 907
        src  += srcStride;
        ydst += lumStride;
908 909 910
    }
}

911
static void uyvytoyuv422_c(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
912 913
                           const uint8_t *src, int width, int height,
                           int lumStride, int chromStride, int srcStride)
914
{
915
    int y;
916
    const int chromWidth = FF_CEIL_RSHIFT(width, 1);
917

918
    for (y = 0; y < height; y++) {
919 920
        extract_even_c(src + 1, ydst, width);
        extract_even2_c(src, udst, vdst, chromWidth);
921

922 923 924 925
        src  += srcStride;
        ydst += lumStride;
        udst += chromStride;
        vdst += chromStride;
926 927 928
    }
}

929
static av_cold void rgb2rgb_init_c(void)
930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945
{
    rgb15to16          = rgb15to16_c;
    rgb15tobgr24       = rgb15tobgr24_c;
    rgb15to32          = rgb15to32_c;
    rgb16tobgr24       = rgb16tobgr24_c;
    rgb16to32          = rgb16to32_c;
    rgb16to15          = rgb16to15_c;
    rgb24tobgr16       = rgb24tobgr16_c;
    rgb24tobgr15       = rgb24tobgr15_c;
    rgb24tobgr32       = rgb24tobgr32_c;
    rgb32to16          = rgb32to16_c;
    rgb32to15          = rgb32to15_c;
    rgb32tobgr24       = rgb32tobgr24_c;
    rgb24to15          = rgb24to15_c;
    rgb24to16          = rgb24to16_c;
    rgb24tobgr24       = rgb24tobgr24_c;
946 947 948 949 950
#if HAVE_BIGENDIAN
    shuffle_bytes_0321 = shuffle_bytes_2103_c;
    shuffle_bytes_2103 = shuffle_bytes_0321_c;
#else
    shuffle_bytes_0321 = shuffle_bytes_0321_c;
951
    shuffle_bytes_2103 = shuffle_bytes_2103_c;
952
#endif
953 954 955 956 957 958 959 960
    rgb32tobgr16       = rgb32tobgr16_c;
    rgb32tobgr15       = rgb32tobgr15_c;
    yv12toyuy2         = yv12toyuy2_c;
    yv12touyvy         = yv12touyvy_c;
    yuv422ptoyuy2      = yuv422ptoyuy2_c;
    yuv422ptouyvy      = yuv422ptouyvy_c;
    yuy2toyv12         = yuy2toyv12_c;
    planar2x           = planar2x_c;
961
    ff_rgb24toyv12     = ff_rgb24toyv12_c;
962
    interleaveBytes    = interleaveBytes_c;
963
    deinterleaveBytes  = deinterleaveBytes_c;
964 965 966 967 968 969 970
    vu9_to_vu12        = vu9_to_vu12_c;
    yvu9_to_yuy2       = yvu9_to_yuy2_c;

    uyvytoyuv420       = uyvytoyuv420_c;
    uyvytoyuv422       = uyvytoyuv422_c;
    yuyvtoyuv420       = yuyvtoyuv420_c;
    yuyvtoyuv422       = yuyvtoyuv422_c;
971
}