utils.c 85.8 KB
Newer Older
1 2 3 4 5
/*
 * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
 *
 * This file is part of FFmpeg.
 *
6 7 8 9
 * FFmpeg is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
10 11 12
 *
 * FFmpeg is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 14
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
15
 *
16 17
 * You should have received a copy of the GNU Lesser General Public
 * License along with FFmpeg; if not, write to the Free Software
18 19 20
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

21 22
#include "config.h"

23
#define _DEFAULT_SOURCE
24
#define _SVID_SOURCE // needed for MAP_ANONYMOUS
25
#define _DARWIN_C_SOURCE // needed for MAP_ANON
26 27 28
#include <inttypes.h>
#include <math.h>
#include <stdio.h>
29
#include <string.h>
30
#if HAVE_MMAP
31 32 33 34 35 36 37 38 39
#include <sys/mman.h>
#if defined(MAP_ANON) && !defined(MAP_ANONYMOUS)
#define MAP_ANONYMOUS MAP_ANON
#endif
#endif
#if HAVE_VIRTUALALLOC
#define WIN32_LEAN_AND_MEAN
#include <windows.h>
#endif
40

41
#include "libavutil/attributes.h"
42
#include "libavutil/avassert.h"
43 44
#include "libavutil/avutil.h"
#include "libavutil/bswap.h"
45
#include "libavutil/cpu.h"
46
#include "libavutil/imgutils.h"
47
#include "libavutil/intreadwrite.h"
48
#include "libavutil/libm.h"
49
#include "libavutil/mathematics.h"
50
#include "libavutil/opt.h"
51
#include "libavutil/pixdesc.h"
52
#include "libavutil/aarch64/cpu.h"
53
#include "libavutil/ppc/cpu.h"
54
#include "libavutil/x86/asm.h"
55
#include "libavutil/x86/cpu.h"
56 57 58 59 60 61

// We have to implement deprecated functions until they are removed, this is the
// simplest way to prevent warnings
#undef attribute_deprecated
#define attribute_deprecated

62 63 64
#include "rgb2rgb.h"
#include "swscale.h"
#include "swscale_internal.h"
65

66 67 68 69 70 71 72
#if !FF_API_SWS_VECTOR
static SwsVector *sws_getIdentityVec(void);
static void sws_addVec(SwsVector *a, SwsVector *b);
static void sws_shiftVec(SwsVector *a, int shift);
static void sws_printVec2(SwsVector *a, AVClass *log_ctx, int log_level);
#endif

73 74
static void handle_formats(SwsContext *c);

75 76
unsigned swscale_version(void)
{
77
    av_assert0(LIBSWSCALE_VERSION_MICRO >= 100);
78 79 80 81 82
    return LIBSWSCALE_VERSION_INT;
}

const char *swscale_configuration(void)
{
83
    return FFMPEG_CONFIGURATION;
84 85 86 87 88
}

const char *swscale_license(void)
{
#define LICENSE_PREFIX "libswscale license: "
89
    return LICENSE_PREFIX FFMPEG_LICENSE + sizeof(LICENSE_PREFIX) - 1;
90 91
}

92
typedef struct FormatEntry {
93 94 95
    uint8_t is_supported_in         :1;
    uint8_t is_supported_out        :1;
    uint8_t is_supported_endianness :1;
96 97
} FormatEntry;

98 99 100 101 102 103 104 105 106 107 108 109 110 111
static const FormatEntry format_entries[AV_PIX_FMT_NB] = {
    [AV_PIX_FMT_YUV420P]     = { 1, 1 },
    [AV_PIX_FMT_YUYV422]     = { 1, 1 },
    [AV_PIX_FMT_RGB24]       = { 1, 1 },
    [AV_PIX_FMT_BGR24]       = { 1, 1 },
    [AV_PIX_FMT_YUV422P]     = { 1, 1 },
    [AV_PIX_FMT_YUV444P]     = { 1, 1 },
    [AV_PIX_FMT_YUV410P]     = { 1, 1 },
    [AV_PIX_FMT_YUV411P]     = { 1, 1 },
    [AV_PIX_FMT_GRAY8]       = { 1, 1 },
    [AV_PIX_FMT_MONOWHITE]   = { 1, 1 },
    [AV_PIX_FMT_MONOBLACK]   = { 1, 1 },
    [AV_PIX_FMT_PAL8]        = { 1, 0 },
    [AV_PIX_FMT_YUVJ420P]    = { 1, 1 },
Michael Niedermayer's avatar
Michael Niedermayer committed
112
    [AV_PIX_FMT_YUVJ411P]    = { 1, 1 },
113 114
    [AV_PIX_FMT_YUVJ422P]    = { 1, 1 },
    [AV_PIX_FMT_YUVJ444P]    = { 1, 1 },
115
    [AV_PIX_FMT_YVYU422]     = { 1, 1 },
116 117 118 119 120 121 122 123 124 125 126 127 128 129
    [AV_PIX_FMT_UYVY422]     = { 1, 1 },
    [AV_PIX_FMT_UYYVYY411]   = { 0, 0 },
    [AV_PIX_FMT_BGR8]        = { 1, 1 },
    [AV_PIX_FMT_BGR4]        = { 0, 1 },
    [AV_PIX_FMT_BGR4_BYTE]   = { 1, 1 },
    [AV_PIX_FMT_RGB8]        = { 1, 1 },
    [AV_PIX_FMT_RGB4]        = { 0, 1 },
    [AV_PIX_FMT_RGB4_BYTE]   = { 1, 1 },
    [AV_PIX_FMT_NV12]        = { 1, 1 },
    [AV_PIX_FMT_NV21]        = { 1, 1 },
    [AV_PIX_FMT_ARGB]        = { 1, 1 },
    [AV_PIX_FMT_RGBA]        = { 1, 1 },
    [AV_PIX_FMT_ABGR]        = { 1, 1 },
    [AV_PIX_FMT_BGRA]        = { 1, 1 },
130 131 132 133
    [AV_PIX_FMT_0RGB]        = { 1, 1 },
    [AV_PIX_FMT_RGB0]        = { 1, 1 },
    [AV_PIX_FMT_0BGR]        = { 1, 1 },
    [AV_PIX_FMT_BGR0]        = { 1, 1 },
134 135
    [AV_PIX_FMT_GRAY9BE]     = { 1, 1 },
    [AV_PIX_FMT_GRAY9LE]     = { 1, 1 },
136 137
    [AV_PIX_FMT_GRAY10BE]    = { 1, 1 },
    [AV_PIX_FMT_GRAY10LE]    = { 1, 1 },
Luca Barbato's avatar
Luca Barbato committed
138 139
    [AV_PIX_FMT_GRAY12BE]    = { 1, 1 },
    [AV_PIX_FMT_GRAY12LE]    = { 1, 1 },
140 141
    [AV_PIX_FMT_GRAY14BE]    = { 1, 1 },
    [AV_PIX_FMT_GRAY14LE]    = { 1, 1 },
142 143 144 145
    [AV_PIX_FMT_GRAY16BE]    = { 1, 1 },
    [AV_PIX_FMT_GRAY16LE]    = { 1, 1 },
    [AV_PIX_FMT_YUV440P]     = { 1, 1 },
    [AV_PIX_FMT_YUVJ440P]    = { 1, 1 },
146 147 148 149
    [AV_PIX_FMT_YUV440P10LE] = { 1, 1 },
    [AV_PIX_FMT_YUV440P10BE] = { 1, 1 },
    [AV_PIX_FMT_YUV440P12LE] = { 1, 1 },
    [AV_PIX_FMT_YUV440P12BE] = { 1, 1 },
150
    [AV_PIX_FMT_YUVA420P]    = { 1, 1 },
151 152
    [AV_PIX_FMT_YUVA422P]    = { 1, 1 },
    [AV_PIX_FMT_YUVA444P]    = { 1, 1 },
153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170
    [AV_PIX_FMT_YUVA420P9BE] = { 1, 1 },
    [AV_PIX_FMT_YUVA420P9LE] = { 1, 1 },
    [AV_PIX_FMT_YUVA422P9BE] = { 1, 1 },
    [AV_PIX_FMT_YUVA422P9LE] = { 1, 1 },
    [AV_PIX_FMT_YUVA444P9BE] = { 1, 1 },
    [AV_PIX_FMT_YUVA444P9LE] = { 1, 1 },
    [AV_PIX_FMT_YUVA420P10BE]= { 1, 1 },
    [AV_PIX_FMT_YUVA420P10LE]= { 1, 1 },
    [AV_PIX_FMT_YUVA422P10BE]= { 1, 1 },
    [AV_PIX_FMT_YUVA422P10LE]= { 1, 1 },
    [AV_PIX_FMT_YUVA444P10BE]= { 1, 1 },
    [AV_PIX_FMT_YUVA444P10LE]= { 1, 1 },
    [AV_PIX_FMT_YUVA420P16BE]= { 1, 1 },
    [AV_PIX_FMT_YUVA420P16LE]= { 1, 1 },
    [AV_PIX_FMT_YUVA422P16BE]= { 1, 1 },
    [AV_PIX_FMT_YUVA422P16LE]= { 1, 1 },
    [AV_PIX_FMT_YUVA444P16BE]= { 1, 1 },
    [AV_PIX_FMT_YUVA444P16LE]= { 1, 1 },
171 172
    [AV_PIX_FMT_RGB48BE]     = { 1, 1 },
    [AV_PIX_FMT_RGB48LE]     = { 1, 1 },
173 174
    [AV_PIX_FMT_RGBA64BE]    = { 1, 1, 1 },
    [AV_PIX_FMT_RGBA64LE]    = { 1, 1, 1 },
175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192
    [AV_PIX_FMT_RGB565BE]    = { 1, 1 },
    [AV_PIX_FMT_RGB565LE]    = { 1, 1 },
    [AV_PIX_FMT_RGB555BE]    = { 1, 1 },
    [AV_PIX_FMT_RGB555LE]    = { 1, 1 },
    [AV_PIX_FMT_BGR565BE]    = { 1, 1 },
    [AV_PIX_FMT_BGR565LE]    = { 1, 1 },
    [AV_PIX_FMT_BGR555BE]    = { 1, 1 },
    [AV_PIX_FMT_BGR555LE]    = { 1, 1 },
    [AV_PIX_FMT_YUV420P16LE] = { 1, 1 },
    [AV_PIX_FMT_YUV420P16BE] = { 1, 1 },
    [AV_PIX_FMT_YUV422P16LE] = { 1, 1 },
    [AV_PIX_FMT_YUV422P16BE] = { 1, 1 },
    [AV_PIX_FMT_YUV444P16LE] = { 1, 1 },
    [AV_PIX_FMT_YUV444P16BE] = { 1, 1 },
    [AV_PIX_FMT_RGB444LE]    = { 1, 1 },
    [AV_PIX_FMT_RGB444BE]    = { 1, 1 },
    [AV_PIX_FMT_BGR444LE]    = { 1, 1 },
    [AV_PIX_FMT_BGR444BE]    = { 1, 1 },
193
    [AV_PIX_FMT_YA8]         = { 1, 1 },
194 195
    [AV_PIX_FMT_YA16BE]      = { 1, 0 },
    [AV_PIX_FMT_YA16LE]      = { 1, 0 },
196 197
    [AV_PIX_FMT_BGR48BE]     = { 1, 1 },
    [AV_PIX_FMT_BGR48LE]     = { 1, 1 },
198 199
    [AV_PIX_FMT_BGRA64BE]    = { 1, 1, 1 },
    [AV_PIX_FMT_BGRA64LE]    = { 1, 1, 1 },
200 201 202 203
    [AV_PIX_FMT_YUV420P9BE]  = { 1, 1 },
    [AV_PIX_FMT_YUV420P9LE]  = { 1, 1 },
    [AV_PIX_FMT_YUV420P10BE] = { 1, 1 },
    [AV_PIX_FMT_YUV420P10LE] = { 1, 1 },
204 205 206 207
    [AV_PIX_FMT_YUV420P12BE] = { 1, 1 },
    [AV_PIX_FMT_YUV420P12LE] = { 1, 1 },
    [AV_PIX_FMT_YUV420P14BE] = { 1, 1 },
    [AV_PIX_FMT_YUV420P14LE] = { 1, 1 },
208 209 210 211
    [AV_PIX_FMT_YUV422P9BE]  = { 1, 1 },
    [AV_PIX_FMT_YUV422P9LE]  = { 1, 1 },
    [AV_PIX_FMT_YUV422P10BE] = { 1, 1 },
    [AV_PIX_FMT_YUV422P10LE] = { 1, 1 },
212 213 214 215
    [AV_PIX_FMT_YUV422P12BE] = { 1, 1 },
    [AV_PIX_FMT_YUV422P12LE] = { 1, 1 },
    [AV_PIX_FMT_YUV422P14BE] = { 1, 1 },
    [AV_PIX_FMT_YUV422P14LE] = { 1, 1 },
216 217 218 219
    [AV_PIX_FMT_YUV444P9BE]  = { 1, 1 },
    [AV_PIX_FMT_YUV444P9LE]  = { 1, 1 },
    [AV_PIX_FMT_YUV444P10BE] = { 1, 1 },
    [AV_PIX_FMT_YUV444P10LE] = { 1, 1 },
220 221 222 223
    [AV_PIX_FMT_YUV444P12BE] = { 1, 1 },
    [AV_PIX_FMT_YUV444P12LE] = { 1, 1 },
    [AV_PIX_FMT_YUV444P14BE] = { 1, 1 },
    [AV_PIX_FMT_YUV444P14LE] = { 1, 1 },
224
    [AV_PIX_FMT_GBRP]        = { 1, 1 },
225 226 227 228
    [AV_PIX_FMT_GBRP9LE]     = { 1, 1 },
    [AV_PIX_FMT_GBRP9BE]     = { 1, 1 },
    [AV_PIX_FMT_GBRP10LE]    = { 1, 1 },
    [AV_PIX_FMT_GBRP10BE]    = { 1, 1 },
229 230
    [AV_PIX_FMT_GBRAP10LE]   = { 1, 1 },
    [AV_PIX_FMT_GBRAP10BE]   = { 1, 1 },
231 232
    [AV_PIX_FMT_GBRP12LE]    = { 1, 1 },
    [AV_PIX_FMT_GBRP12BE]    = { 1, 1 },
233 234
    [AV_PIX_FMT_GBRAP12LE]   = { 1, 1 },
    [AV_PIX_FMT_GBRAP12BE]   = { 1, 1 },
235 236
    [AV_PIX_FMT_GBRP14LE]    = { 1, 1 },
    [AV_PIX_FMT_GBRP14BE]    = { 1, 1 },
237 238
    [AV_PIX_FMT_GBRP16LE]    = { 1, 1 },
    [AV_PIX_FMT_GBRP16BE]    = { 1, 1 },
239
    [AV_PIX_FMT_GBRAP]       = { 1, 1 },
240 241
    [AV_PIX_FMT_GBRAP16LE]   = { 1, 1 },
    [AV_PIX_FMT_GBRAP16BE]   = { 1, 1 },
242 243 244 245 246 247 248 249 250 251 252 253
    [AV_PIX_FMT_BAYER_BGGR8] = { 1, 0 },
    [AV_PIX_FMT_BAYER_RGGB8] = { 1, 0 },
    [AV_PIX_FMT_BAYER_GBRG8] = { 1, 0 },
    [AV_PIX_FMT_BAYER_GRBG8] = { 1, 0 },
    [AV_PIX_FMT_BAYER_BGGR16LE] = { 1, 0 },
    [AV_PIX_FMT_BAYER_BGGR16BE] = { 1, 0 },
    [AV_PIX_FMT_BAYER_RGGB16LE] = { 1, 0 },
    [AV_PIX_FMT_BAYER_RGGB16BE] = { 1, 0 },
    [AV_PIX_FMT_BAYER_GBRG16LE] = { 1, 0 },
    [AV_PIX_FMT_BAYER_GBRG16BE] = { 1, 0 },
    [AV_PIX_FMT_BAYER_GRBG16LE] = { 1, 0 },
    [AV_PIX_FMT_BAYER_GRBG16BE] = { 1, 0 },
254 255
    [AV_PIX_FMT_XYZ12BE]     = { 1, 1, 1 },
    [AV_PIX_FMT_XYZ12LE]     = { 1, 1, 1 },
256
    [AV_PIX_FMT_AYUV64LE]    = { 1, 1},
257 258
    [AV_PIX_FMT_P010LE]      = { 1, 1 },
    [AV_PIX_FMT_P010BE]      = { 1, 1 },
259 260
    [AV_PIX_FMT_P016LE]      = { 1, 1 },
    [AV_PIX_FMT_P016BE]      = { 1, 1 },
261 262
    [AV_PIX_FMT_GRAYF32LE]   = { 1, 1 },
    [AV_PIX_FMT_GRAYF32BE]   = { 1, 1 },
263 264
};

265
int sws_isSupportedInput(enum AVPixelFormat pix_fmt)
266
{
267
    return (unsigned)pix_fmt < AV_PIX_FMT_NB ?
268
           format_entries[pix_fmt].is_supported_in : 0;
269 270
}

271
int sws_isSupportedOutput(enum AVPixelFormat pix_fmt)
272
{
273
    return (unsigned)pix_fmt < AV_PIX_FMT_NB ?
274
           format_entries[pix_fmt].is_supported_out : 0;
275 276
}

277 278 279 280 281 282
int sws_isSupportedEndiannessConversion(enum AVPixelFormat pix_fmt)
{
    return (unsigned)pix_fmt < AV_PIX_FMT_NB ?
           format_entries[pix_fmt].is_supported_endianness : 0;
}

283 284
static double getSplineCoeff(double a, double b, double c, double d,
                             double dist)
285
{
286 287 288 289 290 291 292 293
    if (dist <= 1.0)
        return ((d * dist + c) * dist + b) * dist + a;
    else
        return getSplineCoeff(0.0,
                               b + 2.0 * c + 3.0 * d,
                               c + 3.0 * d,
                              -b - 3.0 * c - 6.0 * d,
                              dist - 1.0);
294 295
}

296 297
static av_cold int get_local_pos(SwsContext *s, int chr_subsample, int pos, int dir)
{
298
    if (pos == -1 || pos <= -513) {
299 300 301 302 303 304
        pos = (128 << chr_subsample) - 128;
    }
    pos += 128; // relative to ideal left edge
    return pos >> chr_subsample;
}

305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324
typedef struct {
    int flag;                   ///< flag associated to the algorithm
    const char *description;    ///< human-readable description
    int size_factor;            ///< size factor used when initing the filters
} ScaleAlgorithm;

static const ScaleAlgorithm scale_algorithms[] = {
    { SWS_AREA,          "area averaging",                  1 /* downscale only, for upscale it is bilinear */ },
    { SWS_BICUBIC,       "bicubic",                         4 },
    { SWS_BICUBLIN,      "luma bicubic / chroma bilinear", -1 },
    { SWS_BILINEAR,      "bilinear",                        2 },
    { SWS_FAST_BILINEAR, "fast bilinear",                  -1 },
    { SWS_GAUSS,         "Gaussian",                        8 /* infinite ;) */ },
    { SWS_LANCZOS,       "Lanczos",                        -1 /* custom */ },
    { SWS_POINT,         "nearest neighbor / point",       -1 },
    { SWS_SINC,          "sinc",                           20 /* infinite ;) */ },
    { SWS_SPLINE,        "bicubic spline",                 20 /* infinite :)*/ },
    { SWS_X,             "experimental",                    8 },
};

325 326 327 328 329
static av_cold int initFilter(int16_t **outFilter, int32_t **filterPos,
                              int *outFilterSize, int xInc, int srcW,
                              int dstW, int filterAlign, int one,
                              int flags, int cpu_flags,
                              SwsVector *srcFilter, SwsVector *dstFilter,
330
                              double param[2], int srcPos, int dstPos)
331 332 333 334 335
{
    int i;
    int filterSize;
    int filter2Size;
    int minFilterSize;
336 337
    int64_t *filter    = NULL;
    int64_t *filter2   = NULL;
338
    const int64_t fone = 1LL << (54 - FFMIN(av_log2(srcW/dstW), 8));
339
    int ret            = -1;
340

341
    emms_c(); // FIXME should not be required but IS (even for non-MMX versions)
342

343
    // NOTE: the +3 is for the MMX(+1) / SSE(+3) scaler which reads over the end
344
    FF_ALLOC_ARRAY_OR_GOTO(NULL, *filterPos, (dstW + 3), sizeof(**filterPos), fail);
345

346
    if (FFABS(xInc - 0x10000) < 10 && srcPos == dstPos) { // unscaled
347
        int i;
348
        filterSize = 1;
349 350
        FF_ALLOCZ_ARRAY_OR_GOTO(NULL, filter,
                                dstW, sizeof(*filter) * filterSize, fail);
351

352 353 354
        for (i = 0; i < dstW; i++) {
            filter[i * filterSize] = fone;
            (*filterPos)[i]        = i;
355
        }
356
    } else if (flags & SWS_POINT) { // lame looking point sampling mode
357
        int i;
358
        int64_t xDstInSrc;
359
        filterSize = 1;
360 361
        FF_ALLOC_ARRAY_OR_GOTO(NULL, filter,
                               dstW, sizeof(*filter) * filterSize, fail);
362

363
        xDstInSrc = ((dstPos*(int64_t)xInc)>>8) - ((srcPos*0x8000LL)>>7);
364 365
        for (i = 0; i < dstW; i++) {
            int xx = (xDstInSrc - ((filterSize - 1) << 15) + (1 << 15)) >> 16;
366

367 368 369
            (*filterPos)[i] = xx;
            filter[i]       = fone;
            xDstInSrc      += xInc;
370
        }
371 372
    } else if ((xInc <= (1 << 16) && (flags & SWS_AREA)) ||
               (flags & SWS_FAST_BILINEAR)) { // bilinear upscale
373
        int i;
374
        int64_t xDstInSrc;
375
        filterSize = 2;
376 377
        FF_ALLOC_ARRAY_OR_GOTO(NULL, filter,
                               dstW, sizeof(*filter) * filterSize, fail);
378

379
        xDstInSrc = ((dstPos*(int64_t)xInc)>>8) - ((srcPos*0x8000LL)>>7);
380 381
        for (i = 0; i < dstW; i++) {
            int xx = (xDstInSrc - ((filterSize - 1) << 15) + (1 << 15)) >> 16;
382 383
            int j;

384 385 386
            (*filterPos)[i] = xx;
            // bilinear upscale / linear interpolate / area averaging
            for (j = 0; j < filterSize; j++) {
387
                int64_t coeff= fone - FFABS(((int64_t)xx<<16) - xDstInSrc)*(fone>>16);
388 389 390
                if (coeff < 0)
                    coeff = 0;
                filter[i * filterSize + j] = coeff;
391 392
                xx++;
            }
393
            xDstInSrc += xInc;
394 395
        }
    } else {
396
        int64_t xDstInSrc;
397 398 399
        int sizeFactor = -1;

        for (i = 0; i < FF_ARRAY_ELEMS(scale_algorithms); i++) {
400
            if (flags & scale_algorithms[i].flag && scale_algorithms[i].size_factor > 0) {
401 402 403
                sizeFactor = scale_algorithms[i].size_factor;
                break;
            }
404
        }
405 406 407
        if (flags & SWS_LANCZOS)
            sizeFactor = param[0] != SWS_PARAM_DEFAULT ? ceil(2 * param[0]) : 6;
        av_assert0(sizeFactor > 0);
408

409 410 411 412
        if (xInc <= 1 << 16)
            filterSize = 1 + sizeFactor;    // upscale
        else
            filterSize = 1 + (sizeFactor * srcW + dstW - 1) / dstW;
413

414 415
        filterSize = FFMIN(filterSize, srcW - 2);
        filterSize = FFMAX(filterSize, 1);
416

417 418
        FF_ALLOC_ARRAY_OR_GOTO(NULL, filter,
                               dstW, sizeof(*filter) * filterSize, fail);
419

420
        xDstInSrc = ((dstPos*(int64_t)xInc)>>7) - ((srcPos*0x10000LL)>>7);
421
        for (i = 0; i < dstW; i++) {
422
            int xx = (xDstInSrc - (filterSize - 2) * (1LL<<16)) / (1 << 17);
423
            int j;
424 425
            (*filterPos)[i] = xx;
            for (j = 0; j < filterSize; j++) {
426
                int64_t d = (FFABS(((int64_t)xx * (1 << 17)) - xDstInSrc)) << 13;
427 428 429
                double floatd;
                int64_t coeff;

430 431 432
                if (xInc > 1 << 16)
                    d = d * dstW / srcW;
                floatd = d * (1.0 / (1 << 30));
433 434

                if (flags & SWS_BICUBIC) {
435 436
                    int64_t B = (param[0] != SWS_PARAM_DEFAULT ? param[0] :   0) * (1 << 24);
                    int64_t C = (param[1] != SWS_PARAM_DEFAULT ? param[1] : 0.6) * (1 << 24);
437

438
                    if (d >= 1LL << 31) {
439 440 441 442 443
                        coeff = 0.0;
                    } else {
                        int64_t dd  = (d  * d) >> 30;
                        int64_t ddd = (dd * d) >> 30;

444 445 446 447
                        if (d < 1LL << 30)
                            coeff =  (12 * (1 << 24) -  9 * B - 6 * C) * ddd +
                                    (-18 * (1 << 24) + 12 * B + 6 * C) *  dd +
                                      (6 * (1 << 24) -  2 * B)         * (1 << 30);
448
                        else
449 450 451 452
                            coeff =      (-B -  6 * C) * ddd +
                                      (6 * B + 30 * C) * dd  +
                                    (-12 * B - 48 * C) * d   +
                                      (8 * B + 24 * C) * (1 << 30);
453
                    }
454
                    coeff /= (1LL<<54)/fone;
455
                } else if (flags & SWS_X) {
456
                    double A = param[0] != SWS_PARAM_DEFAULT ? param[0] : 1.0;
457 458
                    double c;

459 460
                    if (floatd < 1.0)
                        c = cos(floatd * M_PI);
461
                    else
462 463 464
                        c = -1.0;
                    if (c < 0.0)
                        c = -pow(-c, A);
465
                    else
466 467
                        c = pow(c, A);
                    coeff = (c * 0.5 + 0.5) * fone;
468
                } else if (flags & SWS_AREA) {
469 470 471 472 473 474 475 476
                    int64_t d2 = d - (1 << 29);
                    if (d2 * xInc < -(1LL << (29 + 16)))
                        coeff = 1.0 * (1LL << (30 + 16));
                    else if (d2 * xInc < (1LL << (29 + 16)))
                        coeff = -d2 * xInc + (1LL << (29 + 16));
                    else
                        coeff = 0.0;
                    coeff *= fone >> (30 + 16);
477
                } else if (flags & SWS_GAUSS) {
478
                    double p = param[0] != SWS_PARAM_DEFAULT ? param[0] : 3.0;
479
                    coeff = exp2(-p * floatd * floatd) * fone;
480
                } else if (flags & SWS_SINC) {
481
                    coeff = (d ? sin(floatd * M_PI) / (floatd * M_PI) : 1.0) * fone;
482
                } else if (flags & SWS_LANCZOS) {
483 484 485 486 487
                    double p = param[0] != SWS_PARAM_DEFAULT ? param[0] : 3.0;
                    coeff = (d ? sin(floatd * M_PI) * sin(floatd * M_PI / p) /
                             (floatd * floatd * M_PI * M_PI / p) : 1.0) * fone;
                    if (floatd > p)
                        coeff = 0;
488
                } else if (flags & SWS_BILINEAR) {
489 490 491
                    coeff = (1 << 30) - d;
                    if (coeff < 0)
                        coeff = 0;
492 493
                    coeff *= fone >> 30;
                } else if (flags & SWS_SPLINE) {
494 495
                    double p = -2.196152422706632;
                    coeff = getSplineCoeff(1.0, 0.0, p, -p - 1.0, floatd) * fone;
496
                } else {
497
                    av_assert0(0);
498 499
                }

500
                filter[i * filterSize + j] = coeff;
501 502
                xx++;
            }
503
            xDstInSrc += 2 * xInc;
504 505 506 507
        }
    }

    /* apply src & dst Filter to filter -> filter2
508 509
     * av_free(filter);
     */
510
    av_assert0(filterSize > 0);
511 512 513 514 515
    filter2Size = filterSize;
    if (srcFilter)
        filter2Size += srcFilter->length - 1;
    if (dstFilter)
        filter2Size += dstFilter->length - 1;
516
    av_assert0(filter2Size > 0);
517
    FF_ALLOCZ_ARRAY_OR_GOTO(NULL, filter2, dstW, filter2Size * sizeof(*filter2), fail);
518 519

    for (i = 0; i < dstW; i++) {
520 521
        int j, k;

522 523 524 525 526
        if (srcFilter) {
            for (k = 0; k < srcFilter->length; k++) {
                for (j = 0; j < filterSize; j++)
                    filter2[i * filter2Size + k + j] +=
                        srcFilter->coeff[k] * filter[i * filterSize + j];
527 528
            }
        } else {
529 530
            for (j = 0; j < filterSize; j++)
                filter2[i * filter2Size + j] = filter[i * filterSize + j];
531
        }
532
        // FIXME dstFilter
533

534
        (*filterPos)[i] += (filterSize - 1) / 2 - (filter2Size - 1) / 2;
535 536 537 538 539
    }
    av_freep(&filter);

    /* try to reduce the filter-size (step1 find size and shift left) */
    // Assume it is near normalized (*0.5 or *2.0 is OK but * 0.001 is not).
540 541 542
    minFilterSize = 0;
    for (i = dstW - 1; i >= 0; i--) {
        int min = filter2Size;
543
        int j;
544
        int64_t cutOff = 0.0;
545 546

        /* get rid of near zero elements on the left by shifting left */
547
        for (j = 0; j < filter2Size; j++) {
548
            int k;
549
            cutOff += FFABS(filter2[i * filter2Size]);
550

551 552
            if (cutOff > SWS_MAX_REDUCE_CUTOFF * fone)
                break;
553

554 555 556 557
            /* preserve monotonicity because the core can't handle the
             * filter otherwise */
            if (i < dstW - 1 && (*filterPos)[i] >= (*filterPos)[i + 1])
                break;
558 559

            // move filter coefficients left
560 561 562
            for (k = 1; k < filter2Size; k++)
                filter2[i * filter2Size + k - 1] = filter2[i * filter2Size + k];
            filter2[i * filter2Size + k - 1] = 0;
563 564 565
            (*filterPos)[i]++;
        }

566
        cutOff = 0;
567
        /* count near zeros on the right */
568 569
        for (j = filter2Size - 1; j > 0; j--) {
            cutOff += FFABS(filter2[i * filter2Size + j]);
570

571 572
            if (cutOff > SWS_MAX_REDUCE_CUTOFF * fone)
                break;
573 574 575
            min--;
        }

576 577
        if (min > minFilterSize)
            minFilterSize = min;
578 579
    }

580
    if (PPC_ALTIVEC(cpu_flags)) {
581
        // we can handle the special case 4, so we don't want to go the full 8
582 583 584
        if (minFilterSize < 5)
            filterAlign = 4;

585 586 587
        /* We really don't want to waste our time doing useless computation, so
         * fall back on the scalar C code for very small filters.
         * Vectorizing is worth it only if you have a decent-sized vector. */
588 589 590 591
        if (minFilterSize < 3)
            filterAlign = 1;
    }

592
    if (HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX) {
593 594
        // special case for unscaled vertical filtering
        if (minFilterSize == 1 && filterAlign == 2)
595
            filterAlign = 1;
596 597
    }

598
    av_assert0(minFilterSize > 0);
599
    filterSize = (minFilterSize + (filterAlign - 1)) & (~(filterAlign - 1));
600
    av_assert0(filterSize > 0);
601
    filter = av_malloc_array(dstW, filterSize * sizeof(*filter));
602 603
    if (!filter)
        goto fail;
604
    if (filterSize >= MAX_FILTER_SIZE * 16 /
605
                      ((flags & SWS_ACCURATE_RND) ? APCK_SIZE : 16)) {
606
        ret = RETCODE_USE_CASCADE;
607
        goto fail;
608
    }
609
    *outFilterSize = filterSize;
610

611 612 613 614
    if (flags & SWS_PRINT_INFO)
        av_log(NULL, AV_LOG_VERBOSE,
               "SwScaler: reducing / aligning filtersize %d -> %d\n",
               filter2Size, filterSize);
615
    /* try to reduce the filter-size (step2 reduce it) */
616
    for (i = 0; i < dstW; i++) {
617 618
        int j;

619 620 621 622 623 624 625
        for (j = 0; j < filterSize; j++) {
            if (j >= filter2Size)
                filter[i * filterSize + j] = 0;
            else
                filter[i * filterSize + j] = filter2[i * filter2Size + j];
            if ((flags & SWS_BITEXACT) && j >= minFilterSize)
                filter[i * filterSize + j] = 0;
626 627 628
        }
    }

629
    // FIXME try to align filterPos if possible
630

631
    // fix borders
632
    for (i = 0; i < dstW; i++) {
633 634 635
        int j;
        if ((*filterPos)[i] < 0) {
            // move filter coefficients left to compensate for filterPos
636 637 638 639
            for (j = 1; j < filterSize; j++) {
                int left = FFMAX(j + (*filterPos)[i], 0);
                filter[i * filterSize + left] += filter[i * filterSize + j];
                filter[i * filterSize + j]     = 0;
640 641 642 643 644
            }
            (*filterPos)[i]= 0;
        }

        if ((*filterPos)[i] + filterSize > srcW) {
645
            int shift = (*filterPos)[i] + FFMIN(filterSize - srcW, 0);
646
            int64_t acc = 0;
647

648 649 650 651 652
            for (j = filterSize - 1; j >= 0; j--) {
                if ((*filterPos)[i] + j >= srcW) {
                    acc += filter[i * filterSize + j];
                    filter[i * filterSize + j] = 0;
                }
653
            }
654 655 656 657 658 659 660 661
            for (j = filterSize - 1; j >= 0; j--) {
                if (j < shift) {
                    filter[i * filterSize + j] = 0;
                } else {
                    filter[i * filterSize + j] = filter[i * filterSize + j - shift];
                }
            }

662
            (*filterPos)[i]-= shift;
663
            filter[i * filterSize + srcW - 1 - (*filterPos)[i]] += acc;
664
        }
665 666 667 668 669 670 671
        av_assert0((*filterPos)[i] >= 0);
        av_assert0((*filterPos)[i] < srcW);
        if ((*filterPos)[i] + filterSize > srcW) {
            for (j = 0; j < filterSize; j++) {
                av_assert0((*filterPos)[i] + j < srcW || !filter[i * filterSize + j]);
            }
        }
672 673 674 675
    }

    // Note the +1 is for the MMX scaler which reads over the end
    /* align at 16 for AltiVec (needed by hScale_altivec_real) */
676 677
    FF_ALLOCZ_ARRAY_OR_GOTO(NULL, *outFilter,
                            (dstW + 3), *outFilterSize * sizeof(int16_t), fail);
678 679

    /* normalize & store in outFilter */
680
    for (i = 0; i < dstW; i++) {
681
        int j;
682 683
        int64_t error = 0;
        int64_t sum   = 0;
684

685 686
        for (j = 0; j < filterSize; j++) {
            sum += filter[i * filterSize + j];
687
        }
688
        sum = (sum + one / 2) / one;
689 690 691 692
        if (!sum) {
            av_log(NULL, AV_LOG_WARNING, "SwScaler: zero vector in scaling\n");
            sum = 1;
        }
693 694 695 696 697
        for (j = 0; j < *outFilterSize; j++) {
            int64_t v = filter[i * filterSize + j] + error;
            int intV  = ROUNDED_DIV(v, sum);
            (*outFilter)[i * (*outFilterSize) + j] = intV;
            error                                  = v - intV * sum;
698 699 700
        }
    }

701 702 703 704 705 706
    (*filterPos)[dstW + 0] =
    (*filterPos)[dstW + 1] =
    (*filterPos)[dstW + 2] = (*filterPos)[dstW - 1]; /* the MMX/SSE scaler will
                                                      * read over the end */
    for (i = 0; i < *outFilterSize; i++) {
        int k = (dstW - 1) * (*outFilterSize) + i;
707 708 709
        (*outFilter)[k + 1 * (*outFilterSize)] =
        (*outFilter)[k + 2 * (*outFilterSize)] =
        (*outFilter)[k + 3 * (*outFilterSize)] = (*outFilter)[k];
710 711
    }

712 713
    ret = 0;

714
fail:
715
    if(ret < 0)
716
        av_log(NULL, ret == RETCODE_USE_CASCADE ? AV_LOG_DEBUG : AV_LOG_ERROR, "sws: initFilter failed\n");
717 718 719 720 721
    av_free(filter);
    av_free(filter2);
    return ret;
}

722 723
static void fill_rgb2yuv_table(SwsContext *c, const int table[4], int dstRange)
{
724
    int64_t W, V, Z, Cy, Cu, Cv;
725 726 727 728 729 730
    int64_t vr =  table[0];
    int64_t ub =  table[1];
    int64_t ug = -table[2];
    int64_t vg = -table[3];
    int64_t ONE = 65536;
    int64_t cy = ONE;
731 732 733 734 735 736 737 738 739 740 741 742 743 744 745
    uint8_t *p = (uint8_t*)c->input_rgb2yuv_table;
    int i;
    static const int8_t map[] = {
    BY_IDX, GY_IDX, -1    , BY_IDX, BY_IDX, GY_IDX, -1    , BY_IDX,
    RY_IDX, -1    , GY_IDX, RY_IDX, RY_IDX, -1    , GY_IDX, RY_IDX,
    RY_IDX, GY_IDX, -1    , RY_IDX, RY_IDX, GY_IDX, -1    , RY_IDX,
    BY_IDX, -1    , GY_IDX, BY_IDX, BY_IDX, -1    , GY_IDX, BY_IDX,
    BU_IDX, GU_IDX, -1    , BU_IDX, BU_IDX, GU_IDX, -1    , BU_IDX,
    RU_IDX, -1    , GU_IDX, RU_IDX, RU_IDX, -1    , GU_IDX, RU_IDX,
    RU_IDX, GU_IDX, -1    , RU_IDX, RU_IDX, GU_IDX, -1    , RU_IDX,
    BU_IDX, -1    , GU_IDX, BU_IDX, BU_IDX, -1    , GU_IDX, BU_IDX,
    BV_IDX, GV_IDX, -1    , BV_IDX, BV_IDX, GV_IDX, -1    , BV_IDX,
    RV_IDX, -1    , GV_IDX, RV_IDX, RV_IDX, -1    , GV_IDX, RV_IDX,
    RV_IDX, GV_IDX, -1    , RV_IDX, RV_IDX, GV_IDX, -1    , RV_IDX,
    BV_IDX, -1    , GV_IDX, BV_IDX, BV_IDX, -1    , GV_IDX, BV_IDX,
746 747 748 749 750 751 752 753 754 755 756
    RY_IDX, BY_IDX, RY_IDX, BY_IDX, RY_IDX, BY_IDX, RY_IDX, BY_IDX,
    BY_IDX, RY_IDX, BY_IDX, RY_IDX, BY_IDX, RY_IDX, BY_IDX, RY_IDX,
    GY_IDX, -1    , GY_IDX, -1    , GY_IDX, -1    , GY_IDX, -1    ,
    -1    , GY_IDX, -1    , GY_IDX, -1    , GY_IDX, -1    , GY_IDX,
    RU_IDX, BU_IDX, RU_IDX, BU_IDX, RU_IDX, BU_IDX, RU_IDX, BU_IDX,
    BU_IDX, RU_IDX, BU_IDX, RU_IDX, BU_IDX, RU_IDX, BU_IDX, RU_IDX,
    GU_IDX, -1    , GU_IDX, -1    , GU_IDX, -1    , GU_IDX, -1    ,
    -1    , GU_IDX, -1    , GU_IDX, -1    , GU_IDX, -1    , GU_IDX,
    RV_IDX, BV_IDX, RV_IDX, BV_IDX, RV_IDX, BV_IDX, RV_IDX, BV_IDX,
    BV_IDX, RV_IDX, BV_IDX, RV_IDX, BV_IDX, RV_IDX, BV_IDX, RV_IDX,
    GV_IDX, -1    , GV_IDX, -1    , GV_IDX, -1    , GV_IDX, -1    ,
757 758 759 760 761 762 763 764 765 766 767 768
    -1    , GV_IDX, -1    , GV_IDX, -1    , GV_IDX, -1    , GV_IDX, //23
    -1    , -1    , -1    , -1    , -1    , -1    , -1    , -1    , //24
    -1    , -1    , -1    , -1    , -1    , -1    , -1    , -1    , //25
    -1    , -1    , -1    , -1    , -1    , -1    , -1    , -1    , //26
    -1    , -1    , -1    , -1    , -1    , -1    , -1    , -1    , //27
    -1    , -1    , -1    , -1    , -1    , -1    , -1    , -1    , //28
    -1    , -1    , -1    , -1    , -1    , -1    , -1    , -1    , //29
    -1    , -1    , -1    , -1    , -1    , -1    , -1    , -1    , //30
    -1    , -1    , -1    , -1    , -1    , -1    , -1    , -1    , //31
    BY_IDX, GY_IDX, RY_IDX, -1    , -1    , -1    , -1    , -1    , //32
    BU_IDX, GU_IDX, RU_IDX, -1    , -1    , -1    , -1    , -1    , //33
    BV_IDX, GV_IDX, RV_IDX, -1    , -1    , -1    , -1    , -1    , //34
769
    };
770 771 772 773 774 775 776 777 778 779 780

    dstRange = 0; //FIXME range = 1 is handled elsewhere

    if (!dstRange) {
        cy = cy * 255 / 219;
    } else {
        vr = vr * 224 / 255;
        ub = ub * 224 / 255;
        ug = ug * 224 / 255;
        vg = vg * 224 / 255;
    }
781 782
    W = ROUNDED_DIV(ONE*ONE*ug, ub);
    V = ROUNDED_DIV(ONE*ONE*vg, vr);
783
    Z = ONE*ONE-W-V;
784

785 786 787
    Cy = ROUNDED_DIV(cy*Z, ONE);
    Cu = ROUNDED_DIV(ub*Z, ONE);
    Cv = ROUNDED_DIV(vr*Z, ONE);
788

789 790 791
    c->input_rgb2yuv_table[RY_IDX] = -ROUNDED_DIV((1 << RGB2YUV_SHIFT)*V        , Cy);
    c->input_rgb2yuv_table[GY_IDX] =  ROUNDED_DIV((1 << RGB2YUV_SHIFT)*ONE*ONE  , Cy);
    c->input_rgb2yuv_table[BY_IDX] = -ROUNDED_DIV((1 << RGB2YUV_SHIFT)*W        , Cy);
792

793 794 795
    c->input_rgb2yuv_table[RU_IDX] =  ROUNDED_DIV((1 << RGB2YUV_SHIFT)*V        , Cu);
    c->input_rgb2yuv_table[GU_IDX] = -ROUNDED_DIV((1 << RGB2YUV_SHIFT)*ONE*ONE  , Cu);
    c->input_rgb2yuv_table[BU_IDX] =  ROUNDED_DIV((1 << RGB2YUV_SHIFT)*(Z+W)    , Cu);
796

797 798 799
    c->input_rgb2yuv_table[RV_IDX] =  ROUNDED_DIV((1 << RGB2YUV_SHIFT)*(V+Z)    , Cv);
    c->input_rgb2yuv_table[GV_IDX] = -ROUNDED_DIV((1 << RGB2YUV_SHIFT)*ONE*ONE  , Cv);
    c->input_rgb2yuv_table[BV_IDX] =  ROUNDED_DIV((1 << RGB2YUV_SHIFT)*W        , Cv);
800

801
    if(/*!dstRange && */!memcmp(table, ff_yuv2rgb_coeffs[SWS_CS_DEFAULT], sizeof(ff_yuv2rgb_coeffs[SWS_CS_DEFAULT]))) {
802 803 804 805 806 807 808 809 810 811
        c->input_rgb2yuv_table[BY_IDX] =  ((int)(0.114 * 219 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
        c->input_rgb2yuv_table[BV_IDX] = (-(int)(0.081 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
        c->input_rgb2yuv_table[BU_IDX] =  ((int)(0.500 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
        c->input_rgb2yuv_table[GY_IDX] =  ((int)(0.587 * 219 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
        c->input_rgb2yuv_table[GV_IDX] = (-(int)(0.419 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
        c->input_rgb2yuv_table[GU_IDX] = (-(int)(0.331 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
        c->input_rgb2yuv_table[RY_IDX] =  ((int)(0.299 * 219 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
        c->input_rgb2yuv_table[RV_IDX] =  ((int)(0.500 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
        c->input_rgb2yuv_table[RU_IDX] = (-(int)(0.169 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
    }
812 813
    for(i=0; i<FF_ARRAY_ELEMS(map); i++)
        AV_WL16(p + 16*4 + 2*i, map[i] >= 0 ? c->input_rgb2yuv_table[map[i]] : 0);
814 815
}

816 817 818 819 820
static void fill_xyztables(struct SwsContext *c)
{
    int i;
    double xyzgamma = XYZ_GAMMA;
    double rgbgamma = 1.0 / RGB_GAMMA;
821 822
    double xyzgammainv = 1.0 / XYZ_GAMMA;
    double rgbgammainv = RGB_GAMMA;
823 824 825 826
    static const int16_t xyz2rgb_matrix[3][4] = {
        {13270, -6295, -2041},
        {-3969,  7682,   170},
        {  228,  -835,  4329} };
827 828 829 830 831
    static const int16_t rgb2xyz_matrix[3][4] = {
        {1689, 1464,  739},
        { 871, 2929,  296},
        {  79,  488, 3891} };
    static int16_t xyzgamma_tab[4096], rgbgamma_tab[4096], xyzgammainv_tab[4096], rgbgammainv_tab[4096];
832 833

    memcpy(c->xyz2rgb_matrix, xyz2rgb_matrix, sizeof(c->xyz2rgb_matrix));
834
    memcpy(c->rgb2xyz_matrix, rgb2xyz_matrix, sizeof(c->rgb2xyz_matrix));
835 836
    c->xyzgamma = xyzgamma_tab;
    c->rgbgamma = rgbgamma_tab;
837 838
    c->xyzgammainv = xyzgammainv_tab;
    c->rgbgammainv = rgbgammainv_tab;
839 840 841

    if (rgbgamma_tab[4095])
        return;
842 843 844

    /* set gamma vectors */
    for (i = 0; i < 4096; i++) {
845 846
        xyzgamma_tab[i] = lrint(pow(i / 4095.0, xyzgamma) * 4095.0);
        rgbgamma_tab[i] = lrint(pow(i / 4095.0, rgbgamma) * 4095.0);
847 848
        xyzgammainv_tab[i] = lrint(pow(i / 4095.0, xyzgammainv) * 4095.0);
        rgbgammainv_tab[i] = lrint(pow(i / 4095.0, rgbgammainv) * 4095.0);
849 850 851
    }
}

852 853 854
int sws_setColorspaceDetails(struct SwsContext *c, const int inv_table[4],
                             int srcRange, const int table[4], int dstRange,
                             int brightness, int contrast, int saturation)
855
{
856 857
    const AVPixFmtDescriptor *desc_dst;
    const AVPixFmtDescriptor *desc_src;
858
    int need_reinit = 0;
859

860 861 862 863
    handle_formats(c);
    desc_dst = av_pix_fmt_desc_get(c->dstFormat);
    desc_src = av_pix_fmt_desc_get(c->srcFormat);

864 865 866 867 868
    if(!isYUV(c->dstFormat) && !isGray(c->dstFormat))
        dstRange = 0;
    if(!isYUV(c->srcFormat) && !isGray(c->srcFormat))
        srcRange = 0;

869 870 871 872 873 874 875 876 877 878 879 880 881 882 883
    if (c->srcRange != srcRange ||
        c->dstRange != dstRange ||
        c->brightness != brightness ||
        c->contrast   != contrast ||
        c->saturation != saturation ||
        memcmp(c->srcColorspaceTable, inv_table, sizeof(int) * 4) ||
        memcmp(c->dstColorspaceTable,     table, sizeof(int) * 4)
    )
        need_reinit = 1;

    memmove(c->srcColorspaceTable, inv_table, sizeof(int) * 4);
    memmove(c->dstColorspaceTable, table, sizeof(int) * 4);



884 885 886 887 888
    c->brightness = brightness;
    c->contrast   = contrast;
    c->saturation = saturation;
    c->srcRange   = srcRange;
    c->dstRange   = dstRange;
889

890 891
    //The srcBpc check is possibly wrong but we seem to lack a definitive reference to test this
    //and what we have in ticket 2939 looks better with this check
892
    if (need_reinit && (c->srcBpc == 8 || !isYUV(c->srcFormat)))
893 894
        ff_sws_init_range_convert(c);

895 896 897
    c->dstFormatBpp = av_get_bits_per_pixel(desc_dst);
    c->srcFormatBpp = av_get_bits_per_pixel(desc_src);

898 899
    if (c->cascaded_context[c->cascaded_mainindex])
        return sws_setColorspaceDetails(c->cascaded_context[c->cascaded_mainindex],inv_table, srcRange,table, dstRange, brightness,  contrast, saturation);
900

901 902 903
    if (!need_reinit)
        return 0;

904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968
    if ((isYUV(c->dstFormat) || isGray(c->dstFormat)) && (isYUV(c->srcFormat) || isGray(c->srcFormat))) {
        if (!c->cascaded_context[0] &&
            memcmp(c->dstColorspaceTable, c->srcColorspaceTable, sizeof(int) * 4) &&
            c->srcW && c->srcH && c->dstW && c->dstH) {
            enum AVPixelFormat tmp_format;
            int tmp_width, tmp_height;
            int srcW = c->srcW;
            int srcH = c->srcH;
            int dstW = c->dstW;
            int dstH = c->dstH;
            int ret;
            av_log(c, AV_LOG_VERBOSE, "YUV color matrix differs for YUV->YUV, using intermediate RGB to convert\n");

            if (isNBPS(c->dstFormat) || is16BPS(c->dstFormat)) {
                if (isALPHA(c->srcFormat) && isALPHA(c->dstFormat)) {
                    tmp_format = AV_PIX_FMT_BGRA64;
                } else {
                    tmp_format = AV_PIX_FMT_BGR48;
                }
            } else {
                if (isALPHA(c->srcFormat) && isALPHA(c->dstFormat)) {
                    tmp_format = AV_PIX_FMT_BGRA;
                } else {
                    tmp_format = AV_PIX_FMT_BGR24;
                }
            }

            if (srcW*srcH > dstW*dstH) {
                tmp_width  = dstW;
                tmp_height = dstH;
            } else {
                tmp_width  = srcW;
                tmp_height = srcH;
            }

            ret = av_image_alloc(c->cascaded_tmp, c->cascaded_tmpStride,
                                tmp_width, tmp_height, tmp_format, 64);
            if (ret < 0)
                return ret;

            c->cascaded_context[0] = sws_alloc_set_opts(srcW, srcH, c->srcFormat,
                                                        tmp_width, tmp_height, tmp_format,
                                                        c->flags, c->param);
            if (!c->cascaded_context[0])
                return -1;

            c->cascaded_context[0]->alphablend = c->alphablend;
            ret = sws_init_context(c->cascaded_context[0], NULL , NULL);
            if (ret < 0)
                return ret;
            //we set both src and dst depending on that the RGB side will be ignored
            sws_setColorspaceDetails(c->cascaded_context[0], inv_table,
                                     srcRange, table, dstRange,
                                     brightness, contrast, saturation);

            c->cascaded_context[1] = sws_getContext(tmp_width, tmp_height, tmp_format,
                                                    dstW, dstH, c->dstFormat,
                                                    c->flags, NULL, NULL, c->param);
            if (!c->cascaded_context[1])
                return -1;
            sws_setColorspaceDetails(c->cascaded_context[1], inv_table,
                                     srcRange, table, dstRange,
                                     0, 1 << 16, 1 << 16);
            return 0;
        }
969
        return -1;
970
    }
971

972
    if (!isYUV(c->dstFormat) && !isGray(c->dstFormat)) {
973 974 975
        ff_yuv2rgb_c_init_tables(c, inv_table, srcRange, brightness,
                                 contrast, saturation);
        // FIXME factorize
976

977 978 979
        if (ARCH_PPC)
            ff_yuv2rgb_init_tables_ppc(c, inv_table, brightness,
                                       contrast, saturation);
980
    }
981 982 983

    fill_rgb2yuv_table(c, table, dstRange);

984 985 986
    return 0;
}

987 988 989
int sws_getColorspaceDetails(struct SwsContext *c, int **inv_table,
                             int *srcRange, int **table, int *dstRange,
                             int *brightness, int *contrast, int *saturation)
990
{
991
    if (!c )
992
        return -1;
993

994 995 996 997 998 999 1000
    *inv_table  = c->srcColorspaceTable;
    *table      = c->dstColorspaceTable;
    *srcRange   = c->srcRange;
    *dstRange   = c->dstRange;
    *brightness = c->brightness;
    *contrast   = c->contrast;
    *saturation = c->saturation;
1001 1002 1003 1004

    return 0;
}

1005
static int handle_jpeg(enum AVPixelFormat *format)
1006 1007
{
    switch (*format) {
1008 1009
    case AV_PIX_FMT_YUVJ420P:
        *format = AV_PIX_FMT_YUV420P;
1010
        return 1;
Michael Niedermayer's avatar
Michael Niedermayer committed
1011 1012 1013
    case AV_PIX_FMT_YUVJ411P:
        *format = AV_PIX_FMT_YUV411P;
        return 1;
1014 1015
    case AV_PIX_FMT_YUVJ422P:
        *format = AV_PIX_FMT_YUV422P;
1016
        return 1;
1017 1018
    case AV_PIX_FMT_YUVJ444P:
        *format = AV_PIX_FMT_YUV444P;
1019
        return 1;
1020 1021
    case AV_PIX_FMT_YUVJ440P:
        *format = AV_PIX_FMT_YUV440P;
1022
        return 1;
1023
    case AV_PIX_FMT_GRAY8:
1024
    case AV_PIX_FMT_YA8:
1025 1026
    case AV_PIX_FMT_GRAY9LE:
    case AV_PIX_FMT_GRAY9BE:
1027 1028 1029 1030
    case AV_PIX_FMT_GRAY10LE:
    case AV_PIX_FMT_GRAY10BE:
    case AV_PIX_FMT_GRAY12LE:
    case AV_PIX_FMT_GRAY12BE:
1031 1032
    case AV_PIX_FMT_GRAY14LE:
    case AV_PIX_FMT_GRAY14BE:
1033 1034
    case AV_PIX_FMT_GRAY16LE:
    case AV_PIX_FMT_GRAY16BE:
1035 1036
    case AV_PIX_FMT_YA16BE:
    case AV_PIX_FMT_YA16LE:
1037
        return 1;
1038 1039
    default:
        return 0;
1040 1041 1042
    }
}

1043
static int handle_0alpha(enum AVPixelFormat *format)
1044 1045
{
    switch (*format) {
1046 1047 1048 1049
    case AV_PIX_FMT_0BGR    : *format = AV_PIX_FMT_ABGR   ; return 1;
    case AV_PIX_FMT_BGR0    : *format = AV_PIX_FMT_BGRA   ; return 4;
    case AV_PIX_FMT_0RGB    : *format = AV_PIX_FMT_ARGB   ; return 1;
    case AV_PIX_FMT_RGB0    : *format = AV_PIX_FMT_RGBA   ; return 4;
1050
    default:                                          return 0;
1051 1052 1053
    }
}

1054 1055 1056 1057 1058 1059 1060 1061 1062
static int handle_xyz(enum AVPixelFormat *format)
{
    switch (*format) {
    case AV_PIX_FMT_XYZ12BE : *format = AV_PIX_FMT_RGB48BE; return 1;
    case AV_PIX_FMT_XYZ12LE : *format = AV_PIX_FMT_RGB48LE; return 1;
    default:                                                return 0;
    }
}

1063 1064
static void handle_formats(SwsContext *c)
{
1065 1066 1067 1068
    c->src0Alpha |= handle_0alpha(&c->srcFormat);
    c->dst0Alpha |= handle_0alpha(&c->dstFormat);
    c->srcXYZ    |= handle_xyz(&c->srcFormat);
    c->dstXYZ    |= handle_xyz(&c->dstFormat);
1069 1070
    if (c->srcXYZ || c->dstXYZ)
        fill_xyztables(c);
1071 1072
}

1073 1074
SwsContext *sws_alloc_context(void)
{
1075
    SwsContext *c = av_mallocz(sizeof(SwsContext));
1076

1077 1078
    av_assert0(offsetof(SwsContext, redDither) + DITHER32_INT == offsetof(SwsContext, dither32));

1079
    if (c) {
1080
        c->av_class = &ff_sws_context_class;
1081 1082
        av_opt_set_defaults(c);
    }
1083 1084 1085 1086

    return c;
}

1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100
static uint16_t * alloc_gamma_tbl(double e)
{
    int i = 0;
    uint16_t * tbl;
    tbl = (uint16_t*)av_malloc(sizeof(uint16_t) * 1 << 16);
    if (!tbl)
        return NULL;

    for (i = 0; i < 65536; ++i) {
        tbl[i] = pow(i / 65535.0, e) * 65535.0;
    }
    return tbl;
}

1101 1102 1103
static enum AVPixelFormat alphaless_fmt(enum AVPixelFormat fmt)
{
    switch(fmt) {
1104 1105 1106 1107 1108
    case AV_PIX_FMT_ARGB:       return AV_PIX_FMT_RGB24;
    case AV_PIX_FMT_RGBA:       return AV_PIX_FMT_RGB24;
    case AV_PIX_FMT_ABGR:       return AV_PIX_FMT_BGR24;
    case AV_PIX_FMT_BGRA:       return AV_PIX_FMT_BGR24;
    case AV_PIX_FMT_YA8:        return AV_PIX_FMT_GRAY8;
1109 1110 1111

    case AV_PIX_FMT_YUVA420P:   return AV_PIX_FMT_YUV420P;
    case AV_PIX_FMT_YUVA422P:   return AV_PIX_FMT_YUV422P;
1112 1113 1114 1115
    case AV_PIX_FMT_YUVA444P:           return AV_PIX_FMT_YUV444P;

    case AV_PIX_FMT_GBRAP:              return AV_PIX_FMT_GBRP;

1116 1117 1118
    case AV_PIX_FMT_GBRAP10LE:          return AV_PIX_FMT_GBRP10;
    case AV_PIX_FMT_GBRAP10BE:          return AV_PIX_FMT_GBRP10;

1119 1120 1121
    case AV_PIX_FMT_GBRAP12LE:          return AV_PIX_FMT_GBRP12;
    case AV_PIX_FMT_GBRAP12BE:          return AV_PIX_FMT_GBRP12;

1122 1123 1124
    case AV_PIX_FMT_GBRAP16LE:          return AV_PIX_FMT_GBRP16;
    case AV_PIX_FMT_GBRAP16BE:          return AV_PIX_FMT_GBRP16;

1125 1126 1127 1128
    case AV_PIX_FMT_RGBA64LE:   return AV_PIX_FMT_RGB48;
    case AV_PIX_FMT_RGBA64BE:   return AV_PIX_FMT_RGB48;
    case AV_PIX_FMT_BGRA64LE:   return AV_PIX_FMT_BGR48;
    case AV_PIX_FMT_BGRA64BE:   return AV_PIX_FMT_BGR48;
1129

1130 1131
    case AV_PIX_FMT_YA16BE:             return AV_PIX_FMT_GRAY16;
    case AV_PIX_FMT_YA16LE:             return AV_PIX_FMT_GRAY16;
1132

1133 1134
    case AV_PIX_FMT_YUVA420P9BE:        return AV_PIX_FMT_YUV420P9;
    case AV_PIX_FMT_YUVA422P9BE:        return AV_PIX_FMT_YUV422P9;
1135
    case AV_PIX_FMT_YUVA444P9BE:        return AV_PIX_FMT_YUV444P9;
1136 1137
    case AV_PIX_FMT_YUVA420P9LE:        return AV_PIX_FMT_YUV420P9;
    case AV_PIX_FMT_YUVA422P9LE:        return AV_PIX_FMT_YUV422P9;
1138
    case AV_PIX_FMT_YUVA444P9LE:        return AV_PIX_FMT_YUV444P9;
1139 1140
    case AV_PIX_FMT_YUVA420P10BE:       return AV_PIX_FMT_YUV420P10;
    case AV_PIX_FMT_YUVA422P10BE:       return AV_PIX_FMT_YUV422P10;
1141
    case AV_PIX_FMT_YUVA444P10BE:       return AV_PIX_FMT_YUV444P10;
1142 1143
    case AV_PIX_FMT_YUVA420P10LE:       return AV_PIX_FMT_YUV420P10;
    case AV_PIX_FMT_YUVA422P10LE:       return AV_PIX_FMT_YUV422P10;
1144
    case AV_PIX_FMT_YUVA444P10LE:       return AV_PIX_FMT_YUV444P10;
1145 1146
    case AV_PIX_FMT_YUVA420P16BE:       return AV_PIX_FMT_YUV420P16;
    case AV_PIX_FMT_YUVA422P16BE:       return AV_PIX_FMT_YUV422P16;
1147
    case AV_PIX_FMT_YUVA444P16BE:       return AV_PIX_FMT_YUV444P16;
1148 1149
    case AV_PIX_FMT_YUVA420P16LE:       return AV_PIX_FMT_YUV420P16;
    case AV_PIX_FMT_YUVA422P16LE:       return AV_PIX_FMT_YUV422P16;
1150 1151 1152 1153 1154 1155 1156 1157 1158
    case AV_PIX_FMT_YUVA444P16LE:       return AV_PIX_FMT_YUV444P16;

//     case AV_PIX_FMT_AYUV64LE:
//     case AV_PIX_FMT_AYUV64BE:
//     case AV_PIX_FMT_PAL8:
    default: return AV_PIX_FMT_NONE;
    }
}

1159 1160
av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter,
                             SwsFilter *dstFilter)
1161
{
1162
    int i;
1163 1164
    int usesVFilter, usesHFilter;
    int unscaled;
1165 1166 1167 1168 1169
    SwsFilter dummyFilter = { NULL, NULL, NULL, NULL };
    int srcW              = c->srcW;
    int srcH              = c->srcH;
    int dstW              = c->dstW;
    int dstH              = c->dstH;
1170
    int dst_stride        = FFALIGN(dstW * sizeof(int16_t) + 66, 16);
1171
    int flags, cpu_flags;
1172 1173
    enum AVPixelFormat srcFormat = c->srcFormat;
    enum AVPixelFormat dstFormat = c->dstFormat;
1174 1175
    const AVPixFmtDescriptor *desc_src;
    const AVPixFmtDescriptor *desc_dst;
1176
    int ret = 0;
1177
    enum AVPixelFormat tmpFmt;
1178
    static const float float_mult = 1.0f / 255.0f;
1179

1180 1181
    cpu_flags = av_get_cpu_flags();
    flags     = c->flags;
1182
    emms_c();
1183
    if (!rgb15to16)
1184
        ff_sws_rgb2rgb_init();
1185 1186 1187

    unscaled = (srcW == dstW && srcH == dstH);

1188 1189 1190
    c->srcRange |= handle_jpeg(&c->srcFormat);
    c->dstRange |= handle_jpeg(&c->dstFormat);

1191 1192 1193
    if(srcFormat!=c->srcFormat || dstFormat!=c->dstFormat)
        av_log(c, AV_LOG_WARNING, "deprecated pixel format used, make sure you did set range correctly\n");

1194 1195 1196 1197 1198
    if (!c->contrast && !c->saturation && !c->dstFormatBpp)
        sws_setColorspaceDetails(c, ff_yuv2rgb_coeffs[SWS_CS_DEFAULT], c->srcRange,
                                 ff_yuv2rgb_coeffs[SWS_CS_DEFAULT],
                                 c->dstRange, 0, 1 << 16, 1 << 16);

1199 1200 1201 1202 1203
    handle_formats(c);
    srcFormat = c->srcFormat;
    dstFormat = c->dstFormat;
    desc_src = av_pix_fmt_desc_get(srcFormat);
    desc_dst = av_pix_fmt_desc_get(dstFormat);
1204

1205 1206 1207 1208
    // If the source has no alpha then disable alpha blendaway
    if (c->src0Alpha)
        c->alphablend = SWS_ALPHA_BLEND_NONE;

1209 1210
    if (!(unscaled && sws_isSupportedEndiannessConversion(srcFormat) &&
          av_pix_fmt_swap_endianness(srcFormat) == dstFormat)) {
1211
    if (!sws_isSupportedInput(srcFormat)) {
1212
        av_log(c, AV_LOG_ERROR, "%s is not supported as input pixel format\n",
1213
               av_get_pix_fmt_name(srcFormat));
1214
        return AVERROR(EINVAL);
1215
    }
1216
    if (!sws_isSupportedOutput(dstFormat)) {
1217
        av_log(c, AV_LOG_ERROR, "%s is not supported as output pixel format\n",
1218
               av_get_pix_fmt_name(dstFormat));
1219
        return AVERROR(EINVAL);
1220
    }
1221
    }
1222
    av_assert2(desc_src && desc_dst);
1223

1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234
    i = flags & (SWS_POINT         |
                 SWS_AREA          |
                 SWS_BILINEAR      |
                 SWS_FAST_BILINEAR |
                 SWS_BICUBIC       |
                 SWS_X             |
                 SWS_GAUSS         |
                 SWS_LANCZOS       |
                 SWS_SINC          |
                 SWS_SPLINE        |
                 SWS_BICUBLIN);
1235 1236 1237 1238

    /* provide a default scaler if not set by caller */
    if (!i) {
        if (dstW < srcW && dstH < srcH)
1239
            flags |= SWS_BICUBIC;
1240
        else if (dstW > srcW && dstH > srcH)
1241
            flags |= SWS_BICUBIC;
1242
        else
1243
            flags |= SWS_BICUBIC;
1244 1245
        c->flags = flags;
    } else if (i & (i - 1)) {
1246
        av_log(c, AV_LOG_ERROR,
1247
               "Exactly one scaler algorithm must be chosen, got %X\n", i);
1248
        return AVERROR(EINVAL);
1249 1250
    }
    /* sanity check */
1251
    if (srcW < 1 || srcH < 1 || dstW < 1 || dstH < 1) {
1252 1253
        /* FIXME check if these are enough and try to lower them after
         * fixing the relevant parts of the code */
1254
        av_log(c, AV_LOG_ERROR, "%dx%d -> %dx%d is invalid scaling dimension\n",
1255
               srcW, srcH, dstW, dstH);
1256
        return AVERROR(EINVAL);
1257
    }
1258 1259 1260 1261 1262 1263
    if (flags & SWS_FAST_BILINEAR) {
        if (srcW < 8 || dstW < 8) {
            flags ^= SWS_FAST_BILINEAR | SWS_BILINEAR;
            c->flags = flags;
        }
    }
1264

1265 1266 1267 1268
    if (!dstFilter)
        dstFilter = &dummyFilter;
    if (!srcFilter)
        srcFilter = &dummyFilter;
1269

1270 1271
    c->lumXInc      = (((int64_t)srcW << 16) + (dstW >> 1)) / dstW;
    c->lumYInc      = (((int64_t)srcH << 16) + (dstH >> 1)) / dstH;
1272 1273
    c->dstFormatBpp = av_get_bits_per_pixel(desc_dst);
    c->srcFormatBpp = av_get_bits_per_pixel(desc_src);
1274
    c->vRounder     = 4 * 0x0001000100010001ULL;
1275

1276 1277 1278 1279 1280 1281 1282 1283
    usesVFilter = (srcFilter->lumV && srcFilter->lumV->length > 1) ||
                  (srcFilter->chrV && srcFilter->chrV->length > 1) ||
                  (dstFilter->lumV && dstFilter->lumV->length > 1) ||
                  (dstFilter->chrV && dstFilter->chrV->length > 1);
    usesHFilter = (srcFilter->lumH && srcFilter->lumH->length > 1) ||
                  (srcFilter->chrH && srcFilter->chrH->length > 1) ||
                  (dstFilter->lumH && dstFilter->lumH->length > 1) ||
                  (dstFilter->chrH && dstFilter->chrH->length > 1);
1284

1285 1286
    av_pix_fmt_get_chroma_sub_sample(srcFormat, &c->chrSrcHSubSample, &c->chrSrcVSubSample);
    av_pix_fmt_get_chroma_sub_sample(dstFormat, &c->chrDstHSubSample, &c->chrDstVSubSample);
1287

1288 1289 1290 1291 1292
    if (isAnyRGB(dstFormat) && !(flags&SWS_FULL_CHR_H_INT)) {
        if (dstW&1) {
            av_log(c, AV_LOG_DEBUG, "Forcing full internal H chroma due to odd output size\n");
            flags |= SWS_FULL_CHR_H_INT;
            c->flags = flags;
1293
        }
1294 1295 1296 1297 1298 1299 1300 1301 1302 1303

        if (   c->chrSrcHSubSample == 0
            && c->chrSrcVSubSample == 0
            && c->dither != SWS_DITHER_BAYER //SWS_FULL_CHR_H_INT is currently not supported with SWS_DITHER_BAYER
            && !(c->flags & SWS_FAST_BILINEAR)
        ) {
            av_log(c, AV_LOG_DEBUG, "Forcing full internal H chroma due to input having non subsampled chroma\n");
            flags |= SWS_FULL_CHR_H_INT;
            c->flags = flags;
        }
1304
    }
1305

1306 1307 1308 1309 1310
    if (c->dither == SWS_DITHER_AUTO) {
        if (flags & SWS_ERROR_DIFFUSION)
            c->dither = SWS_DITHER_ED;
    }

1311 1312 1313 1314
    if(dstFormat == AV_PIX_FMT_BGR4_BYTE ||
       dstFormat == AV_PIX_FMT_RGB4_BYTE ||
       dstFormat == AV_PIX_FMT_BGR8 ||
       dstFormat == AV_PIX_FMT_RGB8) {
1315 1316
        if (c->dither == SWS_DITHER_AUTO)
            c->dither = (flags & SWS_FULL_CHR_H_INT) ? SWS_DITHER_ED : SWS_DITHER_BAYER;
1317
        if (!(flags & SWS_FULL_CHR_H_INT)) {
1318
            if (c->dither == SWS_DITHER_ED || c->dither == SWS_DITHER_A_DITHER || c->dither == SWS_DITHER_X_DITHER) {
1319 1320 1321 1322 1323 1324
                av_log(c, AV_LOG_DEBUG,
                    "Desired dithering only supported in full chroma interpolation for destination format '%s'\n",
                    av_get_pix_fmt_name(dstFormat));
                flags   |= SWS_FULL_CHR_H_INT;
                c->flags = flags;
            }
1325
        }
1326 1327 1328 1329 1330 1331 1332
        if (flags & SWS_FULL_CHR_H_INT) {
            if (c->dither == SWS_DITHER_BAYER) {
                av_log(c, AV_LOG_DEBUG,
                    "Ordered dither is not supported in full chroma interpolation for destination format '%s'\n",
                    av_get_pix_fmt_name(dstFormat));
                c->dither = SWS_DITHER_ED;
            }
1333
        }
1334
    }
1335
    if (isPlanarRGB(dstFormat)) {
1336 1337
        if (!(flags & SWS_FULL_CHR_H_INT)) {
            av_log(c, AV_LOG_DEBUG,
1338 1339
                   "%s output is not supported with half chroma resolution, switching to full\n",
                   av_get_pix_fmt_name(dstFormat));
1340 1341 1342 1343
            flags   |= SWS_FULL_CHR_H_INT;
            c->flags = flags;
        }
    }
1344

1345 1346
    /* reuse chroma for 2 pixels RGB/BGR unless user wants full
     * chroma interpolation */
1347
    if (flags & SWS_FULL_CHR_H_INT &&
1348
        isAnyRGB(dstFormat)        &&
1349
        !isPlanarRGB(dstFormat)    &&
1350 1351 1352 1353 1354 1355 1356 1357
        dstFormat != AV_PIX_FMT_RGBA64LE &&
        dstFormat != AV_PIX_FMT_RGBA64BE &&
        dstFormat != AV_PIX_FMT_BGRA64LE &&
        dstFormat != AV_PIX_FMT_BGRA64BE &&
        dstFormat != AV_PIX_FMT_RGB48LE &&
        dstFormat != AV_PIX_FMT_RGB48BE &&
        dstFormat != AV_PIX_FMT_BGR48LE &&
        dstFormat != AV_PIX_FMT_BGR48BE &&
1358 1359 1360 1361 1362
        dstFormat != AV_PIX_FMT_RGBA  &&
        dstFormat != AV_PIX_FMT_ARGB  &&
        dstFormat != AV_PIX_FMT_BGRA  &&
        dstFormat != AV_PIX_FMT_ABGR  &&
        dstFormat != AV_PIX_FMT_RGB24 &&
1363 1364 1365 1366 1367 1368
        dstFormat != AV_PIX_FMT_BGR24 &&
        dstFormat != AV_PIX_FMT_BGR4_BYTE &&
        dstFormat != AV_PIX_FMT_RGB4_BYTE &&
        dstFormat != AV_PIX_FMT_BGR8 &&
        dstFormat != AV_PIX_FMT_RGB8
    ) {
1369 1370
        av_log(c, AV_LOG_WARNING,
               "full chroma interpolation for destination format '%s' not yet implemented\n",
1371
               av_get_pix_fmt_name(dstFormat));
1372
        flags   &= ~SWS_FULL_CHR_H_INT;
1373
        c->flags = flags;
1374
    }
1375 1376
    if (isAnyRGB(dstFormat) && !(flags & SWS_FULL_CHR_H_INT))
        c->chrDstHSubSample = 1;
1377 1378

    // drop some chroma lines if the user wants it
1379 1380 1381 1382 1383 1384 1385
    c->vChrDrop          = (flags & SWS_SRC_V_CHR_DROP_MASK) >>
                           SWS_SRC_V_CHR_DROP_SHIFT;
    c->chrSrcVSubSample += c->vChrDrop;

    /* drop every other pixel for chroma calculation unless user
     * wants full chroma */
    if (isAnyRGB(srcFormat) && !(flags & SWS_FULL_CHR_H_INP)   &&
1386 1387 1388
        srcFormat != AV_PIX_FMT_RGB8 && srcFormat != AV_PIX_FMT_BGR8 &&
        srcFormat != AV_PIX_FMT_RGB4 && srcFormat != AV_PIX_FMT_BGR4 &&
        srcFormat != AV_PIX_FMT_RGB4_BYTE && srcFormat != AV_PIX_FMT_BGR4_BYTE &&
1389 1390
        srcFormat != AV_PIX_FMT_GBRP9BE   && srcFormat != AV_PIX_FMT_GBRP9LE  &&
        srcFormat != AV_PIX_FMT_GBRP10BE  && srcFormat != AV_PIX_FMT_GBRP10LE &&
1391
        srcFormat != AV_PIX_FMT_GBRAP10BE && srcFormat != AV_PIX_FMT_GBRAP10LE &&
1392
        srcFormat != AV_PIX_FMT_GBRP12BE  && srcFormat != AV_PIX_FMT_GBRP12LE &&
1393
        srcFormat != AV_PIX_FMT_GBRAP12BE && srcFormat != AV_PIX_FMT_GBRAP12LE &&
1394
        srcFormat != AV_PIX_FMT_GBRP14BE  && srcFormat != AV_PIX_FMT_GBRP14LE &&
1395
        srcFormat != AV_PIX_FMT_GBRP16BE  && srcFormat != AV_PIX_FMT_GBRP16LE &&
1396
        srcFormat != AV_PIX_FMT_GBRAP16BE  && srcFormat != AV_PIX_FMT_GBRAP16LE &&
1397 1398 1399
        ((dstW >> c->chrDstHSubSample) <= (srcW >> 1) ||
         (flags & SWS_FAST_BILINEAR)))
        c->chrSrcHSubSample = 1;
1400

1401 1402 1403 1404 1405
    // Note the AV_CEIL_RSHIFT is so that we always round toward +inf.
    c->chrSrcW = AV_CEIL_RSHIFT(srcW, c->chrSrcHSubSample);
    c->chrSrcH = AV_CEIL_RSHIFT(srcH, c->chrSrcVSubSample);
    c->chrDstW = AV_CEIL_RSHIFT(dstW, c->chrDstHSubSample);
    c->chrDstH = AV_CEIL_RSHIFT(dstH, c->chrDstVSubSample);
1406

1407
    FF_ALLOCZ_OR_GOTO(c, c->formatConvBuffer, FFALIGN(srcW*2+78, 16) * 2, fail);
1408

1409
    c->srcBpc = desc_src->comp[0].depth;
1410 1411
    if (c->srcBpc < 8)
        c->srcBpc = 8;
1412
    c->dstBpc = desc_dst->comp[0].depth;
1413 1414
    if (c->dstBpc < 8)
        c->dstBpc = 8;
1415
    if (isAnyRGB(srcFormat) || srcFormat == AV_PIX_FMT_PAL8)
1416
        c->srcBpc = 16;
1417
    if (c->dstBpc == 16)
1418
        dst_stride <<= 1;
1419

1420
    if (INLINE_MMXEXT(cpu_flags) && c->srcBpc == 8 && c->dstBpc <= 14) {
1421
        c->canMMXEXTBeUsed = dstW >= srcW && (dstW & 31) == 0 &&
1422
                             c->chrDstW >= c->chrSrcW &&
1423
                             (srcW & 15) == 0;
1424
        if (!c->canMMXEXTBeUsed && dstW >= srcW && c->chrDstW >= c->chrSrcW && (srcW & 15) == 0
1425

1426 1427 1428
            && (flags & SWS_FAST_BILINEAR)) {
            if (flags & SWS_PRINT_INFO)
                av_log(c, AV_LOG_INFO,
1429
                       "output width is not a multiple of 32 -> no MMXEXT scaler\n");
1430
        }
1431
        if (usesHFilter || isNBPS(c->srcFormat) || is16BPS(c->srcFormat) || isAnyRGB(c->srcFormat))
1432
            c->canMMXEXTBeUsed = 0;
1433
    } else
1434
        c->canMMXEXTBeUsed = 0;
1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446

    c->chrXInc = (((int64_t)c->chrSrcW << 16) + (c->chrDstW >> 1)) / c->chrDstW;
    c->chrYInc = (((int64_t)c->chrSrcH << 16) + (c->chrDstH >> 1)) / c->chrDstH;

    /* Match pixel 0 of the src to pixel 0 of dst and match pixel n-2 of src
     * to pixel n-2 of dst, but only for the FAST_BILINEAR mode otherwise do
     * correct scaling.
     * n-2 is the last chrominance sample available.
     * This is not perfect, but no one should notice the difference, the more
     * correct variant would be like the vertical one, but that would require
     * some special code for the first and last pixel */
    if (flags & SWS_FAST_BILINEAR) {
1447
        if (c->canMMXEXTBeUsed) {
1448 1449
            c->lumXInc += 20;
            c->chrXInc += 20;
1450
        }
1451
        // we don't use the x86 asm scaler if MMX is available
1452
        else if (INLINE_MMX(cpu_flags) && c->dstBpc <= 14) {
1453 1454
            c->lumXInc = ((int64_t)(srcW       - 2) << 16) / (dstW       - 2) - 20;
            c->chrXInc = ((int64_t)(c->chrSrcW - 2) << 16) / (c->chrDstW - 2) - 20;
1455 1456 1457
        }
    }

1458 1459 1460 1461 1462 1463
    // hardcoded for now
    c->gamma_value = 2.2;
    tmpFmt = AV_PIX_FMT_RGBA64LE;


    if (!unscaled && c->gamma_flag && (srcFormat != tmpFmt || dstFormat != tmpFmt)) {
1464
        SwsContext *c2;
1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480
        c->cascaded_context[0] = NULL;

        ret = av_image_alloc(c->cascaded_tmp, c->cascaded_tmpStride,
                            srcW, srcH, tmpFmt, 64);
        if (ret < 0)
            return ret;

        c->cascaded_context[0] = sws_getContext(srcW, srcH, srcFormat,
                                                srcW, srcH, tmpFmt,
                                                flags, NULL, NULL, c->param);
        if (!c->cascaded_context[0]) {
            return -1;
        }

        c->cascaded_context[1] = sws_getContext(srcW, srcH, tmpFmt,
                                                dstW, dstH, tmpFmt,
1481
                                                flags, srcFilter, dstFilter, c->param);
1482 1483 1484 1485

        if (!c->cascaded_context[1])
            return -1;

1486 1487 1488 1489 1490 1491 1492
        c2 = c->cascaded_context[1];
        c2->is_internal_gamma = 1;
        c2->gamma     = alloc_gamma_tbl(    c->gamma_value);
        c2->inv_gamma = alloc_gamma_tbl(1.f/c->gamma_value);
        if (!c2->gamma || !c2->inv_gamma)
            return AVERROR(ENOMEM);

1493 1494 1495 1496 1497 1498 1499 1500 1501
        // is_internal_flag is set after creating the context
        // to properly create the gamma convert FilterDescriptor
        // we have to re-initialize it
        ff_free_filters(c2);
        if (ff_init_filters(c2) < 0) {
            sws_freeContext(c2);
            return -1;
        }

1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517
        c->cascaded_context[2] = NULL;
        if (dstFormat != tmpFmt) {
            ret = av_image_alloc(c->cascaded1_tmp, c->cascaded1_tmpStride,
                                dstW, dstH, tmpFmt, 64);
            if (ret < 0)
                return ret;

            c->cascaded_context[2] = sws_getContext(dstW, dstH, tmpFmt,
                                                dstW, dstH, dstFormat,
                                                flags, NULL, NULL, c->param);
            if (!c->cascaded_context[2])
                return -1;
        }
        return 0;
    }

1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542
    if (isBayer(srcFormat)) {
        if (!unscaled ||
            (dstFormat != AV_PIX_FMT_RGB24 && dstFormat != AV_PIX_FMT_YUV420P)) {
            enum AVPixelFormat tmpFormat = AV_PIX_FMT_RGB24;

            ret = av_image_alloc(c->cascaded_tmp, c->cascaded_tmpStride,
                                srcW, srcH, tmpFormat, 64);
            if (ret < 0)
                return ret;

            c->cascaded_context[0] = sws_getContext(srcW, srcH, srcFormat,
                                                    srcW, srcH, tmpFormat,
                                                    flags, srcFilter, NULL, c->param);
            if (!c->cascaded_context[0])
                return -1;

            c->cascaded_context[1] = sws_getContext(srcW, srcH, tmpFormat,
                                                    dstW, dstH, dstFormat,
                                                    flags, NULL, dstFilter, c->param);
            if (!c->cascaded_context[1])
                return -1;
            return 0;
        }
    }

1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555
    if (unscaled && c->srcBpc == 8 && dstFormat == AV_PIX_FMT_GRAYF32){
        for (i = 0; i < 256; ++i){
            c->uint2float_lut[i] = (float)i * float_mult;
        }
    }

    // float will be converted to uint16_t
    if ((srcFormat == AV_PIX_FMT_GRAYF32BE || srcFormat == AV_PIX_FMT_GRAYF32LE) &&
        (!unscaled || unscaled && dstFormat != srcFormat && (srcFormat != AV_PIX_FMT_GRAYF32 ||
        dstFormat != AV_PIX_FMT_GRAY8))){
        c->srcBpc = 16;
    }

1556 1557 1558 1559 1560 1561 1562 1563 1564
    if (CONFIG_SWSCALE_ALPHA && isALPHA(srcFormat) && !isALPHA(dstFormat)) {
        enum AVPixelFormat tmpFormat = alphaless_fmt(srcFormat);

        if (tmpFormat != AV_PIX_FMT_NONE && c->alphablend != SWS_ALPHA_BLEND_NONE)
        if (!unscaled ||
            dstFormat != tmpFormat ||
            usesHFilter || usesVFilter ||
            c->srcRange != c->dstRange
        ) {
1565
            c->cascaded_mainindex = 1;
1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580
            ret = av_image_alloc(c->cascaded_tmp, c->cascaded_tmpStride,
                                srcW, srcH, tmpFormat, 64);
            if (ret < 0)
                return ret;

            c->cascaded_context[0] = sws_alloc_set_opts(srcW, srcH, srcFormat,
                                                        srcW, srcH, tmpFormat,
                                                        flags, c->param);
            if (!c->cascaded_context[0])
                return -1;
            c->cascaded_context[0]->alphablend = c->alphablend;
            ret = sws_init_context(c->cascaded_context[0], NULL , NULL);
            if (ret < 0)
                return ret;

1581 1582 1583
            c->cascaded_context[1] = sws_alloc_set_opts(srcW, srcH, tmpFormat,
                                                        dstW, dstH, dstFormat,
                                                        flags, c->param);
1584 1585
            if (!c->cascaded_context[1])
                return -1;
1586 1587 1588 1589 1590 1591 1592

            c->cascaded_context[1]->srcRange = c->srcRange;
            c->cascaded_context[1]->dstRange = c->dstRange;
            ret = sws_init_context(c->cascaded_context[1], srcFilter , dstFilter);
            if (ret < 0)
                return ret;

1593 1594 1595 1596
            return 0;
        }
    }

1597 1598 1599 1600 1601
#if HAVE_MMAP && HAVE_MPROTECT && defined(MAP_ANONYMOUS)
#define USE_MMAP 1
#else
#define USE_MMAP 0
#endif
1602

1603 1604
    /* precalculate horizontal scaler filter coefficients */
    {
1605
#if HAVE_MMXEXT_INLINE
1606
// can't downscale !!!
1607
        if (c->canMMXEXTBeUsed && (flags & SWS_FAST_BILINEAR)) {
1608
            c->lumMmxextFilterCodeSize = ff_init_hscaler_mmxext(dstW, c->lumXInc, NULL,
1609
                                                             NULL, NULL, 8);
1610
            c->chrMmxextFilterCodeSize = ff_init_hscaler_mmxext(c->chrDstW, c->chrXInc,
1611
                                                             NULL, NULL, NULL, 4);
1612

1613
#if USE_MMAP
1614 1615 1616 1617 1618 1619 1620 1621
            c->lumMmxextFilterCode = mmap(NULL, c->lumMmxextFilterCodeSize,
                                          PROT_READ | PROT_WRITE,
                                          MAP_PRIVATE | MAP_ANONYMOUS,
                                          -1, 0);
            c->chrMmxextFilterCode = mmap(NULL, c->chrMmxextFilterCodeSize,
                                          PROT_READ | PROT_WRITE,
                                          MAP_PRIVATE | MAP_ANONYMOUS,
                                          -1, 0);
1622
#elif HAVE_VIRTUALALLOC
1623 1624 1625 1626 1627 1628 1629 1630
            c->lumMmxextFilterCode = VirtualAlloc(NULL,
                                                  c->lumMmxextFilterCodeSize,
                                                  MEM_COMMIT,
                                                  PAGE_EXECUTE_READWRITE);
            c->chrMmxextFilterCode = VirtualAlloc(NULL,
                                                  c->chrMmxextFilterCodeSize,
                                                  MEM_COMMIT,
                                                  PAGE_EXECUTE_READWRITE);
1631
#else
1632 1633
            c->lumMmxextFilterCode = av_malloc(c->lumMmxextFilterCodeSize);
            c->chrMmxextFilterCode = av_malloc(c->chrMmxextFilterCodeSize);
1634 1635
#endif

1636
#ifdef MAP_ANONYMOUS
1637
            if (c->lumMmxextFilterCode == MAP_FAILED || c->chrMmxextFilterCode == MAP_FAILED)
1638
#else
1639
            if (!c->lumMmxextFilterCode || !c->chrMmxextFilterCode)
1640
#endif
1641 1642
            {
                av_log(c, AV_LOG_ERROR, "Failed to allocate MMX2FilterCode\n");
1643
                return AVERROR(ENOMEM);
1644
            }
1645

1646 1647 1648 1649
            FF_ALLOCZ_OR_GOTO(c, c->hLumFilter,    (dstW           / 8 + 8) * sizeof(int16_t), fail);
            FF_ALLOCZ_OR_GOTO(c, c->hChrFilter,    (c->chrDstW     / 4 + 8) * sizeof(int16_t), fail);
            FF_ALLOCZ_OR_GOTO(c, c->hLumFilterPos, (dstW       / 2 / 8 + 8) * sizeof(int32_t), fail);
            FF_ALLOCZ_OR_GOTO(c, c->hChrFilterPos, (c->chrDstW / 2 / 4 + 8) * sizeof(int32_t), fail);
1650

1651
            ff_init_hscaler_mmxext(      dstW, c->lumXInc, c->lumMmxextFilterCode,
1652
                                c->hLumFilter, (uint32_t*)c->hLumFilterPos, 8);
1653
            ff_init_hscaler_mmxext(c->chrDstW, c->chrXInc, c->chrMmxextFilterCode,
1654
                                c->hChrFilter, (uint32_t*)c->hChrFilterPos, 4);
1655

1656
#if USE_MMAP
1657 1658 1659 1660 1661
            if (   mprotect(c->lumMmxextFilterCode, c->lumMmxextFilterCodeSize, PROT_EXEC | PROT_READ) == -1
                || mprotect(c->chrMmxextFilterCode, c->chrMmxextFilterCodeSize, PROT_EXEC | PROT_READ) == -1) {
                av_log(c, AV_LOG_ERROR, "mprotect failed, cannot use fast bilinear scaler\n");
                goto fail;
            }
1662 1663
#endif
        } else
1664
#endif /* HAVE_MMXEXT_INLINE */
1665
        {
1666
            const int filterAlign = X86_MMX(cpu_flags)     ? 4 :
1667
                                    PPC_ALTIVEC(cpu_flags) ? 8 :
1668
                                    have_neon(cpu_flags)   ? 8 : 1;
1669

1670
            if ((ret = initFilter(&c->hLumFilter, &c->hLumFilterPos,
1671 1672 1673 1674
                           &c->hLumFilterSize, c->lumXInc,
                           srcW, dstW, filterAlign, 1 << 14,
                           (flags & SWS_BICUBLIN) ? (flags | SWS_BICUBIC) : flags,
                           cpu_flags, srcFilter->lumH, dstFilter->lumH,
1675 1676
                           c->param,
                           get_local_pos(c, 0, 0, 0),
1677
                           get_local_pos(c, 0, 0, 0))) < 0)
1678
                goto fail;
1679
            if ((ret = initFilter(&c->hChrFilter, &c->hChrFilterPos,
1680 1681 1682 1683
                           &c->hChrFilterSize, c->chrXInc,
                           c->chrSrcW, c->chrDstW, filterAlign, 1 << 14,
                           (flags & SWS_BICUBLIN) ? (flags | SWS_BILINEAR) : flags,
                           cpu_flags, srcFilter->chrH, dstFilter->chrH,
1684 1685
                           c->param,
                           get_local_pos(c, c->chrSrcHSubSample, c->src_h_chr_pos, 0),
1686
                           get_local_pos(c, c->chrDstHSubSample, c->dst_h_chr_pos, 0))) < 0)
1687 1688 1689 1690 1691 1692
                goto fail;
        }
    } // initialize horizontal stuff

    /* precalculate vertical scaler filter coefficients */
    {
1693
        const int filterAlign = X86_MMX(cpu_flags)     ? 2 :
1694 1695
                                PPC_ALTIVEC(cpu_flags) ? 8 :
                                have_neon(cpu_flags)   ? 2 : 1;
1696

1697
        if ((ret = initFilter(&c->vLumFilter, &c->vLumFilterPos, &c->vLumFilterSize,
1698 1699 1700
                       c->lumYInc, srcH, dstH, filterAlign, (1 << 12),
                       (flags & SWS_BICUBLIN) ? (flags | SWS_BICUBIC) : flags,
                       cpu_flags, srcFilter->lumV, dstFilter->lumV,
1701 1702
                       c->param,
                       get_local_pos(c, 0, 0, 1),
1703
                       get_local_pos(c, 0, 0, 1))) < 0)
1704
            goto fail;
1705
        if ((ret = initFilter(&c->vChrFilter, &c->vChrFilterPos, &c->vChrFilterSize,
1706 1707 1708 1709
                       c->chrYInc, c->chrSrcH, c->chrDstH,
                       filterAlign, (1 << 12),
                       (flags & SWS_BICUBLIN) ? (flags | SWS_BILINEAR) : flags,
                       cpu_flags, srcFilter->chrV, dstFilter->chrV,
1710 1711
                       c->param,
                       get_local_pos(c, c->chrSrcVSubSample, c->src_v_chr_pos, 1),
1712
                       get_local_pos(c, c->chrDstVSubSample, c->dst_v_chr_pos, 1))) < 0)
1713

1714 1715
            goto fail;

1716
#if HAVE_ALTIVEC
1717 1718
        FF_ALLOC_OR_GOTO(c, c->vYCoeffsBank, sizeof(vector signed short) * c->vLumFilterSize * c->dstH,    fail);
        FF_ALLOC_OR_GOTO(c, c->vCCoeffsBank, sizeof(vector signed short) * c->vChrFilterSize * c->chrDstH, fail);
1719

1720
        for (i = 0; i < c->vLumFilterSize * c->dstH; i++) {
1721 1722
            int j;
            short *p = (short *)&c->vYCoeffsBank[i];
1723
            for (j = 0; j < 8; j++)
1724 1725 1726
                p[j] = c->vLumFilter[i];
        }

1727
        for (i = 0; i < c->vChrFilterSize * c->chrDstH; i++) {
1728 1729
            int j;
            short *p = (short *)&c->vCCoeffsBank[i];
1730
            for (j = 0; j < 8; j++)
1731 1732 1733 1734 1735
                p[j] = c->vChrFilter[i];
        }
#endif
    }

1736 1737 1738
    for (i = 0; i < 4; i++)
        FF_ALLOCZ_OR_GOTO(c, c->dither_error[i], (c->dstW+2) * sizeof(int), fail);

1739 1740
    c->needAlpha = (CONFIG_SWSCALE_ALPHA && isALPHA(c->srcFormat) && isALPHA(c->dstFormat)) ? 1 : 0;

1741
    // 64 / c->scalingBpp is the same as 16 / sizeof(scaling_intermediate)
1742
    c->uv_off   = (dst_stride>>1) + 64 / (c->dstBpc &~ 7);
1743
    c->uv_offx2 = dst_stride + 16;
1744

1745
    av_assert0(c->chrDstH <= dstH);
1746

1747
    if (flags & SWS_PRINT_INFO) {
1748
        const char *scaler = NULL, *cpucaps;
1749

1750 1751 1752 1753 1754 1755 1756 1757 1758
        for (i = 0; i < FF_ARRAY_ELEMS(scale_algorithms); i++) {
            if (flags & scale_algorithms[i].flag) {
                scaler = scale_algorithms[i].description;
                break;
            }
        }
        if (!scaler)
            scaler =  "ehh flags invalid?!";
        av_log(c, AV_LOG_INFO, "%s scaler, from %s to %s%s ",
1759
               scaler,
1760
               av_get_pix_fmt_name(srcFormat),
1761
#ifdef DITHER1XBPP
1762 1763 1764
               dstFormat == AV_PIX_FMT_BGR555   || dstFormat == AV_PIX_FMT_BGR565   ||
               dstFormat == AV_PIX_FMT_RGB444BE || dstFormat == AV_PIX_FMT_RGB444LE ||
               dstFormat == AV_PIX_FMT_BGR444BE || dstFormat == AV_PIX_FMT_BGR444LE ?
1765
                                                             "dithered " : "",
1766 1767 1768
#else
               "",
#endif
1769
               av_get_pix_fmt_name(dstFormat));
1770

1771
        if (INLINE_MMXEXT(cpu_flags))
1772
            cpucaps = "MMXEXT";
1773
        else if (INLINE_AMD3DNOW(cpu_flags))
1774
            cpucaps = "3DNOW";
1775
        else if (INLINE_MMX(cpu_flags))
1776
            cpucaps = "MMX";
1777
        else if (PPC_ALTIVEC(cpu_flags))
1778
            cpucaps = "AltiVec";
1779
        else
1780 1781 1782
            cpucaps = "C";

        av_log(c, AV_LOG_INFO, "using %s\n", cpucaps);
1783 1784

        av_log(c, AV_LOG_VERBOSE, "%dx%d -> %dx%d\n", srcW, srcH, dstW, dstH);
1785 1786
        av_log(c, AV_LOG_DEBUG,
               "lum srcW=%d srcH=%d dstW=%d dstH=%d xInc=%d yInc=%d\n",
1787
               c->srcW, c->srcH, c->dstW, c->dstH, c->lumXInc, c->lumYInc);
1788 1789 1790 1791
        av_log(c, AV_LOG_DEBUG,
               "chr srcW=%d srcH=%d dstW=%d dstH=%d xInc=%d yInc=%d\n",
               c->chrSrcW, c->chrSrcH, c->chrDstW, c->chrDstH,
               c->chrXInc, c->chrYInc);
1792 1793
    }

1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809
    /* alpha blend special case, note this has been split via cascaded contexts if its scaled */
    if (unscaled && !usesHFilter && !usesVFilter &&
        c->alphablend != SWS_ALPHA_BLEND_NONE &&
        isALPHA(srcFormat) &&
        (c->srcRange == c->dstRange || isAnyRGB(dstFormat)) &&
        alphaless_fmt(srcFormat) == dstFormat
    ) {
        c->swscale = ff_sws_alphablendaway;

        if (flags & SWS_PRINT_INFO)
            av_log(c, AV_LOG_INFO,
                    "using alpha blendaway %s -> %s special converter\n",
                    av_get_pix_fmt_name(srcFormat), av_get_pix_fmt_name(dstFormat));
        return 0;
    }

1810 1811
    /* unscaled special cases */
    if (unscaled && !usesHFilter && !usesVFilter &&
1812 1813 1814
        (c->srcRange == c->dstRange || isAnyRGB(dstFormat) ||
         srcFormat == AV_PIX_FMT_GRAYF32 && dstFormat == AV_PIX_FMT_GRAY8 ||
         srcFormat == AV_PIX_FMT_GRAY8 && dstFormat == AV_PIX_FMT_GRAYF32)) {
1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825
        ff_get_unscaled_swscale(c);

        if (c->swscale) {
            if (flags & SWS_PRINT_INFO)
                av_log(c, AV_LOG_INFO,
                       "using unscaled %s -> %s special converter\n",
                       av_get_pix_fmt_name(srcFormat), av_get_pix_fmt_name(dstFormat));
            return 0;
        }
    }

1826
    c->swscale = ff_getSwsFunc(c);
1827
    return ff_init_filters(c);
1828
fail: // FIXME replace things by appropriate error codes
1829 1830 1831 1832 1833
    if (ret == RETCODE_USE_CASCADE)  {
        int tmpW = sqrt(srcW * (int64_t)dstW);
        int tmpH = sqrt(srcH * (int64_t)dstH);
        enum AVPixelFormat tmpFormat = AV_PIX_FMT_YUV420P;

1834 1835 1836
        if (isALPHA(srcFormat))
            tmpFormat = AV_PIX_FMT_YUVA420P;

1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857
        if (srcW*(int64_t)srcH <= 4LL*dstW*dstH)
            return AVERROR(EINVAL);

        ret = av_image_alloc(c->cascaded_tmp, c->cascaded_tmpStride,
                             tmpW, tmpH, tmpFormat, 64);
        if (ret < 0)
            return ret;

        c->cascaded_context[0] = sws_getContext(srcW, srcH, srcFormat,
                                                tmpW, tmpH, tmpFormat,
                                                flags, srcFilter, NULL, c->param);
        if (!c->cascaded_context[0])
            return -1;

        c->cascaded_context[1] = sws_getContext(tmpW, tmpH, tmpFormat,
                                                dstW, dstH, dstFormat,
                                                flags, NULL, dstFilter, c->param);
        if (!c->cascaded_context[1])
            return -1;
        return 0;
    }
1858 1859
    return -1;
}
1860

1861 1862 1863
SwsContext *sws_alloc_set_opts(int srcW, int srcH, enum AVPixelFormat srcFormat,
                               int dstW, int dstH, enum AVPixelFormat dstFormat,
                               int flags, const double *param)
1864 1865 1866
{
    SwsContext *c;

1867
    if (!(c = sws_alloc_context()))
1868 1869
        return NULL;

1870 1871 1872 1873 1874 1875 1876
    c->flags     = flags;
    c->srcW      = srcW;
    c->srcH      = srcH;
    c->dstW      = dstW;
    c->dstH      = dstH;
    c->srcFormat = srcFormat;
    c->dstFormat = dstFormat;
1877 1878 1879 1880 1881 1882

    if (param) {
        c->param[0] = param[0];
        c->param[1] = param[1];
    }

1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898
    return c;
}

SwsContext *sws_getContext(int srcW, int srcH, enum AVPixelFormat srcFormat,
                           int dstW, int dstH, enum AVPixelFormat dstFormat,
                           int flags, SwsFilter *srcFilter,
                           SwsFilter *dstFilter, const double *param)
{
    SwsContext *c;

    c = sws_alloc_set_opts(srcW, srcH, srcFormat,
                           dstW, dstH, dstFormat,
                           flags, param);
    if (!c)
        return NULL;

1899
    if (sws_init_context(c, srcFilter, dstFilter) < 0) {
1900 1901 1902 1903 1904
        sws_freeContext(c);
        return NULL;
    }

    return c;
1905 1906
}

1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922
static int isnan_vec(SwsVector *a)
{
    int i;
    for (i=0; i<a->length; i++)
        if (isnan(a->coeff[i]))
            return 1;
    return 0;
}

static void makenan_vec(SwsVector *a)
{
    int i;
    for (i=0; i<a->length; i++)
        a->coeff[i] = NAN;
}

1923 1924 1925 1926 1927
SwsFilter *sws_getDefaultFilter(float lumaGBlur, float chromaGBlur,
                                float lumaSharpen, float chromaSharpen,
                                float chromaHShift, float chromaVShift,
                                int verbose)
{
1928
    SwsFilter *filter = av_malloc(sizeof(SwsFilter));
1929 1930 1931
    if (!filter)
        return NULL;

1932 1933 1934
    if (lumaGBlur != 0.0) {
        filter->lumH = sws_getGaussianVec(lumaGBlur, 3.0);
        filter->lumV = sws_getGaussianVec(lumaGBlur, 3.0);
1935
    } else {
1936 1937
        filter->lumH = sws_getIdentityVec();
        filter->lumV = sws_getIdentityVec();
1938 1939
    }

1940 1941 1942
    if (chromaGBlur != 0.0) {
        filter->chrH = sws_getGaussianVec(chromaGBlur, 3.0);
        filter->chrV = sws_getGaussianVec(chromaGBlur, 3.0);
1943
    } else {
1944 1945
        filter->chrH = sws_getIdentityVec();
        filter->chrV = sws_getIdentityVec();
1946 1947
    }

1948 1949
    if (!filter->lumH || !filter->lumV || !filter->chrH || !filter->chrV)
        goto fail;
1950

1951 1952
    if (chromaSharpen != 0.0) {
        SwsVector *id = sws_getIdentityVec();
1953 1954
        if (!id)
            goto fail;
1955 1956 1957 1958 1959 1960 1961
        sws_scaleVec(filter->chrH, -chromaSharpen);
        sws_scaleVec(filter->chrV, -chromaSharpen);
        sws_addVec(filter->chrH, id);
        sws_addVec(filter->chrV, id);
        sws_freeVec(id);
    }

1962 1963
    if (lumaSharpen != 0.0) {
        SwsVector *id = sws_getIdentityVec();
1964 1965
        if (!id)
            goto fail;
1966 1967 1968 1969 1970 1971 1972 1973
        sws_scaleVec(filter->lumH, -lumaSharpen);
        sws_scaleVec(filter->lumV, -lumaSharpen);
        sws_addVec(filter->lumH, id);
        sws_addVec(filter->lumV, id);
        sws_freeVec(id);
    }

    if (chromaHShift != 0.0)
1974
        sws_shiftVec(filter->chrH, (int)(chromaHShift + 0.5));
1975 1976

    if (chromaVShift != 0.0)
1977
        sws_shiftVec(filter->chrV, (int)(chromaVShift + 0.5));
1978 1979 1980 1981 1982 1983

    sws_normalizeVec(filter->chrH, 1.0);
    sws_normalizeVec(filter->chrV, 1.0);
    sws_normalizeVec(filter->lumH, 1.0);
    sws_normalizeVec(filter->lumV, 1.0);

1984 1985 1986 1987 1988 1989
    if (isnan_vec(filter->chrH) ||
        isnan_vec(filter->chrV) ||
        isnan_vec(filter->lumH) ||
        isnan_vec(filter->lumV))
        goto fail;

1990 1991 1992 1993
    if (verbose)
        sws_printVec2(filter->chrH, NULL, AV_LOG_DEBUG);
    if (verbose)
        sws_printVec2(filter->lumH, NULL, AV_LOG_DEBUG);
1994 1995

    return filter;
1996 1997 1998 1999 2000 2001 2002 2003

fail:
    sws_freeVec(filter->lumH);
    sws_freeVec(filter->lumV);
    sws_freeVec(filter->chrH);
    sws_freeVec(filter->chrV);
    av_freep(&filter);
    return NULL;
2004 2005 2006 2007
}

SwsVector *sws_allocVec(int length)
{
2008 2009 2010 2011 2012 2013
    SwsVector *vec;

    if(length <= 0 || length > INT_MAX/ sizeof(double))
        return NULL;

    vec = av_malloc(sizeof(SwsVector));
2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024
    if (!vec)
        return NULL;
    vec->length = length;
    vec->coeff  = av_malloc(sizeof(double) * length);
    if (!vec->coeff)
        av_freep(&vec);
    return vec;
}

SwsVector *sws_getGaussianVec(double variance, double quality)
{
2025
    const int length = (int)(variance * quality + 0.5) | 1;
2026
    int i;
2027
    double middle  = (length - 1) * 0.5;
2028 2029 2030 2031 2032 2033
    SwsVector *vec;

    if(variance < 0 || quality < 0)
        return NULL;

    vec = sws_allocVec(length);
2034 2035 2036 2037

    if (!vec)
        return NULL;

2038 2039 2040 2041
    for (i = 0; i < length; i++) {
        double dist = i - middle;
        vec->coeff[i] = exp(-dist * dist / (2 * variance * variance)) /
                        sqrt(2 * variance * M_PI);
2042 2043 2044 2045 2046 2047 2048
    }

    sws_normalizeVec(vec, 1.0);

    return vec;
}

2049 2050 2051 2052 2053 2054 2055
/**
 * Allocate and return a vector with length coefficients, all
 * with the same value c.
 */
#if !FF_API_SWS_VECTOR
static
#endif
2056 2057 2058
SwsVector *sws_getConstVec(double c, int length)
{
    int i;
2059
    SwsVector *vec = sws_allocVec(length);
2060 2061 2062 2063

    if (!vec)
        return NULL;

2064 2065
    for (i = 0; i < length; i++)
        vec->coeff[i] = c;
2066 2067 2068 2069

    return vec;
}

2070 2071 2072 2073 2074 2075 2076
/**
 * Allocate and return a vector with just one coefficient, with
 * value 1.0.
 */
#if !FF_API_SWS_VECTOR
static
#endif
2077 2078 2079 2080 2081
SwsVector *sws_getIdentityVec(void)
{
    return sws_getConstVec(1.0, 1);
}

2082
static double sws_dcVec(SwsVector *a)
2083 2084
{
    int i;
2085
    double sum = 0;
2086

2087 2088
    for (i = 0; i < a->length; i++)
        sum += a->coeff[i];
2089 2090 2091 2092 2093 2094 2095 2096

    return sum;
}

void sws_scaleVec(SwsVector *a, double scalar)
{
    int i;

2097 2098
    for (i = 0; i < a->length; i++)
        a->coeff[i] *= scalar;
2099 2100 2101 2102
}

void sws_normalizeVec(SwsVector *a, double height)
{
2103
    sws_scaleVec(a, height / sws_dcVec(a));
2104 2105
}

2106
#if FF_API_SWS_VECTOR
2107 2108
static SwsVector *sws_getConvVec(SwsVector *a, SwsVector *b)
{
2109
    int length = a->length + b->length - 1;
2110
    int i, j;
2111
    SwsVector *vec = sws_getConstVec(0.0, length);
2112 2113 2114 2115

    if (!vec)
        return NULL;

2116 2117 2118
    for (i = 0; i < a->length; i++) {
        for (j = 0; j < b->length; j++) {
            vec->coeff[i + j] += a->coeff[i] * b->coeff[j];
2119 2120 2121 2122 2123
        }
    }

    return vec;
}
2124
#endif
2125 2126 2127

static SwsVector *sws_sumVec(SwsVector *a, SwsVector *b)
{
2128
    int length = FFMAX(a->length, b->length);
2129
    int i;
2130
    SwsVector *vec = sws_getConstVec(0.0, length);
2131 2132 2133 2134

    if (!vec)
        return NULL;

2135 2136 2137 2138
    for (i = 0; i < a->length; i++)
        vec->coeff[i + (length - 1) / 2 - (a->length - 1) / 2] += a->coeff[i];
    for (i = 0; i < b->length; i++)
        vec->coeff[i + (length - 1) / 2 - (b->length - 1) / 2] += b->coeff[i];
2139 2140 2141 2142

    return vec;
}

2143
#if FF_API_SWS_VECTOR
2144 2145
static SwsVector *sws_diffVec(SwsVector *a, SwsVector *b)
{
2146
    int length = FFMAX(a->length, b->length);
2147
    int i;
2148
    SwsVector *vec = sws_getConstVec(0.0, length);
2149 2150 2151 2152

    if (!vec)
        return NULL;

2153 2154 2155 2156
    for (i = 0; i < a->length; i++)
        vec->coeff[i + (length - 1) / 2 - (a->length - 1) / 2] += a->coeff[i];
    for (i = 0; i < b->length; i++)
        vec->coeff[i + (length - 1) / 2 - (b->length - 1) / 2] -= b->coeff[i];
2157 2158 2159

    return vec;
}
2160
#endif
2161 2162 2163 2164

/* shift left / or right if "shift" is negative */
static SwsVector *sws_getShiftedVec(SwsVector *a, int shift)
{
2165
    int length = a->length + FFABS(shift) * 2;
2166
    int i;
2167
    SwsVector *vec = sws_getConstVec(0.0, length);
2168 2169 2170 2171

    if (!vec)
        return NULL;

2172 2173 2174
    for (i = 0; i < a->length; i++) {
        vec->coeff[i + (length    - 1) / 2 -
                       (a->length - 1) / 2 - shift] = a->coeff[i];
2175 2176 2177 2178 2179
    }

    return vec;
}

2180 2181 2182
#if !FF_API_SWS_VECTOR
static
#endif
2183 2184
void sws_shiftVec(SwsVector *a, int shift)
{
2185
    SwsVector *shifted = sws_getShiftedVec(a, shift);
2186 2187 2188 2189
    if (!shifted) {
        makenan_vec(a);
        return;
    }
2190
    av_free(a->coeff);
2191 2192
    a->coeff  = shifted->coeff;
    a->length = shifted->length;
2193 2194 2195
    av_free(shifted);
}

2196 2197 2198
#if !FF_API_SWS_VECTOR
static
#endif
2199 2200
void sws_addVec(SwsVector *a, SwsVector *b)
{
2201
    SwsVector *sum = sws_sumVec(a, b);
2202 2203 2204 2205
    if (!sum) {
        makenan_vec(a);
        return;
    }
2206
    av_free(a->coeff);
2207 2208
    a->coeff  = sum->coeff;
    a->length = sum->length;
2209 2210 2211
    av_free(sum);
}

2212
#if FF_API_SWS_VECTOR
2213 2214
void sws_subVec(SwsVector *a, SwsVector *b)
{
2215
    SwsVector *diff = sws_diffVec(a, b);
2216 2217 2218 2219
    if (!diff) {
        makenan_vec(a);
        return;
    }
2220
    av_free(a->coeff);
2221 2222
    a->coeff  = diff->coeff;
    a->length = diff->length;
2223 2224 2225 2226 2227
    av_free(diff);
}

void sws_convVec(SwsVector *a, SwsVector *b)
{
2228
    SwsVector *conv = sws_getConvVec(a, b);
2229 2230 2231 2232
    if (!conv) {
        makenan_vec(a);
        return;
    }
2233
    av_free(a->coeff);
2234 2235
    a->coeff  = conv->coeff;
    a->length = conv->length;
2236 2237 2238 2239 2240
    av_free(conv);
}

SwsVector *sws_cloneVec(SwsVector *a)
{
2241
    SwsVector *vec = sws_allocVec(a->length);
2242 2243 2244 2245

    if (!vec)
        return NULL;

2246
    memcpy(vec->coeff, a->coeff, a->length * sizeof(*a->coeff));
2247 2248 2249

    return vec;
}
2250
#endif
2251

2252 2253 2254 2255 2256 2257 2258
/**
 * Print with av_log() a textual representation of the vector a
 * if log_level <= av_log_level.
 */
#if !FF_API_SWS_VECTOR
static
#endif
2259 2260 2261
void sws_printVec2(SwsVector *a, AVClass *log_ctx, int log_level)
{
    int i;
2262 2263
    double max = 0;
    double min = 0;
2264 2265
    double range;

2266 2267 2268
    for (i = 0; i < a->length; i++)
        if (a->coeff[i] > max)
            max = a->coeff[i];
2269

2270 2271 2272
    for (i = 0; i < a->length; i++)
        if (a->coeff[i] < min)
            min = a->coeff[i];
2273

2274
    range = max - min;
2275

2276 2277
    for (i = 0; i < a->length; i++) {
        int x = (int)((a->coeff[i] - min) * 60.0 / range + 0.5);
2278
        av_log(log_ctx, log_level, "%1.3f ", a->coeff[i]);
2279 2280
        for (; x > 0; x--)
            av_log(log_ctx, log_level, " ");
2281 2282 2283 2284 2285 2286
        av_log(log_ctx, log_level, "|\n");
    }
}

void sws_freeVec(SwsVector *a)
{
2287 2288
    if (!a)
        return;
2289
    av_freep(&a->coeff);
2290
    a->length = 0;
2291 2292 2293 2294 2295
    av_free(a);
}

void sws_freeFilter(SwsFilter *filter)
{
2296 2297 2298
    if (!filter)
        return;

2299 2300 2301 2302
    sws_freeVec(filter->lumH);
    sws_freeVec(filter->lumV);
    sws_freeVec(filter->chrH);
    sws_freeVec(filter->chrV);
2303 2304 2305 2306 2307 2308
    av_free(filter);
}

void sws_freeContext(SwsContext *c)
{
    int i;
2309 2310
    if (!c)
        return;
2311

2312 2313 2314
    for (i = 0; i < 4; i++)
        av_freep(&c->dither_error[i]);

2315 2316 2317 2318
    av_freep(&c->vLumFilter);
    av_freep(&c->vChrFilter);
    av_freep(&c->hLumFilter);
    av_freep(&c->hChrFilter);
2319
#if HAVE_ALTIVEC
2320 2321 2322 2323 2324 2325 2326 2327 2328
    av_freep(&c->vYCoeffsBank);
    av_freep(&c->vCCoeffsBank);
#endif

    av_freep(&c->vLumFilterPos);
    av_freep(&c->vChrFilterPos);
    av_freep(&c->hLumFilterPos);
    av_freep(&c->hChrFilterPos);

2329
#if HAVE_MMX_INLINE
2330
#if USE_MMAP
2331 2332 2333 2334
    if (c->lumMmxextFilterCode)
        munmap(c->lumMmxextFilterCode, c->lumMmxextFilterCodeSize);
    if (c->chrMmxextFilterCode)
        munmap(c->chrMmxextFilterCode, c->chrMmxextFilterCodeSize);
2335
#elif HAVE_VIRTUALALLOC
2336 2337 2338 2339
    if (c->lumMmxextFilterCode)
        VirtualFree(c->lumMmxextFilterCode, 0, MEM_RELEASE);
    if (c->chrMmxextFilterCode)
        VirtualFree(c->chrMmxextFilterCode, 0, MEM_RELEASE);
2340
#else
2341 2342
    av_free(c->lumMmxextFilterCode);
    av_free(c->chrMmxextFilterCode);
2343
#endif
2344 2345
    c->lumMmxextFilterCode = NULL;
    c->chrMmxextFilterCode = NULL;
2346
#endif /* HAVE_MMX_INLINE */
2347 2348

    av_freep(&c->yuvTable);
2349
    av_freep(&c->formatConvBuffer);
2350

2351 2352
    sws_freeContext(c->cascaded_context[0]);
    sws_freeContext(c->cascaded_context[1]);
2353
    sws_freeContext(c->cascaded_context[2]);
2354 2355
    memset(c->cascaded_context, 0, sizeof(c->cascaded_context));
    av_freep(&c->cascaded_tmp[0]);
2356 2357 2358 2359 2360
    av_freep(&c->cascaded1_tmp[0]);

    av_freep(&c->gamma);
    av_freep(&c->inv_gamma);

2361
    ff_free_filters(c);
2362

2363 2364 2365
    av_free(c);
}

2366
struct SwsContext *sws_getCachedContext(struct SwsContext *context, int srcW,
2367
                                        int srcH, enum AVPixelFormat srcFormat,
2368
                                        int dstW, int dstH,
2369
                                        enum AVPixelFormat dstFormat, int flags,
2370 2371 2372
                                        SwsFilter *srcFilter,
                                        SwsFilter *dstFilter,
                                        const double *param)
2373
{
2374 2375
    static const double default_param[2] = { SWS_PARAM_DEFAULT,
                                             SWS_PARAM_DEFAULT };
2376 2377
    int64_t src_h_chr_pos = -513, dst_h_chr_pos = -513,
            src_v_chr_pos = -513, dst_v_chr_pos = -513;
2378 2379 2380 2381

    if (!param)
        param = default_param;

2382
    if (context &&
2383 2384 2385 2386 2387 2388 2389 2390
        (context->srcW      != srcW      ||
         context->srcH      != srcH      ||
         context->srcFormat != srcFormat ||
         context->dstW      != dstW      ||
         context->dstH      != dstH      ||
         context->dstFormat != dstFormat ||
         context->flags     != flags     ||
         context->param[0]  != param[0]  ||
2391
         context->param[1]  != param[1])) {
2392 2393 2394 2395 2396

        av_opt_get_int(context, "src_h_chr_pos", 0, &src_h_chr_pos);
        av_opt_get_int(context, "src_v_chr_pos", 0, &src_v_chr_pos);
        av_opt_get_int(context, "dst_h_chr_pos", 0, &dst_h_chr_pos);
        av_opt_get_int(context, "dst_v_chr_pos", 0, &dst_v_chr_pos);
2397 2398 2399
        sws_freeContext(context);
        context = NULL;
    }
2400

2401
    if (!context) {
2402 2403 2404 2405 2406
        if (!(context = sws_alloc_context()))
            return NULL;
        context->srcW      = srcW;
        context->srcH      = srcH;
        context->srcFormat = srcFormat;
2407 2408
        context->dstW      = dstW;
        context->dstH      = dstH;
2409 2410 2411 2412
        context->dstFormat = dstFormat;
        context->flags     = flags;
        context->param[0]  = param[0];
        context->param[1]  = param[1];
2413 2414 2415 2416 2417 2418

        av_opt_set_int(context, "src_h_chr_pos", src_h_chr_pos, 0);
        av_opt_set_int(context, "src_v_chr_pos", src_v_chr_pos, 0);
        av_opt_set_int(context, "dst_h_chr_pos", dst_h_chr_pos, 0);
        av_opt_set_int(context, "dst_v_chr_pos", dst_v_chr_pos, 0);

2419 2420 2421 2422
        if (sws_init_context(context, srcFilter, dstFilter) < 0) {
            sws_freeContext(context);
            return NULL;
        }
2423 2424 2425
    }
    return context;
}