swscale_unscaled.c 10.7 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62
/*
 * Copyright (C) 2013 Xiaolei Yu <dreifachstein@gmail.com>
 *
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * FFmpeg is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with FFmpeg; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

#include "config.h"
#include "libswscale/swscale.h"
#include "libswscale/swscale_internal.h"
#include "libavutil/arm/cpu.h"

extern void rgbx_to_nv12_neon_32(const uint8_t *src, uint8_t *y, uint8_t *chroma,
                int width, int height,
                int y_stride, int c_stride, int src_stride,
                int32_t coeff_tbl[9]);

extern void rgbx_to_nv12_neon_16(const uint8_t *src, uint8_t *y, uint8_t *chroma,
                int width, int height,
                int y_stride, int c_stride, int src_stride,
                int32_t coeff_tbl[9]);

static int rgbx_to_nv12_neon_32_wrapper(SwsContext *context, const uint8_t *src[],
                        int srcStride[], int srcSliceY, int srcSliceH,
                        uint8_t *dst[], int dstStride[]) {

    rgbx_to_nv12_neon_32(src[0] + srcSliceY * srcStride[0],
            dst[0] + srcSliceY * dstStride[0],
            dst[1] + (srcSliceY / 2) * dstStride[1],
            context->srcW, srcSliceH,
            dstStride[0], dstStride[1], srcStride[0],
            context->input_rgb2yuv_table);

    return 0;
}

static int rgbx_to_nv12_neon_16_wrapper(SwsContext *context, const uint8_t *src[],
                        int srcStride[], int srcSliceY, int srcSliceH,
                        uint8_t *dst[], int dstStride[]) {

    rgbx_to_nv12_neon_16(src[0] + srcSliceY * srcStride[0],
            dst[0] + srcSliceY * dstStride[0],
            dst[1] + (srcSliceY / 2) * dstStride[1],
            context->srcW, srcSliceH,
            dstStride[0], dstStride[1], srcStride[0],
            context->input_rgb2yuv_table);

    return 0;
}
63

64
#define YUV_TO_RGB_TABLE                                                                    \
65 66 67 68
        c->yuv2rgb_v2r_coeff,                                                               \
        c->yuv2rgb_u2g_coeff,                                                               \
        c->yuv2rgb_v2g_coeff,                                                               \
        c->yuv2rgb_u2b_coeff,                                                               \
69 70 71

#define DECLARE_FF_YUVX_TO_RGBX_FUNCS(ifmt, ofmt)                                           \
int ff_##ifmt##_to_##ofmt##_neon(int w, int h,                                              \
72 73 74 75 76 77 78 79
                                 uint8_t *dst, int linesize,                                \
                                 const uint8_t *srcY, int linesizeY,                        \
                                 const uint8_t *srcU, int linesizeU,                        \
                                 const uint8_t *srcV, int linesizeV,                        \
                                 const int16_t *table,                                      \
                                 int y_offset,                                              \
                                 int y_coeff);                                              \
                                                                                            \
80
static int ifmt##_to_##ofmt##_neon_wrapper(SwsContext *c, const uint8_t *src[],             \
81 82
                                           int srcStride[], int srcSliceY, int srcSliceH,   \
                                           uint8_t *dst[], int dstStride[]) {               \
83
    const int16_t yuv2rgb_table[] = { YUV_TO_RGB_TABLE };                                   \
84
                                                                                            \
85
    ff_##ifmt##_to_##ofmt##_neon(c->srcW, srcSliceH,                                        \
86 87 88 89 90
                                 dst[0] + srcSliceY * dstStride[0], dstStride[0],           \
                                 src[0], srcStride[0],                                      \
                                 src[1], srcStride[1],                                      \
                                 src[2], srcStride[2],                                      \
                                 yuv2rgb_table,                                             \
91 92
                                 c->yuv2rgb_y_offset >> 6,                                  \
                                 c->yuv2rgb_y_coeff);                                       \
93 94 95 96
                                                                                            \
    return 0;                                                                               \
}                                                                                           \

97 98 99 100 101
#define DECLARE_FF_YUVX_TO_ALL_RGBX_FUNCS(yuvx)                                             \
DECLARE_FF_YUVX_TO_RGBX_FUNCS(yuvx, argb)                                                   \
DECLARE_FF_YUVX_TO_RGBX_FUNCS(yuvx, rgba)                                                   \
DECLARE_FF_YUVX_TO_RGBX_FUNCS(yuvx, abgr)                                                   \
DECLARE_FF_YUVX_TO_RGBX_FUNCS(yuvx, bgra)                                                   \
102

103 104
DECLARE_FF_YUVX_TO_ALL_RGBX_FUNCS(yuv420p)
DECLARE_FF_YUVX_TO_ALL_RGBX_FUNCS(yuv422p)
105

106 107
#define DECLARE_FF_NVX_TO_RGBX_FUNCS(ifmt, ofmt)                                            \
int ff_##ifmt##_to_##ofmt##_neon(int w, int h,                                              \
108 109 110 111 112 113 114
                                 uint8_t *dst, int linesize,                                \
                                 const uint8_t *srcY, int linesizeY,                        \
                                 const uint8_t *srcC, int linesizeC,                        \
                                 const int16_t *table,                                      \
                                 int y_offset,                                              \
                                 int y_coeff);                                              \
                                                                                            \
115
static int ifmt##_to_##ofmt##_neon_wrapper(SwsContext *c, const uint8_t *src[],             \
116 117
                                           int srcStride[], int srcSliceY, int srcSliceH,   \
                                           uint8_t *dst[], int dstStride[]) {               \
118
    const int16_t yuv2rgb_table[] = { YUV_TO_RGB_TABLE };                                   \
119
                                                                                            \
120
    ff_##ifmt##_to_##ofmt##_neon(c->srcW, srcSliceH,                                        \
121 122
                                 dst[0] + srcSliceY * dstStride[0], dstStride[0],           \
                                 src[0], srcStride[0], src[1], srcStride[1],                \
123
                                 yuv2rgb_table,                                             \
124 125
                                 c->yuv2rgb_y_offset >> 6,                                  \
                                 c->yuv2rgb_y_coeff);                                       \
126 127
                                                                                            \
    return 0;                                                                               \
128 129
}                                                                                           \

130 131 132 133 134
#define DECLARE_FF_NVX_TO_ALL_RGBX_FUNCS(nvx)                                               \
DECLARE_FF_NVX_TO_RGBX_FUNCS(nvx, argb)                                                     \
DECLARE_FF_NVX_TO_RGBX_FUNCS(nvx, rgba)                                                     \
DECLARE_FF_NVX_TO_RGBX_FUNCS(nvx, abgr)                                                     \
DECLARE_FF_NVX_TO_RGBX_FUNCS(nvx, bgra)                                                     \
135

136 137
DECLARE_FF_NVX_TO_ALL_RGBX_FUNCS(nv12)
DECLARE_FF_NVX_TO_ALL_RGBX_FUNCS(nv21)
138 139 140 141 142

/* We need a 16 pixel width alignment. This constraint can easily be removed
 * for input reading but for the output which is 4-bytes per pixel (RGBA) the
 * assembly might be writing as much as 4*15=60 extra bytes at the end of the
 * line, which won't fit the 32-bytes buffer alignment. */
143
#define SET_FF_NVX_TO_RGBX_FUNC(ifmt, IFMT, ofmt, OFMT, accurate_rnd) do {                  \
144 145 146
    if (c->srcFormat == AV_PIX_FMT_##IFMT                                                   \
        && c->dstFormat == AV_PIX_FMT_##OFMT                                                \
        && !(c->srcH & 1)                                                                   \
147 148
        && !(c->srcW & 15)                                                                  \
        && !accurate_rnd) {                                                                 \
149
        c->swscale = ifmt##_to_##ofmt##_neon_wrapper;                                       \
150 151 152
    }                                                                                       \
} while (0)

153 154 155 156 157
#define SET_FF_NVX_TO_ALL_RGBX_FUNC(nvx, NVX, accurate_rnd) do {                            \
    SET_FF_NVX_TO_RGBX_FUNC(nvx, NVX, argb, ARGB, accurate_rnd);                            \
    SET_FF_NVX_TO_RGBX_FUNC(nvx, NVX, rgba, RGBA, accurate_rnd);                            \
    SET_FF_NVX_TO_RGBX_FUNC(nvx, NVX, abgr, ABGR, accurate_rnd);                            \
    SET_FF_NVX_TO_RGBX_FUNC(nvx, NVX, bgra, BGRA, accurate_rnd);                            \
158
} while (0)
159 160 161 162 163 164 165 166 167

static void get_unscaled_swscale_neon(SwsContext *c) {
    int accurate_rnd = c->flags & SWS_ACCURATE_RND;
    if (c->srcFormat == AV_PIX_FMT_RGBA
            && c->dstFormat == AV_PIX_FMT_NV12
            && (c->srcW >= 16)) {
        c->swscale = accurate_rnd ? rgbx_to_nv12_neon_32_wrapper
                        : rgbx_to_nv12_neon_16_wrapper;
    }
168

169 170
    SET_FF_NVX_TO_ALL_RGBX_FUNC(nv12, NV12, accurate_rnd);
    SET_FF_NVX_TO_ALL_RGBX_FUNC(nv21, NV21, accurate_rnd);
171
    SET_FF_NVX_TO_ALL_RGBX_FUNC(yuv420p, YUV420P, accurate_rnd);
172
    SET_FF_NVX_TO_ALL_RGBX_FUNC(yuv422p, YUV422P, accurate_rnd);
173 174 175 176 177 178 179 180
}

void ff_get_unscaled_swscale_arm(SwsContext *c)
{
    int cpu_flags = av_get_cpu_flags();
    if (have_neon(cpu_flags))
        get_unscaled_swscale_neon(c);
}