Commit 8a8d0ce2 authored by Ronald S. Bultje's avatar Ronald S. Bultje

swscale: for >8bit scaling, read in native bit-depth.

For 9/10bit, it means we don't have to upscale to 16bit before
actual scaling or pixel format conversion, and thus a performance
gain.
parent cdc2c1c5
......@@ -220,7 +220,7 @@ yuv2yuvX_altivec_real(SwsContext *c,
}
}
static void hScale_altivec_real(int16_t *dst, int dstW,
static void hScale_altivec_real(SwsContext *c, int16_t *dst, int dstW,
const uint8_t *src, const int16_t *filter,
const int16_t *filterPos, int filterSize)
{
......
......@@ -1766,59 +1766,6 @@ static void nv21ToUV_c(uint8_t *dstU, uint8_t *dstV,
#define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
// FIXME Maybe dither instead.
static av_always_inline void
yuv9_OR_10ToUV_c_template(uint16_t *dstU, uint16_t *dstV,
const uint16_t *srcU, const uint16_t *srcV,
int width, enum PixelFormat origin, int depth)
{
int i;
for (i = 0; i < width; i++) {
int upx = input_pixel(&srcU[i]);
int vpx = input_pixel(&srcV[i]);
dstU[i] = (upx << (16 - depth)) | (upx >> (2 * depth - 16));
dstV[i] = (vpx << (16 - depth)) | (vpx >> (2 * depth - 16));
}
}
static av_always_inline void
yuv9_or_10ToY_c_template(uint16_t *dstY, const uint16_t *srcY,
int width, enum PixelFormat origin, int depth)
{
int i;
for (i = 0; i < width; i++) {
int px = input_pixel(&srcY[i]);
dstY[i] = (px << (16 - depth)) | (px >> (2 * depth - 16));
}
}
#undef input_pixel
#define YUV_NBPS(depth, BE_LE, origin) \
static void BE_LE ## depth ## ToUV_c(uint8_t *_dstU, uint8_t *_dstV, \
const uint8_t *_srcU, const uint8_t *_srcV, \
int width, uint32_t *unused) \
{ \
uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV; \
const uint16_t *srcU = (const uint16_t *) _srcU, \
*srcV = (const uint16_t *) _srcV; \
yuv9_OR_10ToUV_c_template(dstU, dstV, srcU, srcV, width, origin, depth); \
} \
static void BE_LE ## depth ## ToY_c(uint8_t *_dstY, const uint8_t *_srcY, \
int width, uint32_t *unused) \
{ \
uint16_t *dstY = (uint16_t *) _dstY; \
const uint16_t *srcY = (const uint16_t *) _srcY; \
yuv9_or_10ToY_c_template(dstY, srcY, width, origin, depth); \
}
YUV_NBPS( 9, LE, PIX_FMT_YUV420P9LE);
YUV_NBPS( 9, BE, PIX_FMT_YUV420P9BE);
YUV_NBPS(10, LE, PIX_FMT_YUV420P10LE);
YUV_NBPS(10, BE, PIX_FMT_YUV420P10BE);
static void bgr24ToY_c(uint8_t *dst, const uint8_t *src,
int width, uint32_t *unused)
{
......@@ -1905,13 +1852,15 @@ static void rgb24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
}
}
static void hScale16_c(int16_t *_dst, int dstW, const uint8_t *_src,
static void hScale16_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *_src,
const int16_t *filter,
const int16_t *filterPos, int filterSize)
{
int i;
int32_t *dst = (int32_t *) _dst;
const uint16_t *src = (const uint16_t *) _src;
int bits = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
int sh = (bits <= 7) ? 11 : (bits - 4);
for (i = 0; i < dstW; i++) {
int j;
......@@ -1922,12 +1871,12 @@ static void hScale16_c(int16_t *_dst, int dstW, const uint8_t *_src,
val += src[srcPos + j] * filter[filterSize * i + j];
}
// filter=14 bit, input=16 bit, output=30 bit, >> 11 makes 19 bit
dst[i] = FFMIN(val >> 11, (1 << 19) - 1);
dst[i] = FFMIN(val >> sh, (1 << 19) - 1);
}
}
// bilinear / bicubic scaling
static void hScale_c(int16_t *dst, int dstW, const uint8_t *src,
static void hScale_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *src,
const int16_t *filter, const int16_t *filterPos,
int filterSize)
{
......@@ -2063,7 +2012,7 @@ static av_always_inline void hyscale(SwsContext *c, int16_t *dst, int dstWidth,
}
if (!c->hyscale_fast) {
c->hScale(dst, dstWidth, src, hLumFilter, hLumFilterPos, hLumFilterSize);
c->hScale(c, dst, dstWidth, src, hLumFilter, hLumFilterPos, hLumFilterSize);
} else { // fast bilinear upscale / crap downscale
c->hyscale_fast(c, dst, dstWidth, src, srcW, xInc);
}
......@@ -2113,8 +2062,8 @@ static av_always_inline void hcscale(SwsContext *c, int16_t *dst1, int16_t *dst2
}
if (!c->hcscale_fast) {
c->hScale(dst1, dstWidth, src1, hChrFilter, hChrFilterPos, hChrFilterSize);
c->hScale(dst2, dstWidth, src2, hChrFilter, hChrFilterPos, hChrFilterSize);
c->hScale(c, dst1, dstWidth, src1, hChrFilter, hChrFilterPos, hChrFilterSize);
c->hScale(c, dst2, dstWidth, src2, hChrFilter, hChrFilterPos, hChrFilterSize);
} else { // fast bilinear upscale / crap downscale
c->hcscale_fast(c, dst1, dst2, dstWidth, src1, src2, srcW, xInc);
}
......@@ -2645,21 +2594,21 @@ static av_cold void sws_init_swScale_c(SwsContext *c)
case PIX_FMT_PAL8 :
case PIX_FMT_BGR4_BYTE:
case PIX_FMT_RGB4_BYTE: c->chrToYV12 = palToUV_c; break;
#if HAVE_BIGENDIAN
case PIX_FMT_YUV444P9LE:
case PIX_FMT_YUV420P9LE: c->chrToYV12 = LE9ToUV_c; break;
case PIX_FMT_YUV420P9LE:
case PIX_FMT_YUV422P10LE:
case PIX_FMT_YUV444P10LE:
case PIX_FMT_YUV420P10LE: c->chrToYV12 = LE10ToUV_c; break;
case PIX_FMT_YUV444P9BE:
case PIX_FMT_YUV420P9BE: c->chrToYV12 = BE9ToUV_c; break;
case PIX_FMT_YUV444P10BE:
case PIX_FMT_YUV422P10BE:
case PIX_FMT_YUV420P10BE: c->chrToYV12 = BE10ToUV_c; break;
#if HAVE_BIGENDIAN
case PIX_FMT_YUV420P10LE:
case PIX_FMT_YUV420P16LE:
case PIX_FMT_YUV422P16LE:
case PIX_FMT_YUV444P16LE: c->chrToYV12 = bswap16UV_c; break;
#else
case PIX_FMT_YUV444P9BE:
case PIX_FMT_YUV420P9BE:
case PIX_FMT_YUV444P10BE:
case PIX_FMT_YUV422P10BE:
case PIX_FMT_YUV420P10BE:
case PIX_FMT_YUV420P16BE:
case PIX_FMT_YUV422P16BE:
case PIX_FMT_YUV444P16BE: c->chrToYV12 = bswap16UV_c; break;
......@@ -2712,22 +2661,22 @@ static av_cold void sws_init_swScale_c(SwsContext *c)
c->lumToYV12 = NULL;
c->alpToYV12 = NULL;
switch (srcFormat) {
#if HAVE_BIGENDIAN
case PIX_FMT_YUV444P9LE:
case PIX_FMT_YUV420P9LE: c->lumToYV12 = LE9ToY_c; break;
case PIX_FMT_YUV420P9LE:
case PIX_FMT_YUV444P10LE:
case PIX_FMT_YUV422P10LE:
case PIX_FMT_YUV420P10LE: c->lumToYV12 = LE10ToY_c; break;
case PIX_FMT_YUV444P9BE:
case PIX_FMT_YUV420P9BE: c->lumToYV12 = BE9ToY_c; break;
case PIX_FMT_YUV444P10BE:
case PIX_FMT_YUV422P10BE:
case PIX_FMT_YUV420P10BE: c->lumToYV12 = BE10ToY_c; break;
#if HAVE_BIGENDIAN
case PIX_FMT_YUV420P10LE:
case PIX_FMT_YUV420P16LE:
case PIX_FMT_YUV422P16LE:
case PIX_FMT_YUV444P16LE:
case PIX_FMT_GRAY16LE: c->lumToYV12 = bswap16Y_c; break;
#else
case PIX_FMT_YUV444P9BE:
case PIX_FMT_YUV420P9BE:
case PIX_FMT_YUV444P10BE:
case PIX_FMT_YUV422P10BE:
case PIX_FMT_YUV420P10BE:
case PIX_FMT_YUV420P16BE:
case PIX_FMT_YUV422P16BE:
case PIX_FMT_YUV444P16BE:
......
......@@ -440,7 +440,7 @@ typedef struct SwsContext {
* (and input coefficients thus padded with zeroes)
* to simplify creating SIMD code.
*/
void (*hScale)(int16_t *dst, int dstW, const uint8_t *src,
void (*hScale)(struct SwsContext *c, int16_t *dst, int dstW, const uint8_t *src,
const int16_t *filter, const int16_t *filterPos,
int filterSize);
......
......@@ -877,7 +877,6 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter)
}
}
// FIXME it's even nicer if bpp isn't 16, but max({src,dst}formatbpp)
c->scalingBpp = FFMAX(av_pix_fmt_descriptors[srcFormat].comp[0].depth_minus1,
av_pix_fmt_descriptors[dstFormat].comp[0].depth_minus1) >= 8 ? 16 : 8;
if (c->scalingBpp == 16)
......
......@@ -1859,7 +1859,7 @@ static void RENAME(rgb24ToUV)(uint8_t *dstU, uint8_t *dstV,
#if !COMPILE_TEMPLATE_MMX2
// bilinear / bicubic scaling
static void RENAME(hScale)(int16_t *dst, int dstW,
static void RENAME(hScale)(SwsContext *c, int16_t *dst, int dstW,
const uint8_t *src, const int16_t *filter,
const int16_t *filterPos, int filterSize)
{
......
......@@ -31,12 +31,12 @@ uyvy422 314bd486277111a95d9369b944fa0400
yuv410p 7df8f6d69b56a8dcb6c7ee908e5018b5
yuv411p 1143e7c5cc28fe0922b051b17733bc4c
yuv420p fdad2d8df8985e3d17e73c71f713cb14
yuv420p10be 8b5ad855229840a8fa87786cab83f856
yuv420p10le 5264e87921d47b17411578d9c92672b1
yuv420p10be 2343beaf83fccc2ab23a590b2049d38b
yuv420p10le 94d511d783d175f573e7be5cce75ba4d
yuv420p16be f6ef3ba90f238b467c7e72ade927083d
yuv420p16le faf6aab3b1c16e8afbe160686dd360e0
yuv420p9be c14bf746d161face61e1f39b491bf7ef
yuv420p9le 59457f9a51768bf1d4342238d50a9be3
yuv420p9be fdafb9ad473a559246c4cb0a1f416cd8
yuv420p9le fccfd3c3941da635b13739f579819b5a
yuv422p 918e37701ee7377d16a8a6c119c56a40
yuv422p16be 837945d3a771366a5a72a4ed095a4f53
yuv422p16le b8292ae9b52eb7afc3d8b93e8fd895b4
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment