Commit 4e3e333a authored by Ronald S. Bultje's avatar Ronald S. Bultje

swscale: error dithering for 16/9/10-bit to 8-bit.

Based on a somewhat similar idea in FFmpeg's swscale copy.
parent 7d7bacf0
...@@ -182,6 +182,18 @@ DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={ ...@@ -182,6 +182,18 @@ DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
{ 77, 23, 60, 15, 72, 21, 56, 14, }, { 77, 23, 60, 15, 72, 21, 56, 14, },
}; };
#endif #endif
DECLARE_ALIGNED(8, const uint8_t, dither_8x8_128)[8][8] = {
{ 36, 68, 60, 92, 34, 66, 58, 90,},
{ 100, 4,124, 28, 98, 2,122, 26,},
{ 52, 84, 44, 76, 50, 82, 42, 74,},
{ 116, 20,108, 12,114, 18,106, 10,},
{ 32, 64, 56, 88, 38, 70, 62, 94,},
{ 96, 0,120, 24,102, 6,126, 30,},
{ 48, 80, 40, 72, 54, 86, 46, 78,},
{ 112, 16,104, 8,118, 22,110, 14,},
};
DECLARE_ALIGNED(8, const uint8_t, ff_sws_pb_64)[8] =
{ 64, 64, 64, 64, 64, 64, 64, 64 };
static av_always_inline void static av_always_inline void
yuv2yuvX16_c_template(const int16_t *lumFilter, const int32_t **lumSrc, yuv2yuvX16_c_template(const int16_t *lumFilter, const int32_t **lumSrc,
...@@ -285,10 +297,11 @@ static void yuv2yuvX_c(SwsContext *c, const int16_t *lumFilter, ...@@ -285,10 +297,11 @@ static void yuv2yuvX_c(SwsContext *c, const int16_t *lumFilter,
uint8_t *yDest = dest[0], *uDest = dest[1], *vDest = dest[2], uint8_t *yDest = dest[0], *uDest = dest[1], *vDest = dest[2],
*aDest = CONFIG_SWSCALE_ALPHA ? dest[3] : NULL; *aDest = CONFIG_SWSCALE_ALPHA ? dest[3] : NULL;
int i; int i;
const uint8_t *lumDither = c->lumDither8, *chrDither = c->chrDither8;
//FIXME Optimize (just quickly written not optimized..) //FIXME Optimize (just quickly written not optimized..)
for (i=0; i<dstW; i++) { for (i=0; i<dstW; i++) {
int val=1<<18; int val = lumDither[i & 7] << 12;
int j; int j;
for (j=0; j<lumFilterSize; j++) for (j=0; j<lumFilterSize; j++)
val += lumSrc[j][i] * lumFilter[j]; val += lumSrc[j][i] * lumFilter[j];
...@@ -298,8 +311,8 @@ static void yuv2yuvX_c(SwsContext *c, const int16_t *lumFilter, ...@@ -298,8 +311,8 @@ static void yuv2yuvX_c(SwsContext *c, const int16_t *lumFilter,
if (uDest) if (uDest)
for (i=0; i<chrDstW; i++) { for (i=0; i<chrDstW; i++) {
int u=1<<18; int u = chrDither[i & 7] << 12;
int v=1<<18; int v = chrDither[(i + 3) & 7] << 12;
int j; int j;
for (j=0; j<chrFilterSize; j++) { for (j=0; j<chrFilterSize; j++) {
u += chrUSrc[j][i] * chrFilter[j]; u += chrUSrc[j][i] * chrFilter[j];
...@@ -312,7 +325,7 @@ static void yuv2yuvX_c(SwsContext *c, const int16_t *lumFilter, ...@@ -312,7 +325,7 @@ static void yuv2yuvX_c(SwsContext *c, const int16_t *lumFilter,
if (CONFIG_SWSCALE_ALPHA && aDest) if (CONFIG_SWSCALE_ALPHA && aDest)
for (i=0; i<dstW; i++) { for (i=0; i<dstW; i++) {
int val=1<<18; int val = lumDither[i & 7] << 12;
int j; int j;
for (j=0; j<lumFilterSize; j++) for (j=0; j<lumFilterSize; j++)
val += alpSrc[j][i] * lumFilter[j]; val += alpSrc[j][i] * lumFilter[j];
...@@ -329,23 +342,24 @@ static void yuv2yuv1_c(SwsContext *c, const int16_t *lumSrc, ...@@ -329,23 +342,24 @@ static void yuv2yuv1_c(SwsContext *c, const int16_t *lumSrc,
uint8_t *yDest = dest[0], *uDest = dest[1], *vDest = dest[2], uint8_t *yDest = dest[0], *uDest = dest[1], *vDest = dest[2],
*aDest = CONFIG_SWSCALE_ALPHA ? dest[3] : NULL; *aDest = CONFIG_SWSCALE_ALPHA ? dest[3] : NULL;
int i; int i;
const uint8_t *lumDither = c->lumDither8, *chrDither = c->chrDither8;
for (i=0; i<dstW; i++) { for (i=0; i<dstW; i++) {
int val= (lumSrc[i]+64)>>7; int val = (lumSrc[i]+ lumDither[i & 7]) >> 7;
yDest[i]= av_clip_uint8(val); yDest[i]= av_clip_uint8(val);
} }
if (uDest) if (uDest)
for (i=0; i<chrDstW; i++) { for (i=0; i<chrDstW; i++) {
int u=(chrUSrc[i]+64)>>7; int u = (chrUSrc[i] + chrDither[i & 7]) >> 7;
int v=(chrVSrc[i]+64)>>7; int v = (chrVSrc[i] + chrDither[(i + 3) & 7]) >> 7;
uDest[i]= av_clip_uint8(u); uDest[i]= av_clip_uint8(u);
vDest[i]= av_clip_uint8(v); vDest[i]= av_clip_uint8(v);
} }
if (CONFIG_SWSCALE_ALPHA && aDest) if (CONFIG_SWSCALE_ALPHA && aDest)
for (i=0; i<dstW; i++) { for (i=0; i<dstW; i++) {
int val= (alpSrc[i]+64)>>7; int val = (alpSrc[i] + lumDither[i & 7]) >> 7;
aDest[i]= av_clip_uint8(val); aDest[i]= av_clip_uint8(val);
} }
} }
...@@ -359,11 +373,12 @@ static void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter, ...@@ -359,11 +373,12 @@ static void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter,
{ {
uint8_t *yDest = dest[0], *uDest = dest[1]; uint8_t *yDest = dest[0], *uDest = dest[1];
enum PixelFormat dstFormat = c->dstFormat; enum PixelFormat dstFormat = c->dstFormat;
const uint8_t *lumDither = c->lumDither8, *chrDither = c->chrDither8;
//FIXME Optimize (just quickly written not optimized..) //FIXME Optimize (just quickly written not optimized..)
int i; int i;
for (i=0; i<dstW; i++) { for (i=0; i<dstW; i++) {
int val=1<<18; int val = lumDither[i & 7] << 12;
int j; int j;
for (j=0; j<lumFilterSize; j++) for (j=0; j<lumFilterSize; j++)
val += lumSrc[j][i] * lumFilter[j]; val += lumSrc[j][i] * lumFilter[j];
...@@ -376,8 +391,8 @@ static void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter, ...@@ -376,8 +391,8 @@ static void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter,
if (dstFormat == PIX_FMT_NV12) if (dstFormat == PIX_FMT_NV12)
for (i=0; i<chrDstW; i++) { for (i=0; i<chrDstW; i++) {
int u=1<<18; int u = chrDither[i & 7] << 12;
int v=1<<18; int v = chrDither[(i + 3) & 7] << 12;
int j; int j;
for (j=0; j<chrFilterSize; j++) { for (j=0; j<chrFilterSize; j++) {
u += chrUSrc[j][i] * chrFilter[j]; u += chrUSrc[j][i] * chrFilter[j];
...@@ -389,8 +404,8 @@ static void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter, ...@@ -389,8 +404,8 @@ static void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter,
} }
else else
for (i=0; i<chrDstW; i++) { for (i=0; i<chrDstW; i++) {
int u=1<<18; int u = chrDither[i & 7] << 12;
int v=1<<18; int v = chrDither[(i + 3) & 7] << 12;
int j; int j;
for (j=0; j<chrFilterSize; j++) { for (j=0; j<chrFilterSize; j++) {
u += chrUSrc[j][i] * chrFilter[j]; u += chrUSrc[j][i] * chrFilter[j];
...@@ -2352,6 +2367,7 @@ static int swScale(SwsContext *c, const uint8_t* src[], ...@@ -2352,6 +2367,7 @@ static int swScale(SwsContext *c, const uint8_t* src[],
yuv2packed1_fn yuv2packed1 = c->yuv2packed1; yuv2packed1_fn yuv2packed1 = c->yuv2packed1;
yuv2packed2_fn yuv2packed2 = c->yuv2packed2; yuv2packed2_fn yuv2packed2 = c->yuv2packed2;
yuv2packedX_fn yuv2packedX = c->yuv2packedX; yuv2packedX_fn yuv2packedX = c->yuv2packedX;
int should_dither = is9_OR_10BPS(c->srcFormat) || is16BPS(c->srcFormat);
/* vars which will change and which we need to store back in the context */ /* vars which will change and which we need to store back in the context */
int dstY= c->dstY; int dstY= c->dstY;
...@@ -2401,6 +2417,9 @@ static int swScale(SwsContext *c, const uint8_t* src[], ...@@ -2401,6 +2417,9 @@ static int swScale(SwsContext *c, const uint8_t* src[],
lastInChrBuf= -1; lastInChrBuf= -1;
} }
if (!should_dither) {
c->chrDither8 = c->lumDither8 = ff_sws_pb_64;
}
lastDstY= dstY; lastDstY= dstY;
for (;dstY < dstH; dstY++) { for (;dstY < dstH; dstY++) {
...@@ -2490,6 +2509,10 @@ static int swScale(SwsContext *c, const uint8_t* src[], ...@@ -2490,6 +2509,10 @@ static int swScale(SwsContext *c, const uint8_t* src[],
#if HAVE_MMX #if HAVE_MMX
updateMMXDitherTables(c, dstY, lumBufIndex, chrBufIndex, lastInLumBuf, lastInChrBuf); updateMMXDitherTables(c, dstY, lumBufIndex, chrBufIndex, lastInLumBuf, lastInChrBuf);
#endif #endif
if (should_dither) {
c->chrDither8 = dither_8x8_128[chrDstY & 7];
c->lumDither8 = dither_8x8_128[dstY & 7];
}
if (dstY >= dstH-2) { if (dstY >= dstH-2) {
// hmm looks like we can't use MMX here without overwriting this array's tail // hmm looks like we can't use MMX here without overwriting this array's tail
find_c_packed_planar_out_funcs(c, &yuv2yuv1, &yuv2yuvX, find_c_packed_planar_out_funcs(c, &yuv2yuv1, &yuv2yuvX,
......
...@@ -321,6 +321,8 @@ typedef struct SwsContext { ...@@ -321,6 +321,8 @@ typedef struct SwsContext {
#define ALP_MMX_FILTER_OFFSET "11*8+4*4*256*2+48" #define ALP_MMX_FILTER_OFFSET "11*8+4*4*256*2+48"
#define UV_OFF "11*8+4*4*256*3+48" #define UV_OFF "11*8+4*4*256*3+48"
#define UV_OFFx2 "11*8+4*4*256*3+56" #define UV_OFFx2 "11*8+4*4*256*3+56"
#define DITHER16 "11*8+4*4*256*3+64"
#define DITHER32 "11*8+4*4*256*3+80"
DECLARE_ALIGNED(8, uint64_t, redDither); DECLARE_ALIGNED(8, uint64_t, redDither);
DECLARE_ALIGNED(8, uint64_t, greenDither); DECLARE_ALIGNED(8, uint64_t, greenDither);
...@@ -345,6 +347,10 @@ typedef struct SwsContext { ...@@ -345,6 +347,10 @@ typedef struct SwsContext {
int32_t alpMmxFilter[4*MAX_FILTER_SIZE]; int32_t alpMmxFilter[4*MAX_FILTER_SIZE];
DECLARE_ALIGNED(8, ptrdiff_t, uv_off); ///< offset (in pixels) between u and v planes DECLARE_ALIGNED(8, ptrdiff_t, uv_off); ///< offset (in pixels) between u and v planes
DECLARE_ALIGNED(8, ptrdiff_t, uv_offx2); ///< offset (in bytes) between u and v planes DECLARE_ALIGNED(8, ptrdiff_t, uv_offx2); ///< offset (in bytes) between u and v planes
uint16_t dither16[8];
uint32_t dither32[8];
const uint8_t *chrDither8, *lumDither8;
#if HAVE_ALTIVEC #if HAVE_ALTIVEC
vector signed short CY; vector signed short CY;
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment