Commit a7f6bfdc authored by Thomas Mundt's avatar Thomas Mundt Committed by Michael Niedermayer

avfilter/interlace: prevent over-sharpening with the complex low-pass filter

The complex vertical low-pass filter slightly over-sharpens the picture. This becomes visible when several transcodings are cascaded and the error potentises, e.g. some generations of HD->SD SD->HD.
To prevent this behaviour the destination pixel must not exceed the source pixel when the average of the pixels above and below is less than the source pixel. And the other way around.

Tested and approved in a visual transcoding cascade test by video professionals.
SSIM/PSNR test with the first generation of an HD->SD file as a reference against the 6th generation(3 x SD->HD HD->SD):
Results without the patch:
SSIM Y:0.956508 (13.615881) U:0.991601 (20.757750) V:0.993004 (21.551382) All:0.974405 (15.918463)
PSNR y:31.838009 u:48.424280 v:48.962711 average:34.759466 min:31.699297 max:40.857847
Results with the patch:
SSIM Y:0.970051 (15.236232) U:0.991883 (20.905857) V:0.993174 (21.658049) All:0.981290 (17.279202)
PSNR y:34.412108 u:48.504454 v:48.969496 average:37.264644 min:34.310637 max:42.373392
Signed-off-by: 's avatarThomas Mundt <tmundt75@gmail.com>
Signed-off-by: 's avatarMichael Niedermayer <michael@niedermayer.cc>
parent 1a85fb7e
...@@ -83,14 +83,23 @@ static void lowpass_line_complex_c(uint8_t *dstp, ptrdiff_t linesize, ...@@ -83,14 +83,23 @@ static void lowpass_line_complex_c(uint8_t *dstp, ptrdiff_t linesize,
const uint8_t *srcp_below = srcp + pref; const uint8_t *srcp_below = srcp + pref;
const uint8_t *srcp_above2 = srcp + mref * 2; const uint8_t *srcp_above2 = srcp + mref * 2;
const uint8_t *srcp_below2 = srcp + pref * 2; const uint8_t *srcp_below2 = srcp + pref * 2;
int i; int i, srcp_x, srcp_ab;
for (i = 0; i < linesize; i++) { for (i = 0; i < linesize; i++) {
// this calculation is an integer representation of // this calculation is an integer representation of
// '0.75 * current + 0.25 * above + 0.25 * below - 0.125 * above2 - 0.125 * below2' // '0.75 * current + 0.25 * above + 0.25 * below - 0.125 * above2 - 0.125 * below2'
// '4 +' is for rounding. // '4 +' is for rounding.
dstp[i] = av_clip_uint8((4 + (srcp[i] << 2) srcp_x = srcp[i] << 1;
+ ((srcp[i] + srcp_above[i] + srcp_below[i]) << 1) srcp_ab = srcp_above[i] + srcp_below[i];
- srcp_above2[i] - srcp_below2[i]) >> 3); dstp[i] = av_clip_uint8((4 + ((srcp[i] + srcp_x + srcp_ab) << 1)
- srcp_above2[i] - srcp_below2[i]) >> 3);
// Prevent over-sharpening:
// dst must not exceed src when the average of above and below
// is less than src. And the other way around.
if (srcp_ab > srcp_x) {
if (dstp[i] < srcp[i])
dstp[i] = srcp[i];
} else if (dstp[i] > srcp[i])
dstp[i] = srcp[i];
} }
} }
......
...@@ -110,14 +110,23 @@ static void lowpass_line_complex_c(uint8_t *dstp, ptrdiff_t width, const uint8_t ...@@ -110,14 +110,23 @@ static void lowpass_line_complex_c(uint8_t *dstp, ptrdiff_t width, const uint8_t
const uint8_t *srcp_below = srcp + pref; const uint8_t *srcp_below = srcp + pref;
const uint8_t *srcp_above2 = srcp + mref * 2; const uint8_t *srcp_above2 = srcp + mref * 2;
const uint8_t *srcp_below2 = srcp + pref * 2; const uint8_t *srcp_below2 = srcp + pref * 2;
int i; int i, srcp_x, srcp_ab;
for (i = 0; i < width; i++) { for (i = 0; i < width; i++) {
// this calculation is an integer representation of // this calculation is an integer representation of
// '0.75 * current + 0.25 * above + 0.25 * below - 0.125 * above2 - 0.125 * below2' // '0.75 * current + 0.25 * above + 0.25 * below - 0.125 * above2 - 0.125 * below2'
// '4 +' is for rounding. // '4 +' is for rounding.
dstp[i] = av_clip_uint8((4 + (srcp[i] << 2) srcp_x = srcp[i] << 1;
+ ((srcp[i] + srcp_above[i] + srcp_below[i]) << 1) srcp_ab = srcp_above[i] + srcp_below[i];
- srcp_above2[i] - srcp_below2[i]) >> 3); dstp[i] = av_clip_uint8((4 + ((srcp[i] + srcp_x + srcp_ab) << 1)
- srcp_above2[i] - srcp_below2[i]) >> 3);
// Prevent over-sharpening:
// dst must not exceed src when the average of above and below
// is less than src. And the other way around.
if (srcp_ab > srcp_x) {
if (dstp[i] < srcp[i])
dstp[i] = srcp[i];
} else if (dstp[i] > srcp[i])
dstp[i] = srcp[i];
} }
} }
......
...@@ -63,41 +63,46 @@ REP_RET ...@@ -63,41 +63,46 @@ REP_RET
%endmacro %endmacro
%macro LOWPASS_LINE_COMPLEX 0 %macro LOWPASS_LINE_COMPLEX 0
cglobal lowpass_line_complex, 5, 5, 7, dst, h, src, mref, pref cglobal lowpass_line_complex, 5, 5, 8, dst, h, src, mref, pref
pxor m6, m6 pxor m7, m7
.loop: .loop:
mova m0, [srcq+mrefq] mova m0, [srcq+mrefq]
mova m2, [srcq+prefq] mova m2, [srcq+prefq]
mova m1, m0 mova m1, m0
mova m3, m2 mova m3, m2
punpcklbw m0, m6 punpcklbw m0, m7
punpcklbw m2, m6 punpcklbw m2, m7
punpckhbw m1, m6 punpckhbw m1, m7
punpckhbw m3, m6 punpckhbw m3, m7
paddw m0, m2 paddw m0, m2
paddw m1, m3 paddw m1, m3
mova m6, m0
mova m5, m1
mova m2, [srcq]
mova m3, m2
punpcklbw m2, m7
punpckhbw m3, m7
paddw m0, m2
paddw m1, m3
psllw m2, 1
psllw m3, 1
paddw m0, m2
paddw m1, m3
psllw m0, 1
psllw m1, 1
pcmpgtw m6, m2
pcmpgtw m5, m3
packsswb m6, m5
mova m2, [srcq+mrefq*2] mova m2, [srcq+mrefq*2]
mova m4, [srcq+prefq*2] mova m4, [srcq+prefq*2]
mova m3, m2 mova m3, m2
mova m5, m4 mova m5, m4
punpcklbw m2, m6 punpcklbw m2, m7
punpcklbw m4, m6 punpcklbw m4, m7
punpckhbw m3, m6 punpckhbw m3, m7
punpckhbw m5, m6 punpckhbw m5, m7
paddw m2, m4 paddw m2, m4
paddw m3, m5 paddw m3, m5
mova m4, [srcq]
mova m5, m4
punpcklbw m4, m6
punpckhbw m5, m6
paddw m0, m4
paddw m1, m5
psllw m0, 1
psllw m1, 1
psllw m4, 2
psllw m5, 2
paddw m0, m4
paddw m1, m5
paddw m0, [pw_4] paddw m0, [pw_4]
paddw m1, [pw_4] paddw m1, [pw_4]
psubusw m0, m2 psubusw m0, m2
...@@ -105,6 +110,12 @@ cglobal lowpass_line_complex, 5, 5, 7, dst, h, src, mref, pref ...@@ -105,6 +110,12 @@ cglobal lowpass_line_complex, 5, 5, 7, dst, h, src, mref, pref
psrlw m0, 3 psrlw m0, 3
psrlw m1, 3 psrlw m1, 3
packuswb m0, m1 packuswb m0, m1
mova m1, m0
pmaxub m0, [srcq]
pminub m1, [srcq]
pand m0, m6
pandn m6, m1
por m0, m6
mova [dstq], m0 mova [dstq], m0
add dstq, mmsize add dstq, mmsize
......
...@@ -3,28 +3,28 @@ ...@@ -3,28 +3,28 @@
#codec_id 0: rawvideo #codec_id 0: rawvideo
#dimensions 0: 352x288 #dimensions 0: 352x288
#sar 0: 0/1 #sar 0: 0/1
0, 0, 0, 1, 152064, 0x91290ae6 0, 0, 0, 1, 152064, 0x778ab0c1
0, 1, 1, 1, 152064, 0x24f34baf 0, 1, 1, 1, 152064, 0xdc30f7c3
0, 2, 2, 1, 152064, 0x799fc436 0, 2, 2, 1, 152064, 0xcb637467
0, 3, 3, 1, 152064, 0xfe42c0a9 0, 3, 3, 1, 152064, 0xcbf778ce
0, 4, 4, 1, 152064, 0xb496f879 0, 4, 4, 1, 152064, 0x573d9f6d
0, 5, 5, 1, 152064, 0xc43b36c9 0, 5, 5, 1, 152064, 0xd794df2c
0, 6, 6, 1, 152064, 0xb65abbf4 0, 6, 6, 1, 152064, 0x3e885448
0, 7, 7, 1, 152064, 0xd1806312 0, 7, 7, 1, 152064, 0xccec1794
0, 8, 8, 1, 152064, 0x0faf56c1 0, 8, 8, 1, 152064, 0x6f32f51a
0, 9, 9, 1, 152064, 0x4de73b75 0, 9, 9, 1, 152064, 0x0657f5ac
0, 10, 10, 1, 152064, 0xf90f24ce 0, 10, 10, 1, 152064, 0xfa82d600
0, 11, 11, 1, 152064, 0xc1efd6e0 0, 11, 11, 1, 152064, 0x15ff7f32
0, 12, 12, 1, 152064, 0xeb8e5894 0, 12, 12, 1, 152064, 0x1cac0342
0, 13, 13, 1, 152064, 0xe8aacabc 0, 13, 13, 1, 152064, 0x6afb7c49
0, 14, 14, 1, 152064, 0x8bd2163c 0, 14, 14, 1, 152064, 0x6c47d554
0, 15, 15, 1, 152064, 0xbfc72ac2 0, 15, 15, 1, 152064, 0xe0fbd132
0, 16, 16, 1, 152064, 0x1e8f6f56 0, 16, 16, 1, 152064, 0x4f891e8d
0, 17, 17, 1, 152064, 0xe3d19450 0, 17, 17, 1, 152064, 0x88554045
0, 18, 18, 1, 152064, 0x3872af32 0, 18, 18, 1, 152064, 0x0c8e6192
0, 19, 19, 1, 152064, 0xf23be72a 0, 19, 19, 1, 152064, 0xf73c91c3
0, 20, 20, 1, 152064, 0x024f8f2b 0, 20, 20, 1, 152064, 0x49ac328d
0, 21, 21, 1, 152064, 0xb49301ea 0, 21, 21, 1, 152064, 0xf18ebd82
0, 22, 22, 1, 152064, 0x84f5d056 0, 22, 22, 1, 152064, 0x3359760d
0, 23, 23, 1, 152064, 0xd2c09ca5 0, 23, 23, 1, 152064, 0x5c85601a
0, 24, 24, 1, 152064, 0xe9b5ddfd 0, 24, 24, 1, 152064, 0x28c1657b
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment