Commit 2fd14dd8 authored by Christophe Gisquet's avatar Christophe Gisquet Committed by Michael Niedermayer

avcodec/simple_idct10: improve precision

omse goes from 0.03060703 (which fails for dct-test) to 0.01663750.
This also actually improve the error of decoding the sample generated
by fate-vsynth3-dnxhd1080i-10bit using simple_idct10 to FAANI, which
goes (when resampled to yuv422p) from:
stddev:    0.06 PSNR: 72.28 MAXDIFF:    1
to identical.
Signed-off-by: 's avatarMichael Niedermayer <michael@niedermayer.cc>
parent e9a68b03
...@@ -36,6 +36,11 @@ ...@@ -36,6 +36,11 @@
#define BIT_DEPTH 10 #define BIT_DEPTH 10
#include "simple_idct_template.c" #include "simple_idct_template.c"
#define EXTRA_SHIFT 2
#include "simple_idct_template.c"
#undef EXTRA_SHIFT
#undef BIT_DEPTH #undef BIT_DEPTH
#define BIT_DEPTH 12 #define BIT_DEPTH 12
...@@ -230,10 +235,10 @@ void ff_prores_idct(int16_t *block, const int16_t *qmat) ...@@ -230,10 +235,10 @@ void ff_prores_idct(int16_t *block, const int16_t *qmat)
block[i] *= qmat[i]; block[i] *= qmat[i];
for (i = 0; i < 8; i++) for (i = 0; i < 8; i++)
idctRowCondDC_10(block + i*8, 2); idctRowCondDC_extrashift_10(block + i*8, 2);
for (i = 0; i < 8; i++) { for (i = 0; i < 8; i++) {
block[i] += 8192; block[i] += 8192;
idctSparseCol_10(block + i); idctSparseCol_extrashift_10(block + i);
} }
} }
...@@ -66,19 +66,26 @@ ...@@ -66,19 +66,26 @@
#elif BIT_DEPTH == 10 || BIT_DEPTH == 12 #elif BIT_DEPTH == 10 || BIT_DEPTH == 12
#if BIT_DEPTH == 10 # if BIT_DEPTH == 10
#define W1 (22725*4) // 90901 #define W1 22725 // 90901
#define W2 (21407*4) // 85627 #define W2 21407 // 85627
#define W3 (19265*4) // 77062 #define W3 19265 // 77062
#define W4 (16384*4) // 65535 #define W4 16384 // 65535
#define W5 (12873*4) // 51491 #define W5 12873 // 51491
#define W6 ( 8867*4) // 35468 #define W6 8867 // 35468
#define W7 ( 4520*4) // 18081 #define W7 4520 // 18081
#define ROW_SHIFT 15 # ifdef EXTRA_SHIFT
#define COL_SHIFT 20 #define ROW_SHIFT 13
#define COL_SHIFT 18
#define DC_SHIFT 1 #define DC_SHIFT 1
#else # else
#define ROW_SHIFT 12
#define COL_SHIFT 19
#define DC_SHIFT 2
# endif
# else
#define W1 45451 #define W1 45451
#define W2 42813 #define W2 42813
#define W3 38531 #define W3 38531
...@@ -90,7 +97,7 @@ ...@@ -90,7 +97,7 @@
#define ROW_SHIFT 16 #define ROW_SHIFT 16
#define COL_SHIFT 17 #define COL_SHIFT 17
#define DC_SHIFT -1 #define DC_SHIFT -1
#endif # endif
#define MUL(a, b) ((a) * (b)) #define MUL(a, b) ((a) * (b))
#define MAC(a, b, c) ((a) += (b) * (c)) #define MAC(a, b, c) ((a) += (b) * (c))
...@@ -101,7 +108,11 @@ ...@@ -101,7 +108,11 @@
#endif #endif
#ifdef EXTRA_SHIFT
static inline void FUNC(idctRowCondDC_extrashift)(int16_t *row, int extra_shift)
#else
static inline void FUNC(idctRowCondDC)(int16_t *row, int extra_shift) static inline void FUNC(idctRowCondDC)(int16_t *row, int extra_shift)
#endif
{ {
int a0, a1, a2, a3, b0, b1, b2, b3; int a0, a1, a2, a3, b0, b1, b2, b3;
...@@ -236,6 +247,9 @@ static inline void FUNC(idctRowCondDC)(int16_t *row, int extra_shift) ...@@ -236,6 +247,9 @@ static inline void FUNC(idctRowCondDC)(int16_t *row, int extra_shift)
} \ } \
} while (0) } while (0)
#ifdef EXTRA_SHIFT
static inline void FUNC(idctSparseCol_extrashift)(int16_t *col)
#else
static inline void FUNC(idctSparseColPut)(pixel *dest, int line_size, static inline void FUNC(idctSparseColPut)(pixel *dest, int line_size,
int16_t *col) int16_t *col)
{ {
...@@ -285,6 +299,7 @@ static inline void FUNC(idctSparseColAdd)(pixel *dest, int line_size, ...@@ -285,6 +299,7 @@ static inline void FUNC(idctSparseColAdd)(pixel *dest, int line_size,
} }
static inline void FUNC(idctSparseCol)(int16_t *col) static inline void FUNC(idctSparseCol)(int16_t *col)
#endif
{ {
int a0, a1, a2, a3, b0, b1, b2, b3; int a0, a1, a2, a3, b0, b1, b2, b3;
...@@ -300,6 +315,7 @@ static inline void FUNC(idctSparseCol)(int16_t *col) ...@@ -300,6 +315,7 @@ static inline void FUNC(idctSparseCol)(int16_t *col)
col[56] = ((a0 - b0) >> COL_SHIFT); col[56] = ((a0 - b0) >> COL_SHIFT);
} }
#ifndef EXTRA_SHIFT
void FUNC(ff_simple_idct_put)(uint8_t *dest_, int line_size, int16_t *block) void FUNC(ff_simple_idct_put)(uint8_t *dest_, int line_size, int16_t *block)
{ {
pixel *dest = (pixel *)dest_; pixel *dest = (pixel *)dest_;
...@@ -338,3 +354,4 @@ void FUNC(ff_simple_idct)(int16_t *block) ...@@ -338,3 +354,4 @@ void FUNC(ff_simple_idct)(int16_t *block)
for (i = 0; i < 8; i++) for (i = 0; i < 8; i++)
FUNC(idctSparseCol)(block + i); FUNC(idctSparseCol)(block + i);
} }
#endif
#tb 0: 1/24 #tb 0: 1/24
0, 0, 0, 1, 9665280, 0x238a023e 0, 0, 0, 1, 9665280, 0x19ef4057
f8c4b7aa165a80df2485d526161290a3 *tests/data/fate/vsynth1-dnxhd-720p-10bit.dnxhd f8c4b7aa165a80df2485d526161290a3 *tests/data/fate/vsynth1-dnxhd-720p-10bit.dnxhd
2293760 tests/data/fate/vsynth1-dnxhd-720p-10bit.dnxhd 2293760 tests/data/fate/vsynth1-dnxhd-720p-10bit.dnxhd
3cc84f9e8d2e704475b410de27dd9951 *tests/data/fate/vsynth1-dnxhd-720p-10bit.out.rawvideo 87f1f0e074466facd3a9922ecc8311db *tests/data/fate/vsynth1-dnxhd-720p-10bit.out.rawvideo
stddev: 6.23 PSNR: 32.23 MAXDIFF: 64 bytes: 7603200/ 760320 stddev: 6.23 PSNR: 32.23 MAXDIFF: 64 bytes: 7603200/ 760320
e49cb87f69acc809aee55d64990c84a9 *tests/data/fate/vsynth2-dnxhd-720p-10bit.dnxhd e49cb87f69acc809aee55d64990c84a9 *tests/data/fate/vsynth2-dnxhd-720p-10bit.dnxhd
2293760 tests/data/fate/vsynth2-dnxhd-720p-10bit.dnxhd 2293760 tests/data/fate/vsynth2-dnxhd-720p-10bit.dnxhd
a98c4b69d4d036089a455e147d6922a7 *tests/data/fate/vsynth2-dnxhd-720p-10bit.out.rawvideo 1e6e1ef90e5c9b16a80acc17fde596ff *tests/data/fate/vsynth2-dnxhd-720p-10bit.out.rawvideo
stddev: 1.54 PSNR: 44.36 MAXDIFF: 31 bytes: 7603200/ 760320 stddev: 1.54 PSNR: 44.36 MAXDIFF: 31 bytes: 7603200/ 760320
e96fc4a7d994b9369c50da32fd325822 *tests/data/fate/vsynth_lena-dnxhd-720p-10bit.dnxhd e96fc4a7d994b9369c50da32fd325822 *tests/data/fate/vsynth_lena-dnxhd-720p-10bit.dnxhd
2293760 tests/data/fate/vsynth_lena-dnxhd-720p-10bit.dnxhd 2293760 tests/data/fate/vsynth_lena-dnxhd-720p-10bit.dnxhd
2b497215c57558910a605ff8c78430d9 *tests/data/fate/vsynth_lena-dnxhd-720p-10bit.out.rawvideo 0e9fcec94aeff70bac5dec02cf2391bc *tests/data/fate/vsynth_lena-dnxhd-720p-10bit.out.rawvideo
stddev: 1.33 PSNR: 45.61 MAXDIFF: 22 bytes: 7603200/ 760320 stddev: 1.33 PSNR: 45.61 MAXDIFF: 22 bytes: 7603200/ 760320
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment