Commit b1fdf81c authored by Michael Niedermayer's avatar Michael Niedermayer

avcodec/jpeg2000dwt: use 32x32->64 multiplies in the 9/7i DWT

This significantly improves the quality when the integer 9/7 transform
is used
Signed-off-by: 's avatarMichael Niedermayer <michaelni@gmx.at>
parent 6c7b1597
...@@ -40,12 +40,13 @@ ...@@ -40,12 +40,13 @@
/* Lifting parameters in integer format. /* Lifting parameters in integer format.
* Computed as param = (float param) * (1 << 16) */ * Computed as param = (float param) * (1 << 16) */
#define I_LFTG_ALPHA 103949 #define I_LFTG_ALPHA 103949ll
#define I_LFTG_BETA 3472 #define I_LFTG_BETA 3472ll
#define I_LFTG_GAMMA 57862 #define I_LFTG_GAMMA 57862ll
#define I_LFTG_DELTA 29066 #define I_LFTG_DELTA 29066ll
#define I_LFTG_K 80621 #define I_LFTG_K 80621ll
#define I_LFTG_X 53274 #define I_LFTG_X 53274ll
#define I_PRESHIFT 8
static inline void extend53(int *p, int i0, int i1) static inline void extend53(int *p, int i0, int i1)
{ {
...@@ -246,11 +247,16 @@ static void sd_1d97_int(int *p, int i0, int i1) ...@@ -246,11 +247,16 @@ static void sd_1d97_int(int *p, int i0, int i1)
static void dwt_encode97_int(DWTContext *s, int *t) static void dwt_encode97_int(DWTContext *s, int *t)
{ {
int lev, int lev;
w = s->linelen[s->ndeclevels-1][0]; int w = s->linelen[s->ndeclevels-1][0];
int h = s->linelen[s->ndeclevels-1][1];
int i;
int *line = s->i_linebuf; int *line = s->i_linebuf;
line += 5; line += 5;
for (i = 0; i < w * h; i++)
t[i] <<= I_PRESHIFT;
for (lev = s->ndeclevels-1; lev >= 0; lev--){ for (lev = s->ndeclevels-1; lev >= 0; lev--){
int lh = s->linelen[lev][0], int lh = s->linelen[lev][0],
lv = s->linelen[lev][1], lv = s->linelen[lev][1],
...@@ -294,6 +300,9 @@ static void dwt_encode97_int(DWTContext *s, int *t) ...@@ -294,6 +300,9 @@ static void dwt_encode97_int(DWTContext *s, int *t)
} }
} }
for (i = 0; i < w * h; i++)
t[i] = (t[i] + ((1<<I_PRESHIFT)>>1)) >> I_PRESHIFT;
} }
static void sr_1d53(int *p, int i0, int i1) static void sr_1d53(int *p, int i0, int i1)
...@@ -471,11 +480,16 @@ static void dwt_decode97_int(DWTContext *s, int32_t *t) ...@@ -471,11 +480,16 @@ static void dwt_decode97_int(DWTContext *s, int32_t *t)
{ {
int lev; int lev;
int w = s->linelen[s->ndeclevels - 1][0]; int w = s->linelen[s->ndeclevels - 1][0];
int h = s->linelen[s->ndeclevels - 1][1];
int i;
int32_t *line = s->i_linebuf; int32_t *line = s->i_linebuf;
int32_t *data = t; int32_t *data = t;
/* position at index O of line range [0-5,w+5] cf. extend function */ /* position at index O of line range [0-5,w+5] cf. extend function */
line += 5; line += 5;
for (i = 0; i < w * h; i++)
data[i] <<= I_PRESHIFT;
for (lev = 0; lev < s->ndeclevels; lev++) { for (lev = 0; lev < s->ndeclevels; lev++) {
int lh = s->linelen[lev][0], int lh = s->linelen[lev][0],
lv = s->linelen[lev][1], lv = s->linelen[lev][1],
...@@ -515,6 +529,9 @@ static void dwt_decode97_int(DWTContext *s, int32_t *t) ...@@ -515,6 +529,9 @@ static void dwt_decode97_int(DWTContext *s, int32_t *t)
data[w * i + lp] = l[i]; data[w * i + lp] = l[i];
} }
} }
for (i = 0; i < w * h; i++)
data[i] = (data[i] + ((1<<I_PRESHIFT)>>1)) >> I_PRESHIFT;
} }
int ff_jpeg2000_dwt_init(DWTContext *s, uint16_t border[2][2], int ff_jpeg2000_dwt_init(DWTContext *s, uint16_t border[2][2],
......
5/3i, decomp:15 border 151 170 140 183 milli-err2: 0 5/3i, decomp:15 border 151 170 140 183 milli-err2: 0
9/7i, decomp:15 border 151 170 140 183 milli-err2: 5188 9/7i, decomp:15 border 151 170 140 183 milli-err2: 544
9/7f, decomp:15 border 151 170 140 183 err2: 0.0001 9/7f, decomp:15 border 151 170 140 183 err2: 0.0001
5/3i, decomp:21 border 173 201 81 189 milli-err2: 0 5/3i, decomp:21 border 173 201 81 189 milli-err2: 0
9/7i, decomp:21 border 173 201 81 189 milli-err2: 6478 9/7i, decomp:21 border 173 201 81 189 milli-err2: 592
9/7f, decomp:21 border 173 201 81 189 err2: 0.0001 9/7f, decomp:21 border 173 201 81 189 err2: 0.0001
5/3i, decomp:22 border 213 227 76 245 milli-err2: 0 5/3i, decomp:22 border 213 227 76 245 milli-err2: 0
9/7i, decomp:22 border 213 227 76 245 milli-err2: 6539 9/7i, decomp:22 border 213 227 76 245 milli-err2: 533
9/7f, decomp:22 border 213 227 76 245 err2: 0.0001 9/7f, decomp:22 border 213 227 76 245 err2: 0.0001
5/3i, decomp:13 border 134 157 184 203 milli-err2: 0 5/3i, decomp:13 border 134 157 184 203 milli-err2: 0
9/7i, decomp:13 border 134 157 184 203 milli-err2: 19203 9/7i, decomp:13 border 134 157 184 203 milli-err2: 535
9/7f, decomp:13 border 134 157 184 203 err2: 0.0001 9/7f, decomp:13 border 134 157 184 203 err2: 0.0001
5/3i, decomp: 1 border 204 237 6 106 milli-err2: 0 5/3i, decomp: 1 border 204 237 6 106 milli-err2: 0
9/7i, decomp: 1 border 204 237 6 106 milli-err2: 924 9/7i, decomp: 1 border 204 237 6 106 milli-err2: 219
9/7f, decomp: 1 border 204 237 6 106 err2: 0.0000 9/7f, decomp: 1 border 204 237 6 106 err2: 0.0000
5/3i, decomp:28 border 76 211 13 210 milli-err2: 0 5/3i, decomp:28 border 76 211 13 210 milli-err2: 0
9/7i, decomp:28 border 76 211 13 210 milli-err2: 17297 9/7i, decomp:28 border 76 211 13 210 milli-err2: 791
9/7f, decomp:28 border 76 211 13 210 err2: 0.0002 9/7f, decomp:28 border 76 211 13 210 err2: 0.0002
5/3i, decomp:21 border 76 99 43 123 milli-err2: 0 5/3i, decomp:21 border 76 99 43 123 milli-err2: 0
9/7i, decomp:21 border 76 99 43 123 milli-err2: 9039 9/7i, decomp:21 border 76 99 43 123 milli-err2: 686
9/7f, decomp:21 border 76 99 43 123 err2: 0.0001 9/7f, decomp:21 border 76 99 43 123 err2: 0.0001
5/3i, decomp:15 border 192 243 174 204 milli-err2: 0 5/3i, decomp:15 border 192 243 174 204 milli-err2: 0
9/7i, decomp:15 border 192 243 174 204 milli-err2: 7693 9/7i, decomp:15 border 192 243 174 204 milli-err2: 476
9/7f, decomp:15 border 192 243 174 204 err2: 0.0001 9/7f, decomp:15 border 192 243 174 204 err2: 0.0001
5/3i, decomp:21 border 17 68 93 204 milli-err2: 0 5/3i, decomp:21 border 17 68 93 204 milli-err2: 0
9/7i, decomp:21 border 17 68 93 204 milli-err2: 7810 9/7i, decomp:21 border 17 68 93 204 milli-err2: 633
9/7f, decomp:21 border 17 68 93 204 err2: 0.0001 9/7f, decomp:21 border 17 68 93 204 err2: 0.0001
5/3i, decomp:11 border 142 168 82 174 milli-err2: 0 5/3i, decomp:11 border 142 168 82 174 milli-err2: 0
9/7i, decomp:11 border 142 168 82 174 milli-err2: 18168 9/7i, decomp:11 border 142 168 82 174 milli-err2: 696
9/7f, decomp:11 border 142 168 82 174 err2: 0.0001 9/7f, decomp:11 border 142 168 82 174 err2: 0.0001
5/3i, decomp:23 border 142 209 171 235 milli-err2: 0 5/3i, decomp:23 border 142 209 171 235 milli-err2: 0
9/7i, decomp:23 border 142 209 171 235 milli-err2: 7313 9/7i, decomp:23 border 142 209 171 235 milli-err2: 626
9/7f, decomp:23 border 142 209 171 235 err2: 0.0001 9/7f, decomp:23 border 142 209 171 235 err2: 0.0001
5/3i, decomp:30 border 37 185 79 245 milli-err2: 0 5/3i, decomp:30 border 37 185 79 245 milli-err2: 0
9/7i, decomp:30 border 37 185 79 245 milli-err2: 13498 9/7i, decomp:30 border 37 185 79 245 milli-err2: 953
9/7f, decomp:30 border 37 185 79 245 err2: 0.0002 9/7f, decomp:30 border 37 185 79 245 err2: 0.0002
5/3i, decomp: 5 border 129 236 30 243 milli-err2: 0 5/3i, decomp: 5 border 129 236 30 243 milli-err2: 0
9/7i, decomp: 5 border 129 236 30 243 milli-err2: 8775 9/7i, decomp: 5 border 129 236 30 243 milli-err2: 620
9/7f, decomp: 5 border 129 236 30 243 err2: 0.0001 9/7f, decomp: 5 border 129 236 30 243 err2: 0.0001
5/3i, decomp:10 border 5 160 146 247 milli-err2: 0 5/3i, decomp:10 border 5 160 146 247 milli-err2: 0
9/7i, decomp:10 border 5 160 146 247 milli-err2: 13478 9/7i, decomp:10 border 5 160 146 247 milli-err2: 797
9/7f, decomp:10 border 5 160 146 247 err2: 0.0002 9/7f, decomp:10 border 5 160 146 247 err2: 0.0002
5/3i, decomp: 5 border 104 162 6 47 milli-err2: 0 5/3i, decomp: 5 border 104 162 6 47 milli-err2: 0
9/7i, decomp: 5 border 104 162 6 47 milli-err2: 7808 9/7i, decomp: 5 border 104 162 6 47 milli-err2: 603
9/7f, decomp: 5 border 104 162 6 47 err2: 0.0001 9/7f, decomp: 5 border 104 162 6 47 err2: 0.0001
5/3i, decomp:24 border 78 250 102 218 milli-err2: 0 5/3i, decomp:24 border 78 250 102 218 milli-err2: 0
9/7i, decomp:24 border 78 250 102 218 milli-err2: 12570 9/7i, decomp:24 border 78 250 102 218 milli-err2: 836
9/7f, decomp:24 border 78 250 102 218 err2: 0.0002 9/7f, decomp:24 border 78 250 102 218 err2: 0.0002
5/3i, decomp:28 border 86 98 56 79 milli-err2: 0 5/3i, decomp:28 border 86 98 56 79 milli-err2: 0
9/7i, decomp:28 border 86 98 56 79 milli-err2: 4148 9/7i, decomp:28 border 86 98 56 79 milli-err2: 597
9/7f, decomp:28 border 86 98 56 79 err2: 0.0001 9/7f, decomp:28 border 86 98 56 79 err2: 0.0001
5/3i, decomp: 6 border 95 238 197 214 milli-err2: 0 5/3i, decomp: 6 border 95 238 197 214 milli-err2: 0
9/7i, decomp: 6 border 95 238 197 214 milli-err2: 7686 9/7i, decomp: 6 border 95 238 197 214 milli-err2: 478
9/7f, decomp: 6 border 95 238 197 214 err2: 0.0001 9/7f, decomp: 6 border 95 238 197 214 err2: 0.0001
5/3i, decomp:17 border 77 169 93 165 milli-err2: 0 5/3i, decomp:17 border 77 169 93 165 milli-err2: 0
9/7i, decomp:17 border 77 169 93 165 milli-err2: 12026 9/7i, decomp:17 border 77 169 93 165 milli-err2: 616
9/7f, decomp:17 border 77 169 93 165 err2: 0.0001 9/7f, decomp:17 border 77 169 93 165 err2: 0.0001
5/3i, decomp:22 border 178 187 7 119 milli-err2: 0 5/3i, decomp:22 border 178 187 7 119 milli-err2: 0
9/7i, decomp:22 border 178 187 7 119 milli-err2: 4971 9/7i, decomp:22 border 178 187 7 119 milli-err2: 392
9/7f, decomp:22 border 178 187 7 119 err2: 0.0000 9/7f, decomp:22 border 178 187 7 119 err2: 0.0000
#tb 0: 1/24 #tb 0: 1/24
0, 0, 0, 1, 12441600, 0xbf142791 0, 0, 0, 1, 12441600, 0xda6b6cde
0, 1, 1, 1, 12441600, 0x6b7a2ab5 0, 1, 1, 1, 12441600, 0xb0994664
4c7dbe2451f56a49c29b0b5d7808d74d *tests/data/fate/vsynth1-jpeg2000-97.avi a19cc0e1a1c1bf76ff5a0b63a0bdfbd1 *tests/data/fate/vsynth1-jpeg2000-97.avi
3661616 tests/data/fate/vsynth1-jpeg2000-97.avi 3654420 tests/data/fate/vsynth1-jpeg2000-97.avi
d079e946a2fb75ad5ce6cb2760d1cc62 *tests/data/fate/vsynth1-jpeg2000-97.out.rawvideo 3b71c0f8aebf45122da77d892a6ebf00 *tests/data/fate/vsynth1-jpeg2000-97.out.rawvideo
stddev: 4.63 PSNR: 34.81 MAXDIFF: 54 bytes: 7603200/ 7603200 stddev: 4.23 PSNR: 35.59 MAXDIFF: 53 bytes: 7603200/ 7603200
c3582d23a1fca31a6218346b82167f88 *tests/data/fate/vsynth2-jpeg2000-97.avi b86217f0bcbd84a9368ad3f98af32157 *tests/data/fate/vsynth2-jpeg2000-97.avi
2451092 tests/data/fate/vsynth2-jpeg2000-97.avi 2448506 tests/data/fate/vsynth2-jpeg2000-97.avi
d1329b49bcfcf74279eb07f7e20ddcec *tests/data/fate/vsynth2-jpeg2000-97.out.rawvideo 4d9d9db91075a1eca2a6b9f152e4defc *tests/data/fate/vsynth2-jpeg2000-97.out.rawvideo
stddev: 3.73 PSNR: 36.69 MAXDIFF: 30 bytes: 7603200/ 7603200 stddev: 3.23 PSNR: 37.94 MAXDIFF: 29 bytes: 7603200/ 7603200
2f8a9b514fbf1cb034076459463a7b76 *tests/data/fate/vsynth3-jpeg2000-97.avi 5e17fdaae1a22f3eef8c82b512e4b1b9 *tests/data/fate/vsynth3-jpeg2000-97.avi
83866 tests/data/fate/vsynth3-jpeg2000-97.avi 83670 tests/data/fate/vsynth3-jpeg2000-97.avi
febc7ef2ae9ec3f34b74d456922ae858 *tests/data/fate/vsynth3-jpeg2000-97.out.rawvideo 8ec04513b2e6645c9ea340e3fe9fe8f2 *tests/data/fate/vsynth3-jpeg2000-97.out.rawvideo
stddev: 4.87 PSNR: 34.37 MAXDIFF: 51 bytes: 86700/ 86700 stddev: 4.52 PSNR: 35.02 MAXDIFF: 47 bytes: 86700/ 86700
60808e880f1fd410b010feeca9105f4e *tests/data/fate/vsynth_lena-jpeg2000-97.avi ca78db12e1af7cbf44fdce165aaa5130 *tests/data/fate/vsynth_lena-jpeg2000-97.avi
1931500 tests/data/fate/vsynth_lena-jpeg2000-97.avi 1918756 tests/data/fate/vsynth_lena-jpeg2000-97.avi
6d775a823d4b96cc6c121665bc7eb359 *tests/data/fate/vsynth_lena-jpeg2000-97.out.rawvideo 5fd8a2e35503b48af302b3ef5e317683 *tests/data/fate/vsynth_lena-jpeg2000-97.out.rawvideo
stddev: 3.38 PSNR: 37.53 MAXDIFF: 28 bytes: 7603200/ 7603200 stddev: 2.84 PSNR: 39.04 MAXDIFF: 28 bytes: 7603200/ 7603200
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment