Commit e11099db authored by Michael Niedermayer's avatar Michael Niedermayer Committed by Luca Barbato

jpeg2000: Optimize dequantization

Float:   4700 -> 2700 cycles
Integer: 4400 -> 2800 cycles

(sandybridge  i7)
Signed-off-by: 's avatarLuca Barbato <lu_zero@gentoo.org>
parent c1dcbc59
...@@ -1006,12 +1006,13 @@ static void dequantization_float(int x, int y, Jpeg2000Cblk *cblk, ...@@ -1006,12 +1006,13 @@ static void dequantization_float(int x, int y, Jpeg2000Cblk *cblk,
Jpeg2000Component *comp, Jpeg2000Component *comp,
Jpeg2000T1Context *t1, Jpeg2000Band *band) Jpeg2000T1Context *t1, Jpeg2000Band *band)
{ {
int i, j, idx; int i, j;
float *datap = &comp->f_data[(comp->coord[0][1] - comp->coord[0][0]) * y + x]; int w = cblk->coord[0][1] - cblk->coord[0][0];
for (j = 0; j < (cblk->coord[1][1] - cblk->coord[1][0]); ++j) for (j = 0; j < (cblk->coord[1][1] - cblk->coord[1][0]); ++j) {
for (i = 0; i < (cblk->coord[0][1] - cblk->coord[0][0]); ++i) { float *datap = &comp->f_data[(comp->coord[0][1] - comp->coord[0][0]) * (y + j) + x];
idx = (comp->coord[0][1] - comp->coord[0][0]) * j + i; int *src = t1->data[j];
datap[idx] = (float)(t1->data[j][i]) * band->f_stepsize; for (i = 0; i < w; ++i)
datap[i] = src[i] * band->f_stepsize;
} }
} }
...@@ -1020,13 +1021,13 @@ static void dequantization_int(int x, int y, Jpeg2000Cblk *cblk, ...@@ -1020,13 +1021,13 @@ static void dequantization_int(int x, int y, Jpeg2000Cblk *cblk,
Jpeg2000Component *comp, Jpeg2000Component *comp,
Jpeg2000T1Context *t1, Jpeg2000Band *band) Jpeg2000T1Context *t1, Jpeg2000Band *band)
{ {
int i, j, idx; int i, j;
int32_t *datap = &comp->i_data[(comp->coord[0][1] - comp->coord[0][0]) * y + x]; int w = cblk->coord[0][1] - cblk->coord[0][0];
for (j = 0; j < (cblk->coord[1][1] - cblk->coord[1][0]); ++j) for (j = 0; j < (cblk->coord[1][1] - cblk->coord[1][0]); ++j) {
for (i = 0; i < (cblk->coord[0][1] - cblk->coord[0][0]); ++i) { int32_t *datap = &comp->i_data[(comp->coord[0][1] - comp->coord[0][0]) * (y + j) + x];
idx = (comp->coord[0][1] - comp->coord[0][0]) * j + i; int *src = t1->data[j];
datap[idx] = for (i = 0; i < w; ++i)
((int32_t)(t1->data[j][i]) * band->i_stepsize + (1 << 15)) >> 16; datap[i] = (src[i] * band->i_stepsize + (1 << 15)) >> 16;
} }
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment