Commit b439ece5 authored by Clément Bœsch's avatar Clément Bœsch

lavfi/dctdnoiz: move DC normalization out of loops.

Make code slightly faster, simpler, clearer.

The filter is still slow as hell, and that change won't cause any
visible performance improvement (it still takes more than one minute to
process a single 1080p frame on a Core 2 here).
parent bd89b2b2
...@@ -82,9 +82,10 @@ static float *dct_block(DCTdnoizContext *ctx, const float *src, int src_linesize ...@@ -82,9 +82,10 @@ static float *dct_block(DCTdnoizContext *ctx, const float *src, int src_linesize
av_dct_calc(ctx->dct, line); av_dct_calc(ctx->dct, line);
column = ctx->tmp_block + y; column = ctx->tmp_block + y;
for (x = 0; x < BSIZE; x++) { column[0] = line[0] * (1. / sqrt(BSIZE));
*line *= x == 0 ? 1. / sqrt(BSIZE) : sqrt(2. / BSIZE); column += BSIZE;
*column = *line++; for (x = 1; x < BSIZE; x++) {
*column = line[x] * sqrt(2. / BSIZE);
column += BSIZE; column += BSIZE;
} }
} }
...@@ -92,8 +93,9 @@ static float *dct_block(DCTdnoizContext *ctx, const float *src, int src_linesize ...@@ -92,8 +93,9 @@ static float *dct_block(DCTdnoizContext *ctx, const float *src, int src_linesize
column = ctx->tmp_block; column = ctx->tmp_block;
for (x = 0; x < BSIZE; x++) { for (x = 0; x < BSIZE; x++) {
av_dct_calc(ctx->dct, column); av_dct_calc(ctx->dct, column);
for (y = 0; y < BSIZE; y++) column[0] *= 1. / sqrt(BSIZE);
column[y] *= y == 0 ? 1. / sqrt(BSIZE) : sqrt(2. / BSIZE); for (y = 1; y < BSIZE; y++)
column[y] *= sqrt(2. / BSIZE);
column += BSIZE; column += BSIZE;
} }
...@@ -111,18 +113,18 @@ static void idct_block(DCTdnoizContext *ctx, float *dst, int dst_linesize) ...@@ -111,18 +113,18 @@ static void idct_block(DCTdnoizContext *ctx, float *dst, int dst_linesize)
float *tmp = ctx->tmp_block; float *tmp = ctx->tmp_block;
for (y = 0; y < BSIZE; y++) { for (y = 0; y < BSIZE; y++) {
for (x = 0; x < BSIZE; x++) block[0] *= sqrt(BSIZE);
block[x] *= x == 0 ? sqrt(BSIZE) : 1./sqrt(2. / BSIZE); for (x = 1; x < BSIZE; x++)
block[x] *= 1./sqrt(2. / BSIZE);
av_dct_calc(ctx->idct, block); av_dct_calc(ctx->idct, block);
block += BSIZE; block += BSIZE;
} }
block = ctx->block; block = ctx->block;
for (y = 0; y < BSIZE; y++) { for (y = 0; y < BSIZE; y++) {
for (x = 0; x < BSIZE; x++) { tmp[0] = block[y] * sqrt(BSIZE);
tmp[x] = block[x*BSIZE + y]; for (x = 1; x < BSIZE; x++)
tmp[x] *= x == 0 ? sqrt(BSIZE) : 1./sqrt(2. / BSIZE); tmp[x] = block[x*BSIZE + y] * (1./sqrt(2. / BSIZE));
}
av_dct_calc(ctx->idct, tmp); av_dct_calc(ctx->idct, tmp);
for (x = 0; x < BSIZE; x++) for (x = 0; x < BSIZE; x++)
dst[x*dst_linesize + y] += tmp[x]; dst[x*dst_linesize + y] += tmp[x];
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment