Commit c4c5f538 authored by Clément Bœsch's avatar Clément Bœsch Committed by Martin Storsjö

vp9dsp: add DC only versions for idct/idct.

before:

time ./avconv -v 0 -nostats -threads 1 -i sintel_vp9_500kbps.webm -f null -
real    0m11.125s
user    0m11.059s
sys     0m0.050s

time ./avconv -v 0 -nostats -threads 1 -i sintel_vp9_500kbps.webm -f null -
real    0m10.944s
user    0m10.819s
sys     0m0.064s

after:

time ./avconv -v 0 -nostats -threads 1 -i sintel_vp9_500kbps.webm -f null -
real    0m8.153s
user    0m8.034s
sys     0m0.050s

time ./avconv -v 0 -nostats -threads 1 -i sintel_vp9_500kbps.webm -f null -
real    0m8.038s
user    0m7.980s
sys     0m0.039s
Signed-off-by: 's avatarMartin Storsjö <martin@martin.st>
parent e4382a4a
...@@ -944,7 +944,7 @@ static av_cold void vp9dsp_intrapred_init(VP9DSPContext *dsp) ...@@ -944,7 +944,7 @@ static av_cold void vp9dsp_intrapred_init(VP9DSPContext *dsp)
#undef init_intra_pred #undef init_intra_pred
} }
#define itxfm_wrapper(type_a, type_b, sz, bits) \ #define itxfm_wrapper(type_a, type_b, sz, bits, has_dconly) \
static void \ static void \
type_a ## _ ## type_b ## _ ## sz ## x ## sz ## _add_c(uint8_t *dst, \ type_a ## _ ## type_b ## _ ## sz ## x ## sz ## _add_c(uint8_t *dst, \
ptrdiff_t stride, \ ptrdiff_t stride, \
...@@ -953,6 +953,22 @@ type_a ## _ ## type_b ## _ ## sz ## x ## sz ## _add_c(uint8_t *dst, \ ...@@ -953,6 +953,22 @@ type_a ## _ ## type_b ## _ ## sz ## x ## sz ## _add_c(uint8_t *dst, \
{ \ { \
int i, j; \ int i, j; \
int16_t tmp[sz * sz], out[sz]; \ int16_t tmp[sz * sz], out[sz]; \
\
if (has_dconly && eob == 1) { \
const int t = (((block[0] * 11585 + (1 << 13)) >> 14) \
* 11585 + (1 << 13)) >> 14; \
block[0] = 0; \
for (i = 0; i < sz; i++) { \
for (j = 0; j < sz; j++) \
dst[j * stride] = \
av_clip_uint8(dst[j * stride] + \
(bits ? (t + (1 << (bits - 1))) >> bits \
: t)); \
dst++; \
} \
return; \
} \
\
for (i = 0; i < sz; i++) \ for (i = 0; i < sz; i++) \
type_a ## sz ## _1d(tmp + i * sz, block + i, sz, 0); \ type_a ## sz ## _1d(tmp + i * sz, block + i, sz, 0); \
memset(block, 0, sz * sz * sizeof(*block)); \ memset(block, 0, sz * sz * sizeof(*block)); \
...@@ -968,10 +984,10 @@ type_a ## _ ## type_b ## _ ## sz ## x ## sz ## _add_c(uint8_t *dst, \ ...@@ -968,10 +984,10 @@ type_a ## _ ## type_b ## _ ## sz ## x ## sz ## _add_c(uint8_t *dst, \
} }
#define itxfm_wrap(sz, bits) \ #define itxfm_wrap(sz, bits) \
itxfm_wrapper(idct, idct, sz, bits) \ itxfm_wrapper(idct, idct, sz, bits, 1) \
itxfm_wrapper(iadst, idct, sz, bits) \ itxfm_wrapper(iadst, idct, sz, bits, 0) \
itxfm_wrapper(idct, iadst, sz, bits) \ itxfm_wrapper(idct, iadst, sz, bits, 0) \
itxfm_wrapper(iadst, iadst, sz, bits) itxfm_wrapper(iadst, iadst, sz, bits, 0)
#define IN(x) in[x * stride] #define IN(x) in[x * stride]
...@@ -1490,7 +1506,7 @@ static av_always_inline void idct32_1d(int16_t *out, const int16_t *in, ...@@ -1490,7 +1506,7 @@ static av_always_inline void idct32_1d(int16_t *out, const int16_t *in,
out[31] = t0 - t31; out[31] = t0 - t31;
} }
itxfm_wrapper(idct, idct, 32, 6) itxfm_wrapper(idct, idct, 32, 6, 1)
static av_always_inline void iwht4_1d(int16_t *out, const int16_t *in, static av_always_inline void iwht4_1d(int16_t *out, const int16_t *in,
ptrdiff_t stride, int pass) ptrdiff_t stride, int pass)
...@@ -1523,7 +1539,7 @@ static av_always_inline void iwht4_1d(int16_t *out, const int16_t *in, ...@@ -1523,7 +1539,7 @@ static av_always_inline void iwht4_1d(int16_t *out, const int16_t *in,
out[3] = t3; out[3] = t3;
} }
itxfm_wrapper(iwht, iwht, 4, 0) itxfm_wrapper(iwht, iwht, 4, 0, 0)
#undef IN #undef IN
#undef itxfm_wrapper #undef itxfm_wrapper
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment