Commit a1b0a3c8 authored by Jason Garrett-Glaser's avatar Jason Garrett-Glaser Committed by Michael Niedermayer

VP8: idct_mb optimizations

Currently uses AV_RL32 instead of AV_RL32A, as the latter doesn't exist yet.
(cherry picked from commit 62457f90)
parent a239d534
...@@ -1421,17 +1421,17 @@ static av_always_inline void idct_mb(VP8Context *s, uint8_t *dst[3], VP8Macroblo ...@@ -1421,17 +1421,17 @@ static av_always_inline void idct_mb(VP8Context *s, uint8_t *dst[3], VP8Macroblo
if (mb->mode != MODE_I4x4) { if (mb->mode != MODE_I4x4) {
uint8_t *y_dst = dst[0]; uint8_t *y_dst = dst[0];
for (y = 0; y < 4; y++) { for (y = 0; y < 4; y++) {
uint32_t nnz4 = AV_RN32A(s->non_zero_count_cache[y]); uint32_t nnz4 = AV_RL32(s->non_zero_count_cache[y]);
if (nnz4) { if (nnz4) {
if (nnz4&~0x01010101) { if (nnz4&~0x01010101) {
for (x = 0; x < 4; x++) { for (x = 0; x < 4; x++) {
int nnz = s->non_zero_count_cache[y][x]; if ((uint8_t)nnz4 == 1)
if (nnz) { s->vp8dsp.vp8_idct_dc_add(y_dst+4*x, s->block[y][x], s->linesize);
if (nnz == 1) else if((uint8_t)nnz4 > 1)
s->vp8dsp.vp8_idct_dc_add(y_dst+4*x, s->block[y][x], s->linesize); s->vp8dsp.vp8_idct_add(y_dst+4*x, s->block[y][x], s->linesize);
else nnz4 >>= 8;
s->vp8dsp.vp8_idct_add(y_dst+4*x, s->block[y][x], s->linesize); if (!nnz4)
} break;
} }
} else { } else {
s->vp8dsp.vp8_idct_dc_add4y(y_dst, s->block[y], s->linesize); s->vp8dsp.vp8_idct_dc_add4y(y_dst, s->block[y], s->linesize);
...@@ -1442,19 +1442,19 @@ static av_always_inline void idct_mb(VP8Context *s, uint8_t *dst[3], VP8Macroblo ...@@ -1442,19 +1442,19 @@ static av_always_inline void idct_mb(VP8Context *s, uint8_t *dst[3], VP8Macroblo
} }
for (ch = 0; ch < 2; ch++) { for (ch = 0; ch < 2; ch++) {
uint32_t nnz4 = AV_RN32A(s->non_zero_count_cache[4+ch]); uint32_t nnz4 = AV_RL32(s->non_zero_count_cache[4+ch]);
if (nnz4) { if (nnz4) {
uint8_t *ch_dst = dst[1+ch]; uint8_t *ch_dst = dst[1+ch];
if (nnz4&~0x01010101) { if (nnz4&~0x01010101) {
for (y = 0; y < 2; y++) { for (y = 0; y < 2; y++) {
for (x = 0; x < 2; x++) { for (x = 0; x < 2; x++) {
int nnz = s->non_zero_count_cache[4+ch][(y<<1)+x]; if ((uint8_t)nnz4 == 1)
if (nnz) { s->vp8dsp.vp8_idct_dc_add(ch_dst+4*x, s->block[4+ch][(y<<1)+x], s->uvlinesize);
if (nnz == 1) else if((uint8_t)nnz4 > 1)
s->vp8dsp.vp8_idct_dc_add(ch_dst+4*x, s->block[4+ch][(y<<1)+x], s->uvlinesize); s->vp8dsp.vp8_idct_add(ch_dst+4*x, s->block[4+ch][(y<<1)+x], s->uvlinesize);
else nnz4 >>= 8;
s->vp8dsp.vp8_idct_add(ch_dst+4*x, s->block[4+ch][(y<<1)+x], s->uvlinesize); if (!nnz4)
} break;
} }
ch_dst += 4*s->uvlinesize; ch_dst += 4*s->uvlinesize;
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment