Commit f7a8c179 authored by Michael Niedermayer's avatar Michael Niedermayer

optimizations

Originally committed as revision 1869 to svn://svn.ffmpeg.org/ffmpeg/trunk
parent d04fdff1
...@@ -319,6 +319,8 @@ static inline void fill_rectangle(void *vp, int w, int h, int stride, uint32_t v ...@@ -319,6 +319,8 @@ static inline void fill_rectangle(void *vp, int w, int h, int stride, uint32_t v
*(uint16_t*)(p + 1*stride)= *(uint16_t*)(p + 1*stride)=
*(uint16_t*)(p + 2*stride)= *(uint16_t*)(p + 2*stride)=
*(uint16_t*)(p + 3*stride)= size==4 ? val : val*0x0101; *(uint16_t*)(p + 3*stride)= size==4 ? val : val*0x0101;
}else if(w==4 && h==1){
*(uint32_t*)(p + 0*stride)= size==4 ? val : val*0x01010101;
}else if(w==4 && h==2){ }else if(w==4 && h==2){
*(uint32_t*)(p + 0*stride)= *(uint32_t*)(p + 0*stride)=
*(uint32_t*)(p + 1*stride)= size==4 ? val : val*0x01010101; *(uint32_t*)(p + 1*stride)= size==4 ? val : val*0x01010101;
...@@ -3197,7 +3199,7 @@ static int decode_mb(H264Context *h){ ...@@ -3197,7 +3199,7 @@ static int decode_mb(H264Context *h){
const int mb_xy= s->mb_x + s->mb_y*s->mb_stride; const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
int mb_type, partition_count, cbp; int mb_type, partition_count, cbp;
memset(h->mb, 0, sizeof(int16_t)*24*16); //FIXME avoid if allready clear (move after skip handlong? s->dsp.clear_blocks(h->mb); //FIXME avoid if allready clear (move after skip handlong?
tprintf("pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y); tprintf("pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
......
...@@ -43,6 +43,15 @@ ...@@ -43,6 +43,15 @@
* svq3 decoder. * svq3 decoder.
*/ */
/* dual scan (from some older h264 draft)
o-->o-->o o
| /|
o o o / o
| / | |/ |
o o o o
/
o-->o-->o-->o
*/
static const uint8_t svq3_scan[16]={ static const uint8_t svq3_scan[16]={
0+0*4, 1+0*4, 2+0*4, 2+1*4, 0+0*4, 1+0*4, 2+0*4, 2+1*4,
2+2*4, 3+0*4, 3+1*4, 3+2*4, 2+2*4, 3+0*4, 3+1*4, 3+2*4,
...@@ -446,17 +455,22 @@ static int svq3_decode_mb (H264Context *h, unsigned int mb_type) { ...@@ -446,17 +455,22 @@ static int svq3_decode_mb (H264Context *h, unsigned int mb_type) {
} }
/* fill caches */ /* fill caches */
memset (h->ref_cache[0], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t)); /* note ref_cache[0] should contain here:
????????
???11111
N??11111
N??11111
N??11111
N
*/
if (s->mb_x > 0) { if (s->mb_x > 0) {
for (i=0; i < 4; i++) { for (i=0; i < 4; i++) {
*(uint32_t *) h->mv_cache[0][scan8[0] - 1 + i*8] = *(uint32_t *) s->current_picture.motion_val[0][b_xy - 1 + i*h->b_stride]; *(uint32_t *) h->mv_cache[0][scan8[0] - 1 + i*8] = *(uint32_t *) s->current_picture.motion_val[0][b_xy - 1 + i*h->b_stride];
h->ref_cache[0][scan8[0] - 1 + i*8] = 1;
} }
} else { } else {
for (i=0; i < 4; i++) { for (i=0; i < 4; i++) {
*(uint32_t *) h->mv_cache[0][scan8[0] - 1 + i*8] = 0; *(uint32_t *) h->mv_cache[0][scan8[0] - 1 + i*8] = 0;
h->ref_cache[0][scan8[0] - 1 + i*8] = 1;
} }
} }
if (s->mb_y > 0) { if (s->mb_y > 0) {
...@@ -466,12 +480,15 @@ static int svq3_decode_mb (H264Context *h, unsigned int mb_type) { ...@@ -466,12 +480,15 @@ static int svq3_decode_mb (H264Context *h, unsigned int mb_type) {
if (s->mb_x < (s->mb_width - 1)) { if (s->mb_x < (s->mb_width - 1)) {
*(uint32_t *) h->mv_cache[0][scan8[0] + 4 - 1*8] = *(uint32_t *) s->current_picture.motion_val[0][b_xy - h->b_stride + 4]; *(uint32_t *) h->mv_cache[0][scan8[0] + 4 - 1*8] = *(uint32_t *) s->current_picture.motion_val[0][b_xy - h->b_stride + 4];
h->ref_cache[0][scan8[0] + 4 - 1*8] = 1; h->ref_cache[0][scan8[0] + 4 - 1*8] = 1;
} }else
h->ref_cache[0][scan8[0] + 4 - 1*8] = PART_NOT_AVAILABLE;
if (s->mb_x > 0) { if (s->mb_x > 0) {
*(uint32_t *) h->mv_cache[0][scan8[0] - 1 - 1*8] = *(uint32_t *) s->current_picture.motion_val[0][b_xy - h->b_stride - 1]; *(uint32_t *) h->mv_cache[0][scan8[0] - 1 - 1*8] = *(uint32_t *) s->current_picture.motion_val[0][b_xy - h->b_stride - 1];
h->ref_cache[0][scan8[0] - 1 - 1*8] = 1; h->ref_cache[0][scan8[0] - 1 - 1*8] = 1;
} }else
} h->ref_cache[0][scan8[0] - 1 - 1*8] = PART_NOT_AVAILABLE;
}else
memset (&h->ref_cache[0][scan8[0] - 1*8 - 1], PART_NOT_AVAILABLE, 8);
/* decode motion vector(s) and form prediction(s) */ /* decode motion vector(s) and form prediction(s) */
part_width = ((mb_type & 5) == 5) ? 4 : 8 << (mb_type & 1); part_width = ((mb_type & 5) == 5) ? 4 : 8 << (mb_type & 1);
...@@ -510,14 +527,7 @@ static int svq3_decode_mb (H264Context *h, unsigned int mb_type) { ...@@ -510,14 +527,7 @@ static int svq3_decode_mb (H264Context *h, unsigned int mb_type) {
} }
/* update mv_cache */ /* update mv_cache */
for (l=0; l < part_height; l+=4) { fill_rectangle(h->mv_cache[0][scan8[k]], part_width>>2, part_height>>2, 8, (mx&0xFFFF)+(my<<16), 4);
for (m=0; m < part_width; m+=4) {
k = scan8[0] + ((m + j) >> 2) + ((l + i) << 1);
h->mv_cache [0][k][0] = mx;
h->mv_cache [0][k][1] = my;
h->ref_cache[0][k] = 1;
}
}
svq3_mc_dir_part (s, x, y, part_width, part_height, mx, my); svq3_mc_dir_part (s, x, y, part_width, part_height, mx, my);
} }
...@@ -592,8 +602,8 @@ static int svq3_decode_mb (H264Context *h, unsigned int mb_type) { ...@@ -592,8 +602,8 @@ static int svq3_decode_mb (H264Context *h, unsigned int mb_type) {
memset (h->intra4x4_pred_mode[mb_xy], DC_PRED, 8); memset (h->intra4x4_pred_mode[mb_xy], DC_PRED, 8);
} }
if (!IS_SKIP(mb_type)) { if (!IS_SKIP(mb_type)) {
memset (h->mb, 0, 24*16*sizeof(DCTELEM)); memset (h->non_zero_count_cache + 8, 0, 4*9*sizeof(uint8_t));
memset (h->non_zero_count_cache, 0, 8*6*sizeof(uint8_t)); s->dsp.clear_blocks(h->mb);
} }
if (IS_INTRA16x16(mb_type) || (s->pict_type != I_TYPE && s->adaptive_quant && cbp)) { if (IS_INTRA16x16(mb_type) || (s->pict_type != I_TYPE && s->adaptive_quant && cbp)) {
...@@ -743,6 +753,13 @@ static int svq3_decode_frame (AVCodecContext *avctx, ...@@ -743,6 +753,13 @@ static int svq3_decode_frame (AVCodecContext *avctx,
frame_start (h); frame_start (h);
for(i=0; i<4; i++){
int j;
for(j=-1; j<4; j++)
h->ref_cache[0][scan8[0] + 8*i + j]= 1;
h->ref_cache[0][scan8[0] + 8*i + j]= PART_NOT_AVAILABLE;
}
for (s->mb_y=0; s->mb_y < s->mb_height; s->mb_y++) { for (s->mb_y=0; s->mb_y < s->mb_height; s->mb_y++) {
for (s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) { for (s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
int mb_type = svq3_get_ue_golomb (&s->gb); int mb_type = svq3_get_ue_golomb (&s->gb);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment