Commit b86ab381 authored by Kostya Shishkov's avatar Kostya Shishkov Committed by Ronald S. Bultje

Add weighted motion compensation for RV40 B-frames

Signed-off-by: 's avatarRonald S. Bultje <rsbultje@gmail.com>
parent e5ec6869
...@@ -717,7 +717,7 @@ static const int chroma_coeffs[3] = { 0, 3, 5 }; ...@@ -717,7 +717,7 @@ static const int chroma_coeffs[3] = { 0, 3, 5 };
static inline void rv34_mc(RV34DecContext *r, const int block_type, static inline void rv34_mc(RV34DecContext *r, const int block_type,
const int xoff, const int yoff, int mv_off, const int xoff, const int yoff, int mv_off,
const int width, const int height, int dir, const int width, const int height, int dir,
const int thirdpel, const int thirdpel, int weighted,
qpel_mc_func (*qpel_mc)[16], qpel_mc_func (*qpel_mc)[16],
h264_chroma_mc_func (*chroma_mc)) h264_chroma_mc_func (*chroma_mc))
{ {
...@@ -781,9 +781,15 @@ static inline void rv34_mc(RV34DecContext *r, const int block_type, ...@@ -781,9 +781,15 @@ static inline void rv34_mc(RV34DecContext *r, const int block_type,
srcU = uvbuf; srcU = uvbuf;
srcV = uvbuf + 16; srcV = uvbuf + 16;
} }
Y = s->dest[0] + xoff + yoff *s->linesize; if(!weighted){
U = s->dest[1] + (xoff>>1) + (yoff>>1)*s->uvlinesize; Y = s->dest[0] + xoff + yoff *s->linesize;
V = s->dest[2] + (xoff>>1) + (yoff>>1)*s->uvlinesize; U = s->dest[1] + (xoff>>1) + (yoff>>1)*s->uvlinesize;
V = s->dest[2] + (xoff>>1) + (yoff>>1)*s->uvlinesize;
}else{
Y = r->tmp_b_block_y [dir] + xoff + yoff *s->linesize;
U = r->tmp_b_block_uv[dir*2] + (xoff>>1) + (yoff>>1)*s->uvlinesize;
V = r->tmp_b_block_uv[dir*2+1] + (xoff>>1) + (yoff>>1)*s->uvlinesize;
}
if(block_type == RV34_MB_P_16x8){ if(block_type == RV34_MB_P_16x8){
qpel_mc[1][dxy](Y, srcY, s->linesize); qpel_mc[1][dxy](Y, srcY, s->linesize);
...@@ -804,33 +810,70 @@ static void rv34_mc_1mv(RV34DecContext *r, const int block_type, ...@@ -804,33 +810,70 @@ static void rv34_mc_1mv(RV34DecContext *r, const int block_type,
const int xoff, const int yoff, int mv_off, const int xoff, const int yoff, int mv_off,
const int width, const int height, int dir) const int width, const int height, int dir)
{ {
rv34_mc(r, block_type, xoff, yoff, mv_off, width, height, dir, r->rv30, rv34_mc(r, block_type, xoff, yoff, mv_off, width, height, dir, r->rv30, 0,
r->rdsp.put_pixels_tab, r->rdsp.put_pixels_tab,
r->rdsp.put_chroma_pixels_tab); r->rdsp.put_chroma_pixels_tab);
} }
static void rv4_weight(RV34DecContext *r)
{
r->rdsp.rv40_weight_pixels_tab[0](r->s.dest[0],
r->tmp_b_block_y[0],
r->tmp_b_block_y[1],
r->weight1,
r->weight2,
r->s.linesize);
r->rdsp.rv40_weight_pixels_tab[1](r->s.dest[1],
r->tmp_b_block_uv[0],
r->tmp_b_block_uv[2],
r->weight1,
r->weight2,
r->s.uvlinesize);
r->rdsp.rv40_weight_pixels_tab[1](r->s.dest[2],
r->tmp_b_block_uv[1],
r->tmp_b_block_uv[3],
r->weight1,
r->weight2,
r->s.uvlinesize);
}
static void rv34_mc_2mv(RV34DecContext *r, const int block_type) static void rv34_mc_2mv(RV34DecContext *r, const int block_type)
{ {
rv34_mc(r, block_type, 0, 0, 0, 2, 2, 0, r->rv30, int weighted = !r->rv30 && block_type != RV34_MB_B_BIDIR && r->weight1 != 8192;
rv34_mc(r, block_type, 0, 0, 0, 2, 2, 0, r->rv30, weighted,
r->rdsp.put_pixels_tab, r->rdsp.put_pixels_tab,
r->rdsp.put_chroma_pixels_tab); r->rdsp.put_chroma_pixels_tab);
rv34_mc(r, block_type, 0, 0, 0, 2, 2, 1, r->rv30, if(!weighted){
r->rdsp.avg_pixels_tab, rv34_mc(r, block_type, 0, 0, 0, 2, 2, 1, r->rv30, 0,
r->rdsp.avg_chroma_pixels_tab); r->rdsp.avg_pixels_tab,
r->rdsp.avg_chroma_pixels_tab);
}else{
rv34_mc(r, block_type, 0, 0, 0, 2, 2, 1, r->rv30, 1,
r->rdsp.put_pixels_tab,
r->rdsp.put_chroma_pixels_tab);
rv4_weight(r);
}
} }
static void rv34_mc_2mv_skip(RV34DecContext *r) static void rv34_mc_2mv_skip(RV34DecContext *r)
{ {
int i, j; int i, j;
int weighted = !r->rv30 && r->weight1 != 8192;
for(j = 0; j < 2; j++) for(j = 0; j < 2; j++)
for(i = 0; i < 2; i++){ for(i = 0; i < 2; i++){
rv34_mc(r, RV34_MB_P_8x8, i*8, j*8, i+j*r->s.b8_stride, 1, 1, 0, r->rv30, rv34_mc(r, RV34_MB_P_8x8, i*8, j*8, i+j*r->s.b8_stride, 1, 1, 0, r->rv30,
weighted,
r->rdsp.put_pixels_tab, r->rdsp.put_pixels_tab,
r->rdsp.put_chroma_pixels_tab); r->rdsp.put_chroma_pixels_tab);
rv34_mc(r, RV34_MB_P_8x8, i*8, j*8, i+j*r->s.b8_stride, 1, 1, 1, r->rv30, rv34_mc(r, RV34_MB_P_8x8, i*8, j*8, i+j*r->s.b8_stride, 1, 1, 1, r->rv30,
r->rdsp.avg_pixels_tab, weighted,
r->rdsp.avg_chroma_pixels_tab); weighted ? r->rdsp.put_pixels_tab : r->rdsp.avg_pixels_tab,
weighted ? r->rdsp.put_chroma_pixels_tab : r->rdsp.avg_chroma_pixels_tab);
} }
if(weighted)
rv4_weight(r);
} }
/** number of motion vectors in each macroblock type */ /** number of motion vectors in each macroblock type */
...@@ -1265,6 +1308,16 @@ static int rv34_decode_slice(RV34DecContext *r, int end, const uint8_t* buf, int ...@@ -1265,6 +1308,16 @@ static int rv34_decode_slice(RV34DecContext *r, int end, const uint8_t* buf, int
if(MPV_frame_start(s, s->avctx) < 0) if(MPV_frame_start(s, s->avctx) < 0)
return -1; return -1;
ff_er_frame_start(s); ff_er_frame_start(s);
if (!r->tmp_b_block_base || s->width != r->si.width || s->height != r->si.height) {
int i;
r->tmp_b_block_base = av_realloc(r->tmp_b_block_base, s->linesize * 48);
for (i = 0; i < 2; i++)
r->tmp_b_block_y[i] = r->tmp_b_block_base + i * 16 * s->linesize;
for (i = 0; i < 4; i++)
r->tmp_b_block_uv[i] = r->tmp_b_block_base + 32 * s->linesize
+ (i >> 1) * 8 * s->uvlinesize + (i & 1) * 16;
}
r->cur_pts = r->si.pts; r->cur_pts = r->si.pts;
if(s->pict_type != AV_PICTURE_TYPE_B){ if(s->pict_type != AV_PICTURE_TYPE_B){
r->last_pts = r->next_pts; r->last_pts = r->next_pts;
...@@ -1500,6 +1553,7 @@ av_cold int ff_rv34_decode_end(AVCodecContext *avctx) ...@@ -1500,6 +1553,7 @@ av_cold int ff_rv34_decode_end(AVCodecContext *avctx)
av_freep(&r->intra_types_hist); av_freep(&r->intra_types_hist);
r->intra_types = NULL; r->intra_types = NULL;
av_freep(&r->tmp_b_block_base);
av_freep(&r->mb_type); av_freep(&r->mb_type);
av_freep(&r->cbp_luma); av_freep(&r->cbp_luma);
av_freep(&r->cbp_chroma); av_freep(&r->cbp_chroma);
......
...@@ -116,6 +116,11 @@ typedef struct RV34DecContext{ ...@@ -116,6 +116,11 @@ typedef struct RV34DecContext{
/** 8x8 block available flags (for MV prediction) */ /** 8x8 block available flags (for MV prediction) */
DECLARE_ALIGNED(8, uint32_t, avail_cache)[3*4]; DECLARE_ALIGNED(8, uint32_t, avail_cache)[3*4];
/** temporary blocks for RV4 weighted MC */
uint8_t *tmp_b_block_y[2];
uint8_t *tmp_b_block_uv[4];
uint8_t *tmp_b_block_base;
int (*parse_slice_header)(struct RV34DecContext *r, GetBitContext *gb, SliceInfo *si); int (*parse_slice_header)(struct RV34DecContext *r, GetBitContext *gb, SliceInfo *si);
int (*decode_mb_info)(struct RV34DecContext *r); int (*decode_mb_info)(struct RV34DecContext *r);
int (*decode_intra_types)(struct RV34DecContext *r, GetBitContext *gb, int8_t *dst); int (*decode_intra_types)(struct RV34DecContext *r, GetBitContext *gb, int8_t *dst);
......
...@@ -29,11 +29,17 @@ ...@@ -29,11 +29,17 @@
#include "dsputil.h" #include "dsputil.h"
typedef void (*rv40_weight_func)(uint8_t *dst/*align width (8 or 16)*/,
uint8_t *src1/*align width (8 or 16)*/,
uint8_t *src2/*align width (8 or 16)*/,
int w1, int w2, int stride);
typedef struct RV34DSPContext { typedef struct RV34DSPContext {
qpel_mc_func put_pixels_tab[4][16]; qpel_mc_func put_pixels_tab[4][16];
qpel_mc_func avg_pixels_tab[4][16]; qpel_mc_func avg_pixels_tab[4][16];
h264_chroma_mc_func put_chroma_pixels_tab[3]; h264_chroma_mc_func put_chroma_pixels_tab[3];
h264_chroma_mc_func avg_chroma_pixels_tab[3]; h264_chroma_mc_func avg_chroma_pixels_tab[3];
rv40_weight_func rv40_weight_pixels_tab[2];
} RV34DSPContext; } RV34DSPContext;
void ff_rv30dsp_init(RV34DSPContext *c, DSPContext* dsp); void ff_rv30dsp_init(RV34DSPContext *c, DSPContext* dsp);
......
...@@ -285,6 +285,23 @@ static void OPNAME ## rv40_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*a ...@@ -285,6 +285,23 @@ static void OPNAME ## rv40_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*a
RV40_CHROMA_MC(put_, op_put) RV40_CHROMA_MC(put_, op_put)
RV40_CHROMA_MC(avg_, op_avg) RV40_CHROMA_MC(avg_, op_avg)
#define RV40_WEIGHT_FUNC(size) \
static void rv40_weight_func_ ## size (uint8_t *dst, uint8_t *src1, uint8_t *src2, int w1, int w2, int stride)\
{\
int i, j;\
\
for (j = 0; j < size; j++) {\
for (i = 0; i < size; i++)\
dst[i] = (((w2 * src1[i]) >> 9) + ((w1 * src2[i]) >> 9) + 0x10) >> 5;\
src1 += stride;\
src2 += stride;\
dst += stride;\
}\
}
RV40_WEIGHT_FUNC(16)
RV40_WEIGHT_FUNC(8)
av_cold void ff_rv40dsp_init(RV34DSPContext *c, DSPContext* dsp) { av_cold void ff_rv40dsp_init(RV34DSPContext *c, DSPContext* dsp) {
c->put_pixels_tab[0][ 0] = dsp->put_h264_qpel_pixels_tab[0][0]; c->put_pixels_tab[0][ 0] = dsp->put_h264_qpel_pixels_tab[0][0];
c->put_pixels_tab[0][ 1] = put_rv40_qpel16_mc10_c; c->put_pixels_tab[0][ 1] = put_rv40_qpel16_mc10_c;
...@@ -356,6 +373,9 @@ av_cold void ff_rv40dsp_init(RV34DSPContext *c, DSPContext* dsp) { ...@@ -356,6 +373,9 @@ av_cold void ff_rv40dsp_init(RV34DSPContext *c, DSPContext* dsp) {
c->avg_chroma_pixels_tab[0] = avg_rv40_chroma_mc8_c; c->avg_chroma_pixels_tab[0] = avg_rv40_chroma_mc8_c;
c->avg_chroma_pixels_tab[1] = avg_rv40_chroma_mc4_c; c->avg_chroma_pixels_tab[1] = avg_rv40_chroma_mc4_c;
c->rv40_weight_pixels_tab[0] = rv40_weight_func_16;
c->rv40_weight_pixels_tab[1] = rv40_weight_func_8;
if (HAVE_MMX) if (HAVE_MMX)
ff_rv40dsp_init_x86(c, dsp); ff_rv40dsp_init_x86(c, dsp);
} }
...@@ -16,106 +16,106 @@ ...@@ -16,106 +16,106 @@
0, 112500, 276480, 0x5f7a0d4f 0, 112500, 276480, 0x5f7a0d4f
0, 120000, 276480, 0x5f7a0d4f 0, 120000, 276480, 0x5f7a0d4f
0, 127500, 276480, 0x5f7a0d4f 0, 127500, 276480, 0x5f7a0d4f
0, 135000, 276480, 0x2d722f8a 0, 135000, 276480, 0x75641594
0, 142500, 276480, 0xebbb3c8f 0, 142500, 276480, 0x32ee3526
0, 150000, 276480, 0x8574c868 0, 150000, 276480, 0x5ce39368
0, 157500, 276480, 0x4ec1e418 0, 157500, 276480, 0x4ec1e418
0, 165000, 276480, 0x95f22651 0, 165000, 276480, 0x85cbc3b5
0, 172500, 276480, 0x071d897e 0, 172500, 276480, 0x377c7b46
0, 180000, 276480, 0x9f7623f9 0, 180000, 276480, 0x756a4a2e
0, 187500, 276480, 0x86d4dedf 0, 187500, 276480, 0xcb379547
0, 195000, 276480, 0xc0a0be22 0, 195000, 276480, 0x99c085be
0, 202500, 276480, 0xc5902aec 0, 202500, 276480, 0xe479ffed
0, 210000, 276480, 0xe000f066 0, 210000, 276480, 0x1e4fae19
0, 217500, 276480, 0x0b2a48d5 0, 217500, 276480, 0x776412ef
0, 225000, 276480, 0xa1565256 0, 225000, 276480, 0x58ce0f38
0, 232500, 276480, 0x8de3ceb3 0, 232500, 276480, 0x5ab69b27
0, 240000, 276480, 0x654b564a 0, 240000, 276480, 0xc3db9706
0, 247500, 276480, 0xc9c57884 0, 247500, 276480, 0xc9c57884
0, 255000, 276480, 0x89cdcdd4 0, 255000, 276480, 0x000b5269
0, 262500, 276480, 0x3594fe61 0, 262500, 276480, 0x27ff7a5d
0, 270000, 276480, 0x9d082a81 0, 270000, 276480, 0x70647530
0, 277500, 276480, 0x4e6cd0c3 0, 277500, 276480, 0x97612c4b
0, 285000, 276480, 0xc129765f 0, 285000, 276480, 0xdf4e04d7
0, 292500, 276480, 0x92a04c99 0, 292500, 276480, 0xbd98f57c
0, 300000, 276480, 0x5ca62953 0, 300000, 276480, 0x5163b29b
0, 307500, 276480, 0xb7e478aa 0, 307500, 276480, 0x99170e64
0, 315000, 276480, 0x932735d5 0, 315000, 276480, 0x8a4e991f
0, 322500, 276480, 0xaaa2d7aa 0, 322500, 276480, 0x6a45425f
0, 330000, 276480, 0xd1329996 0, 330000, 276480, 0x7bf6b1ef
0, 337500, 276480, 0x6de1e34b 0, 337500, 276480, 0x6de1e34b
0, 345000, 276480, 0x8c963c9b 0, 345000, 276480, 0xdcaaa99a
0, 352500, 276480, 0xce6eff29 0, 352500, 276480, 0xd1e98808
0, 360000, 276480, 0x25412f7e 0, 360000, 276480, 0x6e2d524e
0, 367500, 276480, 0x11a5ad85 0, 367500, 276480, 0x22c50a3d
0, 375000, 276480, 0x26ea3248 0, 375000, 276480, 0x62b76407
0, 382500, 276480, 0x86c35fa4 0, 382500, 276480, 0x51e9b3eb
0, 390000, 276480, 0xa98a2d38 0, 390000, 276480, 0x441f7afd
0, 397500, 276480, 0xed827333 0, 397500, 276480, 0xfb01efc6
0, 405000, 276480, 0x5d44a824 0, 405000, 276480, 0x294bb441
0, 412500, 276480, 0x46d54d04 0, 412500, 276480, 0xe04ac45e
0, 420000, 276480, 0x413fd26a 0, 420000, 276480, 0x58f275ea
0, 427500, 276480, 0xf0b3b71b 0, 427500, 276480, 0xf0b3b71b
0, 435000, 276480, 0x459bc06d 0, 435000, 276480, 0x674e34e4
0, 442500, 276480, 0x4199cd45 0, 442500, 276480, 0x41dda2d9
0, 450000, 276480, 0xa8d35683 0, 450000, 276480, 0xf46ba7fb
0, 457500, 276480, 0x9a3e7de0 0, 457500, 276480, 0x28b54815
0, 465000, 276480, 0x5a30f666 0, 465000, 276480, 0xaf2b5d89
0, 472500, 276480, 0x40152668 0, 472500, 276480, 0x8facba58
0, 480000, 276480, 0x90c4d22c 0, 480000, 276480, 0x28a63236
0, 487500, 276480, 0x5cbaacc9 0, 487500, 276480, 0x1ad43fd7
0, 495000, 276480, 0x72b658f1 0, 495000, 276480, 0x71507bd2
0, 502500, 276480, 0x0ba3dcc9 0, 502500, 276480, 0x35626022
0, 510000, 276480, 0x259ed5c1 0, 510000, 276480, 0x7c1139b3
0, 517500, 276480, 0x7fd73a99 0, 517500, 276480, 0x7fd73a99
0, 525000, 276480, 0x488980c5 0, 525000, 276480, 0xb52e1aa2
0, 532500, 276480, 0x1d4c96a5 0, 532500, 276480, 0xd6f82cae
0, 540000, 276480, 0x41ced7f2 0, 540000, 276480, 0xf88f75d4
0, 547500, 276480, 0xd62d1837 0, 547500, 276480, 0x04a8e3ee
0, 555000, 276480, 0xf5fd9d20 0, 555000, 276480, 0xa29f5b01
0, 562500, 276480, 0x2af91fda 0, 562500, 276480, 0x754ceaf5
0, 570000, 276480, 0x38ce229d 0, 570000, 276480, 0x5a38b4af
0, 577500, 276480, 0xf3a712c0 0, 577500, 276480, 0xfcebc261
0, 585000, 276480, 0x57b111d2 0, 585000, 276480, 0x3d3ca985
0, 592500, 276480, 0x8556b792 0, 592500, 276480, 0x94a03c75
0, 600000, 276480, 0xb32d0896 0, 600000, 276480, 0x2f98911c
0, 607500, 276480, 0x923b9937 0, 607500, 276480, 0x923b9937
0, 615000, 276480, 0x0da1e7e3 0, 615000, 276480, 0xefab7ffd
0, 622500, 276480, 0x7f172382 0, 622500, 276480, 0x6b9fbc80
0, 630000, 276480, 0x93622b88 0, 630000, 276480, 0xe4bdbd1e
0, 637500, 276480, 0x2599d540 0, 637500, 276480, 0x225a56c0
0, 645000, 276480, 0xed20c105 0, 645000, 276480, 0xf58b1b7c
0, 652500, 276480, 0x62ce256e 0, 652500, 276480, 0xbaffcdcc
0, 660000, 276480, 0x286a04bb 0, 660000, 276480, 0xeb6eb88f
0, 667500, 276480, 0x423f7e7c 0, 667500, 276480, 0xdb753d35
0, 675000, 276480, 0x21fc252a 0, 675000, 276480, 0xea80a82e
0, 682500, 276480, 0xf8a8e8ee 0, 682500, 276480, 0x2aae902a
0, 690000, 276480, 0x770d4a8d 0, 690000, 276480, 0x9b9ee961
0, 697500, 276480, 0xaa12b6fd 0, 697500, 276480, 0xaa12b6fd
0, 705000, 276480, 0xdc7221a8 0, 705000, 276480, 0x50c31e73
0, 712500, 276480, 0x487eeb30 0, 712500, 276480, 0xdd9fb89f
0, 720000, 276480, 0x1e74f2db 0, 720000, 276480, 0xaf82399a
0, 727500, 276480, 0x40ae2bc3 0, 727500, 276480, 0x7ce5f23c
0, 735000, 276480, 0x9ca9b930 0, 735000, 276480, 0x5aaa7519
0, 742500, 276480, 0x9fb19b0f 0, 742500, 276480, 0xe45a5599
0, 750000, 276480, 0x7bdf836c 0, 750000, 276480, 0x704411fb
0, 757500, 276480, 0x1e607ba7 0, 757500, 276480, 0x9d7430a1
0, 765000, 276480, 0xbd96578b 0, 765000, 276480, 0x2c230702
0, 772500, 276480, 0x2124bf07 0, 772500, 276480, 0x4a4f76cd
0, 780000, 276480, 0x4895e27a 0, 780000, 276480, 0x27f54854
0, 787500, 276480, 0x694d76e3 0, 787500, 276480, 0x694d76e3
0, 795000, 276480, 0xe70df513 0, 795000, 276480, 0x525463e2
0, 802500, 276480, 0xcacafe6b 0, 802500, 276480, 0x819898f9
0, 810000, 276480, 0x64087748 0, 810000, 276480, 0xeeed00fc
0, 817500, 276480, 0x571fda23 0, 817500, 276480, 0xb6f99ee3
0, 825000, 276480, 0x8c86cbe9 0, 825000, 276480, 0xefc83107
0, 832500, 276480, 0xc8ea4671 0, 832500, 276480, 0xbb22e024
0, 840000, 276480, 0xbfb74300 0, 840000, 276480, 0x300f922a
0, 847500, 276480, 0xbe1e3770 0, 847500, 276480, 0x826fc3bd
0, 855000, 276480, 0x757a0232 0, 855000, 276480, 0x679a53f8
0, 862500, 276480, 0xa5f50c84 0, 862500, 276480, 0x976c9e93
0, 870000, 276480, 0x6d95f808 0, 870000, 276480, 0xb194656e
0, 877500, 276480, 0xf002c5ca 0, 877500, 276480, 0xf002c5ca
0, 885000, 276480, 0x1a2abb26 0, 885000, 276480, 0xb243dda5
0, 892500, 276480, 0x6cf69bf2 0, 892500, 276480, 0x1700efbb
0, 900000, 276480, 0x8f316c66 0, 900000, 276480, 0x8f316c66
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment