Commit ccfb03ec authored by Ronald S. Bultje's avatar Ronald S. Bultje

vp9: fix scaled motion vector clipping for sub8x8 blocks.

To match the obscure clipping bug behaviour in libvpx.
parent 68c1e913
...@@ -2750,6 +2750,7 @@ static av_always_inline void mc_luma_scaled(VP9Context *s, vp9_scaled_mc_func sm ...@@ -2750,6 +2750,7 @@ static av_always_inline void mc_luma_scaled(VP9Context *s, vp9_scaled_mc_func sm
const uint8_t *ref, ptrdiff_t ref_stride, const uint8_t *ref, ptrdiff_t ref_stride,
ThreadFrame *ref_frame, ThreadFrame *ref_frame,
ptrdiff_t y, ptrdiff_t x, const VP56mv *in_mv, ptrdiff_t y, ptrdiff_t x, const VP56mv *in_mv,
int px, int py, int pw, int ph,
int bw, int bh, int w, int h, int bytesperpixel, int bw, int bh, int w, int h, int bytesperpixel,
const uint16_t *scale, const uint8_t *step) const uint16_t *scale, const uint8_t *step)
{ {
...@@ -2759,8 +2760,8 @@ static av_always_inline void mc_luma_scaled(VP9Context *s, vp9_scaled_mc_func sm ...@@ -2759,8 +2760,8 @@ static av_always_inline void mc_luma_scaled(VP9Context *s, vp9_scaled_mc_func sm
int th; int th;
VP56mv mv; VP56mv mv;
mv.x = av_clip(in_mv->x, -(x + bw + 4) << 3, (s->cols * 8 - x + 3) << 3); mv.x = av_clip(in_mv->x, -(x + pw - px + 4) << 3, (s->cols * 8 - x + px + 3) << 3);
mv.y = av_clip(in_mv->y, -(y + bh + 4) << 3, (s->rows * 8 - y + 3) << 3); mv.y = av_clip(in_mv->y, -(y + ph - py + 4) << 3, (s->rows * 8 - y + py + 3) << 3);
// BUG libvpx seems to scale the two components separately. This introduces // BUG libvpx seems to scale the two components separately. This introduces
// rounding errors but we have to reproduce them to be exactly compatible // rounding errors but we have to reproduce them to be exactly compatible
// with the output from libvpx... // with the output from libvpx...
...@@ -2798,6 +2799,7 @@ static av_always_inline void mc_chroma_scaled(VP9Context *s, vp9_scaled_mc_func ...@@ -2798,6 +2799,7 @@ static av_always_inline void mc_chroma_scaled(VP9Context *s, vp9_scaled_mc_func
const uint8_t *ref_v, ptrdiff_t src_stride_v, const uint8_t *ref_v, ptrdiff_t src_stride_v,
ThreadFrame *ref_frame, ThreadFrame *ref_frame,
ptrdiff_t y, ptrdiff_t x, const VP56mv *in_mv, ptrdiff_t y, ptrdiff_t x, const VP56mv *in_mv,
int px, int py, int pw, int ph,
int bw, int bh, int w, int h, int bytesperpixel, int bw, int bh, int w, int h, int bytesperpixel,
const uint16_t *scale, const uint8_t *step) const uint16_t *scale, const uint8_t *step)
{ {
...@@ -2808,18 +2810,18 @@ static av_always_inline void mc_chroma_scaled(VP9Context *s, vp9_scaled_mc_func ...@@ -2808,18 +2810,18 @@ static av_always_inline void mc_chroma_scaled(VP9Context *s, vp9_scaled_mc_func
if (s->ss_h) { if (s->ss_h) {
// BUG https://code.google.com/p/webm/issues/detail?id=820 // BUG https://code.google.com/p/webm/issues/detail?id=820
mv.x = av_clip(in_mv->x, -(x + bw + 4) << 4, (s->cols * 4 - x + 3) << 4); mv.x = av_clip(in_mv->x, -(x + pw - px + 4) << 4, (s->cols * 4 - x + px + 3) << 4);
mx = scale_mv(mv.x, 0) + (scale_mv(x * 16, 0) & ~15) + (scale_mv(x * 32, 0) & 15); mx = scale_mv(mv.x, 0) + (scale_mv(x * 16, 0) & ~15) + (scale_mv(x * 32, 0) & 15);
} else { } else {
mv.x = av_clip(in_mv->x, -(x + bw + 4) << 3, (s->cols * 8 - x + 3) << 3); mv.x = av_clip(in_mv->x, -(x + pw - px + 4) << 3, (s->cols * 8 - x + px + 3) << 3);
mx = scale_mv(mv.x << 1, 0) + scale_mv(x * 16, 0); mx = scale_mv(mv.x << 1, 0) + scale_mv(x * 16, 0);
} }
if (s->ss_v) { if (s->ss_v) {
// BUG https://code.google.com/p/webm/issues/detail?id=820 // BUG https://code.google.com/p/webm/issues/detail?id=820
mv.y = av_clip(in_mv->y, -(y + bh + 4) << 4, (s->rows * 4 - y + 3) << 4); mv.y = av_clip(in_mv->y, -(y + ph - py + 4) << 4, (s->rows * 4 - y + py + 3) << 4);
my = scale_mv(mv.y, 1) + (scale_mv(y * 16, 1) & ~15) + (scale_mv(y * 32, 1) & 15); my = scale_mv(mv.y, 1) + (scale_mv(y * 16, 1) & ~15) + (scale_mv(y * 32, 1) & 15);
} else { } else {
mv.y = av_clip(in_mv->y, -(y + bh + 4) << 3, (s->rows * 8 - y + 3) << 3); mv.y = av_clip(in_mv->y, -(y + ph - py + 4) << 3, (s->rows * 8 - y + py + 3) << 3);
my = scale_mv(mv.y << 1, 1) + scale_mv(y * 16, 1); my = scale_mv(mv.y << 1, 1) + scale_mv(y * 16, 1);
} }
#undef scale_mv #undef scale_mv
...@@ -2858,14 +2860,15 @@ static av_always_inline void mc_chroma_scaled(VP9Context *s, vp9_scaled_mc_func ...@@ -2858,14 +2860,15 @@ static av_always_inline void mc_chroma_scaled(VP9Context *s, vp9_scaled_mc_func
} }
} }
#define mc_luma_dir(s, mc, dst, dst_ls, src, src_ls, tref, row, col, mv, bw, bh, w, h, i) \ #define mc_luma_dir(s, mc, dst, dst_ls, src, src_ls, tref, row, col, mv, \
px, py, pw, ph, bw, bh, w, h, i) \
mc_luma_scaled(s, s->dsp.s##mc, dst, dst_ls, src, src_ls, tref, row, col, \ mc_luma_scaled(s, s->dsp.s##mc, dst, dst_ls, src, src_ls, tref, row, col, \
mv, bw, bh, w, h, bytesperpixel, \ mv, px, py, pw, ph, bw, bh, w, h, bytesperpixel, \
s->mvscale[b->ref[i]], s->mvstep[b->ref[i]]) s->mvscale[b->ref[i]], s->mvstep[b->ref[i]])
#define mc_chroma_dir(s, mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \ #define mc_chroma_dir(s, mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
row, col, mv, bw, bh, w, h, i) \ row, col, mv, px, py, pw, ph, bw, bh, w, h, i) \
mc_chroma_scaled(s, s->dsp.s##mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \ mc_chroma_scaled(s, s->dsp.s##mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
row, col, mv, bw, bh, w, h, bytesperpixel, \ row, col, mv, px, py, pw, ph, bw, bh, w, h, bytesperpixel, \
s->mvscale[b->ref[i]], s->mvstep[b->ref[i]]) s->mvscale[b->ref[i]], s->mvstep[b->ref[i]])
#define SCALED 1 #define SCALED 1
#define FN(x) x##_scaled_8bpp #define FN(x) x##_scaled_8bpp
...@@ -2959,11 +2962,12 @@ static av_always_inline void mc_chroma_unscaled(VP9Context *s, vp9_mc_func (*mc) ...@@ -2959,11 +2962,12 @@ static av_always_inline void mc_chroma_unscaled(VP9Context *s, vp9_mc_func (*mc)
} }
} }
#define mc_luma_dir(s, mc, dst, dst_ls, src, src_ls, tref, row, col, mv, bw, bh, w, h, i) \ #define mc_luma_dir(s, mc, dst, dst_ls, src, src_ls, tref, row, col, mv, \
px, py, pw, ph, bw, bh, w, h, i) \
mc_luma_unscaled(s, s->dsp.mc, dst, dst_ls, src, src_ls, tref, row, col, \ mc_luma_unscaled(s, s->dsp.mc, dst, dst_ls, src, src_ls, tref, row, col, \
mv, bw, bh, w, h, bytesperpixel) mv, bw, bh, w, h, bytesperpixel)
#define mc_chroma_dir(s, mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \ #define mc_chroma_dir(s, mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
row, col, mv, bw, bh, w, h, i) \ row, col, mv, px, py, pw, ph, bw, bh, w, h, i) \
mc_chroma_unscaled(s, s->dsp.mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \ mc_chroma_unscaled(s, s->dsp.mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
row, col, mv, bw, bh, w, h, bytesperpixel) row, col, mv, bw, bh, w, h, bytesperpixel)
#define SCALED 0 #define SCALED 0
......
...@@ -57,11 +57,11 @@ static void FN(inter_pred)(AVCodecContext *ctx) ...@@ -57,11 +57,11 @@ static void FN(inter_pred)(AVCodecContext *ctx)
if (b->bs == BS_8x4) { if (b->bs == BS_8x4) {
mc_luma_dir(s, mc[3][b->filter][0], s->dst[0], ls_y, mc_luma_dir(s, mc[3][b->filter][0], s->dst[0], ls_y,
ref1->data[0], ref1->linesize[0], tref1, ref1->data[0], ref1->linesize[0], tref1,
row << 3, col << 3, &b->mv[0][0], 8, 4, w1, h1, 0); row << 3, col << 3, &b->mv[0][0],,,,, 8, 4, w1, h1, 0);
mc_luma_dir(s, mc[3][b->filter][0], mc_luma_dir(s, mc[3][b->filter][0],
s->dst[0] + 4 * ls_y, ls_y, s->dst[0] + 4 * ls_y, ls_y,
ref1->data[0], ref1->linesize[0], tref1, ref1->data[0], ref1->linesize[0], tref1,
(row << 3) + 4, col << 3, &b->mv[2][0], 8, 4, w1, h1, 0); (row << 3) + 4, col << 3, &b->mv[2][0],,,,, 8, 4, w1, h1, 0);
w1 = (w1 + s->ss_h) >> s->ss_h; w1 = (w1 + s->ss_h) >> s->ss_h;
if (s->ss_v) { if (s->ss_v) {
h1 = (h1 + 1) >> 1; h1 = (h1 + 1) >> 1;
...@@ -71,14 +71,14 @@ static void FN(inter_pred)(AVCodecContext *ctx) ...@@ -71,14 +71,14 @@ static void FN(inter_pred)(AVCodecContext *ctx)
ref1->data[1], ref1->linesize[1], ref1->data[1], ref1->linesize[1],
ref1->data[2], ref1->linesize[2], tref1, ref1->data[2], ref1->linesize[2], tref1,
row << 2, col << (3 - s->ss_h), row << 2, col << (3 - s->ss_h),
&uvmv, 8 >> s->ss_h, 4, w1, h1, 0); &uvmv,,,,, 8 >> s->ss_h, 4, w1, h1, 0);
} else { } else {
mc_chroma_dir(s, mc[3 + s->ss_h][b->filter][0], mc_chroma_dir(s, mc[3 + s->ss_h][b->filter][0],
s->dst[1], s->dst[2], ls_uv, s->dst[1], s->dst[2], ls_uv,
ref1->data[1], ref1->linesize[1], ref1->data[1], ref1->linesize[1],
ref1->data[2], ref1->linesize[2], tref1, ref1->data[2], ref1->linesize[2], tref1,
row << 3, col << (3 - s->ss_h), row << 3, col << (3 - s->ss_h),
&b->mv[0][0], 8 >> s->ss_h, 4, w1, h1, 0); &b->mv[0][0],,,,, 8 >> s->ss_h, 4, w1, h1, 0);
// BUG for 4:2:2 bs=8x4, libvpx uses the wrong block index // BUG for 4:2:2 bs=8x4, libvpx uses the wrong block index
// to get the motion vector for the bottom 4x4 block // to get the motion vector for the bottom 4x4 block
// https://code.google.com/p/webm/issues/detail?id=993 // https://code.google.com/p/webm/issues/detail?id=993
...@@ -92,17 +92,17 @@ static void FN(inter_pred)(AVCodecContext *ctx) ...@@ -92,17 +92,17 @@ static void FN(inter_pred)(AVCodecContext *ctx)
ref1->data[1], ref1->linesize[1], ref1->data[1], ref1->linesize[1],
ref1->data[2], ref1->linesize[2], tref1, ref1->data[2], ref1->linesize[2], tref1,
(row << 3) + 4, col << (3 - s->ss_h), (row << 3) + 4, col << (3 - s->ss_h),
&uvmv, 8 >> s->ss_h, 4, w1, h1, 0); &uvmv,,,,, 8 >> s->ss_h, 4, w1, h1, 0);
} }
if (b->comp) { if (b->comp) {
mc_luma_dir(s, mc[3][b->filter][1], s->dst[0], ls_y, mc_luma_dir(s, mc[3][b->filter][1], s->dst[0], ls_y,
ref2->data[0], ref2->linesize[0], tref2, ref2->data[0], ref2->linesize[0], tref2,
row << 3, col << 3, &b->mv[0][1], 8, 4, w2, h2, 1); row << 3, col << 3, &b->mv[0][1],,,,, 8, 4, w2, h2, 1);
mc_luma_dir(s, mc[3][b->filter][1], mc_luma_dir(s, mc[3][b->filter][1],
s->dst[0] + 4 * ls_y, ls_y, s->dst[0] + 4 * ls_y, ls_y,
ref2->data[0], ref2->linesize[0], tref2, ref2->data[0], ref2->linesize[0], tref2,
(row << 3) + 4, col << 3, &b->mv[2][1], 8, 4, w2, h2, 1); (row << 3) + 4, col << 3, &b->mv[2][1],,,,, 8, 4, w2, h2, 1);
w2 = (w2 + s->ss_h) >> s->ss_h; w2 = (w2 + s->ss_h) >> s->ss_h;
if (s->ss_v) { if (s->ss_v) {
h2 = (h2 + 1) >> 1; h2 = (h2 + 1) >> 1;
...@@ -112,14 +112,14 @@ static void FN(inter_pred)(AVCodecContext *ctx) ...@@ -112,14 +112,14 @@ static void FN(inter_pred)(AVCodecContext *ctx)
ref2->data[1], ref2->linesize[1], ref2->data[1], ref2->linesize[1],
ref2->data[2], ref2->linesize[2], tref2, ref2->data[2], ref2->linesize[2], tref2,
row << 2, col << (3 - s->ss_h), row << 2, col << (3 - s->ss_h),
&uvmv, 8 >> s->ss_h, 4, w2, h2, 1); &uvmv,,,,, 8 >> s->ss_h, 4, w2, h2, 1);
} else { } else {
mc_chroma_dir(s, mc[3 + s->ss_h][b->filter][1], mc_chroma_dir(s, mc[3 + s->ss_h][b->filter][1],
s->dst[1], s->dst[2], ls_uv, s->dst[1], s->dst[2], ls_uv,
ref2->data[1], ref2->linesize[1], ref2->data[1], ref2->linesize[1],
ref2->data[2], ref2->linesize[2], tref2, ref2->data[2], ref2->linesize[2], tref2,
row << 3, col << (3 - s->ss_h), row << 3, col << (3 - s->ss_h),
&b->mv[0][1], 8 >> s->ss_h, 4, w2, h2, 1); &b->mv[0][1],,,,, 8 >> s->ss_h, 4, w2, h2, 1);
// BUG for 4:2:2 bs=8x4, libvpx uses the wrong block index // BUG for 4:2:2 bs=8x4, libvpx uses the wrong block index
// to get the motion vector for the bottom 4x4 block // to get the motion vector for the bottom 4x4 block
// https://code.google.com/p/webm/issues/detail?id=993 // https://code.google.com/p/webm/issues/detail?id=993
...@@ -133,16 +133,16 @@ static void FN(inter_pred)(AVCodecContext *ctx) ...@@ -133,16 +133,16 @@ static void FN(inter_pred)(AVCodecContext *ctx)
ref2->data[1], ref2->linesize[1], ref2->data[1], ref2->linesize[1],
ref2->data[2], ref2->linesize[2], tref2, ref2->data[2], ref2->linesize[2], tref2,
(row << 3) + 4, col << (3 - s->ss_h), (row << 3) + 4, col << (3 - s->ss_h),
&uvmv, 8 >> s->ss_h, 4, w2, h2, 1); &uvmv,,,,, 8 >> s->ss_h, 4, w2, h2, 1);
} }
} }
} else if (b->bs == BS_4x8) { } else if (b->bs == BS_4x8) {
mc_luma_dir(s, mc[4][b->filter][0], s->dst[0], ls_y, mc_luma_dir(s, mc[4][b->filter][0], s->dst[0], ls_y,
ref1->data[0], ref1->linesize[0], tref1, ref1->data[0], ref1->linesize[0], tref1,
row << 3, col << 3, &b->mv[0][0], 4, 8, w1, h1, 0); row << 3, col << 3, &b->mv[0][0],,,,, 4, 8, w1, h1, 0);
mc_luma_dir(s, mc[4][b->filter][0], s->dst[0] + 4 * bytesperpixel, ls_y, mc_luma_dir(s, mc[4][b->filter][0], s->dst[0] + 4 * bytesperpixel, ls_y,
ref1->data[0], ref1->linesize[0], tref1, ref1->data[0], ref1->linesize[0], tref1,
row << 3, (col << 3) + 4, &b->mv[1][0], 4, 8, w1, h1, 0); row << 3, (col << 3) + 4, &b->mv[1][0],,,,, 4, 8, w1, h1, 0);
h1 = (h1 + s->ss_v) >> s->ss_v; h1 = (h1 + s->ss_v) >> s->ss_v;
if (s->ss_h) { if (s->ss_h) {
w1 = (w1 + 1) >> 1; w1 = (w1 + 1) >> 1;
...@@ -152,30 +152,30 @@ static void FN(inter_pred)(AVCodecContext *ctx) ...@@ -152,30 +152,30 @@ static void FN(inter_pred)(AVCodecContext *ctx)
ref1->data[1], ref1->linesize[1], ref1->data[1], ref1->linesize[1],
ref1->data[2], ref1->linesize[2], tref1, ref1->data[2], ref1->linesize[2], tref1,
row << (3 - s->ss_v), col << 2, row << (3 - s->ss_v), col << 2,
&uvmv, 4, 8 >> s->ss_v, w1, h1, 0); &uvmv,,,,, 4, 8 >> s->ss_v, w1, h1, 0);
} else { } else {
mc_chroma_dir(s, mc[4][b->filter][0], mc_chroma_dir(s, mc[4][b->filter][0],
s->dst[1], s->dst[2], ls_uv, s->dst[1], s->dst[2], ls_uv,
ref1->data[1], ref1->linesize[1], ref1->data[1], ref1->linesize[1],
ref1->data[2], ref1->linesize[2], tref1, ref1->data[2], ref1->linesize[2], tref1,
row << (3 - s->ss_v), col << 3, row << (3 - s->ss_v), col << 3,
&b->mv[0][0], 4, 8 >> s->ss_v, w1, h1, 0); &b->mv[0][0],,,,, 4, 8 >> s->ss_v, w1, h1, 0);
mc_chroma_dir(s, mc[4][b->filter][0], mc_chroma_dir(s, mc[4][b->filter][0],
s->dst[1] + 4 * bytesperpixel, s->dst[1] + 4 * bytesperpixel,
s->dst[2] + 4 * bytesperpixel, ls_uv, s->dst[2] + 4 * bytesperpixel, ls_uv,
ref1->data[1], ref1->linesize[1], ref1->data[1], ref1->linesize[1],
ref1->data[2], ref1->linesize[2], tref1, ref1->data[2], ref1->linesize[2], tref1,
row << (3 - s->ss_v), (col << 3) + 4, row << (3 - s->ss_v), (col << 3) + 4,
&b->mv[1][0], 4, 8 >> s->ss_v, w1, h1, 0); &b->mv[1][0],,,,, 4, 8 >> s->ss_v, w1, h1, 0);
} }
if (b->comp) { if (b->comp) {
mc_luma_dir(s, mc[4][b->filter][1], s->dst[0], ls_y, mc_luma_dir(s, mc[4][b->filter][1], s->dst[0], ls_y,
ref2->data[0], ref2->linesize[0], tref2, ref2->data[0], ref2->linesize[0], tref2,
row << 3, col << 3, &b->mv[0][1], 4, 8, w2, h2, 1); row << 3, col << 3, &b->mv[0][1],,,,, 4, 8, w2, h2, 1);
mc_luma_dir(s, mc[4][b->filter][1], s->dst[0] + 4 * bytesperpixel, ls_y, mc_luma_dir(s, mc[4][b->filter][1], s->dst[0] + 4 * bytesperpixel, ls_y,
ref2->data[0], ref2->linesize[0], tref2, ref2->data[0], ref2->linesize[0], tref2,
row << 3, (col << 3) + 4, &b->mv[1][1], 4, 8, w2, h2, 1); row << 3, (col << 3) + 4, &b->mv[1][1],,,,, 4, 8, w2, h2, 1);
h2 = (h2 + s->ss_v) >> s->ss_v; h2 = (h2 + s->ss_v) >> s->ss_v;
if (s->ss_h) { if (s->ss_h) {
w2 = (w2 + 1) >> 1; w2 = (w2 + 1) >> 1;
...@@ -185,21 +185,21 @@ static void FN(inter_pred)(AVCodecContext *ctx) ...@@ -185,21 +185,21 @@ static void FN(inter_pred)(AVCodecContext *ctx)
ref2->data[1], ref2->linesize[1], ref2->data[1], ref2->linesize[1],
ref2->data[2], ref2->linesize[2], tref2, ref2->data[2], ref2->linesize[2], tref2,
row << (3 - s->ss_v), col << 2, row << (3 - s->ss_v), col << 2,
&uvmv, 4, 8 >> s->ss_v, w2, h2, 1); &uvmv,,,,, 4, 8 >> s->ss_v, w2, h2, 1);
} else { } else {
mc_chroma_dir(s, mc[4][b->filter][1], mc_chroma_dir(s, mc[4][b->filter][1],
s->dst[1], s->dst[2], ls_uv, s->dst[1], s->dst[2], ls_uv,
ref2->data[1], ref2->linesize[1], ref2->data[1], ref2->linesize[1],
ref2->data[2], ref2->linesize[2], tref2, ref2->data[2], ref2->linesize[2], tref2,
row << (3 - s->ss_v), col << 3, row << (3 - s->ss_v), col << 3,
&b->mv[0][1], 4, 8 >> s->ss_v, w2, h2, 1); &b->mv[0][1],,,,, 4, 8 >> s->ss_v, w2, h2, 1);
mc_chroma_dir(s, mc[4][b->filter][1], mc_chroma_dir(s, mc[4][b->filter][1],
s->dst[1] + 4 * bytesperpixel, s->dst[1] + 4 * bytesperpixel,
s->dst[2] + 4 * bytesperpixel, ls_uv, s->dst[2] + 4 * bytesperpixel, ls_uv,
ref2->data[1], ref2->linesize[1], ref2->data[1], ref2->linesize[1],
ref2->data[2], ref2->linesize[2], tref2, ref2->data[2], ref2->linesize[2], tref2,
row << (3 - s->ss_v), (col << 3) + 4, row << (3 - s->ss_v), (col << 3) + 4,
&b->mv[1][1], 4, 8 >> s->ss_v, w2, h2, 1); &b->mv[1][1],,,,, 4, 8 >> s->ss_v, w2, h2, 1);
} }
} }
} else } else
...@@ -211,18 +211,22 @@ static void FN(inter_pred)(AVCodecContext *ctx) ...@@ -211,18 +211,22 @@ static void FN(inter_pred)(AVCodecContext *ctx)
// do a w8 instead of a w4 call // do a w8 instead of a w4 call
mc_luma_dir(s, mc[4][b->filter][0], s->dst[0], ls_y, mc_luma_dir(s, mc[4][b->filter][0], s->dst[0], ls_y,
ref1->data[0], ref1->linesize[0], tref1, ref1->data[0], ref1->linesize[0], tref1,
row << 3, col << 3, &b->mv[0][0], 4, 4, w1, h1, 0); row << 3, col << 3, &b->mv[0][0],
0, 0, 8, 8, 4, 4, w1, h1, 0);
mc_luma_dir(s, mc[4][b->filter][0], s->dst[0] + 4 * bytesperpixel, ls_y, mc_luma_dir(s, mc[4][b->filter][0], s->dst[0] + 4 * bytesperpixel, ls_y,
ref1->data[0], ref1->linesize[0], tref1, ref1->data[0], ref1->linesize[0], tref1,
row << 3, (col << 3) + 4, &b->mv[1][0], 4, 4, w1, h1, 0); row << 3, (col << 3) + 4, &b->mv[1][0],
4, 0, 8, 8, 4, 4, w1, h1, 0);
mc_luma_dir(s, mc[4][b->filter][0], mc_luma_dir(s, mc[4][b->filter][0],
s->dst[0] + 4 * ls_y, ls_y, s->dst[0] + 4 * ls_y, ls_y,
ref1->data[0], ref1->linesize[0], tref1, ref1->data[0], ref1->linesize[0], tref1,
(row << 3) + 4, col << 3, &b->mv[2][0], 4, 4, w1, h1, 0); (row << 3) + 4, col << 3, &b->mv[2][0],
0, 4, 8, 8, 4, 4, w1, h1, 0);
mc_luma_dir(s, mc[4][b->filter][0], mc_luma_dir(s, mc[4][b->filter][0],
s->dst[0] + 4 * ls_y + 4 * bytesperpixel, ls_y, s->dst[0] + 4 * ls_y + 4 * bytesperpixel, ls_y,
ref1->data[0], ref1->linesize[0], tref1, ref1->data[0], ref1->linesize[0], tref1,
(row << 3) + 4, (col << 3) + 4, &b->mv[3][0], 4, 4, w1, h1, 0); (row << 3) + 4, (col << 3) + 4, &b->mv[3][0],
4, 4, 8, 8, 4, 4, w1, h1, 0);
if (s->ss_v) { if (s->ss_v) {
h1 = (h1 + 1) >> 1; h1 = (h1 + 1) >> 1;
if (s->ss_h) { if (s->ss_h) {
...@@ -234,7 +238,7 @@ static void FN(inter_pred)(AVCodecContext *ctx) ...@@ -234,7 +238,7 @@ static void FN(inter_pred)(AVCodecContext *ctx)
ref1->data[1], ref1->linesize[1], ref1->data[1], ref1->linesize[1],
ref1->data[2], ref1->linesize[2], tref1, ref1->data[2], ref1->linesize[2], tref1,
row << 2, col << 2, row << 2, col << 2,
&uvmv, 4, 4, w1, h1, 0); &uvmv, 0, 0, 4, 4, 4, 4, w1, h1, 0);
} else { } else {
uvmv = ROUNDED_DIV_MVx2(b->mv[0][0], b->mv[2][0]); uvmv = ROUNDED_DIV_MVx2(b->mv[0][0], b->mv[2][0]);
mc_chroma_dir(s, mc[4][b->filter][0], mc_chroma_dir(s, mc[4][b->filter][0],
...@@ -242,7 +246,7 @@ static void FN(inter_pred)(AVCodecContext *ctx) ...@@ -242,7 +246,7 @@ static void FN(inter_pred)(AVCodecContext *ctx)
ref1->data[1], ref1->linesize[1], ref1->data[1], ref1->linesize[1],
ref1->data[2], ref1->linesize[2], tref1, ref1->data[2], ref1->linesize[2], tref1,
row << 2, col << 3, row << 2, col << 3,
&uvmv, 4, 4, w1, h1, 0); &uvmv, 0, 0, 8, 4, 4, 4, w1, h1, 0);
uvmv = ROUNDED_DIV_MVx2(b->mv[1][0], b->mv[3][0]); uvmv = ROUNDED_DIV_MVx2(b->mv[1][0], b->mv[3][0]);
mc_chroma_dir(s, mc[4][b->filter][0], mc_chroma_dir(s, mc[4][b->filter][0],
s->dst[1] + 4 * bytesperpixel, s->dst[1] + 4 * bytesperpixel,
...@@ -250,7 +254,7 @@ static void FN(inter_pred)(AVCodecContext *ctx) ...@@ -250,7 +254,7 @@ static void FN(inter_pred)(AVCodecContext *ctx)
ref1->data[1], ref1->linesize[1], ref1->data[1], ref1->linesize[1],
ref1->data[2], ref1->linesize[2], tref1, ref1->data[2], ref1->linesize[2], tref1,
row << 2, (col << 3) + 4, row << 2, (col << 3) + 4,
&uvmv, 4, 4, w1, h1, 0); &uvmv, 4, 0, 8, 4, 4, 4, w1, h1, 0);
} }
} else { } else {
if (s->ss_h) { if (s->ss_h) {
...@@ -261,7 +265,7 @@ static void FN(inter_pred)(AVCodecContext *ctx) ...@@ -261,7 +265,7 @@ static void FN(inter_pred)(AVCodecContext *ctx)
ref1->data[1], ref1->linesize[1], ref1->data[1], ref1->linesize[1],
ref1->data[2], ref1->linesize[2], tref1, ref1->data[2], ref1->linesize[2], tref1,
row << 3, col << 2, row << 3, col << 2,
&uvmv, 4, 4, w1, h1, 0); &uvmv, 0, 0, 4, 8, 4, 4, w1, h1, 0);
// BUG libvpx uses wrong block index for 4:2:2 bs=4x4 // BUG libvpx uses wrong block index for 4:2:2 bs=4x4
// bottom block // bottom block
// https://code.google.com/p/webm/issues/detail?id=993 // https://code.google.com/p/webm/issues/detail?id=993
...@@ -271,52 +275,52 @@ static void FN(inter_pred)(AVCodecContext *ctx) ...@@ -271,52 +275,52 @@ static void FN(inter_pred)(AVCodecContext *ctx)
ref1->data[1], ref1->linesize[1], ref1->data[1], ref1->linesize[1],
ref1->data[2], ref1->linesize[2], tref1, ref1->data[2], ref1->linesize[2], tref1,
(row << 3) + 4, col << 2, (row << 3) + 4, col << 2,
&uvmv, 4, 4, w1, h1, 0); &uvmv, 0, 4, 4, 8, 4, 4, w1, h1, 0);
} else { } else {
mc_chroma_dir(s, mc[4][b->filter][0], mc_chroma_dir(s, mc[4][b->filter][0],
s->dst[1], s->dst[2], ls_uv, s->dst[1], s->dst[2], ls_uv,
ref1->data[1], ref1->linesize[1], ref1->data[1], ref1->linesize[1],
ref1->data[2], ref1->linesize[2], tref1, ref1->data[2], ref1->linesize[2], tref1,
row << 3, col << 3, row << 3, col << 3,
&b->mv[0][0], 4, 4, w1, h1, 0); &b->mv[0][0], 0, 0, 8, 8, 4, 4, w1, h1, 0);
mc_chroma_dir(s, mc[4][b->filter][0], mc_chroma_dir(s, mc[4][b->filter][0],
s->dst[1] + 4 * bytesperpixel, s->dst[1] + 4 * bytesperpixel,
s->dst[2] + 4 * bytesperpixel, ls_uv, s->dst[2] + 4 * bytesperpixel, ls_uv,
ref1->data[1], ref1->linesize[1], ref1->data[1], ref1->linesize[1],
ref1->data[2], ref1->linesize[2], tref1, ref1->data[2], ref1->linesize[2], tref1,
row << 3, (col << 3) + 4, row << 3, (col << 3) + 4,
&b->mv[1][0], 4, 4, w1, h1, 0); &b->mv[1][0], 4, 0, 8, 8, 4, 4, w1, h1, 0);
mc_chroma_dir(s, mc[4][b->filter][0], mc_chroma_dir(s, mc[4][b->filter][0],
s->dst[1] + 4 * ls_uv, s->dst[2] + 4 * ls_uv, ls_uv, s->dst[1] + 4 * ls_uv, s->dst[2] + 4 * ls_uv, ls_uv,
ref1->data[1], ref1->linesize[1], ref1->data[1], ref1->linesize[1],
ref1->data[2], ref1->linesize[2], tref1, ref1->data[2], ref1->linesize[2], tref1,
(row << 3) + 4, col << 3, (row << 3) + 4, col << 3,
&b->mv[2][0], 4, 4, w1, h1, 0); &b->mv[2][0], 0, 4, 8, 8, 4, 4, w1, h1, 0);
mc_chroma_dir(s, mc[4][b->filter][0], mc_chroma_dir(s, mc[4][b->filter][0],
s->dst[1] + 4 * ls_uv + 4 * bytesperpixel, s->dst[1] + 4 * ls_uv + 4 * bytesperpixel,
s->dst[2] + 4 * ls_uv + 4 * bytesperpixel, ls_uv, s->dst[2] + 4 * ls_uv + 4 * bytesperpixel, ls_uv,
ref1->data[1], ref1->linesize[1], ref1->data[1], ref1->linesize[1],
ref1->data[2], ref1->linesize[2], tref1, ref1->data[2], ref1->linesize[2], tref1,
(row << 3) + 4, (col << 3) + 4, (row << 3) + 4, (col << 3) + 4,
&b->mv[3][0], 4, 4, w1, h1, 0); &b->mv[3][0], 4, 4, 8, 8, 4, 4, w1, h1, 0);
} }
} }
if (b->comp) { if (b->comp) {
mc_luma_dir(s, mc[4][b->filter][1], s->dst[0], ls_y, mc_luma_dir(s, mc[4][b->filter][1], s->dst[0], ls_y,
ref2->data[0], ref2->linesize[0], tref2, ref2->data[0], ref2->linesize[0], tref2,
row << 3, col << 3, &b->mv[0][1], 4, 4, w2, h2, 1); row << 3, col << 3, &b->mv[0][1], 0, 0, 8, 8, 4, 4, w2, h2, 1);
mc_luma_dir(s, mc[4][b->filter][1], s->dst[0] + 4 * bytesperpixel, ls_y, mc_luma_dir(s, mc[4][b->filter][1], s->dst[0] + 4 * bytesperpixel, ls_y,
ref2->data[0], ref2->linesize[0], tref2, ref2->data[0], ref2->linesize[0], tref2,
row << 3, (col << 3) + 4, &b->mv[1][1], 4, 4, w2, h2, 1); row << 3, (col << 3) + 4, &b->mv[1][1], 4, 0, 8, 8, 4, 4, w2, h2, 1);
mc_luma_dir(s, mc[4][b->filter][1], mc_luma_dir(s, mc[4][b->filter][1],
s->dst[0] + 4 * ls_y, ls_y, s->dst[0] + 4 * ls_y, ls_y,
ref2->data[0], ref2->linesize[0], tref2, ref2->data[0], ref2->linesize[0], tref2,
(row << 3) + 4, col << 3, &b->mv[2][1], 4, 4, w2, h2, 1); (row << 3) + 4, col << 3, &b->mv[2][1], 0, 4, 8, 8, 4, 4, w2, h2, 1);
mc_luma_dir(s, mc[4][b->filter][1], mc_luma_dir(s, mc[4][b->filter][1],
s->dst[0] + 4 * ls_y + 4 * bytesperpixel, ls_y, s->dst[0] + 4 * ls_y + 4 * bytesperpixel, ls_y,
ref2->data[0], ref2->linesize[0], tref2, ref2->data[0], ref2->linesize[0], tref2,
(row << 3) + 4, (col << 3) + 4, &b->mv[3][1], 4, 4, w2, h2, 1); (row << 3) + 4, (col << 3) + 4, &b->mv[3][1], 4, 4, 8, 8, 4, 4, w2, h2, 1);
if (s->ss_v) { if (s->ss_v) {
h2 = (h2 + 1) >> 1; h2 = (h2 + 1) >> 1;
if (s->ss_h) { if (s->ss_h) {
...@@ -328,7 +332,7 @@ static void FN(inter_pred)(AVCodecContext *ctx) ...@@ -328,7 +332,7 @@ static void FN(inter_pred)(AVCodecContext *ctx)
ref2->data[1], ref2->linesize[1], ref2->data[1], ref2->linesize[1],
ref2->data[2], ref2->linesize[2], tref2, ref2->data[2], ref2->linesize[2], tref2,
row << 2, col << 2, row << 2, col << 2,
&uvmv, 4, 4, w2, h2, 1); &uvmv, 0, 0, 4, 4, 4, 4, w2, h2, 1);
} else { } else {
uvmv = ROUNDED_DIV_MVx2(b->mv[0][1], b->mv[2][1]); uvmv = ROUNDED_DIV_MVx2(b->mv[0][1], b->mv[2][1]);
mc_chroma_dir(s, mc[4][b->filter][1], mc_chroma_dir(s, mc[4][b->filter][1],
...@@ -336,7 +340,7 @@ static void FN(inter_pred)(AVCodecContext *ctx) ...@@ -336,7 +340,7 @@ static void FN(inter_pred)(AVCodecContext *ctx)
ref2->data[1], ref2->linesize[1], ref2->data[1], ref2->linesize[1],
ref2->data[2], ref2->linesize[2], tref2, ref2->data[2], ref2->linesize[2], tref2,
row << 2, col << 3, row << 2, col << 3,
&uvmv, 4, 4, w2, h2, 1); &uvmv, 0, 0, 8, 4, 4, 4, w2, h2, 1);
uvmv = ROUNDED_DIV_MVx2(b->mv[1][1], b->mv[3][1]); uvmv = ROUNDED_DIV_MVx2(b->mv[1][1], b->mv[3][1]);
mc_chroma_dir(s, mc[4][b->filter][1], mc_chroma_dir(s, mc[4][b->filter][1],
s->dst[1] + 4 * bytesperpixel, s->dst[1] + 4 * bytesperpixel,
...@@ -344,7 +348,7 @@ static void FN(inter_pred)(AVCodecContext *ctx) ...@@ -344,7 +348,7 @@ static void FN(inter_pred)(AVCodecContext *ctx)
ref2->data[1], ref2->linesize[1], ref2->data[1], ref2->linesize[1],
ref2->data[2], ref2->linesize[2], tref2, ref2->data[2], ref2->linesize[2], tref2,
row << 2, (col << 3) + 4, row << 2, (col << 3) + 4,
&uvmv, 4, 4, w2, h2, 1); &uvmv, 4, 0, 8, 4, 4, 4, w2, h2, 1);
} }
} else { } else {
if (s->ss_h) { if (s->ss_h) {
...@@ -355,7 +359,7 @@ static void FN(inter_pred)(AVCodecContext *ctx) ...@@ -355,7 +359,7 @@ static void FN(inter_pred)(AVCodecContext *ctx)
ref2->data[1], ref2->linesize[1], ref2->data[1], ref2->linesize[1],
ref2->data[2], ref2->linesize[2], tref2, ref2->data[2], ref2->linesize[2], tref2,
row << 3, col << 2, row << 3, col << 2,
&uvmv, 4, 4, w2, h2, 1); &uvmv, 0, 0, 4, 8, 4, 4, w2, h2, 1);
// BUG libvpx uses wrong block index for 4:2:2 bs=4x4 // BUG libvpx uses wrong block index for 4:2:2 bs=4x4
// bottom block // bottom block
// https://code.google.com/p/webm/issues/detail?id=993 // https://code.google.com/p/webm/issues/detail?id=993
...@@ -365,34 +369,34 @@ static void FN(inter_pred)(AVCodecContext *ctx) ...@@ -365,34 +369,34 @@ static void FN(inter_pred)(AVCodecContext *ctx)
ref2->data[1], ref2->linesize[1], ref2->data[1], ref2->linesize[1],
ref2->data[2], ref2->linesize[2], tref2, ref2->data[2], ref2->linesize[2], tref2,
(row << 3) + 4, col << 2, (row << 3) + 4, col << 2,
&uvmv, 4, 4, w2, h2, 1); &uvmv, 0, 4, 4, 8, 4, 4, w2, h2, 1);
} else { } else {
mc_chroma_dir(s, mc[4][b->filter][1], mc_chroma_dir(s, mc[4][b->filter][1],
s->dst[1], s->dst[2], ls_uv, s->dst[1], s->dst[2], ls_uv,
ref2->data[1], ref2->linesize[1], ref2->data[1], ref2->linesize[1],
ref2->data[2], ref2->linesize[2], tref2, ref2->data[2], ref2->linesize[2], tref2,
row << 3, col << 3, row << 3, col << 3,
&b->mv[0][1], 4, 4, w2, h2, 1); &b->mv[0][1], 0, 0, 8, 8, 4, 4, w2, h2, 1);
mc_chroma_dir(s, mc[4][b->filter][1], mc_chroma_dir(s, mc[4][b->filter][1],
s->dst[1] + 4 * bytesperpixel, s->dst[1] + 4 * bytesperpixel,
s->dst[2] + 4 * bytesperpixel, ls_uv, s->dst[2] + 4 * bytesperpixel, ls_uv,
ref2->data[1], ref2->linesize[1], ref2->data[1], ref2->linesize[1],
ref2->data[2], ref2->linesize[2], tref2, ref2->data[2], ref2->linesize[2], tref2,
row << 3, (col << 3) + 4, row << 3, (col << 3) + 4,
&b->mv[1][1], 4, 4, w2, h2, 1); &b->mv[1][1], 4, 0, 8, 8, 4, 4, w2, h2, 1);
mc_chroma_dir(s, mc[4][b->filter][1], mc_chroma_dir(s, mc[4][b->filter][1],
s->dst[1] + 4 * ls_uv, s->dst[2] + 4 * ls_uv, ls_uv, s->dst[1] + 4 * ls_uv, s->dst[2] + 4 * ls_uv, ls_uv,
ref2->data[1], ref2->linesize[1], ref2->data[1], ref2->linesize[1],
ref2->data[2], ref2->linesize[2], tref2, ref2->data[2], ref2->linesize[2], tref2,
(row << 3) + 4, col << 3, (row << 3) + 4, col << 3,
&b->mv[2][1], 4, 4, w2, h2, 1); &b->mv[2][1], 0, 4, 8, 8, 4, 4, w2, h2, 1);
mc_chroma_dir(s, mc[4][b->filter][1], mc_chroma_dir(s, mc[4][b->filter][1],
s->dst[1] + 4 * ls_uv + 4 * bytesperpixel, s->dst[1] + 4 * ls_uv + 4 * bytesperpixel,
s->dst[2] + 4 * ls_uv + 4 * bytesperpixel, ls_uv, s->dst[2] + 4 * ls_uv + 4 * bytesperpixel, ls_uv,
ref2->data[1], ref2->linesize[1], ref2->data[1], ref2->linesize[1],
ref2->data[2], ref2->linesize[2], tref2, ref2->data[2], ref2->linesize[2], tref2,
(row << 3) + 4, (col << 3) + 4, (row << 3) + 4, (col << 3) + 4,
&b->mv[3][1], 4, 4, w2, h2, 1); &b->mv[3][1], 4, 4, 8, 8, 4, 4, w2, h2, 1);
} }
} }
} }
...@@ -404,7 +408,7 @@ static void FN(inter_pred)(AVCodecContext *ctx) ...@@ -404,7 +408,7 @@ static void FN(inter_pred)(AVCodecContext *ctx)
mc_luma_dir(s, mc[bwl][b->filter][0], s->dst[0], ls_y, mc_luma_dir(s, mc[bwl][b->filter][0], s->dst[0], ls_y,
ref1->data[0], ref1->linesize[0], tref1, ref1->data[0], ref1->linesize[0], tref1,
row << 3, col << 3, &b->mv[0][0], bw, bh, w1, h1, 0); row << 3, col << 3, &b->mv[0][0], 0, 0, bw, bh, bw, bh, w1, h1, 0);
w1 = (w1 + s->ss_h) >> s->ss_h; w1 = (w1 + s->ss_h) >> s->ss_h;
h1 = (h1 + s->ss_v) >> s->ss_v; h1 = (h1 + s->ss_v) >> s->ss_v;
mc_chroma_dir(s, mc[bwl + s->ss_h][b->filter][0], mc_chroma_dir(s, mc[bwl + s->ss_h][b->filter][0],
...@@ -412,12 +416,12 @@ static void FN(inter_pred)(AVCodecContext *ctx) ...@@ -412,12 +416,12 @@ static void FN(inter_pred)(AVCodecContext *ctx)
ref1->data[1], ref1->linesize[1], ref1->data[1], ref1->linesize[1],
ref1->data[2], ref1->linesize[2], tref1, ref1->data[2], ref1->linesize[2], tref1,
row << (3 - s->ss_v), col << (3 - s->ss_h), row << (3 - s->ss_v), col << (3 - s->ss_h),
&b->mv[0][0], uvbw, uvbh, w1, h1, 0); &b->mv[0][0], 0, 0, uvbw, uvbh, uvbw, uvbh, w1, h1, 0);
if (b->comp) { if (b->comp) {
mc_luma_dir(s, mc[bwl][b->filter][1], s->dst[0], ls_y, mc_luma_dir(s, mc[bwl][b->filter][1], s->dst[0], ls_y,
ref2->data[0], ref2->linesize[0], tref2, ref2->data[0], ref2->linesize[0], tref2,
row << 3, col << 3, &b->mv[0][1], bw, bh, w2, h2, 1); row << 3, col << 3, &b->mv[0][1], 0, 0, bw, bh, bw, bh, w2, h2, 1);
w2 = (w2 + s->ss_h) >> s->ss_h; w2 = (w2 + s->ss_h) >> s->ss_h;
h2 = (h2 + s->ss_v) >> s->ss_v; h2 = (h2 + s->ss_v) >> s->ss_v;
mc_chroma_dir(s, mc[bwl + s->ss_h][b->filter][1], mc_chroma_dir(s, mc[bwl + s->ss_h][b->filter][1],
...@@ -425,7 +429,7 @@ static void FN(inter_pred)(AVCodecContext *ctx) ...@@ -425,7 +429,7 @@ static void FN(inter_pred)(AVCodecContext *ctx)
ref2->data[1], ref2->linesize[1], ref2->data[1], ref2->linesize[1],
ref2->data[2], ref2->linesize[2], tref2, ref2->data[2], ref2->linesize[2], tref2,
row << (3 - s->ss_v), col << (3 - s->ss_h), row << (3 - s->ss_v), col << (3 - s->ss_h),
&b->mv[0][1], uvbw, uvbh, w2, h2, 1); &b->mv[0][1], 0, 0, uvbw, uvbh, uvbw, uvbh, w2, h2, 1);
} }
} }
} }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment