Commit d56245f7 authored by Michael Niedermayer's avatar Michael Niedermayer

Merge remote-tracking branch 'rbultje/vp9-profile1-wip'

* rbultje/vp9-profile1-wip:
  vp9: add fate test for 422.
  vp9: copy bug in libvpx for 4:2:2 chroma bs=8x4/4x4 prediction.
  vp9: add yuv440 fate test.
  vp9: fix mask_edges and filter_plane_rows/cols() for 440.
  vp9: more specifically specify mask destination to mask_edges().
  vp9: add fate test for profile 1 444.
  vp9: don't create special u/v filter masks for 444.
  vp9: merge uv loopfilter code into generic filter_plane_rows/cols().
  vp9: split out loopfilter luma rows/cols functions from loopfilter_sb().
  vp9: invert order of two conditions.
  vp9: use correct chroma subsampling for profile 1 inter block recon.
  vp9: use correct chroma subsampling for profile 1 intra block recon.
  vp9: take chroma subsampling into account when walking the block tree.
  vp9: support non-420 chroma subsampling for profile 1 token decoding.
  vp9: increase buffer sizes for non-420 chroma subsamplings.
  vp9: profile 1 header decoding.
Merged-by: 's avatarMichael Niedermayer <michaelni@gmx.at>
parents c8de8f7e b005d097
......@@ -112,8 +112,7 @@ typedef struct VP9Context {
uint8_t invisible;
uint8_t use_last_frame_mvs;
uint8_t errorres;
uint8_t colorspace;
uint8_t fullrange;
uint8_t ss_h, ss_v;
uint8_t intraonly;
uint8_t resetctx;
uint8_t refreshrefmask;
......@@ -216,7 +215,7 @@ typedef struct VP9Context {
DECLARE_ALIGNED(16, uint8_t, left_y_nnz_ctx)[16];
DECLARE_ALIGNED(16, uint8_t, left_mode_ctx)[16];
DECLARE_ALIGNED(16, VP56mv, left_mv_ctx)[16][2];
DECLARE_ALIGNED(8, uint8_t, left_uv_nnz_ctx)[2][8];
DECLARE_ALIGNED(16, uint8_t, left_uv_nnz_ctx)[2][16];
DECLARE_ALIGNED(8, uint8_t, left_partition_ctx)[8];
DECLARE_ALIGNED(8, uint8_t, left_skip_ctx)[8];
DECLARE_ALIGNED(8, uint8_t, left_txfm_ctx)[8];
......@@ -249,8 +248,8 @@ typedef struct VP9Context {
int16_t *block_base, *block, *uvblock_base[2], *uvblock[2];
uint8_t *eob_base, *uveob_base[2], *eob, *uveob[2];
struct { int x, y; } min_mv, max_mv;
DECLARE_ALIGNED(32, uint8_t, tmp_y)[64*64];
DECLARE_ALIGNED(32, uint8_t, tmp_uv)[2][32*32];
DECLARE_ALIGNED(32, uint8_t, tmp_y)[64 * 64];
DECLARE_ALIGNED(32, uint8_t, tmp_uv)[2][64 * 64];
uint16_t mvscale[3][2];
uint8_t mvstep[3][2];
} VP9Context;
......@@ -308,39 +307,42 @@ static int vp9_ref_frame(AVCodecContext *ctx, VP9Frame *dst, VP9Frame *src)
return 0;
}
static int update_size(AVCodecContext *ctx, int w, int h)
static int update_size(AVCodecContext *ctx, int w, int h, enum AVPixelFormat fmt)
{
VP9Context *s = ctx->priv_data;
uint8_t *p;
av_assert0(w > 0 && h > 0);
if (s->intra_pred_data[0] && w == ctx->width && h == ctx->height)
if (s->intra_pred_data[0] && w == ctx->width && h == ctx->height && ctx->pix_fmt == fmt)
return 0;
ctx->width = w;
ctx->height = h;
s->sb_cols = (w + 63) >> 6;
s->sb_rows = (h + 63) >> 6;
s->cols = (w + 7) >> 3;
s->rows = (h + 7) >> 3;
ctx->width = w;
ctx->height = h;
ctx->pix_fmt = fmt;
s->sb_cols = (w + 63) >> 6;
s->sb_rows = (h + 63) >> 6;
s->cols = (w + 7) >> 3;
s->rows = (h + 7) >> 3;
#define assign(var, type, n) var = (type) p; p += s->sb_cols * (n) * sizeof(*var)
av_freep(&s->intra_pred_data[0]);
p = av_malloc(s->sb_cols * (240 + sizeof(*s->lflvl) + 16 * sizeof(*s->above_mv_ctx)));
// FIXME we slightly over-allocate here for subsampled chroma, but a little
// bit of padding shouldn't affect performance...
p = av_malloc(s->sb_cols * (320 + sizeof(*s->lflvl) + 16 * sizeof(*s->above_mv_ctx)));
if (!p)
return AVERROR(ENOMEM);
assign(s->intra_pred_data[0], uint8_t *, 64);
assign(s->intra_pred_data[1], uint8_t *, 32);
assign(s->intra_pred_data[2], uint8_t *, 32);
assign(s->intra_pred_data[1], uint8_t *, 64);
assign(s->intra_pred_data[2], uint8_t *, 64);
assign(s->above_y_nnz_ctx, uint8_t *, 16);
assign(s->above_mode_ctx, uint8_t *, 16);
assign(s->above_mv_ctx, VP56mv(*)[2], 16);
assign(s->above_uv_nnz_ctx[0], uint8_t *, 16);
assign(s->above_uv_nnz_ctx[1], uint8_t *, 16);
assign(s->above_partition_ctx, uint8_t *, 8);
assign(s->above_skip_ctx, uint8_t *, 8);
assign(s->above_txfm_ctx, uint8_t *, 8);
assign(s->above_uv_nnz_ctx[0], uint8_t *, 8);
assign(s->above_uv_nnz_ctx[1], uint8_t *, 8);
assign(s->above_segpred_ctx, uint8_t *, 8);
assign(s->above_intra_ctx, uint8_t *, 8);
assign(s->above_comp_ctx, uint8_t *, 8);
......@@ -359,34 +361,39 @@ static int update_size(AVCodecContext *ctx, int w, int h)
static int update_block_buffers(AVCodecContext *ctx)
{
VP9Context *s = ctx->priv_data;
int chroma_blocks, chroma_eobs;
if (s->b_base && s->block_base && s->block_alloc_using_2pass == s->frames[CUR_FRAME].uses_2pass)
return 0;
av_free(s->b_base);
av_free(s->block_base);
chroma_blocks = 64 * 64 >> (s->ss_h + s->ss_v);
chroma_eobs = 16 * 16 >> (s->ss_h + s->ss_v);
if (s->frames[CUR_FRAME].uses_2pass) {
int sbs = s->sb_cols * s->sb_rows;
s->b_base = av_malloc_array(s->cols * s->rows, sizeof(VP9Block));
s->block_base = av_mallocz((64 * 64 + 128) * sbs * 3);
s->block_base = av_mallocz(((64 * 64 + 2 * chroma_blocks) * sizeof(int16_t) +
16 * 16 + 2 * chroma_eobs) * sbs);
if (!s->b_base || !s->block_base)
return AVERROR(ENOMEM);
s->uvblock_base[0] = s->block_base + sbs * 64 * 64;
s->uvblock_base[1] = s->uvblock_base[0] + sbs * 32 * 32;
s->eob_base = (uint8_t *) (s->uvblock_base[1] + sbs * 32 * 32);
s->uveob_base[0] = s->eob_base + 256 * sbs;
s->uveob_base[1] = s->uveob_base[0] + 64 * sbs;
s->uvblock_base[1] = s->uvblock_base[0] + sbs * chroma_blocks;
s->eob_base = (uint8_t *) (s->uvblock_base[1] + sbs * chroma_blocks);
s->uveob_base[0] = s->eob_base + 16 * 16 * sbs;
s->uveob_base[1] = s->uveob_base[0] + chroma_eobs * sbs;
} else {
s->b_base = av_malloc(sizeof(VP9Block));
s->block_base = av_mallocz((64 * 64 + 128) * 3);
s->block_base = av_mallocz((64 * 64 + 2 * chroma_blocks) * sizeof(int16_t) +
16 * 16 + 2 * chroma_eobs);
if (!s->b_base || !s->block_base)
return AVERROR(ENOMEM);
s->uvblock_base[0] = s->block_base + 64 * 64;
s->uvblock_base[1] = s->uvblock_base[0] + 32 * 32;
s->eob_base = (uint8_t *) (s->uvblock_base[1] + 32 * 32);
s->uveob_base[0] = s->eob_base + 256;
s->uveob_base[1] = s->uveob_base[0] + 64;
s->uvblock_base[1] = s->uvblock_base[0] + chroma_blocks;
s->eob_base = (uint8_t *) (s->uvblock_base[1] + chroma_blocks);
s->uveob_base[0] = s->eob_base + 16 * 16;
s->uveob_base[1] = s->uveob_base[0] + chroma_eobs;
}
s->block_alloc_using_2pass = s->frames[CUR_FRAME].uses_2pass;
......@@ -463,11 +470,56 @@ static int update_prob(VP56RangeCoder *c, int p)
255 - inv_recenter_nonneg(inv_map_table[d], 255 - p);
}
static enum AVPixelFormat read_colorspace_details(AVCodecContext *ctx)
{
static const enum AVColorSpace colorspaces[8] = {
AVCOL_SPC_UNSPECIFIED, AVCOL_SPC_BT470BG, AVCOL_SPC_BT709, AVCOL_SPC_SMPTE170M,
AVCOL_SPC_SMPTE240M, AVCOL_SPC_BT2020_NCL, AVCOL_SPC_RESERVED, AVCOL_SPC_RGB,
};
VP9Context *s = ctx->priv_data;
enum AVPixelFormat res;
ctx->colorspace = colorspaces[get_bits(&s->gb, 3)];
if (ctx->colorspace == AVCOL_SPC_RGB) { // RGB = profile 1
if (s->profile == 1) {
s->ss_h = s->ss_v = 1;
res = AV_PIX_FMT_GBRP;
ctx->color_range = AVCOL_RANGE_JPEG;
} else {
av_log(ctx, AV_LOG_ERROR, "RGB not supported in profile 0\n");
return AVERROR_INVALIDDATA;
}
} else {
static const enum AVPixelFormat pix_fmt_for_ss[2 /* v */][2 /* h */] = {
{ AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV422P },
{ AV_PIX_FMT_YUV440P, AV_PIX_FMT_YUV420P },
};
ctx->color_range = get_bits1(&s->gb) ? AVCOL_RANGE_JPEG : AVCOL_RANGE_MPEG;
if (s->profile == 1) {
s->ss_h = get_bits1(&s->gb);
s->ss_v = get_bits1(&s->gb);
if ((res = pix_fmt_for_ss[s->ss_v][s->ss_h]) == AV_PIX_FMT_YUV420P) {
av_log(ctx, AV_LOG_ERROR, "YUV 4:2:0 not supported in profile 1\n");
return AVERROR_INVALIDDATA;
} else if (get_bits1(&s->gb)) {
av_log(ctx, AV_LOG_ERROR, "Profile 1 color details reserved bit set\n");
return AVERROR_INVALIDDATA;
}
} else {
s->ss_h = s->ss_v = 1;
res = AV_PIX_FMT_YUV420P;
}
}
return res;
}
static int decode_frame_header(AVCodecContext *ctx,
const uint8_t *data, int size, int *ref)
{
VP9Context *s = ctx->priv_data;
int c, i, j, k, l, m, n, w, h, max, size2, res, sharp;
enum AVPixelFormat fmt = ctx->pix_fmt;
int last_invisible;
const uint8_t *data2;
......@@ -481,8 +533,9 @@ static int decode_frame_header(AVCodecContext *ctx,
return AVERROR_INVALIDDATA;
}
s->profile = get_bits1(&s->gb);
if (get_bits1(&s->gb)) { // reserved bit
av_log(ctx, AV_LOG_ERROR, "Reserved bit should be zero\n");
s->profile |= get_bits1(&s->gb) << 1;
if (s->profile > 1) {
av_log(ctx, AV_LOG_ERROR, "Profile %d is not yet supported\n", s->profile);
return AVERROR_INVALIDDATA;
}
if (get_bits1(&s->gb)) {
......@@ -500,12 +553,8 @@ static int decode_frame_header(AVCodecContext *ctx,
av_log(ctx, AV_LOG_ERROR, "Invalid sync code\n");
return AVERROR_INVALIDDATA;
}
s->colorspace = get_bits(&s->gb, 3);
if (s->colorspace == 7) { // RGB = profile 1
av_log(ctx, AV_LOG_ERROR, "RGB not supported in profile 0\n");
return AVERROR_INVALIDDATA;
}
s->fullrange = get_bits1(&s->gb);
if ((fmt = read_colorspace_details(ctx)) < 0)
return fmt;
// for profile 1, here follows the subsampling bits
s->refreshrefmask = 0xff;
w = get_bits(&s->gb, 16) + 1;
......@@ -520,6 +569,15 @@ static int decode_frame_header(AVCodecContext *ctx,
av_log(ctx, AV_LOG_ERROR, "Invalid sync code\n");
return AVERROR_INVALIDDATA;
}
if (s->profile == 1) {
if ((fmt = read_colorspace_details(ctx)) < 0)
return fmt;
} else {
s->ss_h = s->ss_v = 1;
fmt = AV_PIX_FMT_YUV420P;
ctx->colorspace = AVCOL_SPC_BT470BG;
ctx->color_range = AVCOL_RANGE_JPEG;
}
s->refreshrefmask = get_bits(&s->gb, 8);
w = get_bits(&s->gb, 16) + 1;
h = get_bits(&s->gb, 16) + 1;
......@@ -722,8 +780,8 @@ static int decode_frame_header(AVCodecContext *ctx,
}
/* tiling info */
if ((res = update_size(ctx, w, h)) < 0) {
av_log(ctx, AV_LOG_ERROR, "Failed to initialize decoder for %dx%d\n", w, h);
if ((res = update_size(ctx, w, h, fmt)) < 0) {
av_log(ctx, AV_LOG_ERROR, "Failed to initialize decoder for %dx%d @ %d\n", w, h, fmt);
return res;
}
for (s->tiling.log2_tile_cols = 0;
......@@ -2279,12 +2337,12 @@ static void decode_coeffs(AVCodecContext *ctx)
break;
}
#define DECODE_UV_COEF_LOOP(step) \
#define DECODE_UV_COEF_LOOP(step, decode_coeffs_fn) \
for (n = 0, y = 0; y < end_y; y += step) { \
for (x = 0; x < end_x; x += step, n += step * step) { \
res = decode_coeffs_b(&s->c, s->uvblock[pl] + 16 * n, \
16 * step * step, c, e, p, a[x] + l[y], \
uvscan, uvnb, uv_band_counts, qmul[1]); \
res = decode_coeffs_fn(&s->c, s->uvblock[pl] + 16 * n, \
16 * step * step, c, e, p, a[x] + l[y], \
uvscan, uvnb, uv_band_counts, qmul[1]); \
a[x] = l[y] = !!res; \
if (step >= 4) { \
AV_WN16A(&s->uveob[pl][n], res); \
......@@ -2297,36 +2355,30 @@ static void decode_coeffs(AVCodecContext *ctx)
p = s->prob.coef[b->uvtx][1 /* uv */][!b->intra];
c = s->counts.coef[b->uvtx][1 /* uv */][!b->intra];
e = s->counts.eob[b->uvtx][1 /* uv */][!b->intra];
w4 >>= 1;
h4 >>= 1;
end_x >>= 1;
end_y >>= 1;
w4 >>= s->ss_h;
end_x >>= s->ss_h;
h4 >>= s->ss_v;
end_y >>= s->ss_v;
for (pl = 0; pl < 2; pl++) {
a = &s->above_uv_nnz_ctx[pl][col];
l = &s->left_uv_nnz_ctx[pl][row & 7];
a = &s->above_uv_nnz_ctx[pl][col << !s->ss_h];
l = &s->left_uv_nnz_ctx[pl][(row & 7) << !s->ss_v];
switch (b->uvtx) {
case TX_4X4:
DECODE_UV_COEF_LOOP(1);
DECODE_UV_COEF_LOOP(1, decode_coeffs_b);
break;
case TX_8X8:
MERGE_CTX(2, AV_RN16A);
DECODE_UV_COEF_LOOP(2);
DECODE_UV_COEF_LOOP(2, decode_coeffs_b);
SPLAT_CTX(2);
break;
case TX_16X16:
MERGE_CTX(4, AV_RN32A);
DECODE_UV_COEF_LOOP(4);
DECODE_UV_COEF_LOOP(4, decode_coeffs_b);
SPLAT_CTX(4);
break;
case TX_32X32:
MERGE_CTX(8, AV_RN64A);
// a 64x64 (max) uv block can ever only contain 1 tx32x32 block
// so there is no need to loop
res = decode_coeffs_b32(&s->c, s->uvblock[pl],
1024, c, e, p, a[0] + l[0],
uvscan, uvnb, uv_band_counts, qmul[1]);
a[0] = l[0] = !!res;
AV_WN16A(&s->uveob[pl][0], res);
DECODE_UV_COEF_LOOP(8, decode_coeffs_b32);
SPLAT_CTX(8);
break;
}
......@@ -2338,7 +2390,7 @@ static av_always_inline int check_intra_mode(VP9Context *s, int mode, uint8_t **
uint8_t *dst_inner, ptrdiff_t stride_inner,
uint8_t *l, int col, int x, int w,
int row, int y, enum TxfmMode tx,
int p)
int p, int ss_h, int ss_v)
{
int have_top = row > 0 || y > 0;
int have_left = col > s->tiling.tile_col_start || x > 0;
......@@ -2393,7 +2445,7 @@ static av_always_inline int check_intra_mode(VP9Context *s, int mode, uint8_t **
mode = mode_conv[mode][have_left][have_top];
if (edges[mode].needs_top) {
uint8_t *top, *topleft;
int n_px_need = 4 << tx, n_px_have = (((s->cols - col) << !p) - x) * 4;
int n_px_need = 4 << tx, n_px_have = (((s->cols - col) << !ss_h) - x) * 4;
int n_px_need_tr = 0;
if (tx == TX_4X4 && edges[mode].needs_topright && have_right)
......@@ -2404,11 +2456,11 @@ static av_always_inline int check_intra_mode(VP9Context *s, int mode, uint8_t **
// post-loopfilter data)
if (have_top) {
top = !(row & 7) && !y ?
s->intra_pred_data[p] + col * (8 >> !!p) + x * 4 :
s->intra_pred_data[p] + col * (8 >> ss_h) + x * 4 :
y == 0 ? &dst_edge[-stride_edge] : &dst_inner[-stride_inner];
if (have_left)
topleft = !(row & 7) && !y ?
s->intra_pred_data[p] + col * (8 >> !!p) + x * 4 :
s->intra_pred_data[p] + col * (8 >> ss_h) + x * 4 :
y == 0 || x == 0 ? &dst_edge[-stride_edge] :
&dst_inner[-stride_inner];
}
......@@ -2449,7 +2501,7 @@ static av_always_inline int check_intra_mode(VP9Context *s, int mode, uint8_t **
}
if (edges[mode].needs_left) {
if (have_left) {
int n_px_need = 4 << tx, i, n_px_have = (((s->rows - row) << !p) - y) * 4;
int n_px_need = 4 << tx, i, n_px_have = (((s->rows - row) << !ss_v) - y) * 4;
uint8_t *dst = x == 0 ? dst_edge : dst_inner;
ptrdiff_t stride = x == 0 ? stride_edge : stride_inner;
......@@ -2508,7 +2560,7 @@ static void intra_recon(AVCodecContext *ctx, ptrdiff_t y_off, ptrdiff_t uv_off)
mode = check_intra_mode(s, mode, &a, ptr_r,
s->frames[CUR_FRAME].tf.f->linesize[0],
ptr, s->y_stride, l,
col, x, w4, row, y, b->tx, 0);
col, x, w4, row, y, b->tx, 0, 0, 0);
s->dsp.intra_pred[b->tx][mode](ptr, s->y_stride, l, a);
if (eob)
s->dsp.itxfm_add[tx][txtp](ptr, s->y_stride,
......@@ -2519,9 +2571,9 @@ static void intra_recon(AVCodecContext *ctx, ptrdiff_t y_off, ptrdiff_t uv_off)
}
// U/V
w4 >>= 1;
end_x >>= 1;
end_y >>= 1;
w4 >>= s->ss_h;
end_x >>= s->ss_h;
end_y >>= s->ss_v;
step = 1 << (b->uvtx * 2);
for (p = 0; p < 2; p++) {
dst = s->dst[1 + p];
......@@ -2536,8 +2588,8 @@ static void intra_recon(AVCodecContext *ctx, ptrdiff_t y_off, ptrdiff_t uv_off)
mode = check_intra_mode(s, mode, &a, ptr_r,
s->frames[CUR_FRAME].tf.f->linesize[1],
ptr, s->uv_stride, l,
col, x, w4, row, y, b->uvtx, p + 1);
ptr, s->uv_stride, l, col, x, w4, row, y,
b->uvtx, p + 1, s->ss_h, s->ss_v);
s->dsp.intra_pred[b->uvtx][mode](ptr, s->uv_stride, l, a);
if (eob)
s->dsp.itxfm_add[uvtx][DCT_DCT](ptr, s->uv_stride,
......@@ -2557,7 +2609,7 @@ static av_always_inline void mc_luma_scaled(VP9Context *s, vp9_scaled_mc_func sm
int bw, int bh, int w, int h,
const uint16_t *scale, const uint8_t *step)
{
#define scale_mv(n, dim) (((int64_t)n * scale[dim]) >> 14)
#define scale_mv(n, dim) (((int64_t)(n) * scale[dim]) >> 14)
// BUG libvpx seems to scale the two components separately. This introduces
// rounding errors but we have to reproduce them to be exactly compatible
// with the output from libvpx...
......@@ -2601,8 +2653,8 @@ static av_always_inline void mc_chroma_scaled(VP9Context *s, vp9_scaled_mc_func
const uint16_t *scale, const uint8_t *step)
{
// BUG https://code.google.com/p/webm/issues/detail?id=820
int mx = scale_mv(mv->x, 0) + (scale_mv(x * 16, 0) & ~15) + (scale_mv(x * 32, 0) & 15);
int my = scale_mv(mv->y, 1) + (scale_mv(y * 16, 1) & ~15) + (scale_mv(y * 32, 1) & 15);
int mx = scale_mv(mv->x << !s->ss_h, 0) + (scale_mv(x * 16, 0) & ~15) + (scale_mv(x * 32, 0) & 15);
int my = scale_mv(mv->y << !s->ss_v, 1) + (scale_mv(y * 16, 1) & ~15) + (scale_mv(y * 32, 1) & 15);
#undef scale_mv
int refbw_m1, refbh_m1;
int th;
......@@ -2618,7 +2670,7 @@ static av_always_inline void mc_chroma_scaled(VP9Context *s, vp9_scaled_mc_func
// FIXME bilinear filter only needs 0/1 pixels, not 3/4
// we use +7 because the last 7 pixels of each sbrow can be changed in
// the longest loopfilter of the next sbrow
th = (y + refbh_m1 + 4 + 7) >> 5;
th = (y + refbh_m1 + 4 + 7) >> (6 - s->ss_v);
ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
if (x < 3 || y < 3 || x + 4 >= w - refbw_m1 || y + 4 >= h - refbh_m1) {
s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
......@@ -2696,7 +2748,7 @@ static av_always_inline void mc_chroma_unscaled(VP9Context *s, vp9_mc_func (*mc)
ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
int bw, int bh, int w, int h)
{
int mx = mv->x, my = mv->y, th;
int mx = mv->x << !s->ss_h, my = mv->y << !s->ss_v, th;
y += my >> 4;
x += mx >> 4;
......@@ -2707,7 +2759,7 @@ static av_always_inline void mc_chroma_unscaled(VP9Context *s, vp9_mc_func (*mc)
// FIXME bilinear filter only needs 0/1 pixels, not 3/4
// we use +7 because the last 7 pixels of each sbrow can be changed in
// the longest loopfilter of the next sbrow
th = (y + bh + 4 * !!my + 7) >> 5;
th = (y + bh + 4 * !!my + 7) >> (6 - s->ss_v);
ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
if (x < !!mx * 3 || y < !!my * 3 ||
x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) {
......@@ -2781,8 +2833,8 @@ static void inter_recon(AVCodecContext *ctx)
}
// uv itxfm add
end_x >>= 1;
end_y >>= 1;
end_x >>= s->ss_h;
end_y >>= s->ss_v;
step = 1 << (b->uvtx * 2);
for (p = 0; p < 2; p++) {
dst = s->dst[p + 1];
......@@ -2801,11 +2853,14 @@ static void inter_recon(AVCodecContext *ctx)
}
}
static av_always_inline void mask_edges(struct VP9Filter *lflvl, int is_uv,
static av_always_inline void mask_edges(uint8_t (*mask)[8][4], int ss_h, int ss_v,
int row_and_7, int col_and_7,
int w, int h, int col_end, int row_end,
enum TxfmMode tx, int skip_inter)
{
static const unsigned wide_filter_col_mask[2] = { 0x11, 0x01 };
static const unsigned wide_filter_row_mask[2] = { 0x03, 0x07 };
// FIXME I'm pretty sure all loops can be replaced by a single LUT if
// we make VP9Filter.mask uint64_t (i.e. row/col all single variable)
// and make the LUT 5-indexed (bl, bp, is_uv, tx and row/col), and then
......@@ -2816,14 +2871,14 @@ static av_always_inline void mask_edges(struct VP9Filter *lflvl, int is_uv,
// a time, and we only use the topleft block's mode information to set
// things like block strength. Thus, for any block size smaller than
// 16x16, ignore the odd portion of the block.
if (tx == TX_4X4 && is_uv) {
if (h == 1) {
if (tx == TX_4X4 && (ss_v | ss_h)) {
if (h == ss_v) {
if (row_and_7 & 1)
return;
if (!row_end)
h += 1;
}
if (w == 1) {
if (w == ss_h) {
if (col_and_7 & 1)
return;
if (!col_end)
......@@ -2833,103 +2888,85 @@ static av_always_inline void mask_edges(struct VP9Filter *lflvl, int is_uv,
if (tx == TX_4X4 && !skip_inter) {
int t = 1 << col_and_7, m_col = (t << w) - t, y;
int m_col_odd = (t << (w - 1)) - t;
// on 32-px edges, use the 8-px wide loopfilter; else, use 4-px wide
if (is_uv) {
int m_row_8 = m_col & 0x01, m_row_4 = m_col - m_row_8;
for (y = row_and_7; y < h + row_and_7; y++) {
int col_mask_id = 2 - !(y & 7);
lflvl->mask[is_uv][0][y][1] |= m_row_8;
lflvl->mask[is_uv][0][y][2] |= m_row_4;
// for odd lines, if the odd col is not being filtered,
// skip odd row also:
// .---. <-- a
// | |
// |___| <-- b
// ^ ^
// c d
//
// if a/c are even row/col and b/d are odd, and d is skipped,
// e.g. right edge of size-66x66.webm, then skip b also (bug)
if ((col_end & 1) && (y & 1)) {
lflvl->mask[is_uv][1][y][col_mask_id] |= m_col_odd;
} else {
lflvl->mask[is_uv][1][y][col_mask_id] |= m_col;
}
}
} else {
int m_row_8 = m_col & 0x11, m_row_4 = m_col - m_row_8;
for (y = row_and_7; y < h + row_and_7; y++) {
int col_mask_id = 2 - !(y & 3);
lflvl->mask[is_uv][0][y][1] |= m_row_8; // row edge
lflvl->mask[is_uv][0][y][2] |= m_row_4;
lflvl->mask[is_uv][1][y][col_mask_id] |= m_col; // col edge
lflvl->mask[is_uv][0][y][3] |= m_col;
lflvl->mask[is_uv][1][y][3] |= m_col;
int m_row_8 = m_col & wide_filter_col_mask[ss_h], m_row_4 = m_col - m_row_8;
for (y = row_and_7; y < h + row_and_7; y++) {
int col_mask_id = 2 - !(y & wide_filter_row_mask[ss_v]);
mask[0][y][1] |= m_row_8;
mask[0][y][2] |= m_row_4;
// for odd lines, if the odd col is not being filtered,
// skip odd row also:
// .---. <-- a
// | |
// |___| <-- b
// ^ ^
// c d
//
// if a/c are even row/col and b/d are odd, and d is skipped,
// e.g. right edge of size-66x66.webm, then skip b also (bug)
if ((ss_h & ss_v) && (col_end & 1) && (y & 1)) {
mask[1][y][col_mask_id] |= (t << (w - 1)) - t;
} else {
mask[1][y][col_mask_id] |= m_col;
}
if (!ss_h)
mask[0][y][3] |= m_col;
if (!ss_v)
mask[1][y][3] |= m_col;
}
} else {
int y, t = 1 << col_and_7, m_col = (t << w) - t;
if (!skip_inter) {
int mask_id = (tx == TX_8X8);
int l2 = tx + is_uv - 1, step1d = 1 << l2;
static const unsigned masks[4] = { 0xff, 0x55, 0x11, 0x01 };
int l2 = tx + ss_h - 1, step1d;
int m_row = m_col & masks[l2];
// at odd UV col/row edges tx16/tx32 loopfilter edges, force
// 8wd loopfilter to prevent going off the visible edge.
if (is_uv && tx > TX_8X8 && (w ^ (w - 1)) == 1) {
if (ss_h && tx > TX_8X8 && (w ^ (w - 1)) == 1) {
int m_row_16 = ((t << (w - 1)) - t) & masks[l2];
int m_row_8 = m_row - m_row_16;
for (y = row_and_7; y < h + row_and_7; y++) {
lflvl->mask[is_uv][0][y][0] |= m_row_16;
lflvl->mask[is_uv][0][y][1] |= m_row_8;
mask[0][y][0] |= m_row_16;
mask[0][y][1] |= m_row_8;
}
} else {
for (y = row_and_7; y < h + row_and_7; y++)
lflvl->mask[is_uv][0][y][mask_id] |= m_row;
mask[0][y][mask_id] |= m_row;
}
if (is_uv && tx > TX_8X8 && (h ^ (h - 1)) == 1) {
l2 = tx + ss_v - 1;
step1d = 1 << l2;
if (ss_v && tx > TX_8X8 && (h ^ (h - 1)) == 1) {
for (y = row_and_7; y < h + row_and_7 - 1; y += step1d)
lflvl->mask[is_uv][1][y][0] |= m_col;
mask[1][y][0] |= m_col;
if (y - row_and_7 == h - 1)
lflvl->mask[is_uv][1][y][1] |= m_col;
mask[1][y][1] |= m_col;
} else {
for (y = row_and_7; y < h + row_and_7; y += step1d)
lflvl->mask[is_uv][1][y][mask_id] |= m_col;
mask[1][y][mask_id] |= m_col;
}
} else if (tx != TX_4X4) {
int mask_id;
mask_id = (tx == TX_8X8) || (is_uv && h == 1);
lflvl->mask[is_uv][1][row_and_7][mask_id] |= m_col;
mask_id = (tx == TX_8X8) || (is_uv && w == 1);
mask_id = (tx == TX_8X8) || (h == ss_v);
mask[1][row_and_7][mask_id] |= m_col;
mask_id = (tx == TX_8X8) || (w == ss_h);
for (y = row_and_7; y < h + row_and_7; y++)
lflvl->mask[is_uv][0][y][mask_id] |= t;
} else if (is_uv) {
int t8 = t & 0x01, t4 = t - t8;
for (y = row_and_7; y < h + row_and_7; y++) {
lflvl->mask[is_uv][0][y][2] |= t4;
lflvl->mask[is_uv][0][y][1] |= t8;
}
lflvl->mask[is_uv][1][row_and_7][2 - !(row_and_7 & 7)] |= m_col;
mask[0][y][mask_id] |= t;
} else {
int t8 = t & 0x11, t4 = t - t8;
int t8 = t & wide_filter_col_mask[ss_h], t4 = t - t8;
for (y = row_and_7; y < h + row_and_7; y++) {
lflvl->mask[is_uv][0][y][2] |= t4;
lflvl->mask[is_uv][0][y][1] |= t8;
mask[0][y][2] |= t4;
mask[0][y][1] |= t8;
}
lflvl->mask[is_uv][1][row_and_7][2 - !(row_and_7 & 3)] |= m_col;
mask[1][row_and_7][2 - !(row_and_7 & wide_filter_row_mask[ss_v])] |= m_col;
}
}
}
......@@ -2958,7 +2995,8 @@ static void decode_b(AVCodecContext *ctx, int row, int col,
b->bl = bl;
b->bp = bp;
decode_mode(ctx);
b->uvtx = b->tx - (w4 * 2 == (1 << b->tx) || h4 * 2 == (1 << b->tx));
b->uvtx = b->tx - ((s->ss_h && w4 * 2 == (1 << b->tx)) ||
(s->ss_v && h4 * 2 == (1 << b->tx)));
if (!b->skip) {
decode_coeffs(ctx);
......@@ -2973,34 +3011,39 @@ static void decode_b(AVCodecContext *ctx, int row, int col,
case 8: AV_ZERO64(&v); break; \
case 16: AV_ZERO128(&v); break; \
}
#define SPLAT_ZERO_YUV(dir, var, off, n) \
#define SPLAT_ZERO_YUV(dir, var, off, n, dir2) \
do { \
SPLAT_ZERO_CTX(s->dir##_y_##var[off * 2], n * 2); \
SPLAT_ZERO_CTX(s->dir##_uv_##var[0][off], n); \
SPLAT_ZERO_CTX(s->dir##_uv_##var[1][off], n); \
if (s->ss_##dir2) { \
SPLAT_ZERO_CTX(s->dir##_uv_##var[0][off], n); \
SPLAT_ZERO_CTX(s->dir##_uv_##var[1][off], n); \
} else { \
SPLAT_ZERO_CTX(s->dir##_uv_##var[0][off * 2], n * 2); \
SPLAT_ZERO_CTX(s->dir##_uv_##var[1][off * 2], n * 2); \
} \
} while (0)
switch (w4) {
case 1: SPLAT_ZERO_YUV(above, nnz_ctx, col, 1); break;
case 2: SPLAT_ZERO_YUV(above, nnz_ctx, col, 2); break;
case 4: SPLAT_ZERO_YUV(above, nnz_ctx, col, 4); break;
case 8: SPLAT_ZERO_YUV(above, nnz_ctx, col, 8); break;
case 1: SPLAT_ZERO_YUV(above, nnz_ctx, col, 1, h); break;
case 2: SPLAT_ZERO_YUV(above, nnz_ctx, col, 2, h); break;
case 4: SPLAT_ZERO_YUV(above, nnz_ctx, col, 4, h); break;
case 8: SPLAT_ZERO_YUV(above, nnz_ctx, col, 8, h); break;
}
switch (h4) {
case 1: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 1); break;
case 2: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 2); break;
case 4: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 4); break;
case 8: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 8); break;
case 1: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 1, v); break;
case 2: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 2, v); break;
case 4: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 4, v); break;
case 8: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 8, v); break;
}
}
if (s->pass == 1) {
s->b++;
s->block += w4 * h4 * 64;
s->uvblock[0] += w4 * h4 * 16;
s->uvblock[1] += w4 * h4 * 16;
s->uvblock[0] += w4 * h4 * 64 >> (s->ss_h + s->ss_v);
s->uvblock[1] += w4 * h4 * 64 >> (s->ss_h + s->ss_v);
s->eob += 4 * w4 * h4;
s->uveob[0] += w4 * h4;
s->uveob[1] += w4 * h4;
s->uveob[0] += 4 * w4 * h4 >> (s->ss_h + s->ss_v);
s->uveob[1] += 4 * w4 * h4 >> (s->ss_h + s->ss_v);
return;
}
......@@ -3073,11 +3116,12 @@ static void decode_b(AVCodecContext *ctx, int row, int col,
int skip_inter = !b->intra && b->skip, col7 = s->col7, row7 = s->row7;
setctx_2d(&lflvl->level[row7 * 8 + col7], w4, h4, 8, lvl);
mask_edges(lflvl, 0, row7, col7, x_end, y_end, 0, 0, b->tx, skip_inter);
mask_edges(lflvl, 1, row7, col7, x_end, y_end,
s->cols & 1 && col + w4 >= s->cols ? s->cols & 7 : 0,
s->rows & 1 && row + h4 >= s->rows ? s->rows & 7 : 0,
b->uvtx, skip_inter);
mask_edges(lflvl->mask[0], 0, 0, row7, col7, x_end, y_end, 0, 0, b->tx, skip_inter);
if (s->ss_h || s->ss_v)
mask_edges(lflvl->mask[1], s->ss_h, s->ss_v, row7, col7, x_end, y_end,
s->cols & 1 && col + w4 >= s->cols ? s->cols & 7 : 0,
s->rows & 1 && row + h4 >= s->rows ? s->rows & 7 : 0,
b->uvtx, skip_inter);
if (!s->filter.lim_lut[lvl]) {
int sharp = s->filter.sharpness;
......@@ -3097,11 +3141,11 @@ static void decode_b(AVCodecContext *ctx, int row, int col,
if (s->pass == 2) {
s->b++;
s->block += w4 * h4 * 64;
s->uvblock[0] += w4 * h4 * 16;
s->uvblock[1] += w4 * h4 * 16;
s->uvblock[0] += w4 * h4 * 64 >> (s->ss_v + s->ss_h);
s->uvblock[1] += w4 * h4 * 64 >> (s->ss_v + s->ss_h);
s->eob += 4 * w4 * h4;
s->uveob[0] += w4 * h4;
s->uveob[1] += w4 * h4;
s->uveob[0] += 4 * w4 * h4 >> (s->ss_v + s->ss_h);
s->uveob[1] += 4 * w4 * h4 >> (s->ss_v + s->ss_h);
}
}
......@@ -3131,24 +3175,24 @@ static void decode_sb(AVCodecContext *ctx, int row, int col, struct VP9Filter *l
case PARTITION_H:
decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
yoff += hbs * 8 * y_stride;
uvoff += hbs * 4 * uv_stride;
uvoff += hbs * 8 * uv_stride >> s->ss_v;
decode_b(ctx, row + hbs, col, lflvl, yoff, uvoff, bl, bp);
break;
case PARTITION_V:
decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
yoff += hbs * 8;
uvoff += hbs * 4;
uvoff += hbs * 8 >> s->ss_h;
decode_b(ctx, row, col + hbs, lflvl, yoff, uvoff, bl, bp);
break;
case PARTITION_SPLIT:
decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
decode_sb(ctx, row, col + hbs, lflvl,
yoff + 8 * hbs, uvoff + 4 * hbs, bl + 1);
yoff + 8 * hbs, uvoff + (8 * hbs >> s->ss_h), bl + 1);
yoff += hbs * 8 * y_stride;
uvoff += hbs * 4 * uv_stride;
uvoff += hbs * 8 * uv_stride >> s->ss_v;
decode_sb(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
decode_sb(ctx, row + hbs, col + hbs, lflvl,
yoff + 8 * hbs, uvoff + 4 * hbs, bl + 1);
yoff + 8 * hbs, uvoff + (8 * hbs >> s->ss_h), bl + 1);
break;
default:
av_assert0(0);
......@@ -3157,7 +3201,7 @@ static void decode_sb(AVCodecContext *ctx, int row, int col, struct VP9Filter *l
bp = PARTITION_SPLIT;
decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
decode_sb(ctx, row, col + hbs, lflvl,
yoff + 8 * hbs, uvoff + 4 * hbs, bl + 1);
yoff + 8 * hbs, uvoff + (8 * hbs >> s->ss_h), bl + 1);
} else {
bp = PARTITION_H;
decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
......@@ -3167,7 +3211,7 @@ static void decode_sb(AVCodecContext *ctx, int row, int col, struct VP9Filter *l
bp = PARTITION_SPLIT;
decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
yoff += hbs * 8 * y_stride;
uvoff += hbs * 4 * uv_stride;
uvoff += hbs * 8 * uv_stride >> s->ss_v;
decode_sb(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
} else {
bp = PARTITION_V;
......@@ -3196,11 +3240,11 @@ static void decode_sb_mem(AVCodecContext *ctx, int row, int col, struct VP9Filte
decode_b(ctx, row, col, lflvl, yoff, uvoff, b->bl, b->bp);
if (b->bp == PARTITION_H && row + hbs < s->rows) {
yoff += hbs * 8 * y_stride;
uvoff += hbs * 4 * uv_stride;
uvoff += hbs * 8 * uv_stride >> s->ss_v;
decode_b(ctx, row + hbs, col, lflvl, yoff, uvoff, b->bl, b->bp);
} else if (b->bp == PARTITION_V && col + hbs < s->cols) {
yoff += hbs * 8;
uvoff += hbs * 4;
uvoff += hbs * 8 >> s->ss_h;
decode_b(ctx, row, col + hbs, lflvl, yoff, uvoff, b->bl, b->bp);
}
} else {
......@@ -3208,262 +3252,203 @@ static void decode_sb_mem(AVCodecContext *ctx, int row, int col, struct VP9Filte
if (col + hbs < s->cols) { // FIXME why not <=?
if (row + hbs < s->rows) {
decode_sb_mem(ctx, row, col + hbs, lflvl, yoff + 8 * hbs,
uvoff + 4 * hbs, bl + 1);
uvoff + (8 * hbs >> s->ss_h), bl + 1);
yoff += hbs * 8 * y_stride;
uvoff += hbs * 4 * uv_stride;
uvoff += hbs * 8 * uv_stride >> s->ss_v;
decode_sb_mem(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
decode_sb_mem(ctx, row + hbs, col + hbs, lflvl,
yoff + 8 * hbs, uvoff + 4 * hbs, bl + 1);
yoff + 8 * hbs, uvoff + (8 * hbs >> s->ss_h), bl + 1);
} else {
yoff += hbs * 8;
uvoff += hbs * 4;
uvoff += hbs * 8 >> s->ss_h;
decode_sb_mem(ctx, row, col + hbs, lflvl, yoff, uvoff, bl + 1);
}
} else if (row + hbs < s->rows) {
yoff += hbs * 8 * y_stride;
uvoff += hbs * 4 * uv_stride;
uvoff += hbs * 8 * uv_stride >> s->ss_v;
decode_sb_mem(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
}
}
}
static void loopfilter_sb(AVCodecContext *ctx, struct VP9Filter *lflvl,
int row, int col, ptrdiff_t yoff, ptrdiff_t uvoff)
static av_always_inline void filter_plane_cols(VP9Context *s, int col, int ss_h, int ss_v,
uint8_t *lvl, uint8_t (*mask)[4],
uint8_t *dst, ptrdiff_t ls)
{
VP9Context *s = ctx->priv_data;
AVFrame *f = s->frames[CUR_FRAME].tf.f;
uint8_t *dst = f->data[0] + yoff, *lvl = lflvl->level;
ptrdiff_t ls_y = f->linesize[0], ls_uv = f->linesize[1];
int y, x, p;
int y, x;
// FIXME in how far can we interleave the v/h loopfilter calls? E.g.
// if you think of them as acting on a 8x8 block max, we can interleave
// each v/h within the single x loop, but that only works if we work on
// 8 pixel blocks, and we won't always do that (we want at least 16px
// to use SSE2 optimizations, perhaps 32 for AVX2)
// filter edges between columns, Y plane (e.g. block1 | block2)
for (y = 0; y < 8; y += 2, dst += 16 * ls_y, lvl += 16) {
uint8_t *ptr = dst, *l = lvl, *hmask1 = lflvl->mask[0][0][y];
uint8_t *hmask2 = lflvl->mask[0][0][y + 1];
// filter edges between columns (e.g. block1 | block2)
for (y = 0; y < 8; y += 2 << ss_v, dst += 16 * ls, lvl += 16 << ss_v) {
uint8_t *ptr = dst, *l = lvl, *hmask1 = mask[y], *hmask2 = mask[y + 1 + ss_v];
unsigned hm1 = hmask1[0] | hmask1[1] | hmask1[2], hm13 = hmask1[3];
unsigned hm2 = hmask2[1] | hmask2[2], hm23 = hmask2[3];
unsigned hm = hm1 | hm2 | hm13 | hm23;
for (x = 1; hm & ~(x - 1); x <<= 1, ptr += 8, l++) {
if (hm1 & x) {
int L = *l, H = L >> 4;
int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
for (x = 1; hm & ~(x - 1); x <<= 1, ptr += 8 >> ss_h) {
if (col || x > 1) {
if (hm1 & x) {
int L = *l, H = L >> 4;
int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
if (col || x > 1) {
if (hmask1[0] & x) {
if (hmask2[0] & x) {
av_assert2(l[8] == L);
s->dsp.loop_filter_16[0](ptr, ls_y, E, I, H);
av_assert2(l[8 << ss_v] == L);
s->dsp.loop_filter_16[0](ptr, ls, E, I, H);
} else {
s->dsp.loop_filter_8[2][0](ptr, ls_y, E, I, H);
s->dsp.loop_filter_8[2][0](ptr, ls, E, I, H);
}
} else if (hm2 & x) {
L = l[8];
L = l[8 << ss_v];
H |= (L >> 4) << 8;
E |= s->filter.mblim_lut[L] << 8;
I |= s->filter.lim_lut[L] << 8;
s->dsp.loop_filter_mix2[!!(hmask1[1] & x)]
[!!(hmask2[1] & x)]
[0](ptr, ls_y, E, I, H);
[0](ptr, ls, E, I, H);
} else {
s->dsp.loop_filter_8[!!(hmask1[1] & x)]
[0](ptr, ls_y, E, I, H);
[0](ptr, ls, E, I, H);
}
}
} else if (hm2 & x) {
int L = l[8], H = L >> 4;
int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
} else if (hm2 & x) {
int L = l[8 << ss_v], H = L >> 4;
int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
if (col || x > 1) {
s->dsp.loop_filter_8[!!(hmask2[1] & x)]
[0](ptr + 8 * ls_y, ls_y, E, I, H);
[0](ptr + 8 * ls, ls, E, I, H);
}
}
if (hm13 & x) {
int L = *l, H = L >> 4;
int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
if (hm23 & x) {
L = l[8];
H |= (L >> 4) << 8;
E |= s->filter.mblim_lut[L] << 8;
I |= s->filter.lim_lut[L] << 8;
s->dsp.loop_filter_mix2[0][0][0](ptr + 4, ls_y, E, I, H);
} else {
s->dsp.loop_filter_8[0][0](ptr + 4, ls_y, E, I, H);
}
} else if (hm23 & x) {
int L = l[8], H = L >> 4;
int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
if (ss_h) {
if (x & 0xAA)
l += 2;
} else {
if (hm13 & x) {
int L = *l, H = L >> 4;
int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
s->dsp.loop_filter_8[0][0](ptr + 8 * ls_y + 4, ls_y, E, I, H);
if (hm23 & x) {
L = l[8 << ss_v];
H |= (L >> 4) << 8;
E |= s->filter.mblim_lut[L] << 8;
I |= s->filter.lim_lut[L] << 8;
s->dsp.loop_filter_mix2[0][0][0](ptr + 4, ls, E, I, H);
} else {
s->dsp.loop_filter_8[0][0](ptr + 4, ls, E, I, H);
}
} else if (hm23 & x) {
int L = l[8 << ss_v], H = L >> 4;
int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
s->dsp.loop_filter_8[0][0](ptr + 8 * ls + 4, ls, E, I, H);
}
l++;
}
}
}
}
static av_always_inline void filter_plane_rows(VP9Context *s, int row, int ss_h, int ss_v,
uint8_t *lvl, uint8_t (*mask)[4],
uint8_t *dst, ptrdiff_t ls)
{
int y, x;
// block1
// filter edges between rows, Y plane (e.g. ------)
// block2
dst = f->data[0] + yoff;
lvl = lflvl->level;
for (y = 0; y < 8; y++, dst += 8 * ls_y, lvl += 8) {
uint8_t *ptr = dst, *l = lvl, *vmask = lflvl->mask[0][1][y];
// block1
// filter edges between rows (e.g. ------)
// block2
for (y = 0; y < 8; y++, dst += 8 * ls >> ss_v) {
uint8_t *ptr = dst, *l = lvl, *vmask = mask[y];
unsigned vm = vmask[0] | vmask[1] | vmask[2], vm3 = vmask[3];
for (x = 1; vm & ~(x - 1); x <<= 2, ptr += 16, l += 2) {
for (x = 1; vm & ~(x - 1); x <<= (2 << ss_h), ptr += 16, l += 2 << ss_h) {
if (row || y) {
if (vm & x) {
int L = *l, H = L >> 4;
int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
if (vmask[0] & x) {
if (vmask[0] & (x << 1)) {
av_assert2(l[1] == L);
s->dsp.loop_filter_16[1](ptr, ls_y, E, I, H);
if (vmask[0] & (x << (1 + ss_h))) {
av_assert2(l[1 + ss_h] == L);
s->dsp.loop_filter_16[1](ptr, ls, E, I, H);
} else {
s->dsp.loop_filter_8[2][1](ptr, ls_y, E, I, H);
s->dsp.loop_filter_8[2][1](ptr, ls, E, I, H);
}
} else if (vm & (x << 1)) {
L = l[1];
} else if (vm & (x << (1 + ss_h))) {
L = l[1 + ss_h];
H |= (L >> 4) << 8;
E |= s->filter.mblim_lut[L] << 8;
I |= s->filter.lim_lut[L] << 8;
s->dsp.loop_filter_mix2[!!(vmask[1] & x)]
[!!(vmask[1] & (x << 1))]
[1](ptr, ls_y, E, I, H);
[!!(vmask[1] & (x << (1 + ss_h)))]
[1](ptr, ls, E, I, H);
} else {
s->dsp.loop_filter_8[!!(vmask[1] & x)]
[1](ptr, ls_y, E, I, H);
[1](ptr, ls, E, I, H);
}
} else if (vm & (x << 1)) {
int L = l[1], H = L >> 4;
} else if (vm & (x << (1 + ss_h))) {
int L = l[1 + ss_h], H = L >> 4;
int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
s->dsp.loop_filter_8[!!(vmask[1] & (x << 1))]
[1](ptr + 8, ls_y, E, I, H);
s->dsp.loop_filter_8[!!(vmask[1] & (x << (1 + ss_h)))]
[1](ptr + 8, ls, E, I, H);
}
}
if (vm3 & x) {
int L = *l, H = L >> 4;
int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
if (vm3 & (x << 1)) {
L = l[1];
H |= (L >> 4) << 8;
E |= s->filter.mblim_lut[L] << 8;
I |= s->filter.lim_lut[L] << 8;
s->dsp.loop_filter_mix2[0][0][1](ptr + ls_y * 4, ls_y, E, I, H);
} else {
s->dsp.loop_filter_8[0][1](ptr + ls_y * 4, ls_y, E, I, H);
}
} else if (vm3 & (x << 1)) {
int L = l[1], H = L >> 4;
int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
s->dsp.loop_filter_8[0][1](ptr + ls_y * 4 + 8, ls_y, E, I, H);
}
}
}
// same principle but for U/V planes
for (p = 0; p < 2; p++) {
lvl = lflvl->level;
dst = f->data[1 + p] + uvoff;
for (y = 0; y < 8; y += 4, dst += 16 * ls_uv, lvl += 32) {
uint8_t *ptr = dst, *l = lvl, *hmask1 = lflvl->mask[1][0][y];
uint8_t *hmask2 = lflvl->mask[1][0][y + 2];
unsigned hm1 = hmask1[0] | hmask1[1] | hmask1[2];
unsigned hm2 = hmask2[1] | hmask2[2], hm = hm1 | hm2;
for (x = 1; hm & ~(x - 1); x <<= 1, ptr += 4) {
if (col || x > 1) {
if (hm1 & x) {
int L = *l, H = L >> 4;
int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
if (hmask1[0] & x) {
if (hmask2[0] & x) {
av_assert2(l[16] == L);
s->dsp.loop_filter_16[0](ptr, ls_uv, E, I, H);
} else {
s->dsp.loop_filter_8[2][0](ptr, ls_uv, E, I, H);
}
} else if (hm2 & x) {
L = l[16];
H |= (L >> 4) << 8;
E |= s->filter.mblim_lut[L] << 8;
I |= s->filter.lim_lut[L] << 8;
s->dsp.loop_filter_mix2[!!(hmask1[1] & x)]
[!!(hmask2[1] & x)]
[0](ptr, ls_uv, E, I, H);
} else {
s->dsp.loop_filter_8[!!(hmask1[1] & x)]
[0](ptr, ls_uv, E, I, H);
}
} else if (hm2 & x) {
int L = l[16], H = L >> 4;
int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
if (!ss_v) {
if (vm3 & x) {
int L = *l, H = L >> 4;
int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
s->dsp.loop_filter_8[!!(hmask2[1] & x)]
[0](ptr + 8 * ls_uv, ls_uv, E, I, H);
if (vm3 & (x << (1 + ss_h))) {
L = l[1 + ss_h];
H |= (L >> 4) << 8;
E |= s->filter.mblim_lut[L] << 8;
I |= s->filter.lim_lut[L] << 8;
s->dsp.loop_filter_mix2[0][0][1](ptr + ls * 4, ls, E, I, H);
} else {
s->dsp.loop_filter_8[0][1](ptr + ls * 4, ls, E, I, H);
}
}
if (x & 0xAA)
l += 2;
}
}
lvl = lflvl->level;
dst = f->data[1 + p] + uvoff;
for (y = 0; y < 8; y++, dst += 4 * ls_uv) {
uint8_t *ptr = dst, *l = lvl, *vmask = lflvl->mask[1][1][y];
unsigned vm = vmask[0] | vmask[1] | vmask[2];
for (x = 1; vm & ~(x - 1); x <<= 4, ptr += 16, l += 4) {
if (row || y) {
if (vm & x) {
int L = *l, H = L >> 4;
int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
if (vmask[0] & x) {
if (vmask[0] & (x << 2)) {
av_assert2(l[2] == L);
s->dsp.loop_filter_16[1](ptr, ls_uv, E, I, H);
} else {
s->dsp.loop_filter_8[2][1](ptr, ls_uv, E, I, H);
}
} else if (vm & (x << 2)) {
L = l[2];
H |= (L >> 4) << 8;
E |= s->filter.mblim_lut[L] << 8;
I |= s->filter.lim_lut[L] << 8;
s->dsp.loop_filter_mix2[!!(vmask[1] & x)]
[!!(vmask[1] & (x << 2))]
[1](ptr, ls_uv, E, I, H);
} else {
s->dsp.loop_filter_8[!!(vmask[1] & x)]
[1](ptr, ls_uv, E, I, H);
}
} else if (vm & (x << 2)) {
int L = l[2], H = L >> 4;
int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
} else if (vm3 & (x << (1 + ss_h))) {
int L = l[1 + ss_h], H = L >> 4;
int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
s->dsp.loop_filter_8[!!(vmask[1] & (x << 2))]
[1](ptr + 8, ls_uv, E, I, H);
}
s->dsp.loop_filter_8[0][1](ptr + ls * 4 + 8, ls, E, I, H);
}
}
}
if (ss_v) {
if (y & 1)
lvl += 16;
} else {
lvl += 8;
}
}
}
static void loopfilter_sb(AVCodecContext *ctx, struct VP9Filter *lflvl,
int row, int col, ptrdiff_t yoff, ptrdiff_t uvoff)
{
VP9Context *s = ctx->priv_data;
AVFrame *f = s->frames[CUR_FRAME].tf.f;
uint8_t *dst = f->data[0] + yoff;
ptrdiff_t ls_y = f->linesize[0], ls_uv = f->linesize[1];
uint8_t (*uv_masks)[8][4] = lflvl->mask[s->ss_h | s->ss_v];
int p;
// FIXME in how far can we interleave the v/h loopfilter calls? E.g.
// if you think of them as acting on a 8x8 block max, we can interleave
// each v/h within the single x loop, but that only works if we work on
// 8 pixel blocks, and we won't always do that (we want at least 16px
// to use SSE2 optimizations, perhaps 32 for AVX2)
filter_plane_cols(s, col, 0, 0, lflvl->level, lflvl->mask[0][0], dst, ls_y);
filter_plane_rows(s, row, 0, 0, lflvl->level, lflvl->mask[0][1], dst, ls_y);
for (p = 0; p < 2; p++) {
dst = f->data[1 + p] + uvoff;
filter_plane_cols(s, col, s->ss_h, s->ss_v, lflvl->level, uv_masks[0], dst, ls_uv);
filter_plane_rows(s, row, s->ss_h, s->ss_v, lflvl->level, uv_masks[1], dst, ls_uv);
}
}
static void set_tile_offset(int *start, int *end, int idx, int log2_n, int n)
{
int sb_start = ( idx * n) >> log2_n;
......@@ -3815,18 +3800,6 @@ static int vp9_decode_frame(AVCodecContext *ctx, void *frame,
return res;
}
if (s->fullrange)
ctx->color_range = AVCOL_RANGE_JPEG;
else
ctx->color_range = AVCOL_RANGE_MPEG;
switch (s->colorspace) {
case 1: ctx->colorspace = AVCOL_SPC_BT470BG; break;
case 2: ctx->colorspace = AVCOL_SPC_BT709; break;
case 3: ctx->colorspace = AVCOL_SPC_SMPTE170M; break;
case 4: ctx->colorspace = AVCOL_SPC_SMPTE240M; break;
}
// main tile decode loop
memset(s->above_partition_ctx, 0, s->cols);
memset(s->above_skip_ctx, 0, s->cols);
......@@ -3836,8 +3809,8 @@ static int vp9_decode_frame(AVCodecContext *ctx, void *frame,
memset(s->above_mode_ctx, NEARESTMV, s->cols);
}
memset(s->above_y_nnz_ctx, 0, s->sb_cols * 16);
memset(s->above_uv_nnz_ctx[0], 0, s->sb_cols * 8);
memset(s->above_uv_nnz_ctx[1], 0, s->sb_cols * 8);
memset(s->above_uv_nnz_ctx[0], 0, s->sb_cols * 16 >> s->ss_h);
memset(s->above_uv_nnz_ctx[1], 0, s->sb_cols * 16 >> s->ss_h);
memset(s->above_segpred_ctx, 0, s->cols);
s->pass = s->frames[CUR_FRAME].uses_2pass =
ctx->active_thread_type == FF_THREAD_FRAME && s->refreshctx && !s->parallelmode;
......@@ -3905,7 +3878,7 @@ static int vp9_decode_frame(AVCodecContext *ctx, void *frame,
}
for (row = s->tiling.tile_row_start; row < s->tiling.tile_row_end;
row += 8, yoff += ls_y * 64, uvoff += ls_uv * 32) {
row += 8, yoff += ls_y * 64, uvoff += ls_uv * 64 >> s->ss_v) {
struct VP9Filter *lflvl_ptr = s->lflvl;
ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
......@@ -3922,7 +3895,7 @@ static int vp9_decode_frame(AVCodecContext *ctx, void *frame,
memset(s->left_mode_ctx, NEARESTMV, 8);
}
memset(s->left_y_nnz_ctx, 0, 16);
memset(s->left_uv_nnz_ctx, 0, 16);
memset(s->left_uv_nnz_ctx, 0, 32);
memset(s->left_segpred_ctx, 0, 8);
memcpy(&s->c, &s->c_b[tile_col], sizeof(s->c));
......@@ -3930,7 +3903,7 @@ static int vp9_decode_frame(AVCodecContext *ctx, void *frame,
for (col = s->tiling.tile_col_start;
col < s->tiling.tile_col_end;
col += 8, yoff2 += 64, uvoff2 += 32, lflvl_ptr++) {
col += 8, yoff2 += 64, uvoff2 += 64 >> s->ss_h, lflvl_ptr++) {
// FIXME integrate with lf code (i.e. zero after each
// use, similar to invtxfm coefficients, or similar)
if (s->pass != 1) {
......@@ -3961,11 +3934,11 @@ static int vp9_decode_frame(AVCodecContext *ctx, void *frame,
f->data[0] + yoff + 63 * ls_y,
8 * s->cols);
memcpy(s->intra_pred_data[1],
f->data[1] + uvoff + 31 * ls_uv,
4 * s->cols);
f->data[1] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
8 * s->cols >> s->ss_h);
memcpy(s->intra_pred_data[2],
f->data[2] + uvoff + 31 * ls_uv,
4 * s->cols);
f->data[2] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
8 * s->cols >> s->ss_h);
}
// loopfilter one row
......@@ -3974,7 +3947,7 @@ static int vp9_decode_frame(AVCodecContext *ctx, void *frame,
uvoff2 = uvoff;
lflvl_ptr = s->lflvl;
for (col = 0; col < s->cols;
col += 8, yoff2 += 64, uvoff2 += 32, lflvl_ptr++) {
col += 8, yoff2 += 64, uvoff2 += 64 >> s->ss_h, lflvl_ptr++) {
loopfilter_sb(ctx, lflvl_ptr, row, col, yoff2, uvoff2);
}
}
......@@ -4051,7 +4024,6 @@ static av_cold int vp9_decode_init(AVCodecContext *ctx)
VP9Context *s = ctx->priv_data;
ctx->internal->allocate_progress = 1;
ctx->pix_fmt = AV_PIX_FMT_YUV420P;
ff_vp9dsp_init(&s->dsp);
ff_videodsp_init(&s->vdsp, 8);
s->filter.sharpness = -1;
......@@ -4094,6 +4066,8 @@ static int vp9_decode_update_thread_context(AVCodecContext *dst, const AVCodecCo
s->invisible = ssrc->invisible;
s->keyframe = ssrc->keyframe;
s->ss_v = ssrc->ss_v;
s->ss_h = ssrc->ss_h;
s->segmentation.enabled = ssrc->segmentation.enabled;
s->segmentation.update_map = ssrc->segmentation.update_map;
memcpy(&s->prob_ctx, &ssrc->prob_ctx, sizeof(s->prob_ctx));
......
......@@ -21,6 +21,12 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#define ROUNDED_DIV_MVx2(a, b) \
(VP56mv) { .x = ROUNDED_DIV(a.x + b.x, 2), .y = ROUNDED_DIV(a.y + b.y, 2) }
#define ROUNDED_DIV_MVx4(a, b, c, d) \
(VP56mv) { .x = ROUNDED_DIV(a.x + b.x + c.x + d.x, 4), \
.y = ROUNDED_DIV(a.y + b.y + c.y + d.y, 4) }
static void FN(inter_pred)(AVCodecContext *ctx)
{
static const uint8_t bwlog_tab[2][N_BS_SIZES] = {
......@@ -44,6 +50,8 @@ static void FN(inter_pred)(AVCodecContext *ctx)
// y inter pred
if (b->bs > BS_8x8) {
VP56mv uvmv;
if (b->bs == BS_8x4) {
mc_luma_dir(s, mc[3][b->filter][0], s->dst[0], ls_y,
ref1->data[0], ref1->linesize[0], tref1,
......@@ -52,6 +60,38 @@ static void FN(inter_pred)(AVCodecContext *ctx)
s->dst[0] + 4 * ls_y, ls_y,
ref1->data[0], ref1->linesize[0], tref1,
(row << 3) + 4, col << 3, &b->mv[2][0], 8, 4, w1, h1, 0);
w1 = (w1 + s->ss_h) >> s->ss_h;
if (s->ss_v) {
h1 = (h1 + 1) >> 1;
uvmv = ROUNDED_DIV_MVx2(b->mv[0][0], b->mv[2][0]);
mc_chroma_dir(s, mc[3 + s->ss_h][b->filter][0],
s->dst[1], s->dst[2], ls_uv,
ref1->data[1], ref1->linesize[1],
ref1->data[2], ref1->linesize[2], tref1,
row << 2, col << (3 - s->ss_h),
&uvmv, 8 >> s->ss_h, 4, w1, h1, 0);
} else {
mc_chroma_dir(s, mc[3 + s->ss_h][b->filter][0],
s->dst[1], s->dst[2], ls_uv,
ref1->data[1], ref1->linesize[1],
ref1->data[2], ref1->linesize[2], tref1,
row << 3, col << (3 - s->ss_h),
&b->mv[0][0], 8 >> s->ss_h, 4, w1, h1, 0);
// BUG for 4:2:2 bs=8x4, libvpx uses the wrong block index
// to get the motion vector for the bottom 4x4 block
// https://code.google.com/p/webm/issues/detail?id=993
if (s->ss_h == 0) {
uvmv = b->mv[2][0];
} else {
uvmv = ROUNDED_DIV_MVx2(b->mv[0][0], b->mv[2][0]);
}
mc_chroma_dir(s, mc[3 + s->ss_h][b->filter][0],
s->dst[1] + 4 * ls_uv, s->dst[2] + 4 * ls_uv, ls_uv,
ref1->data[1], ref1->linesize[1],
ref1->data[2], ref1->linesize[2], tref1,
(row << 3) + 4, col << (3 - s->ss_h),
&uvmv, 8 >> s->ss_h, 4, w1, h1, 0);
}
if (b->comp) {
mc_luma_dir(s, mc[3][b->filter][1], s->dst[0], ls_y,
......@@ -61,6 +101,38 @@ static void FN(inter_pred)(AVCodecContext *ctx)
s->dst[0] + 4 * ls_y, ls_y,
ref2->data[0], ref2->linesize[0], tref2,
(row << 3) + 4, col << 3, &b->mv[2][1], 8, 4, w2, h2, 1);
w2 = (w2 + s->ss_h) >> s->ss_h;
if (s->ss_v) {
h2 = (h2 + 1) >> 1;
uvmv = ROUNDED_DIV_MVx2(b->mv[0][1], b->mv[2][1]);
mc_chroma_dir(s, mc[3 + s->ss_h][b->filter][1],
s->dst[1], s->dst[2], ls_uv,
ref2->data[1], ref2->linesize[1],
ref2->data[2], ref2->linesize[2], tref2,
row << 2, col << (3 - s->ss_h),
&uvmv, 8 >> s->ss_h, 4, w2, h2, 1);
} else {
mc_chroma_dir(s, mc[3 + s->ss_h][b->filter][1],
s->dst[1], s->dst[2], ls_uv,
ref2->data[1], ref2->linesize[1],
ref2->data[2], ref2->linesize[2], tref2,
row << 3, col << (3 - s->ss_h),
&b->mv[0][1], 8 >> s->ss_h, 4, w2, h2, 1);
// BUG for 4:2:2 bs=8x4, libvpx uses the wrong block index
// to get the motion vector for the bottom 4x4 block
// https://code.google.com/p/webm/issues/detail?id=993
if (s->ss_h == 0) {
uvmv = b->mv[2][1];
} else {
uvmv = ROUNDED_DIV_MVx2(b->mv[0][1], b->mv[2][1]);
}
mc_chroma_dir(s, mc[3 + s->ss_h][b->filter][1],
s->dst[1] + 4 * ls_uv, s->dst[2] + 4 * ls_uv, ls_uv,
ref2->data[1], ref2->linesize[1],
ref2->data[2], ref2->linesize[2], tref2,
(row << 3) + 4, col << (3 - s->ss_h),
&uvmv, 8 >> s->ss_h, 4, w2, h2, 1);
}
}
} else if (b->bs == BS_4x8) {
mc_luma_dir(s, mc[4][b->filter][0], s->dst[0], ls_y,
......@@ -69,6 +141,30 @@ static void FN(inter_pred)(AVCodecContext *ctx)
mc_luma_dir(s, mc[4][b->filter][0], s->dst[0] + 4, ls_y,
ref1->data[0], ref1->linesize[0], tref1,
row << 3, (col << 3) + 4, &b->mv[1][0], 4, 8, w1, h1, 0);
h1 = (h1 + s->ss_v) >> s->ss_v;
if (s->ss_h) {
w1 = (w1 + 1) >> 1;
uvmv = ROUNDED_DIV_MVx2(b->mv[0][0], b->mv[1][0]);
mc_chroma_dir(s, mc[4][b->filter][0],
s->dst[1], s->dst[2], ls_uv,
ref1->data[1], ref1->linesize[1],
ref1->data[2], ref1->linesize[2], tref1,
row << (3 - s->ss_v), col << 2,
&uvmv, 4, 8 >> s->ss_v, w1, h1, 0);
} else {
mc_chroma_dir(s, mc[4][b->filter][0],
s->dst[1], s->dst[2], ls_uv,
ref1->data[1], ref1->linesize[1],
ref1->data[2], ref1->linesize[2], tref1,
row << (3 - s->ss_v), col << 3,
&b->mv[0][0], 4, 8 >> s->ss_v, w1, h1, 0);
mc_chroma_dir(s, mc[4][b->filter][0],
s->dst[1] + 4, s->dst[2] + 4, ls_uv,
ref1->data[1], ref1->linesize[1],
ref1->data[2], ref1->linesize[2], tref1,
row << (3 - s->ss_v), (col << 3) + 4,
&b->mv[1][0], 4, 8 >> s->ss_v, w1, h1, 0);
}
if (b->comp) {
mc_luma_dir(s, mc[4][b->filter][1], s->dst[0], ls_y,
......@@ -77,6 +173,30 @@ static void FN(inter_pred)(AVCodecContext *ctx)
mc_luma_dir(s, mc[4][b->filter][1], s->dst[0] + 4, ls_y,
ref2->data[0], ref2->linesize[0], tref2,
row << 3, (col << 3) + 4, &b->mv[1][1], 4, 8, w2, h2, 1);
h2 = (h2 + s->ss_v) >> s->ss_v;
if (s->ss_h) {
w2 = (w2 + 1) >> 1;
uvmv = ROUNDED_DIV_MVx2(b->mv[0][1], b->mv[1][1]);
mc_chroma_dir(s, mc[4][b->filter][1],
s->dst[1], s->dst[2], ls_uv,
ref2->data[1], ref2->linesize[1],
ref2->data[2], ref2->linesize[2], tref2,
row << (3 - s->ss_v), col << 2,
&uvmv, 4, 8 >> s->ss_v, w2, h2, 1);
} else {
mc_chroma_dir(s, mc[4][b->filter][1],
s->dst[1], s->dst[2], ls_uv,
ref2->data[1], ref2->linesize[1],
ref2->data[2], ref2->linesize[2], tref2,
row << (3 - s->ss_v), col << 3,
&b->mv[0][1], 4, 8 >> s->ss_v, w2, h2, 1);
mc_chroma_dir(s, mc[4][b->filter][1],
s->dst[1] + 4, s->dst[2] + 4, ls_uv,
ref2->data[1], ref2->linesize[1],
ref2->data[2], ref2->linesize[2], tref2,
row << (3 - s->ss_v), (col << 3) + 4,
&b->mv[1][1], 4, 8 >> s->ss_v, w2, h2, 1);
}
}
} else {
av_assert2(b->bs == BS_4x4);
......@@ -97,6 +217,81 @@ static void FN(inter_pred)(AVCodecContext *ctx)
s->dst[0] + 4 * ls_y + 4, ls_y,
ref1->data[0], ref1->linesize[0], tref1,
(row << 3) + 4, (col << 3) + 4, &b->mv[3][0], 4, 4, w1, h1, 0);
if (s->ss_v) {
h1 = (h1 + 1) >> 1;
if (s->ss_h) {
w1 = (w1 + 1) >> 1;
uvmv = ROUNDED_DIV_MVx4(b->mv[0][0], b->mv[1][0],
b->mv[2][0], b->mv[3][0]);
mc_chroma_dir(s, mc[4][b->filter][0],
s->dst[1], s->dst[2], ls_uv,
ref1->data[1], ref1->linesize[1],
ref1->data[2], ref1->linesize[2], tref1,
row << 2, col << 2,
&uvmv, 4, 4, w1, h1, 0);
} else {
uvmv = ROUNDED_DIV_MVx2(b->mv[0][0], b->mv[2][0]);
mc_chroma_dir(s, mc[4][b->filter][0],
s->dst[1], s->dst[2], ls_uv,
ref1->data[1], ref1->linesize[1],
ref1->data[2], ref1->linesize[2], tref1,
row << 2, col << 3,
&uvmv, 4, 4, w1, h1, 0);
uvmv = ROUNDED_DIV_MVx2(b->mv[1][0], b->mv[3][0]);
mc_chroma_dir(s, mc[4][b->filter][0],
s->dst[1] + 4, s->dst[2] + 4, ls_uv,
ref1->data[1], ref1->linesize[1],
ref1->data[2], ref1->linesize[2], tref1,
row << 2, (col << 3) + 4,
&uvmv, 4, 4, w1, h1, 0);
}
} else {
if (s->ss_h) {
w1 = (w1 + 1) >> 1;
uvmv = ROUNDED_DIV_MVx2(b->mv[0][0], b->mv[1][0]);
mc_chroma_dir(s, mc[4][b->filter][0],
s->dst[1], s->dst[2], ls_uv,
ref1->data[1], ref1->linesize[1],
ref1->data[2], ref1->linesize[2], tref1,
row << 3, col << 2,
&uvmv, 4, 4, w1, h1, 0);
// BUG libvpx uses wrong block index for 4:2:2 bs=4x4
// bottom block
// https://code.google.com/p/webm/issues/detail?id=993
uvmv = ROUNDED_DIV_MVx2(b->mv[1][0], b->mv[2][0]);
mc_chroma_dir(s, mc[4][b->filter][0],
s->dst[1] + 4 * ls_uv, s->dst[2] + 4 * ls_uv, ls_uv,
ref1->data[1], ref1->linesize[1],
ref1->data[2], ref1->linesize[2], tref1,
(row << 3) + 4, col << 2,
&uvmv, 4, 4, w1, h1, 0);
} else {
mc_chroma_dir(s, mc[4][b->filter][0],
s->dst[1], s->dst[2], ls_uv,
ref1->data[1], ref1->linesize[1],
ref1->data[2], ref1->linesize[2], tref1,
row << 3, col << 3,
&b->mv[0][0], 4, 4, w1, h1, 0);
mc_chroma_dir(s, mc[4][b->filter][0],
s->dst[1] + 4, s->dst[2] + 4, ls_uv,
ref1->data[1], ref1->linesize[1],
ref1->data[2], ref1->linesize[2], tref1,
row << 3, (col << 3) + 4,
&b->mv[1][0], 4, 4, w1, h1, 0);
mc_chroma_dir(s, mc[4][b->filter][0],
s->dst[1] + 4 * ls_uv, s->dst[2] + 4 * ls_uv, ls_uv,
ref1->data[1], ref1->linesize[1],
ref1->data[2], ref1->linesize[2], tref1,
(row << 3) + 4, col << 3,
&b->mv[2][0], 4, 4, w1, h1, 0);
mc_chroma_dir(s, mc[4][b->filter][0],
s->dst[1] + 4 * ls_uv + 4, s->dst[2] + 4 * ls_uv + 4, ls_uv,
ref1->data[1], ref1->linesize[1],
ref1->data[2], ref1->linesize[2], tref1,
(row << 3) + 4, (col << 3) + 4,
&b->mv[3][0], 4, 4, w1, h1, 0);
}
}
if (b->comp) {
mc_luma_dir(s, mc[4][b->filter][1], s->dst[0], ls_y,
......@@ -113,59 +308,112 @@ static void FN(inter_pred)(AVCodecContext *ctx)
s->dst[0] + 4 * ls_y + 4, ls_y,
ref2->data[0], ref2->linesize[0], tref2,
(row << 3) + 4, (col << 3) + 4, &b->mv[3][1], 4, 4, w2, h2, 1);
if (s->ss_v) {
h2 = (h2 + 1) >> 1;
if (s->ss_h) {
w2 = (w2 + 1) >> 1;
uvmv = ROUNDED_DIV_MVx4(b->mv[0][1], b->mv[1][1],
b->mv[2][1], b->mv[3][1]);
mc_chroma_dir(s, mc[4][b->filter][1],
s->dst[1], s->dst[2], ls_uv,
ref2->data[1], ref2->linesize[1],
ref2->data[2], ref2->linesize[2], tref2,
row << 2, col << 2,
&uvmv, 4, 4, w2, h2, 1);
} else {
uvmv = ROUNDED_DIV_MVx2(b->mv[0][1], b->mv[2][1]);
mc_chroma_dir(s, mc[4][b->filter][1],
s->dst[1], s->dst[2], ls_uv,
ref2->data[1], ref2->linesize[1],
ref2->data[2], ref2->linesize[2], tref2,
row << 2, col << 3,
&uvmv, 4, 4, w2, h2, 1);
uvmv = ROUNDED_DIV_MVx2(b->mv[1][1], b->mv[3][1]);
mc_chroma_dir(s, mc[4][b->filter][1],
s->dst[1] + 4, s->dst[2] + 4, ls_uv,
ref2->data[1], ref2->linesize[1],
ref2->data[2], ref2->linesize[2], tref2,
row << 2, (col << 3) + 4,
&uvmv, 4, 4, w2, h2, 1);
}
} else {
if (s->ss_h) {
w2 = (w2 + 1) >> 1;
uvmv = ROUNDED_DIV_MVx2(b->mv[0][1], b->mv[1][1]);
mc_chroma_dir(s, mc[4][b->filter][1],
s->dst[1], s->dst[2], ls_uv,
ref2->data[1], ref2->linesize[1],
ref2->data[2], ref2->linesize[2], tref2,
row << 3, col << 2,
&uvmv, 4, 4, w2, h2, 1);
// BUG libvpx uses wrong block index for 4:2:2 bs=4x4
// bottom block
// https://code.google.com/p/webm/issues/detail?id=993
uvmv = ROUNDED_DIV_MVx2(b->mv[1][1], b->mv[2][1]);
mc_chroma_dir(s, mc[4][b->filter][1],
s->dst[1] + 4 * ls_uv, s->dst[2] + 4 * ls_uv, ls_uv,
ref2->data[1], ref2->linesize[1],
ref2->data[2], ref2->linesize[2], tref2,
(row << 3) + 4, col << 2,
&uvmv, 4, 4, w2, h2, 1);
} else {
mc_chroma_dir(s, mc[4][b->filter][1],
s->dst[1], s->dst[2], ls_uv,
ref2->data[1], ref2->linesize[1],
ref2->data[2], ref2->linesize[2], tref2,
row << 3, col << 3,
&b->mv[0][1], 4, 4, w2, h2, 1);
mc_chroma_dir(s, mc[4][b->filter][1],
s->dst[1] + 4, s->dst[2] + 4, ls_uv,
ref2->data[1], ref2->linesize[1],
ref2->data[2], ref2->linesize[2], tref2,
row << 3, (col << 3) + 4,
&b->mv[1][1], 4, 4, w2, h2, 1);
mc_chroma_dir(s, mc[4][b->filter][1],
s->dst[1] + 4 * ls_uv, s->dst[2] + 4 * ls_uv, ls_uv,
ref2->data[1], ref2->linesize[1],
ref2->data[2], ref2->linesize[2], tref2,
(row << 3) + 4, col << 3,
&b->mv[2][1], 4, 4, w2, h2, 1);
mc_chroma_dir(s, mc[4][b->filter][1],
s->dst[1] + 4 * ls_uv + 4, s->dst[2] + 4 * ls_uv + 4, ls_uv,
ref2->data[1], ref2->linesize[1],
ref2->data[2], ref2->linesize[2], tref2,
(row << 3) + 4, (col << 3) + 4,
&b->mv[3][1], 4, 4, w2, h2, 1);
}
}
}
}
} else {
int bwl = bwlog_tab[0][b->bs];
int bw = bwh_tab[0][b->bs][0] * 4, bh = bwh_tab[0][b->bs][1] * 4;
int uvbw = bwh_tab[s->ss_h][b->bs][0] * 4, uvbh = bwh_tab[s->ss_v][b->bs][1] * 4;
mc_luma_dir(s, mc[bwl][b->filter][0], s->dst[0], ls_y,
ref1->data[0], ref1->linesize[0], tref1,
row << 3, col << 3, &b->mv[0][0],bw, bh, w1, h1, 0);
if (b->comp)
mc_luma_dir(s, mc[bwl][b->filter][1], s->dst[0], ls_y,
ref2->data[0], ref2->linesize[0], tref2,
row << 3, col << 3, &b->mv[0][1], bw, bh, w2, h2, 1);
}
// uv inter pred
{
int bwl = bwlog_tab[1][b->bs];
int bw = bwh_tab[1][b->bs][0] * 4, bh = bwh_tab[1][b->bs][1] * 4;
VP56mv mvuv;
w1 = (w1 + 1) >> 1;
h1 = (h1 + 1) >> 1;
if (b->comp) {
w2 = (w2 + 1) >> 1;
h2 = (h2 + 1) >> 1;
}
if (b->bs > BS_8x8) {
mvuv.x = ROUNDED_DIV(b->mv[0][0].x + b->mv[1][0].x + b->mv[2][0].x + b->mv[3][0].x, 4);
mvuv.y = ROUNDED_DIV(b->mv[0][0].y + b->mv[1][0].y + b->mv[2][0].y + b->mv[3][0].y, 4);
} else {
mvuv = b->mv[0][0];
}
mc_chroma_dir(s, mc[bwl][b->filter][0],
row << 3, col << 3, &b->mv[0][0], bw, bh, w1, h1, 0);
w1 = (w1 + s->ss_h) >> s->ss_h;
h1 = (h1 + s->ss_v) >> s->ss_v;
mc_chroma_dir(s, mc[bwl + s->ss_h][b->filter][0],
s->dst[1], s->dst[2], ls_uv,
ref1->data[1], ref1->linesize[1],
ref1->data[2], ref1->linesize[2], tref1,
row << 2, col << 2, &mvuv, bw, bh, w1, h1, 0);
row << (3 - s->ss_v), col << (3 - s->ss_h),
&b->mv[0][0], uvbw, uvbh, w1, h1, 0);
if (b->comp) {
if (b->bs > BS_8x8) {
mvuv.x = ROUNDED_DIV(b->mv[0][1].x + b->mv[1][1].x + b->mv[2][1].x + b->mv[3][1].x, 4);
mvuv.y = ROUNDED_DIV(b->mv[0][1].y + b->mv[1][1].y + b->mv[2][1].y + b->mv[3][1].y, 4);
} else {
mvuv = b->mv[0][1];
}
mc_chroma_dir(s, mc[bwl][b->filter][1],
mc_luma_dir(s, mc[bwl][b->filter][1], s->dst[0], ls_y,
ref2->data[0], ref2->linesize[0], tref2,
row << 3, col << 3, &b->mv[0][1], bw, bh, w2, h2, 1);
w2 = (w2 + s->ss_h) >> s->ss_h;
h2 = (h2 + s->ss_v) >> s->ss_v;
mc_chroma_dir(s, mc[bwl + s->ss_h][b->filter][1],
s->dst[1], s->dst[2], ls_uv,
ref2->data[1], ref2->linesize[1],
ref2->data[2], ref2->linesize[2], tref2,
row << 2, col << 2, &mvuv, bw, bh, w2, h2, 1);
row << (3 - s->ss_v), col << (3 - s->ss_h),
&b->mv[0][1], uvbw, uvbh, w2, h2, 1);
}
}
}
......@@ -85,6 +85,12 @@ fate-vp9$(2)-$(1): CMD = framemd5 $(3) -i $(TARGET_SAMPLES)/vp9-test-vectors/vp9
fate-vp9$(2)-$(1): REF = $(SRC_PATH)/tests/ref/fate/vp9-$(1)
endef
define FATE_VP9_PROFILE_SUITE
FATE_VP9-$(CONFIG_MATROSKA_DEMUXER) += fate-vp9p$(2)-$(1)
fate-vp9p$(2)-$(1): CMD = framemd5 -i $(TARGET_SAMPLES)/vp9-test-vectors/vp9$(2)-2-$(1).webm
fate-vp9p$(2)-$(1): REF = $(SRC_PATH)/tests/ref/fate/vp9p$(2)-$(1)
endef
VP9_Q = 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 \
16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 \
32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 \
......@@ -94,24 +100,27 @@ VP9_SIZE_A = 08 10 16 18 32 34 64 66
VP9_SIZE_B = 196 198 200 202 208 210 224 226
define FATE_VP9_FULL
$(foreach Q,$(VP9_Q),$(eval $(call FATE_VP9_SUITE,00-quantizer-$(Q),$(1),$(2))))
$(foreach SHARP,$(VP9_SHARP),$(eval $(call FATE_VP9_SUITE,01-sharpness-$(SHARP),$(1),$(2))))
$(foreach W,$(VP9_SIZE_A),$(eval $(foreach H,$(VP9_SIZE_A),$(eval $(call FATE_VP9_SUITE,02-size-$(W)x$(H),$(1),$(2))))))
$(foreach W,$(VP9_SIZE_B),$(eval $(foreach H,$(VP9_SIZE_B),$(eval $(call FATE_VP9_SUITE,03-size-$(W)x$(H),$(1),$(2))))))
$(eval $(call FATE_VP9_SUITE,03-deltaq,$(1),$(2)))
$(eval $(call FATE_VP9_SUITE,06-bilinear,$(1),$(2)))
$(eval $(call FATE_VP9_SUITE,09-lf_deltas,$(1),$(2)))
$(eval $(call FATE_VP9_SUITE,10-show-existing-frame,$(1),$(2)))
$(eval $(call FATE_VP9_SUITE,10-show-existing-frame2,$(1),$(2)))
$(eval $(call FATE_VP9_SUITE,15-segkey_adpq,$(1),$(2)))
$(eval $(call FATE_VP9_SUITE,16-intra-only,$(1),$(2)))
$(eval $(call FATE_VP9_SUITE,2pass-akiyo,$(1),$(2)))
$(eval $(call FATE_VP9_SUITE,parallelmode-akiyo,$(1),$(2)))
$(eval $(call FATE_VP9_SUITE,segmentation-aq-akiyo,$(1),$(2)))
$(eval $(call FATE_VP9_SUITE,segmentation-sf-akiyo,$(1),$(2)))
$(eval $(call FATE_VP9_SUITE,tiling-pedestrian,$(1),$(2)))
$(eval $(call FATE_VP9_SUITE,trac3849,$(1),$(2)))
$(eval $(call FATE_VP9_SUITE,trac4359,$(1),$(2)))
$(foreach Q,$(VP9_Q),$(eval $(call FATE_VP9_SUITE,00-quantizer-$(Q))))
$(foreach SHARP,$(VP9_SHARP),$(eval $(call FATE_VP9_SUITE,01-sharpness-$(SHARP))))
$(foreach W,$(VP9_SIZE_A),$(eval $(foreach H,$(VP9_SIZE_A),$(eval $(call FATE_VP9_SUITE,02-size-$(W)x$(H))))))
$(foreach W,$(VP9_SIZE_B),$(eval $(foreach H,$(VP9_SIZE_B),$(eval $(call FATE_VP9_SUITE,03-size-$(W)x$(H))))))
$(eval $(call FATE_VP9_SUITE,03-deltaq))
$(eval $(call FATE_VP9_PROFILE_SUITE,04-yuv444,1))
$(eval $(call FATE_VP9_PROFILE_SUITE,04-yuv440,1))
$(eval $(call FATE_VP9_PROFILE_SUITE,04-yuv422,1))
$(eval $(call FATE_VP9_SUITE,06-bilinear))
$(eval $(call FATE_VP9_SUITE,09-lf_deltas))
$(eval $(call FATE_VP9_SUITE,10-show-existing-frame))
$(eval $(call FATE_VP9_SUITE,10-show-existing-frame2))
$(eval $(call FATE_VP9_SUITE,15-segkey_adpq))
$(eval $(call FATE_VP9_SUITE,16-intra-only))
$(eval $(call FATE_VP9_SUITE,2pass-akiyo))
$(eval $(call FATE_VP9_SUITE,parallelmode-akiyo))
$(eval $(call FATE_VP9_SUITE,segmentation-aq-akiyo))
$(eval $(call FATE_VP9_SUITE,segmentation-sf-akiyo))
$(eval $(call FATE_VP9_SUITE,tiling-pedestrian))
$(eval $(call FATE_VP9_SUITE,trac3849))
$(eval $(call FATE_VP9_SUITE,trac4359))
endef
$(eval $(call FATE_VP9_FULL))
......
#format: frame checksums
#version: 1
#hash: MD5
#tb 0: 1/50
#stream#, dts, pts, duration, size, hash
0, 0, 0, 1, 28800, b81b8a8444ac6ce4a4807c37e0a44c8b
0, 1, 1, 1, 28800, 344458b82d35ea9944dc841643fc25c2
0, 2, 2, 1, 28800, 376a4bb3944f052191963740b980eb26
0, 3, 3, 1, 28800, 2fecb02c842bd7d588415904f2d3a82d
0, 4, 4, 1, 28800, 0fda2f1dabba5c179599190f179b9782
0, 5, 5, 1, 28800, a88ac885ee59e3a3a01fa483cdd40274
0, 6, 6, 1, 28800, e76b488ffa70a05457fc046e7b999c56
0, 7, 7, 1, 28800, 74ae5e52162f5bbc95258d44a2dd647c
0, 8, 8, 1, 28800, 0c017e2b12e5192c8d598941d9c93306
0, 9, 9, 1, 28800, ca3941ee43b7033cb48f8498af127d53
#format: frame checksums
#version: 1
#hash: MD5
#tb 0: 1/50
#stream#, dts, pts, duration, size, hash
0, 0, 0, 1, 28800, 61157ad4fb02a254de8f34ae7b8915dc
0, 1, 1, 1, 28800, 9431337382bf90d40aa417e297ac05da
0, 2, 2, 1, 28800, 56b739049cc9e97a1d82018bba3db0ee
0, 3, 3, 1, 28800, 75138a9b6bb905b2f79a1ebb959ddfea
0, 4, 4, 1, 28800, 141b2fc9625fad86577838d84a276ef8
0, 5, 5, 1, 28800, b364668c44a237d4e532e086a55401a9
0, 6, 6, 1, 28800, a4ca6014d5194e4c921a4cb4289eb315
0, 7, 7, 1, 28800, cfcacb3d5086d3861f4712a3c87a6b6c
0, 8, 8, 1, 28800, 228d3fd3d849d021f3690cc538edb0a3
0, 9, 9, 1, 28800, 97ecf281eb1130723d70e3c8803fa814
#format: frame checksums
#version: 1
#hash: MD5
#tb 0: 1/25
#stream#, dts, pts, duration, size, hash
0, 0, 0, 1, 304128, 859df7b3661783e337a16ee79f3c20bc
0, 1, 1, 1, 304128, 3b3ccf344cd5a478c4c1fa422497183d
0, 2, 2, 1, 304128, 3be1f565823cb88013a14a93a3cf9480
0, 3, 3, 1, 304128, 6e188a963deaf46c2d6e741b03c4240c
0, 4, 4, 1, 304128, 82ead184ae478ac821b1b4b72f28c9cd
0, 5, 5, 1, 304128, 59bb43badc76b39a228b1ad96b6339ca
0, 6, 6, 1, 304128, 2eaee790fc188e2251b92dd4ea90c42a
0, 7, 7, 1, 304128, 2a95f8727589e710dc1b95400916b72e
0, 8, 8, 1, 304128, b7032f73544a7108fcdcaca2832ecc32
0, 9, 9, 1, 304128, b7778c35b30bcc400b25ed0e5b7913e1
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment