Commit 23ba4538 authored by Ronald S. Bultje's avatar Ronald S. Bultje

vp9: add inter-frame profile 2/3 suport.

parent b224b165
...@@ -2733,7 +2733,7 @@ static av_always_inline void mc_luma_scaled(VP9Context *s, vp9_scaled_mc_func sm ...@@ -2733,7 +2733,7 @@ static av_always_inline void mc_luma_scaled(VP9Context *s, vp9_scaled_mc_func sm
const uint8_t *ref, ptrdiff_t ref_stride, const uint8_t *ref, ptrdiff_t ref_stride,
ThreadFrame *ref_frame, ThreadFrame *ref_frame,
ptrdiff_t y, ptrdiff_t x, const VP56mv *mv, ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
int bw, int bh, int w, int h, int bw, int bh, int w, int h, int bytesperpixel,
const uint16_t *scale, const uint8_t *step) const uint16_t *scale, const uint8_t *step)
{ {
#define scale_mv(n, dim) (((int64_t)(n) * scale[dim]) >> 14) #define scale_mv(n, dim) (((int64_t)(n) * scale[dim]) >> 14)
...@@ -2747,7 +2747,7 @@ static av_always_inline void mc_luma_scaled(VP9Context *s, vp9_scaled_mc_func sm ...@@ -2747,7 +2747,7 @@ static av_always_inline void mc_luma_scaled(VP9Context *s, vp9_scaled_mc_func sm
y = my >> 4; y = my >> 4;
x = mx >> 4; x = mx >> 4;
ref += y * ref_stride + x; ref += y * ref_stride + x * bytesperpixel;
mx &= 15; mx &= 15;
my &= 15; my &= 15;
refbw_m1 = ((bw - 1) * step[0] + mx) >> 4; refbw_m1 = ((bw - 1) * step[0] + mx) >> 4;
...@@ -2759,12 +2759,12 @@ static av_always_inline void mc_luma_scaled(VP9Context *s, vp9_scaled_mc_func sm ...@@ -2759,12 +2759,12 @@ static av_always_inline void mc_luma_scaled(VP9Context *s, vp9_scaled_mc_func sm
ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0); ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
if (x < 3 || y < 3 || x + 4 >= w - refbw_m1 || y + 4 >= h - refbh_m1) { if (x < 3 || y < 3 || x + 4 >= w - refbw_m1 || y + 4 >= h - refbh_m1) {
s->vdsp.emulated_edge_mc(s->edge_emu_buffer, s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
ref - 3 * ref_stride - 3, ref - 3 * ref_stride - 3 * bytesperpixel,
144, ref_stride, 288, ref_stride,
refbw_m1 + 8, refbh_m1 + 8, refbw_m1 + 8, refbh_m1 + 8,
x - 3, y - 3, w, h); x - 3, y - 3, w, h);
ref = s->edge_emu_buffer + 3 * 144 + 3; ref = s->edge_emu_buffer + 3 * 288 + 3 * bytesperpixel;
ref_stride = 144; ref_stride = 288;
} }
smc(dst, dst_stride, ref, ref_stride, bh, mx, my, step[0], step[1]); smc(dst, dst_stride, ref, ref_stride, bh, mx, my, step[0], step[1]);
} }
...@@ -2776,7 +2776,7 @@ static av_always_inline void mc_chroma_scaled(VP9Context *s, vp9_scaled_mc_func ...@@ -2776,7 +2776,7 @@ static av_always_inline void mc_chroma_scaled(VP9Context *s, vp9_scaled_mc_func
const uint8_t *ref_v, ptrdiff_t src_stride_v, const uint8_t *ref_v, ptrdiff_t src_stride_v,
ThreadFrame *ref_frame, ThreadFrame *ref_frame,
ptrdiff_t y, ptrdiff_t x, const VP56mv *mv, ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
int bw, int bh, int w, int h, int bw, int bh, int w, int h, int bytesperpixel,
const uint16_t *scale, const uint8_t *step) const uint16_t *scale, const uint8_t *step)
{ {
// BUG https://code.google.com/p/webm/issues/detail?id=820 // BUG https://code.google.com/p/webm/issues/detail?id=820
...@@ -2788,8 +2788,8 @@ static av_always_inline void mc_chroma_scaled(VP9Context *s, vp9_scaled_mc_func ...@@ -2788,8 +2788,8 @@ static av_always_inline void mc_chroma_scaled(VP9Context *s, vp9_scaled_mc_func
y = my >> 4; y = my >> 4;
x = mx >> 4; x = mx >> 4;
ref_u += y * src_stride_u + x; ref_u += y * src_stride_u + x * bytesperpixel;
ref_v += y * src_stride_v + x; ref_v += y * src_stride_v + x * bytesperpixel;
mx &= 15; mx &= 15;
my &= 15; my &= 15;
refbw_m1 = ((bw - 1) * step[0] + mx) >> 4; refbw_m1 = ((bw - 1) * step[0] + mx) >> 4;
...@@ -2801,51 +2801,60 @@ static av_always_inline void mc_chroma_scaled(VP9Context *s, vp9_scaled_mc_func ...@@ -2801,51 +2801,60 @@ static av_always_inline void mc_chroma_scaled(VP9Context *s, vp9_scaled_mc_func
ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0); ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
if (x < 3 || y < 3 || x + 4 >= w - refbw_m1 || y + 4 >= h - refbh_m1) { if (x < 3 || y < 3 || x + 4 >= w - refbw_m1 || y + 4 >= h - refbh_m1) {
s->vdsp.emulated_edge_mc(s->edge_emu_buffer, s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
ref_u - 3 * src_stride_u - 3, ref_u - 3 * src_stride_u - 3 * bytesperpixel,
144, src_stride_u, 288, src_stride_u,
refbw_m1 + 8, refbh_m1 + 8, refbw_m1 + 8, refbh_m1 + 8,
x - 3, y - 3, w, h); x - 3, y - 3, w, h);
ref_u = s->edge_emu_buffer + 3 * 144 + 3; ref_u = s->edge_emu_buffer + 3 * 288 + 3 * bytesperpixel;
smc(dst_u, dst_stride, ref_u, 144, bh, mx, my, step[0], step[1]); smc(dst_u, dst_stride, ref_u, 288, bh, mx, my, step[0], step[1]);
s->vdsp.emulated_edge_mc(s->edge_emu_buffer, s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
ref_v - 3 * src_stride_v - 3, ref_v - 3 * src_stride_v - 3 * bytesperpixel,
144, src_stride_v, 288, src_stride_v,
refbw_m1 + 8, refbh_m1 + 8, refbw_m1 + 8, refbh_m1 + 8,
x - 3, y - 3, w, h); x - 3, y - 3, w, h);
ref_v = s->edge_emu_buffer + 3 * 144 + 3; ref_v = s->edge_emu_buffer + 3 * 288 + 3 * bytesperpixel;
smc(dst_v, dst_stride, ref_v, 144, bh, mx, my, step[0], step[1]); smc(dst_v, dst_stride, ref_v, 288, bh, mx, my, step[0], step[1]);
} else { } else {
smc(dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my, step[0], step[1]); smc(dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my, step[0], step[1]);
smc(dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my, step[0], step[1]); smc(dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my, step[0], step[1]);
} }
} }
#define FN(x) x##_scaled
#define mc_luma_dir(s, mc, dst, dst_ls, src, src_ls, tref, row, col, mv, bw, bh, w, h, i) \ #define mc_luma_dir(s, mc, dst, dst_ls, src, src_ls, tref, row, col, mv, bw, bh, w, h, i) \
mc_luma_scaled(s, s->dsp.s##mc, dst, dst_ls, src, src_ls, tref, row, col, \ mc_luma_scaled(s, s->dsp.s##mc, dst, dst_ls, src, src_ls, tref, row, col, \
mv, bw, bh, w, h, s->mvscale[b->ref[i]], s->mvstep[b->ref[i]]) mv, bw, bh, w, h, bytesperpixel, \
s->mvscale[b->ref[i]], s->mvstep[b->ref[i]])
#define mc_chroma_dir(s, mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \ #define mc_chroma_dir(s, mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
row, col, mv, bw, bh, w, h, i) \ row, col, mv, bw, bh, w, h, i) \
mc_chroma_scaled(s, s->dsp.s##mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \ mc_chroma_scaled(s, s->dsp.s##mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
row, col, mv, bw, bh, w, h, s->mvscale[b->ref[i]], s->mvstep[b->ref[i]]) row, col, mv, bw, bh, w, h, bytesperpixel, \
s->mvscale[b->ref[i]], s->mvstep[b->ref[i]])
#define FN(x) x##_scaled_8bpp
#define BYTES_PER_PIXEL 1
#include "vp9_mc_template.c"
#undef FN
#undef BYTES_PER_PIXEL
#define FN(x) x##_scaled_16bpp
#define BYTES_PER_PIXEL 2
#include "vp9_mc_template.c" #include "vp9_mc_template.c"
#undef mc_luma_dir #undef mc_luma_dir
#undef mc_chroma_dir #undef mc_chroma_dir
#undef FN #undef FN
#undef BYTES_PER_PIXEL
static av_always_inline void mc_luma_unscaled(VP9Context *s, vp9_mc_func (*mc)[2], static av_always_inline void mc_luma_unscaled(VP9Context *s, vp9_mc_func (*mc)[2],
uint8_t *dst, ptrdiff_t dst_stride, uint8_t *dst, ptrdiff_t dst_stride,
const uint8_t *ref, ptrdiff_t ref_stride, const uint8_t *ref, ptrdiff_t ref_stride,
ThreadFrame *ref_frame, ThreadFrame *ref_frame,
ptrdiff_t y, ptrdiff_t x, const VP56mv *mv, ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
int bw, int bh, int w, int h) int bw, int bh, int w, int h, int bytesperpixel)
{ {
int mx = mv->x, my = mv->y, th; int mx = mv->x, my = mv->y, th;
y += my >> 3; y += my >> 3;
x += mx >> 3; x += mx >> 3;
ref += y * ref_stride + x; ref += y * ref_stride + x * bytesperpixel;
mx &= 7; mx &= 7;
my &= 7; my &= 7;
// FIXME bilinear filter only needs 0/1 pixels, not 3/4 // FIXME bilinear filter only needs 0/1 pixels, not 3/4
...@@ -2856,12 +2865,12 @@ static av_always_inline void mc_luma_unscaled(VP9Context *s, vp9_mc_func (*mc)[2 ...@@ -2856,12 +2865,12 @@ static av_always_inline void mc_luma_unscaled(VP9Context *s, vp9_mc_func (*mc)[2
if (x < !!mx * 3 || y < !!my * 3 || if (x < !!mx * 3 || y < !!my * 3 ||
x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) { x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) {
s->vdsp.emulated_edge_mc(s->edge_emu_buffer, s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
ref - !!my * 3 * ref_stride - !!mx * 3, ref - !!my * 3 * ref_stride - !!mx * 3 * bytesperpixel,
80, ref_stride, 160, ref_stride,
bw + !!mx * 7, bh + !!my * 7, bw + !!mx * 7, bh + !!my * 7,
x - !!mx * 3, y - !!my * 3, w, h); x - !!mx * 3, y - !!my * 3, w, h);
ref = s->edge_emu_buffer + !!my * 3 * 80 + !!mx * 3; ref = s->edge_emu_buffer + !!my * 3 * 160 + !!mx * 3 * bytesperpixel;
ref_stride = 80; ref_stride = 160;
} }
mc[!!mx][!!my](dst, dst_stride, ref, ref_stride, bh, mx << 1, my << 1); mc[!!mx][!!my](dst, dst_stride, ref, ref_stride, bh, mx << 1, my << 1);
} }
...@@ -2873,14 +2882,14 @@ static av_always_inline void mc_chroma_unscaled(VP9Context *s, vp9_mc_func (*mc) ...@@ -2873,14 +2882,14 @@ static av_always_inline void mc_chroma_unscaled(VP9Context *s, vp9_mc_func (*mc)
const uint8_t *ref_v, ptrdiff_t src_stride_v, const uint8_t *ref_v, ptrdiff_t src_stride_v,
ThreadFrame *ref_frame, ThreadFrame *ref_frame,
ptrdiff_t y, ptrdiff_t x, const VP56mv *mv, ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
int bw, int bh, int w, int h) int bw, int bh, int w, int h, int bytesperpixel)
{ {
int mx = mv->x << !s->ss_h, my = mv->y << !s->ss_v, th; int mx = mv->x << !s->ss_h, my = mv->y << !s->ss_v, th;
y += my >> 4; y += my >> 4;
x += mx >> 4; x += mx >> 4;
ref_u += y * src_stride_u + x; ref_u += y * src_stride_u + x * bytesperpixel;
ref_v += y * src_stride_v + x; ref_v += y * src_stride_v + x * bytesperpixel;
mx &= 15; mx &= 15;
my &= 15; my &= 15;
// FIXME bilinear filter only needs 0/1 pixels, not 3/4 // FIXME bilinear filter only needs 0/1 pixels, not 3/4
...@@ -2891,49 +2900,64 @@ static av_always_inline void mc_chroma_unscaled(VP9Context *s, vp9_mc_func (*mc) ...@@ -2891,49 +2900,64 @@ static av_always_inline void mc_chroma_unscaled(VP9Context *s, vp9_mc_func (*mc)
if (x < !!mx * 3 || y < !!my * 3 || if (x < !!mx * 3 || y < !!my * 3 ||
x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) { x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) {
s->vdsp.emulated_edge_mc(s->edge_emu_buffer, s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
ref_u - !!my * 3 * src_stride_u - !!mx * 3, ref_u - !!my * 3 * src_stride_u - !!mx * 3 * bytesperpixel,
80, src_stride_u, 160, src_stride_u,
bw + !!mx * 7, bh + !!my * 7, bw + !!mx * 7, bh + !!my * 7,
x - !!mx * 3, y - !!my * 3, w, h); x - !!mx * 3, y - !!my * 3, w, h);
ref_u = s->edge_emu_buffer + !!my * 3 * 80 + !!mx * 3; ref_u = s->edge_emu_buffer + !!my * 3 * 160 + !!mx * 3 * bytesperpixel;
mc[!!mx][!!my](dst_u, dst_stride, ref_u, 80, bh, mx, my); mc[!!mx][!!my](dst_u, dst_stride, ref_u, 160, bh, mx, my);
s->vdsp.emulated_edge_mc(s->edge_emu_buffer, s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
ref_v - !!my * 3 * src_stride_v - !!mx * 3, ref_v - !!my * 3 * src_stride_v - !!mx * 3 * bytesperpixel,
80, src_stride_v, 160, src_stride_v,
bw + !!mx * 7, bh + !!my * 7, bw + !!mx * 7, bh + !!my * 7,
x - !!mx * 3, y - !!my * 3, w, h); x - !!mx * 3, y - !!my * 3, w, h);
ref_v = s->edge_emu_buffer + !!my * 3 * 80 + !!mx * 3; ref_v = s->edge_emu_buffer + !!my * 3 * 160 + !!mx * 3 * bytesperpixel;
mc[!!mx][!!my](dst_v, dst_stride, ref_v, 80, bh, mx, my); mc[!!mx][!!my](dst_v, dst_stride, ref_v, 160, bh, mx, my);
} else { } else {
mc[!!mx][!!my](dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my); mc[!!mx][!!my](dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my);
mc[!!mx][!!my](dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my); mc[!!mx][!!my](dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my);
} }
} }
#define FN(x) x
#define mc_luma_dir(s, mc, dst, dst_ls, src, src_ls, tref, row, col, mv, bw, bh, w, h, i) \ #define mc_luma_dir(s, mc, dst, dst_ls, src, src_ls, tref, row, col, mv, bw, bh, w, h, i) \
mc_luma_unscaled(s, s->dsp.mc, dst, dst_ls, src, src_ls, tref, row, col, \ mc_luma_unscaled(s, s->dsp.mc, dst, dst_ls, src, src_ls, tref, row, col, \
mv, bw, bh, w, h) mv, bw, bh, w, h, bytesperpixel)
#define mc_chroma_dir(s, mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \ #define mc_chroma_dir(s, mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
row, col, mv, bw, bh, w, h, i) \ row, col, mv, bw, bh, w, h, i) \
mc_chroma_unscaled(s, s->dsp.mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \ mc_chroma_unscaled(s, s->dsp.mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
row, col, mv, bw, bh, w, h) row, col, mv, bw, bh, w, h, bytesperpixel)
#define FN(x) x##_8bpp
#define BYTES_PER_PIXEL 1
#include "vp9_mc_template.c"
#undef FN
#undef BYTES_PER_PIXEL
#define FN(x) x##_16bpp
#define BYTES_PER_PIXEL 2
#include "vp9_mc_template.c" #include "vp9_mc_template.c"
#undef mc_luma_dir_dir #undef mc_luma_dir_dir
#undef mc_chroma_dir_dir #undef mc_chroma_dir_dir
#undef FN #undef FN
#undef BYTES_PER_PIXEL
static void inter_recon(AVCodecContext *ctx) static av_always_inline void inter_recon(AVCodecContext *ctx, int bytesperpixel)
{ {
VP9Context *s = ctx->priv_data; VP9Context *s = ctx->priv_data;
VP9Block *b = s->b; VP9Block *b = s->b;
int row = s->row, col = s->col; int row = s->row, col = s->col;
if (s->mvscale[b->ref[0]][0] || (b->comp && s->mvscale[b->ref[1]][0])) { if (s->mvscale[b->ref[0]][0] || (b->comp && s->mvscale[b->ref[1]][0])) {
inter_pred_scaled(ctx); if (bytesperpixel == 1) {
inter_pred_scaled_8bpp(ctx);
} else {
inter_pred_scaled_16bpp(ctx);
}
} else { } else {
inter_pred(ctx); if (bytesperpixel == 1) {
inter_pred_8bpp(ctx);
} else {
inter_pred_16bpp(ctx);
}
} }
if (!b->skip) { if (!b->skip) {
/* mostly copied intra_recon() */ /* mostly copied intra_recon() */
...@@ -2949,12 +2973,13 @@ static void inter_recon(AVCodecContext *ctx) ...@@ -2949,12 +2973,13 @@ static void inter_recon(AVCodecContext *ctx)
// y itxfm add // y itxfm add
for (n = 0, y = 0; y < end_y; y += step1d) { for (n = 0, y = 0; y < end_y; y += step1d) {
uint8_t *ptr = dst; uint8_t *ptr = dst;
for (x = 0; x < end_x; x += step1d, ptr += 4 * step1d, n += step) { for (x = 0; x < end_x; x += step1d,
ptr += 4 * step1d * bytesperpixel, n += step) {
int eob = b->tx > TX_8X8 ? AV_RN16A(&s->eob[n]) : s->eob[n]; int eob = b->tx > TX_8X8 ? AV_RN16A(&s->eob[n]) : s->eob[n];
if (eob) if (eob)
s->dsp.itxfm_add[tx][DCT_DCT](ptr, s->y_stride, s->dsp.itxfm_add[tx][DCT_DCT](ptr, s->y_stride,
s->block + 16 * n, eob); s->block + 16 * n * bytesperpixel, eob);
} }
dst += 4 * s->y_stride * step1d; dst += 4 * s->y_stride * step1d;
} }
...@@ -2967,12 +2992,13 @@ static void inter_recon(AVCodecContext *ctx) ...@@ -2967,12 +2992,13 @@ static void inter_recon(AVCodecContext *ctx)
dst = s->dst[p + 1]; dst = s->dst[p + 1];
for (n = 0, y = 0; y < end_y; y += uvstep1d) { for (n = 0, y = 0; y < end_y; y += uvstep1d) {
uint8_t *ptr = dst; uint8_t *ptr = dst;
for (x = 0; x < end_x; x += uvstep1d, ptr += 4 * uvstep1d, n += step) { for (x = 0; x < end_x; x += uvstep1d,
ptr += 4 * uvstep1d * bytesperpixel, n += step) {
int eob = b->uvtx > TX_8X8 ? AV_RN16A(&s->uveob[p][n]) : s->uveob[p][n]; int eob = b->uvtx > TX_8X8 ? AV_RN16A(&s->uveob[p][n]) : s->uveob[p][n];
if (eob) if (eob)
s->dsp.itxfm_add[uvtx][DCT_DCT](ptr, s->uv_stride, s->dsp.itxfm_add[uvtx][DCT_DCT](ptr, s->uv_stride,
s->uvblock[p] + 16 * n, eob); s->uvblock[p] + 16 * n * bytesperpixel, eob);
} }
dst += 4 * uvstep1d * s->uv_stride; dst += 4 * uvstep1d * s->uv_stride;
} }
...@@ -2980,6 +3006,16 @@ static void inter_recon(AVCodecContext *ctx) ...@@ -2980,6 +3006,16 @@ static void inter_recon(AVCodecContext *ctx)
} }
} }
static void inter_recon_8bpp(AVCodecContext *ctx)
{
inter_recon(ctx, 1);
}
static void inter_recon_16bpp(AVCodecContext *ctx)
{
inter_recon(ctx, 2);
}
static av_always_inline void mask_edges(uint8_t (*mask)[8][4], int ss_h, int ss_v, static av_always_inline void mask_edges(uint8_t (*mask)[8][4], int ss_h, int ss_v,
int row_and_7, int col_and_7, int row_and_7, int col_and_7,
int w, int h, int col_end, int row_end, int w, int h, int col_end, int row_end,
...@@ -3211,7 +3247,11 @@ static void decode_b(AVCodecContext *ctx, int row, int col, ...@@ -3211,7 +3247,11 @@ static void decode_b(AVCodecContext *ctx, int row, int col,
intra_recon_8bpp(ctx, yoff, uvoff); intra_recon_8bpp(ctx, yoff, uvoff);
} }
} else { } else {
inter_recon(ctx); if (s->bpp > 8) {
inter_recon_16bpp(ctx);
} else {
inter_recon_8bpp(ctx);
}
} }
if (emu[0]) { if (emu[0]) {
int w = FFMIN(s->cols - col, w4) * 8, h = FFMIN(s->rows - row, h4) * 8, n, o = 0; int w = FFMIN(s->cols - col, w4) * 8, h = FFMIN(s->rows - row, h4) * 8, n, o = 0;
......
...@@ -40,6 +40,7 @@ static void FN(inter_pred)(AVCodecContext *ctx) ...@@ -40,6 +40,7 @@ static void FN(inter_pred)(AVCodecContext *ctx)
AVFrame *ref1 = tref1->f, *ref2; AVFrame *ref1 = tref1->f, *ref2;
int w1 = ref1->width, h1 = ref1->height, w2, h2; int w1 = ref1->width, h1 = ref1->height, w2, h2;
ptrdiff_t ls_y = s->y_stride, ls_uv = s->uv_stride; ptrdiff_t ls_y = s->y_stride, ls_uv = s->uv_stride;
int bytesperpixel = BYTES_PER_PIXEL;
if (b->comp) { if (b->comp) {
tref2 = &s->refs[s->refidx[b->ref[1]]]; tref2 = &s->refs[s->refidx[b->ref[1]]];
...@@ -138,7 +139,7 @@ static void FN(inter_pred)(AVCodecContext *ctx) ...@@ -138,7 +139,7 @@ static void FN(inter_pred)(AVCodecContext *ctx)
mc_luma_dir(s, mc[4][b->filter][0], s->dst[0], ls_y, mc_luma_dir(s, mc[4][b->filter][0], s->dst[0], ls_y,
ref1->data[0], ref1->linesize[0], tref1, ref1->data[0], ref1->linesize[0], tref1,
row << 3, col << 3, &b->mv[0][0], 4, 8, w1, h1, 0); row << 3, col << 3, &b->mv[0][0], 4, 8, w1, h1, 0);
mc_luma_dir(s, mc[4][b->filter][0], s->dst[0] + 4, ls_y, mc_luma_dir(s, mc[4][b->filter][0], s->dst[0] + 4 * bytesperpixel, ls_y,
ref1->data[0], ref1->linesize[0], tref1, ref1->data[0], ref1->linesize[0], tref1,
row << 3, (col << 3) + 4, &b->mv[1][0], 4, 8, w1, h1, 0); row << 3, (col << 3) + 4, &b->mv[1][0], 4, 8, w1, h1, 0);
h1 = (h1 + s->ss_v) >> s->ss_v; h1 = (h1 + s->ss_v) >> s->ss_v;
...@@ -159,7 +160,8 @@ static void FN(inter_pred)(AVCodecContext *ctx) ...@@ -159,7 +160,8 @@ static void FN(inter_pred)(AVCodecContext *ctx)
row << (3 - s->ss_v), col << 3, row << (3 - s->ss_v), col << 3,
&b->mv[0][0], 4, 8 >> s->ss_v, w1, h1, 0); &b->mv[0][0], 4, 8 >> s->ss_v, w1, h1, 0);
mc_chroma_dir(s, mc[4][b->filter][0], mc_chroma_dir(s, mc[4][b->filter][0],
s->dst[1] + 4, s->dst[2] + 4, ls_uv, s->dst[1] + 4 * bytesperpixel,
s->dst[2] + 4 * bytesperpixel, ls_uv,
ref1->data[1], ref1->linesize[1], ref1->data[1], ref1->linesize[1],
ref1->data[2], ref1->linesize[2], tref1, ref1->data[2], ref1->linesize[2], tref1,
row << (3 - s->ss_v), (col << 3) + 4, row << (3 - s->ss_v), (col << 3) + 4,
...@@ -170,7 +172,7 @@ static void FN(inter_pred)(AVCodecContext *ctx) ...@@ -170,7 +172,7 @@ static void FN(inter_pred)(AVCodecContext *ctx)
mc_luma_dir(s, mc[4][b->filter][1], s->dst[0], ls_y, mc_luma_dir(s, mc[4][b->filter][1], s->dst[0], ls_y,
ref2->data[0], ref2->linesize[0], tref2, ref2->data[0], ref2->linesize[0], tref2,
row << 3, col << 3, &b->mv[0][1], 4, 8, w2, h2, 1); row << 3, col << 3, &b->mv[0][1], 4, 8, w2, h2, 1);
mc_luma_dir(s, mc[4][b->filter][1], s->dst[0] + 4, ls_y, mc_luma_dir(s, mc[4][b->filter][1], s->dst[0] + 4 * bytesperpixel, ls_y,
ref2->data[0], ref2->linesize[0], tref2, ref2->data[0], ref2->linesize[0], tref2,
row << 3, (col << 3) + 4, &b->mv[1][1], 4, 8, w2, h2, 1); row << 3, (col << 3) + 4, &b->mv[1][1], 4, 8, w2, h2, 1);
h2 = (h2 + s->ss_v) >> s->ss_v; h2 = (h2 + s->ss_v) >> s->ss_v;
...@@ -191,7 +193,8 @@ static void FN(inter_pred)(AVCodecContext *ctx) ...@@ -191,7 +193,8 @@ static void FN(inter_pred)(AVCodecContext *ctx)
row << (3 - s->ss_v), col << 3, row << (3 - s->ss_v), col << 3,
&b->mv[0][1], 4, 8 >> s->ss_v, w2, h2, 1); &b->mv[0][1], 4, 8 >> s->ss_v, w2, h2, 1);
mc_chroma_dir(s, mc[4][b->filter][1], mc_chroma_dir(s, mc[4][b->filter][1],
s->dst[1] + 4, s->dst[2] + 4, ls_uv, s->dst[1] + 4 * bytesperpixel,
s->dst[2] + 4 * bytesperpixel, ls_uv,
ref2->data[1], ref2->linesize[1], ref2->data[1], ref2->linesize[1],
ref2->data[2], ref2->linesize[2], tref2, ref2->data[2], ref2->linesize[2], tref2,
row << (3 - s->ss_v), (col << 3) + 4, row << (3 - s->ss_v), (col << 3) + 4,
...@@ -206,7 +209,7 @@ static void FN(inter_pred)(AVCodecContext *ctx) ...@@ -206,7 +209,7 @@ static void FN(inter_pred)(AVCodecContext *ctx)
mc_luma_dir(s, mc[4][b->filter][0], s->dst[0], ls_y, mc_luma_dir(s, mc[4][b->filter][0], s->dst[0], ls_y,
ref1->data[0], ref1->linesize[0], tref1, ref1->data[0], ref1->linesize[0], tref1,
row << 3, col << 3, &b->mv[0][0], 4, 4, w1, h1, 0); row << 3, col << 3, &b->mv[0][0], 4, 4, w1, h1, 0);
mc_luma_dir(s, mc[4][b->filter][0], s->dst[0] + 4, ls_y, mc_luma_dir(s, mc[4][b->filter][0], s->dst[0] + 4 * bytesperpixel, ls_y,
ref1->data[0], ref1->linesize[0], tref1, ref1->data[0], ref1->linesize[0], tref1,
row << 3, (col << 3) + 4, &b->mv[1][0], 4, 4, w1, h1, 0); row << 3, (col << 3) + 4, &b->mv[1][0], 4, 4, w1, h1, 0);
mc_luma_dir(s, mc[4][b->filter][0], mc_luma_dir(s, mc[4][b->filter][0],
...@@ -214,7 +217,7 @@ static void FN(inter_pred)(AVCodecContext *ctx) ...@@ -214,7 +217,7 @@ static void FN(inter_pred)(AVCodecContext *ctx)
ref1->data[0], ref1->linesize[0], tref1, ref1->data[0], ref1->linesize[0], tref1,
(row << 3) + 4, col << 3, &b->mv[2][0], 4, 4, w1, h1, 0); (row << 3) + 4, col << 3, &b->mv[2][0], 4, 4, w1, h1, 0);
mc_luma_dir(s, mc[4][b->filter][0], mc_luma_dir(s, mc[4][b->filter][0],
s->dst[0] + 4 * ls_y + 4, ls_y, s->dst[0] + 4 * ls_y + 4 * bytesperpixel, ls_y,
ref1->data[0], ref1->linesize[0], tref1, ref1->data[0], ref1->linesize[0], tref1,
(row << 3) + 4, (col << 3) + 4, &b->mv[3][0], 4, 4, w1, h1, 0); (row << 3) + 4, (col << 3) + 4, &b->mv[3][0], 4, 4, w1, h1, 0);
if (s->ss_v) { if (s->ss_v) {
...@@ -239,7 +242,8 @@ static void FN(inter_pred)(AVCodecContext *ctx) ...@@ -239,7 +242,8 @@ static void FN(inter_pred)(AVCodecContext *ctx)
&uvmv, 4, 4, w1, h1, 0); &uvmv, 4, 4, w1, h1, 0);
uvmv = ROUNDED_DIV_MVx2(b->mv[1][0], b->mv[3][0]); uvmv = ROUNDED_DIV_MVx2(b->mv[1][0], b->mv[3][0]);
mc_chroma_dir(s, mc[4][b->filter][0], mc_chroma_dir(s, mc[4][b->filter][0],
s->dst[1] + 4, s->dst[2] + 4, ls_uv, s->dst[1] + 4 * bytesperpixel,
s->dst[2] + 4 * bytesperpixel, ls_uv,
ref1->data[1], ref1->linesize[1], ref1->data[1], ref1->linesize[1],
ref1->data[2], ref1->linesize[2], tref1, ref1->data[2], ref1->linesize[2], tref1,
row << 2, (col << 3) + 4, row << 2, (col << 3) + 4,
...@@ -273,7 +277,8 @@ static void FN(inter_pred)(AVCodecContext *ctx) ...@@ -273,7 +277,8 @@ static void FN(inter_pred)(AVCodecContext *ctx)
row << 3, col << 3, row << 3, col << 3,
&b->mv[0][0], 4, 4, w1, h1, 0); &b->mv[0][0], 4, 4, w1, h1, 0);
mc_chroma_dir(s, mc[4][b->filter][0], mc_chroma_dir(s, mc[4][b->filter][0],
s->dst[1] + 4, s->dst[2] + 4, ls_uv, s->dst[1] + 4 * bytesperpixel,
s->dst[2] + 4 * bytesperpixel, ls_uv,
ref1->data[1], ref1->linesize[1], ref1->data[1], ref1->linesize[1],
ref1->data[2], ref1->linesize[2], tref1, ref1->data[2], ref1->linesize[2], tref1,
row << 3, (col << 3) + 4, row << 3, (col << 3) + 4,
...@@ -285,7 +290,8 @@ static void FN(inter_pred)(AVCodecContext *ctx) ...@@ -285,7 +290,8 @@ static void FN(inter_pred)(AVCodecContext *ctx)
(row << 3) + 4, col << 3, (row << 3) + 4, col << 3,
&b->mv[2][0], 4, 4, w1, h1, 0); &b->mv[2][0], 4, 4, w1, h1, 0);
mc_chroma_dir(s, mc[4][b->filter][0], mc_chroma_dir(s, mc[4][b->filter][0],
s->dst[1] + 4 * ls_uv + 4, s->dst[2] + 4 * ls_uv + 4, ls_uv, s->dst[1] + 4 * ls_uv + 4 * bytesperpixel,
s->dst[2] + 4 * ls_uv + 4 * bytesperpixel, ls_uv,
ref1->data[1], ref1->linesize[1], ref1->data[1], ref1->linesize[1],
ref1->data[2], ref1->linesize[2], tref1, ref1->data[2], ref1->linesize[2], tref1,
(row << 3) + 4, (col << 3) + 4, (row << 3) + 4, (col << 3) + 4,
...@@ -297,7 +303,7 @@ static void FN(inter_pred)(AVCodecContext *ctx) ...@@ -297,7 +303,7 @@ static void FN(inter_pred)(AVCodecContext *ctx)
mc_luma_dir(s, mc[4][b->filter][1], s->dst[0], ls_y, mc_luma_dir(s, mc[4][b->filter][1], s->dst[0], ls_y,
ref2->data[0], ref2->linesize[0], tref2, ref2->data[0], ref2->linesize[0], tref2,
row << 3, col << 3, &b->mv[0][1], 4, 4, w2, h2, 1); row << 3, col << 3, &b->mv[0][1], 4, 4, w2, h2, 1);
mc_luma_dir(s, mc[4][b->filter][1], s->dst[0] + 4, ls_y, mc_luma_dir(s, mc[4][b->filter][1], s->dst[0] + 4 * bytesperpixel, ls_y,
ref2->data[0], ref2->linesize[0], tref2, ref2->data[0], ref2->linesize[0], tref2,
row << 3, (col << 3) + 4, &b->mv[1][1], 4, 4, w2, h2, 1); row << 3, (col << 3) + 4, &b->mv[1][1], 4, 4, w2, h2, 1);
mc_luma_dir(s, mc[4][b->filter][1], mc_luma_dir(s, mc[4][b->filter][1],
...@@ -305,7 +311,7 @@ static void FN(inter_pred)(AVCodecContext *ctx) ...@@ -305,7 +311,7 @@ static void FN(inter_pred)(AVCodecContext *ctx)
ref2->data[0], ref2->linesize[0], tref2, ref2->data[0], ref2->linesize[0], tref2,
(row << 3) + 4, col << 3, &b->mv[2][1], 4, 4, w2, h2, 1); (row << 3) + 4, col << 3, &b->mv[2][1], 4, 4, w2, h2, 1);
mc_luma_dir(s, mc[4][b->filter][1], mc_luma_dir(s, mc[4][b->filter][1],
s->dst[0] + 4 * ls_y + 4, ls_y, s->dst[0] + 4 * ls_y + 4 * bytesperpixel, ls_y,
ref2->data[0], ref2->linesize[0], tref2, ref2->data[0], ref2->linesize[0], tref2,
(row << 3) + 4, (col << 3) + 4, &b->mv[3][1], 4, 4, w2, h2, 1); (row << 3) + 4, (col << 3) + 4, &b->mv[3][1], 4, 4, w2, h2, 1);
if (s->ss_v) { if (s->ss_v) {
...@@ -330,7 +336,8 @@ static void FN(inter_pred)(AVCodecContext *ctx) ...@@ -330,7 +336,8 @@ static void FN(inter_pred)(AVCodecContext *ctx)
&uvmv, 4, 4, w2, h2, 1); &uvmv, 4, 4, w2, h2, 1);
uvmv = ROUNDED_DIV_MVx2(b->mv[1][1], b->mv[3][1]); uvmv = ROUNDED_DIV_MVx2(b->mv[1][1], b->mv[3][1]);
mc_chroma_dir(s, mc[4][b->filter][1], mc_chroma_dir(s, mc[4][b->filter][1],
s->dst[1] + 4, s->dst[2] + 4, ls_uv, s->dst[1] + 4 * bytesperpixel,
s->dst[2] + 4 * bytesperpixel, ls_uv,
ref2->data[1], ref2->linesize[1], ref2->data[1], ref2->linesize[1],
ref2->data[2], ref2->linesize[2], tref2, ref2->data[2], ref2->linesize[2], tref2,
row << 2, (col << 3) + 4, row << 2, (col << 3) + 4,
...@@ -364,7 +371,8 @@ static void FN(inter_pred)(AVCodecContext *ctx) ...@@ -364,7 +371,8 @@ static void FN(inter_pred)(AVCodecContext *ctx)
row << 3, col << 3, row << 3, col << 3,
&b->mv[0][1], 4, 4, w2, h2, 1); &b->mv[0][1], 4, 4, w2, h2, 1);
mc_chroma_dir(s, mc[4][b->filter][1], mc_chroma_dir(s, mc[4][b->filter][1],
s->dst[1] + 4, s->dst[2] + 4, ls_uv, s->dst[1] + 4 * bytesperpixel,
s->dst[2] + 4 * bytesperpixel, ls_uv,
ref2->data[1], ref2->linesize[1], ref2->data[1], ref2->linesize[1],
ref2->data[2], ref2->linesize[2], tref2, ref2->data[2], ref2->linesize[2], tref2,
row << 3, (col << 3) + 4, row << 3, (col << 3) + 4,
...@@ -376,7 +384,8 @@ static void FN(inter_pred)(AVCodecContext *ctx) ...@@ -376,7 +384,8 @@ static void FN(inter_pred)(AVCodecContext *ctx)
(row << 3) + 4, col << 3, (row << 3) + 4, col << 3,
&b->mv[2][1], 4, 4, w2, h2, 1); &b->mv[2][1], 4, 4, w2, h2, 1);
mc_chroma_dir(s, mc[4][b->filter][1], mc_chroma_dir(s, mc[4][b->filter][1],
s->dst[1] + 4 * ls_uv + 4, s->dst[2] + 4 * ls_uv + 4, ls_uv, s->dst[1] + 4 * ls_uv + 4 * bytesperpixel,
s->dst[2] + 4 * ls_uv + 4 * bytesperpixel, ls_uv,
ref2->data[1], ref2->linesize[1], ref2->data[1], ref2->linesize[1],
ref2->data[2], ref2->linesize[2], tref2, ref2->data[2], ref2->linesize[2], tref2,
(row << 3) + 4, (col << 3) + 4, (row << 3) + 4, (col << 3) + 4,
......
...@@ -1911,22 +1911,27 @@ static av_always_inline void copy_c(uint8_t *dst, ptrdiff_t dst_stride, ...@@ -1911,22 +1911,27 @@ static av_always_inline void copy_c(uint8_t *dst, ptrdiff_t dst_stride,
int w, int h) int w, int h)
{ {
do { do {
memcpy(dst, src, w); memcpy(dst, src, w * sizeof(pixel));
dst += dst_stride; dst += dst_stride;
src += src_stride; src += src_stride;
} while (--h); } while (--h);
} }
static av_always_inline void avg_c(uint8_t *dst, ptrdiff_t dst_stride, static av_always_inline void avg_c(uint8_t *_dst, ptrdiff_t dst_stride,
const uint8_t *src, ptrdiff_t src_stride, const uint8_t *_src, ptrdiff_t src_stride,
int w, int h) int w, int h)
{ {
pixel *dst = (pixel *) _dst;
const pixel *src = (const pixel *) _src;
dst_stride /= sizeof(pixel);
src_stride /= sizeof(pixel);
do { do {
int x; int x;
for (x = 0; x < w; x += 4) for (x = 0; x < w; x += 4)
AV_WN32A(&dst[x], rnd_avg32(AV_RN32A(&dst[x]), AV_RN32(&src[x]))); AV_WN4PA(&dst[x], rnd_avg_pixel4(AV_RN4PA(&dst[x]), AV_RN4P(&src[x])));
dst += dst_stride; dst += dst_stride;
src += src_stride; src += src_stride;
...@@ -2010,7 +2015,7 @@ static const int16_t vp9_subpel_filters[3][16][8] = { ...@@ -2010,7 +2015,7 @@ static const int16_t vp9_subpel_filters[3][16][8] = {
}; };
#define FILTER_8TAP(src, x, F, stride) \ #define FILTER_8TAP(src, x, F, stride) \
av_clip_uint8((F[0] * src[x + -3 * stride] + \ av_clip_pixel((F[0] * src[x + -3 * stride] + \
F[1] * src[x + -2 * stride] + \ F[1] * src[x + -2 * stride] + \
F[2] * src[x + -1 * stride] + \ F[2] * src[x + -1 * stride] + \
F[3] * src[x + +0 * stride] + \ F[3] * src[x + +0 * stride] + \
...@@ -2019,11 +2024,16 @@ static const int16_t vp9_subpel_filters[3][16][8] = { ...@@ -2019,11 +2024,16 @@ static const int16_t vp9_subpel_filters[3][16][8] = {
F[6] * src[x + +3 * stride] + \ F[6] * src[x + +3 * stride] + \
F[7] * src[x + +4 * stride] + 64) >> 7) F[7] * src[x + +4 * stride] + 64) >> 7)
static av_always_inline void do_8tap_1d_c(uint8_t *dst, ptrdiff_t dst_stride, static av_always_inline void do_8tap_1d_c(uint8_t *_dst, ptrdiff_t dst_stride,
const uint8_t *src, ptrdiff_t src_stride, const uint8_t *_src, ptrdiff_t src_stride,
int w, int h, ptrdiff_t ds, int w, int h, ptrdiff_t ds,
const int16_t *filter, int avg) const int16_t *filter, int avg)
{ {
pixel *dst = (pixel *) _dst;
const pixel *src = (const pixel *) _src;
dst_stride /= sizeof(pixel);
src_stride /= sizeof(pixel);
do { do {
int x; int x;
...@@ -2047,21 +2057,25 @@ static av_noinline void opn##_8tap_1d_##dir##_c(uint8_t *dst, ptrdiff_t dst_stri ...@@ -2047,21 +2057,25 @@ static av_noinline void opn##_8tap_1d_##dir##_c(uint8_t *dst, ptrdiff_t dst_stri
do_8tap_1d_c(dst, dst_stride, src, src_stride, w, h, ds, filter, opa); \ do_8tap_1d_c(dst, dst_stride, src, src_stride, w, h, ds, filter, opa); \
} }
filter_8tap_1d_fn(put, 0, v, src_stride) filter_8tap_1d_fn(put, 0, v, src_stride / sizeof(pixel))
filter_8tap_1d_fn(put, 0, h, 1) filter_8tap_1d_fn(put, 0, h, 1)
filter_8tap_1d_fn(avg, 1, v, src_stride) filter_8tap_1d_fn(avg, 1, v, src_stride / sizeof(pixel))
filter_8tap_1d_fn(avg, 1, h, 1) filter_8tap_1d_fn(avg, 1, h, 1)
#undef filter_8tap_1d_fn #undef filter_8tap_1d_fn
static av_always_inline void do_8tap_2d_c(uint8_t *dst, ptrdiff_t dst_stride, static av_always_inline void do_8tap_2d_c(uint8_t *_dst, ptrdiff_t dst_stride,
const uint8_t *src, ptrdiff_t src_stride, const uint8_t *_src, ptrdiff_t src_stride,
int w, int h, const int16_t *filterx, int w, int h, const int16_t *filterx,
const int16_t *filtery, int avg) const int16_t *filtery, int avg)
{ {
int tmp_h = h + 7; int tmp_h = h + 7;
uint8_t tmp[64 * 71], *tmp_ptr = tmp; pixel tmp[64 * 71], *tmp_ptr = tmp;
pixel *dst = (pixel *) _dst;
const pixel *src = (const pixel *) _src;
dst_stride /= sizeof(pixel);
src_stride /= sizeof(pixel);
src -= src_stride * 3; src -= src_stride * 3;
do { do {
int x; int x;
...@@ -2125,10 +2139,15 @@ static void avg##_8tap_##type##_##sz##hv_c(uint8_t *dst, ptrdiff_t dst_stride, \ ...@@ -2125,10 +2139,15 @@ static void avg##_8tap_##type##_##sz##hv_c(uint8_t *dst, ptrdiff_t dst_stride, \
#define FILTER_BILIN(src, x, mxy, stride) \ #define FILTER_BILIN(src, x, mxy, stride) \
(src[x] + ((mxy * (src[x + stride] - src[x]) + 8) >> 4)) (src[x] + ((mxy * (src[x + stride] - src[x]) + 8) >> 4))
static av_always_inline void do_bilin_1d_c(uint8_t *dst, ptrdiff_t dst_stride, static av_always_inline void do_bilin_1d_c(uint8_t *_dst, ptrdiff_t dst_stride,
const uint8_t *src, ptrdiff_t src_stride, const uint8_t *_src, ptrdiff_t src_stride,
int w, int h, ptrdiff_t ds, int mxy, int avg) int w, int h, ptrdiff_t ds, int mxy, int avg)
{ {
pixel *dst = (pixel *) _dst;
const pixel *src = (const pixel *) _src;
dst_stride /= sizeof(pixel);
src_stride /= sizeof(pixel);
do { do {
int x; int x;
...@@ -2152,20 +2171,24 @@ static av_noinline void opn##_bilin_1d_##dir##_c(uint8_t *dst, ptrdiff_t dst_str ...@@ -2152,20 +2171,24 @@ static av_noinline void opn##_bilin_1d_##dir##_c(uint8_t *dst, ptrdiff_t dst_str
do_bilin_1d_c(dst, dst_stride, src, src_stride, w, h, ds, mxy, opa); \ do_bilin_1d_c(dst, dst_stride, src, src_stride, w, h, ds, mxy, opa); \
} }
bilin_1d_fn(put, 0, v, src_stride) bilin_1d_fn(put, 0, v, src_stride / sizeof(pixel))
bilin_1d_fn(put, 0, h, 1) bilin_1d_fn(put, 0, h, 1)
bilin_1d_fn(avg, 1, v, src_stride) bilin_1d_fn(avg, 1, v, src_stride / sizeof(pixel))
bilin_1d_fn(avg, 1, h, 1) bilin_1d_fn(avg, 1, h, 1)
#undef bilin_1d_fn #undef bilin_1d_fn
static av_always_inline void do_bilin_2d_c(uint8_t *dst, ptrdiff_t dst_stride, static av_always_inline void do_bilin_2d_c(uint8_t *_dst, ptrdiff_t dst_stride,
const uint8_t *src, ptrdiff_t src_stride, const uint8_t *_src, ptrdiff_t src_stride,
int w, int h, int mx, int my, int avg) int w, int h, int mx, int my, int avg)
{ {
uint8_t tmp[64 * 65], *tmp_ptr = tmp; pixel tmp[64 * 65], *tmp_ptr = tmp;
int tmp_h = h + 1; int tmp_h = h + 1;
pixel *dst = (pixel *) _dst;
const pixel *src = (const pixel *) _src;
dst_stride /= sizeof(pixel);
src_stride /= sizeof(pixel);
do { do {
int x; int x;
...@@ -2299,15 +2322,19 @@ static av_cold void vp9dsp_mc_init(VP9DSPContext *dsp) ...@@ -2299,15 +2322,19 @@ static av_cold void vp9dsp_mc_init(VP9DSPContext *dsp)
#undef init_subpel3 #undef init_subpel3
} }
static av_always_inline void do_scaled_8tap_c(uint8_t *dst, ptrdiff_t dst_stride, static av_always_inline void do_scaled_8tap_c(uint8_t *_dst, ptrdiff_t dst_stride,
const uint8_t *src, ptrdiff_t src_stride, const uint8_t *_src, ptrdiff_t src_stride,
int w, int h, int mx, int my, int w, int h, int mx, int my,
int dx, int dy, int avg, int dx, int dy, int avg,
const int16_t (*filters)[8]) const int16_t (*filters)[8])
{ {
int tmp_h = (((h - 1) * dy + my) >> 4) + 8; int tmp_h = (((h - 1) * dy + my) >> 4) + 8;
uint8_t tmp[64 * 135], *tmp_ptr = tmp; pixel tmp[64 * 135], *tmp_ptr = tmp;
pixel *dst = (pixel *) _dst;
const pixel *src = (const pixel *) _src;
dst_stride /= sizeof(pixel);
src_stride /= sizeof(pixel);
src -= src_stride * 3; src -= src_stride * 3;
do { do {
int x; int x;
...@@ -2369,14 +2396,18 @@ static void avg##_scaled_##type##_##sz##_c(uint8_t *dst, ptrdiff_t dst_stride, \ ...@@ -2369,14 +2396,18 @@ static void avg##_scaled_##type##_##sz##_c(uint8_t *dst, ptrdiff_t dst_stride, \
vp9_subpel_filters[type_idx]); \ vp9_subpel_filters[type_idx]); \
} }
static av_always_inline void do_scaled_bilin_c(uint8_t *dst, ptrdiff_t dst_stride, static av_always_inline void do_scaled_bilin_c(uint8_t *_dst, ptrdiff_t dst_stride,
const uint8_t *src, ptrdiff_t src_stride, const uint8_t *_src, ptrdiff_t src_stride,
int w, int h, int mx, int my, int w, int h, int mx, int my,
int dx, int dy, int avg) int dx, int dy, int avg)
{ {
uint8_t tmp[64 * 129], *tmp_ptr = tmp; pixel tmp[64 * 129], *tmp_ptr = tmp;
int tmp_h = (((h - 1) * dy + my) >> 4) + 2; int tmp_h = (((h - 1) * dy + my) >> 4) + 2;
pixel *dst = (pixel *) _dst;
const pixel *src = (const pixel *) _src;
dst_stride /= sizeof(pixel);
src_stride /= sizeof(pixel);
do { do {
int x; int x;
int imx = mx, ioff = 0; int imx = mx, ioff = 0;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment