Commit 23ba4538 authored by Ronald S. Bultje's avatar Ronald S. Bultje

vp9: add inter-frame profile 2/3 suport.

parent b224b165
This diff is collapsed.
...@@ -40,6 +40,7 @@ static void FN(inter_pred)(AVCodecContext *ctx) ...@@ -40,6 +40,7 @@ static void FN(inter_pred)(AVCodecContext *ctx)
AVFrame *ref1 = tref1->f, *ref2; AVFrame *ref1 = tref1->f, *ref2;
int w1 = ref1->width, h1 = ref1->height, w2, h2; int w1 = ref1->width, h1 = ref1->height, w2, h2;
ptrdiff_t ls_y = s->y_stride, ls_uv = s->uv_stride; ptrdiff_t ls_y = s->y_stride, ls_uv = s->uv_stride;
int bytesperpixel = BYTES_PER_PIXEL;
if (b->comp) { if (b->comp) {
tref2 = &s->refs[s->refidx[b->ref[1]]]; tref2 = &s->refs[s->refidx[b->ref[1]]];
...@@ -138,7 +139,7 @@ static void FN(inter_pred)(AVCodecContext *ctx) ...@@ -138,7 +139,7 @@ static void FN(inter_pred)(AVCodecContext *ctx)
mc_luma_dir(s, mc[4][b->filter][0], s->dst[0], ls_y, mc_luma_dir(s, mc[4][b->filter][0], s->dst[0], ls_y,
ref1->data[0], ref1->linesize[0], tref1, ref1->data[0], ref1->linesize[0], tref1,
row << 3, col << 3, &b->mv[0][0], 4, 8, w1, h1, 0); row << 3, col << 3, &b->mv[0][0], 4, 8, w1, h1, 0);
mc_luma_dir(s, mc[4][b->filter][0], s->dst[0] + 4, ls_y, mc_luma_dir(s, mc[4][b->filter][0], s->dst[0] + 4 * bytesperpixel, ls_y,
ref1->data[0], ref1->linesize[0], tref1, ref1->data[0], ref1->linesize[0], tref1,
row << 3, (col << 3) + 4, &b->mv[1][0], 4, 8, w1, h1, 0); row << 3, (col << 3) + 4, &b->mv[1][0], 4, 8, w1, h1, 0);
h1 = (h1 + s->ss_v) >> s->ss_v; h1 = (h1 + s->ss_v) >> s->ss_v;
...@@ -159,7 +160,8 @@ static void FN(inter_pred)(AVCodecContext *ctx) ...@@ -159,7 +160,8 @@ static void FN(inter_pred)(AVCodecContext *ctx)
row << (3 - s->ss_v), col << 3, row << (3 - s->ss_v), col << 3,
&b->mv[0][0], 4, 8 >> s->ss_v, w1, h1, 0); &b->mv[0][0], 4, 8 >> s->ss_v, w1, h1, 0);
mc_chroma_dir(s, mc[4][b->filter][0], mc_chroma_dir(s, mc[4][b->filter][0],
s->dst[1] + 4, s->dst[2] + 4, ls_uv, s->dst[1] + 4 * bytesperpixel,
s->dst[2] + 4 * bytesperpixel, ls_uv,
ref1->data[1], ref1->linesize[1], ref1->data[1], ref1->linesize[1],
ref1->data[2], ref1->linesize[2], tref1, ref1->data[2], ref1->linesize[2], tref1,
row << (3 - s->ss_v), (col << 3) + 4, row << (3 - s->ss_v), (col << 3) + 4,
...@@ -170,7 +172,7 @@ static void FN(inter_pred)(AVCodecContext *ctx) ...@@ -170,7 +172,7 @@ static void FN(inter_pred)(AVCodecContext *ctx)
mc_luma_dir(s, mc[4][b->filter][1], s->dst[0], ls_y, mc_luma_dir(s, mc[4][b->filter][1], s->dst[0], ls_y,
ref2->data[0], ref2->linesize[0], tref2, ref2->data[0], ref2->linesize[0], tref2,
row << 3, col << 3, &b->mv[0][1], 4, 8, w2, h2, 1); row << 3, col << 3, &b->mv[0][1], 4, 8, w2, h2, 1);
mc_luma_dir(s, mc[4][b->filter][1], s->dst[0] + 4, ls_y, mc_luma_dir(s, mc[4][b->filter][1], s->dst[0] + 4 * bytesperpixel, ls_y,
ref2->data[0], ref2->linesize[0], tref2, ref2->data[0], ref2->linesize[0], tref2,
row << 3, (col << 3) + 4, &b->mv[1][1], 4, 8, w2, h2, 1); row << 3, (col << 3) + 4, &b->mv[1][1], 4, 8, w2, h2, 1);
h2 = (h2 + s->ss_v) >> s->ss_v; h2 = (h2 + s->ss_v) >> s->ss_v;
...@@ -191,7 +193,8 @@ static void FN(inter_pred)(AVCodecContext *ctx) ...@@ -191,7 +193,8 @@ static void FN(inter_pred)(AVCodecContext *ctx)
row << (3 - s->ss_v), col << 3, row << (3 - s->ss_v), col << 3,
&b->mv[0][1], 4, 8 >> s->ss_v, w2, h2, 1); &b->mv[0][1], 4, 8 >> s->ss_v, w2, h2, 1);
mc_chroma_dir(s, mc[4][b->filter][1], mc_chroma_dir(s, mc[4][b->filter][1],
s->dst[1] + 4, s->dst[2] + 4, ls_uv, s->dst[1] + 4 * bytesperpixel,
s->dst[2] + 4 * bytesperpixel, ls_uv,
ref2->data[1], ref2->linesize[1], ref2->data[1], ref2->linesize[1],
ref2->data[2], ref2->linesize[2], tref2, ref2->data[2], ref2->linesize[2], tref2,
row << (3 - s->ss_v), (col << 3) + 4, row << (3 - s->ss_v), (col << 3) + 4,
...@@ -206,7 +209,7 @@ static void FN(inter_pred)(AVCodecContext *ctx) ...@@ -206,7 +209,7 @@ static void FN(inter_pred)(AVCodecContext *ctx)
mc_luma_dir(s, mc[4][b->filter][0], s->dst[0], ls_y, mc_luma_dir(s, mc[4][b->filter][0], s->dst[0], ls_y,
ref1->data[0], ref1->linesize[0], tref1, ref1->data[0], ref1->linesize[0], tref1,
row << 3, col << 3, &b->mv[0][0], 4, 4, w1, h1, 0); row << 3, col << 3, &b->mv[0][0], 4, 4, w1, h1, 0);
mc_luma_dir(s, mc[4][b->filter][0], s->dst[0] + 4, ls_y, mc_luma_dir(s, mc[4][b->filter][0], s->dst[0] + 4 * bytesperpixel, ls_y,
ref1->data[0], ref1->linesize[0], tref1, ref1->data[0], ref1->linesize[0], tref1,
row << 3, (col << 3) + 4, &b->mv[1][0], 4, 4, w1, h1, 0); row << 3, (col << 3) + 4, &b->mv[1][0], 4, 4, w1, h1, 0);
mc_luma_dir(s, mc[4][b->filter][0], mc_luma_dir(s, mc[4][b->filter][0],
...@@ -214,7 +217,7 @@ static void FN(inter_pred)(AVCodecContext *ctx) ...@@ -214,7 +217,7 @@ static void FN(inter_pred)(AVCodecContext *ctx)
ref1->data[0], ref1->linesize[0], tref1, ref1->data[0], ref1->linesize[0], tref1,
(row << 3) + 4, col << 3, &b->mv[2][0], 4, 4, w1, h1, 0); (row << 3) + 4, col << 3, &b->mv[2][0], 4, 4, w1, h1, 0);
mc_luma_dir(s, mc[4][b->filter][0], mc_luma_dir(s, mc[4][b->filter][0],
s->dst[0] + 4 * ls_y + 4, ls_y, s->dst[0] + 4 * ls_y + 4 * bytesperpixel, ls_y,
ref1->data[0], ref1->linesize[0], tref1, ref1->data[0], ref1->linesize[0], tref1,
(row << 3) + 4, (col << 3) + 4, &b->mv[3][0], 4, 4, w1, h1, 0); (row << 3) + 4, (col << 3) + 4, &b->mv[3][0], 4, 4, w1, h1, 0);
if (s->ss_v) { if (s->ss_v) {
...@@ -239,7 +242,8 @@ static void FN(inter_pred)(AVCodecContext *ctx) ...@@ -239,7 +242,8 @@ static void FN(inter_pred)(AVCodecContext *ctx)
&uvmv, 4, 4, w1, h1, 0); &uvmv, 4, 4, w1, h1, 0);
uvmv = ROUNDED_DIV_MVx2(b->mv[1][0], b->mv[3][0]); uvmv = ROUNDED_DIV_MVx2(b->mv[1][0], b->mv[3][0]);
mc_chroma_dir(s, mc[4][b->filter][0], mc_chroma_dir(s, mc[4][b->filter][0],
s->dst[1] + 4, s->dst[2] + 4, ls_uv, s->dst[1] + 4 * bytesperpixel,
s->dst[2] + 4 * bytesperpixel, ls_uv,
ref1->data[1], ref1->linesize[1], ref1->data[1], ref1->linesize[1],
ref1->data[2], ref1->linesize[2], tref1, ref1->data[2], ref1->linesize[2], tref1,
row << 2, (col << 3) + 4, row << 2, (col << 3) + 4,
...@@ -273,7 +277,8 @@ static void FN(inter_pred)(AVCodecContext *ctx) ...@@ -273,7 +277,8 @@ static void FN(inter_pred)(AVCodecContext *ctx)
row << 3, col << 3, row << 3, col << 3,
&b->mv[0][0], 4, 4, w1, h1, 0); &b->mv[0][0], 4, 4, w1, h1, 0);
mc_chroma_dir(s, mc[4][b->filter][0], mc_chroma_dir(s, mc[4][b->filter][0],
s->dst[1] + 4, s->dst[2] + 4, ls_uv, s->dst[1] + 4 * bytesperpixel,
s->dst[2] + 4 * bytesperpixel, ls_uv,
ref1->data[1], ref1->linesize[1], ref1->data[1], ref1->linesize[1],
ref1->data[2], ref1->linesize[2], tref1, ref1->data[2], ref1->linesize[2], tref1,
row << 3, (col << 3) + 4, row << 3, (col << 3) + 4,
...@@ -285,7 +290,8 @@ static void FN(inter_pred)(AVCodecContext *ctx) ...@@ -285,7 +290,8 @@ static void FN(inter_pred)(AVCodecContext *ctx)
(row << 3) + 4, col << 3, (row << 3) + 4, col << 3,
&b->mv[2][0], 4, 4, w1, h1, 0); &b->mv[2][0], 4, 4, w1, h1, 0);
mc_chroma_dir(s, mc[4][b->filter][0], mc_chroma_dir(s, mc[4][b->filter][0],
s->dst[1] + 4 * ls_uv + 4, s->dst[2] + 4 * ls_uv + 4, ls_uv, s->dst[1] + 4 * ls_uv + 4 * bytesperpixel,
s->dst[2] + 4 * ls_uv + 4 * bytesperpixel, ls_uv,
ref1->data[1], ref1->linesize[1], ref1->data[1], ref1->linesize[1],
ref1->data[2], ref1->linesize[2], tref1, ref1->data[2], ref1->linesize[2], tref1,
(row << 3) + 4, (col << 3) + 4, (row << 3) + 4, (col << 3) + 4,
...@@ -297,7 +303,7 @@ static void FN(inter_pred)(AVCodecContext *ctx) ...@@ -297,7 +303,7 @@ static void FN(inter_pred)(AVCodecContext *ctx)
mc_luma_dir(s, mc[4][b->filter][1], s->dst[0], ls_y, mc_luma_dir(s, mc[4][b->filter][1], s->dst[0], ls_y,
ref2->data[0], ref2->linesize[0], tref2, ref2->data[0], ref2->linesize[0], tref2,
row << 3, col << 3, &b->mv[0][1], 4, 4, w2, h2, 1); row << 3, col << 3, &b->mv[0][1], 4, 4, w2, h2, 1);
mc_luma_dir(s, mc[4][b->filter][1], s->dst[0] + 4, ls_y, mc_luma_dir(s, mc[4][b->filter][1], s->dst[0] + 4 * bytesperpixel, ls_y,
ref2->data[0], ref2->linesize[0], tref2, ref2->data[0], ref2->linesize[0], tref2,
row << 3, (col << 3) + 4, &b->mv[1][1], 4, 4, w2, h2, 1); row << 3, (col << 3) + 4, &b->mv[1][1], 4, 4, w2, h2, 1);
mc_luma_dir(s, mc[4][b->filter][1], mc_luma_dir(s, mc[4][b->filter][1],
...@@ -305,7 +311,7 @@ static void FN(inter_pred)(AVCodecContext *ctx) ...@@ -305,7 +311,7 @@ static void FN(inter_pred)(AVCodecContext *ctx)
ref2->data[0], ref2->linesize[0], tref2, ref2->data[0], ref2->linesize[0], tref2,
(row << 3) + 4, col << 3, &b->mv[2][1], 4, 4, w2, h2, 1); (row << 3) + 4, col << 3, &b->mv[2][1], 4, 4, w2, h2, 1);
mc_luma_dir(s, mc[4][b->filter][1], mc_luma_dir(s, mc[4][b->filter][1],
s->dst[0] + 4 * ls_y + 4, ls_y, s->dst[0] + 4 * ls_y + 4 * bytesperpixel, ls_y,
ref2->data[0], ref2->linesize[0], tref2, ref2->data[0], ref2->linesize[0], tref2,
(row << 3) + 4, (col << 3) + 4, &b->mv[3][1], 4, 4, w2, h2, 1); (row << 3) + 4, (col << 3) + 4, &b->mv[3][1], 4, 4, w2, h2, 1);
if (s->ss_v) { if (s->ss_v) {
...@@ -330,7 +336,8 @@ static void FN(inter_pred)(AVCodecContext *ctx) ...@@ -330,7 +336,8 @@ static void FN(inter_pred)(AVCodecContext *ctx)
&uvmv, 4, 4, w2, h2, 1); &uvmv, 4, 4, w2, h2, 1);
uvmv = ROUNDED_DIV_MVx2(b->mv[1][1], b->mv[3][1]); uvmv = ROUNDED_DIV_MVx2(b->mv[1][1], b->mv[3][1]);
mc_chroma_dir(s, mc[4][b->filter][1], mc_chroma_dir(s, mc[4][b->filter][1],
s->dst[1] + 4, s->dst[2] + 4, ls_uv, s->dst[1] + 4 * bytesperpixel,
s->dst[2] + 4 * bytesperpixel, ls_uv,
ref2->data[1], ref2->linesize[1], ref2->data[1], ref2->linesize[1],
ref2->data[2], ref2->linesize[2], tref2, ref2->data[2], ref2->linesize[2], tref2,
row << 2, (col << 3) + 4, row << 2, (col << 3) + 4,
...@@ -364,7 +371,8 @@ static void FN(inter_pred)(AVCodecContext *ctx) ...@@ -364,7 +371,8 @@ static void FN(inter_pred)(AVCodecContext *ctx)
row << 3, col << 3, row << 3, col << 3,
&b->mv[0][1], 4, 4, w2, h2, 1); &b->mv[0][1], 4, 4, w2, h2, 1);
mc_chroma_dir(s, mc[4][b->filter][1], mc_chroma_dir(s, mc[4][b->filter][1],
s->dst[1] + 4, s->dst[2] + 4, ls_uv, s->dst[1] + 4 * bytesperpixel,
s->dst[2] + 4 * bytesperpixel, ls_uv,
ref2->data[1], ref2->linesize[1], ref2->data[1], ref2->linesize[1],
ref2->data[2], ref2->linesize[2], tref2, ref2->data[2], ref2->linesize[2], tref2,
row << 3, (col << 3) + 4, row << 3, (col << 3) + 4,
...@@ -376,7 +384,8 @@ static void FN(inter_pred)(AVCodecContext *ctx) ...@@ -376,7 +384,8 @@ static void FN(inter_pred)(AVCodecContext *ctx)
(row << 3) + 4, col << 3, (row << 3) + 4, col << 3,
&b->mv[2][1], 4, 4, w2, h2, 1); &b->mv[2][1], 4, 4, w2, h2, 1);
mc_chroma_dir(s, mc[4][b->filter][1], mc_chroma_dir(s, mc[4][b->filter][1],
s->dst[1] + 4 * ls_uv + 4, s->dst[2] + 4 * ls_uv + 4, ls_uv, s->dst[1] + 4 * ls_uv + 4 * bytesperpixel,
s->dst[2] + 4 * ls_uv + 4 * bytesperpixel, ls_uv,
ref2->data[1], ref2->linesize[1], ref2->data[1], ref2->linesize[1],
ref2->data[2], ref2->linesize[2], tref2, ref2->data[2], ref2->linesize[2], tref2,
(row << 3) + 4, (col << 3) + 4, (row << 3) + 4, (col << 3) + 4,
......
...@@ -1911,22 +1911,27 @@ static av_always_inline void copy_c(uint8_t *dst, ptrdiff_t dst_stride, ...@@ -1911,22 +1911,27 @@ static av_always_inline void copy_c(uint8_t *dst, ptrdiff_t dst_stride,
int w, int h) int w, int h)
{ {
do { do {
memcpy(dst, src, w); memcpy(dst, src, w * sizeof(pixel));
dst += dst_stride; dst += dst_stride;
src += src_stride; src += src_stride;
} while (--h); } while (--h);
} }
static av_always_inline void avg_c(uint8_t *dst, ptrdiff_t dst_stride, static av_always_inline void avg_c(uint8_t *_dst, ptrdiff_t dst_stride,
const uint8_t *src, ptrdiff_t src_stride, const uint8_t *_src, ptrdiff_t src_stride,
int w, int h) int w, int h)
{ {
pixel *dst = (pixel *) _dst;
const pixel *src = (const pixel *) _src;
dst_stride /= sizeof(pixel);
src_stride /= sizeof(pixel);
do { do {
int x; int x;
for (x = 0; x < w; x += 4) for (x = 0; x < w; x += 4)
AV_WN32A(&dst[x], rnd_avg32(AV_RN32A(&dst[x]), AV_RN32(&src[x]))); AV_WN4PA(&dst[x], rnd_avg_pixel4(AV_RN4PA(&dst[x]), AV_RN4P(&src[x])));
dst += dst_stride; dst += dst_stride;
src += src_stride; src += src_stride;
...@@ -2010,7 +2015,7 @@ static const int16_t vp9_subpel_filters[3][16][8] = { ...@@ -2010,7 +2015,7 @@ static const int16_t vp9_subpel_filters[3][16][8] = {
}; };
#define FILTER_8TAP(src, x, F, stride) \ #define FILTER_8TAP(src, x, F, stride) \
av_clip_uint8((F[0] * src[x + -3 * stride] + \ av_clip_pixel((F[0] * src[x + -3 * stride] + \
F[1] * src[x + -2 * stride] + \ F[1] * src[x + -2 * stride] + \
F[2] * src[x + -1 * stride] + \ F[2] * src[x + -1 * stride] + \
F[3] * src[x + +0 * stride] + \ F[3] * src[x + +0 * stride] + \
...@@ -2019,11 +2024,16 @@ static const int16_t vp9_subpel_filters[3][16][8] = { ...@@ -2019,11 +2024,16 @@ static const int16_t vp9_subpel_filters[3][16][8] = {
F[6] * src[x + +3 * stride] + \ F[6] * src[x + +3 * stride] + \
F[7] * src[x + +4 * stride] + 64) >> 7) F[7] * src[x + +4 * stride] + 64) >> 7)
static av_always_inline void do_8tap_1d_c(uint8_t *dst, ptrdiff_t dst_stride, static av_always_inline void do_8tap_1d_c(uint8_t *_dst, ptrdiff_t dst_stride,
const uint8_t *src, ptrdiff_t src_stride, const uint8_t *_src, ptrdiff_t src_stride,
int w, int h, ptrdiff_t ds, int w, int h, ptrdiff_t ds,
const int16_t *filter, int avg) const int16_t *filter, int avg)
{ {
pixel *dst = (pixel *) _dst;
const pixel *src = (const pixel *) _src;
dst_stride /= sizeof(pixel);
src_stride /= sizeof(pixel);
do { do {
int x; int x;
...@@ -2047,21 +2057,25 @@ static av_noinline void opn##_8tap_1d_##dir##_c(uint8_t *dst, ptrdiff_t dst_stri ...@@ -2047,21 +2057,25 @@ static av_noinline void opn##_8tap_1d_##dir##_c(uint8_t *dst, ptrdiff_t dst_stri
do_8tap_1d_c(dst, dst_stride, src, src_stride, w, h, ds, filter, opa); \ do_8tap_1d_c(dst, dst_stride, src, src_stride, w, h, ds, filter, opa); \
} }
filter_8tap_1d_fn(put, 0, v, src_stride) filter_8tap_1d_fn(put, 0, v, src_stride / sizeof(pixel))
filter_8tap_1d_fn(put, 0, h, 1) filter_8tap_1d_fn(put, 0, h, 1)
filter_8tap_1d_fn(avg, 1, v, src_stride) filter_8tap_1d_fn(avg, 1, v, src_stride / sizeof(pixel))
filter_8tap_1d_fn(avg, 1, h, 1) filter_8tap_1d_fn(avg, 1, h, 1)
#undef filter_8tap_1d_fn #undef filter_8tap_1d_fn
static av_always_inline void do_8tap_2d_c(uint8_t *dst, ptrdiff_t dst_stride, static av_always_inline void do_8tap_2d_c(uint8_t *_dst, ptrdiff_t dst_stride,
const uint8_t *src, ptrdiff_t src_stride, const uint8_t *_src, ptrdiff_t src_stride,
int w, int h, const int16_t *filterx, int w, int h, const int16_t *filterx,
const int16_t *filtery, int avg) const int16_t *filtery, int avg)
{ {
int tmp_h = h + 7; int tmp_h = h + 7;
uint8_t tmp[64 * 71], *tmp_ptr = tmp; pixel tmp[64 * 71], *tmp_ptr = tmp;
pixel *dst = (pixel *) _dst;
const pixel *src = (const pixel *) _src;
dst_stride /= sizeof(pixel);
src_stride /= sizeof(pixel);
src -= src_stride * 3; src -= src_stride * 3;
do { do {
int x; int x;
...@@ -2125,10 +2139,15 @@ static void avg##_8tap_##type##_##sz##hv_c(uint8_t *dst, ptrdiff_t dst_stride, \ ...@@ -2125,10 +2139,15 @@ static void avg##_8tap_##type##_##sz##hv_c(uint8_t *dst, ptrdiff_t dst_stride, \
#define FILTER_BILIN(src, x, mxy, stride) \ #define FILTER_BILIN(src, x, mxy, stride) \
(src[x] + ((mxy * (src[x + stride] - src[x]) + 8) >> 4)) (src[x] + ((mxy * (src[x + stride] - src[x]) + 8) >> 4))
static av_always_inline void do_bilin_1d_c(uint8_t *dst, ptrdiff_t dst_stride, static av_always_inline void do_bilin_1d_c(uint8_t *_dst, ptrdiff_t dst_stride,
const uint8_t *src, ptrdiff_t src_stride, const uint8_t *_src, ptrdiff_t src_stride,
int w, int h, ptrdiff_t ds, int mxy, int avg) int w, int h, ptrdiff_t ds, int mxy, int avg)
{ {
pixel *dst = (pixel *) _dst;
const pixel *src = (const pixel *) _src;
dst_stride /= sizeof(pixel);
src_stride /= sizeof(pixel);
do { do {
int x; int x;
...@@ -2152,20 +2171,24 @@ static av_noinline void opn##_bilin_1d_##dir##_c(uint8_t *dst, ptrdiff_t dst_str ...@@ -2152,20 +2171,24 @@ static av_noinline void opn##_bilin_1d_##dir##_c(uint8_t *dst, ptrdiff_t dst_str
do_bilin_1d_c(dst, dst_stride, src, src_stride, w, h, ds, mxy, opa); \ do_bilin_1d_c(dst, dst_stride, src, src_stride, w, h, ds, mxy, opa); \
} }
bilin_1d_fn(put, 0, v, src_stride) bilin_1d_fn(put, 0, v, src_stride / sizeof(pixel))
bilin_1d_fn(put, 0, h, 1) bilin_1d_fn(put, 0, h, 1)
bilin_1d_fn(avg, 1, v, src_stride) bilin_1d_fn(avg, 1, v, src_stride / sizeof(pixel))
bilin_1d_fn(avg, 1, h, 1) bilin_1d_fn(avg, 1, h, 1)
#undef bilin_1d_fn #undef bilin_1d_fn
static av_always_inline void do_bilin_2d_c(uint8_t *dst, ptrdiff_t dst_stride, static av_always_inline void do_bilin_2d_c(uint8_t *_dst, ptrdiff_t dst_stride,
const uint8_t *src, ptrdiff_t src_stride, const uint8_t *_src, ptrdiff_t src_stride,
int w, int h, int mx, int my, int avg) int w, int h, int mx, int my, int avg)
{ {
uint8_t tmp[64 * 65], *tmp_ptr = tmp; pixel tmp[64 * 65], *tmp_ptr = tmp;
int tmp_h = h + 1; int tmp_h = h + 1;
pixel *dst = (pixel *) _dst;
const pixel *src = (const pixel *) _src;
dst_stride /= sizeof(pixel);
src_stride /= sizeof(pixel);
do { do {
int x; int x;
...@@ -2299,15 +2322,19 @@ static av_cold void vp9dsp_mc_init(VP9DSPContext *dsp) ...@@ -2299,15 +2322,19 @@ static av_cold void vp9dsp_mc_init(VP9DSPContext *dsp)
#undef init_subpel3 #undef init_subpel3
} }
static av_always_inline void do_scaled_8tap_c(uint8_t *dst, ptrdiff_t dst_stride, static av_always_inline void do_scaled_8tap_c(uint8_t *_dst, ptrdiff_t dst_stride,
const uint8_t *src, ptrdiff_t src_stride, const uint8_t *_src, ptrdiff_t src_stride,
int w, int h, int mx, int my, int w, int h, int mx, int my,
int dx, int dy, int avg, int dx, int dy, int avg,
const int16_t (*filters)[8]) const int16_t (*filters)[8])
{ {
int tmp_h = (((h - 1) * dy + my) >> 4) + 8; int tmp_h = (((h - 1) * dy + my) >> 4) + 8;
uint8_t tmp[64 * 135], *tmp_ptr = tmp; pixel tmp[64 * 135], *tmp_ptr = tmp;
pixel *dst = (pixel *) _dst;
const pixel *src = (const pixel *) _src;
dst_stride /= sizeof(pixel);
src_stride /= sizeof(pixel);
src -= src_stride * 3; src -= src_stride * 3;
do { do {
int x; int x;
...@@ -2369,14 +2396,18 @@ static void avg##_scaled_##type##_##sz##_c(uint8_t *dst, ptrdiff_t dst_stride, \ ...@@ -2369,14 +2396,18 @@ static void avg##_scaled_##type##_##sz##_c(uint8_t *dst, ptrdiff_t dst_stride, \
vp9_subpel_filters[type_idx]); \ vp9_subpel_filters[type_idx]); \
} }
static av_always_inline void do_scaled_bilin_c(uint8_t *dst, ptrdiff_t dst_stride, static av_always_inline void do_scaled_bilin_c(uint8_t *_dst, ptrdiff_t dst_stride,
const uint8_t *src, ptrdiff_t src_stride, const uint8_t *_src, ptrdiff_t src_stride,
int w, int h, int mx, int my, int w, int h, int mx, int my,
int dx, int dy, int avg) int dx, int dy, int avg)
{ {
uint8_t tmp[64 * 129], *tmp_ptr = tmp; pixel tmp[64 * 129], *tmp_ptr = tmp;
int tmp_h = (((h - 1) * dy + my) >> 4) + 2; int tmp_h = (((h - 1) * dy + my) >> 4) + 2;
pixel *dst = (pixel *) _dst;
const pixel *src = (const pixel *) _src;
dst_stride /= sizeof(pixel);
src_stride /= sizeof(pixel);
do { do {
int x; int x;
int imx = mx, ioff = 0; int imx = mx, ioff = 0;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment