Commit 23ba4538 authored by Ronald S. Bultje's avatar Ronald S. Bultje

vp9: add inter-frame profile 2/3 suport.

parent b224b165
This diff is collapsed.
......@@ -40,6 +40,7 @@ static void FN(inter_pred)(AVCodecContext *ctx)
AVFrame *ref1 = tref1->f, *ref2;
int w1 = ref1->width, h1 = ref1->height, w2, h2;
ptrdiff_t ls_y = s->y_stride, ls_uv = s->uv_stride;
int bytesperpixel = BYTES_PER_PIXEL;
if (b->comp) {
tref2 = &s->refs[s->refidx[b->ref[1]]];
......@@ -138,7 +139,7 @@ static void FN(inter_pred)(AVCodecContext *ctx)
mc_luma_dir(s, mc[4][b->filter][0], s->dst[0], ls_y,
ref1->data[0], ref1->linesize[0], tref1,
row << 3, col << 3, &b->mv[0][0], 4, 8, w1, h1, 0);
mc_luma_dir(s, mc[4][b->filter][0], s->dst[0] + 4, ls_y,
mc_luma_dir(s, mc[4][b->filter][0], s->dst[0] + 4 * bytesperpixel, ls_y,
ref1->data[0], ref1->linesize[0], tref1,
row << 3, (col << 3) + 4, &b->mv[1][0], 4, 8, w1, h1, 0);
h1 = (h1 + s->ss_v) >> s->ss_v;
......@@ -159,7 +160,8 @@ static void FN(inter_pred)(AVCodecContext *ctx)
row << (3 - s->ss_v), col << 3,
&b->mv[0][0], 4, 8 >> s->ss_v, w1, h1, 0);
mc_chroma_dir(s, mc[4][b->filter][0],
s->dst[1] + 4, s->dst[2] + 4, ls_uv,
s->dst[1] + 4 * bytesperpixel,
s->dst[2] + 4 * bytesperpixel, ls_uv,
ref1->data[1], ref1->linesize[1],
ref1->data[2], ref1->linesize[2], tref1,
row << (3 - s->ss_v), (col << 3) + 4,
......@@ -170,7 +172,7 @@ static void FN(inter_pred)(AVCodecContext *ctx)
mc_luma_dir(s, mc[4][b->filter][1], s->dst[0], ls_y,
ref2->data[0], ref2->linesize[0], tref2,
row << 3, col << 3, &b->mv[0][1], 4, 8, w2, h2, 1);
mc_luma_dir(s, mc[4][b->filter][1], s->dst[0] + 4, ls_y,
mc_luma_dir(s, mc[4][b->filter][1], s->dst[0] + 4 * bytesperpixel, ls_y,
ref2->data[0], ref2->linesize[0], tref2,
row << 3, (col << 3) + 4, &b->mv[1][1], 4, 8, w2, h2, 1);
h2 = (h2 + s->ss_v) >> s->ss_v;
......@@ -191,7 +193,8 @@ static void FN(inter_pred)(AVCodecContext *ctx)
row << (3 - s->ss_v), col << 3,
&b->mv[0][1], 4, 8 >> s->ss_v, w2, h2, 1);
mc_chroma_dir(s, mc[4][b->filter][1],
s->dst[1] + 4, s->dst[2] + 4, ls_uv,
s->dst[1] + 4 * bytesperpixel,
s->dst[2] + 4 * bytesperpixel, ls_uv,
ref2->data[1], ref2->linesize[1],
ref2->data[2], ref2->linesize[2], tref2,
row << (3 - s->ss_v), (col << 3) + 4,
......@@ -206,7 +209,7 @@ static void FN(inter_pred)(AVCodecContext *ctx)
mc_luma_dir(s, mc[4][b->filter][0], s->dst[0], ls_y,
ref1->data[0], ref1->linesize[0], tref1,
row << 3, col << 3, &b->mv[0][0], 4, 4, w1, h1, 0);
mc_luma_dir(s, mc[4][b->filter][0], s->dst[0] + 4, ls_y,
mc_luma_dir(s, mc[4][b->filter][0], s->dst[0] + 4 * bytesperpixel, ls_y,
ref1->data[0], ref1->linesize[0], tref1,
row << 3, (col << 3) + 4, &b->mv[1][0], 4, 4, w1, h1, 0);
mc_luma_dir(s, mc[4][b->filter][0],
......@@ -214,7 +217,7 @@ static void FN(inter_pred)(AVCodecContext *ctx)
ref1->data[0], ref1->linesize[0], tref1,
(row << 3) + 4, col << 3, &b->mv[2][0], 4, 4, w1, h1, 0);
mc_luma_dir(s, mc[4][b->filter][0],
s->dst[0] + 4 * ls_y + 4, ls_y,
s->dst[0] + 4 * ls_y + 4 * bytesperpixel, ls_y,
ref1->data[0], ref1->linesize[0], tref1,
(row << 3) + 4, (col << 3) + 4, &b->mv[3][0], 4, 4, w1, h1, 0);
if (s->ss_v) {
......@@ -239,7 +242,8 @@ static void FN(inter_pred)(AVCodecContext *ctx)
&uvmv, 4, 4, w1, h1, 0);
uvmv = ROUNDED_DIV_MVx2(b->mv[1][0], b->mv[3][0]);
mc_chroma_dir(s, mc[4][b->filter][0],
s->dst[1] + 4, s->dst[2] + 4, ls_uv,
s->dst[1] + 4 * bytesperpixel,
s->dst[2] + 4 * bytesperpixel, ls_uv,
ref1->data[1], ref1->linesize[1],
ref1->data[2], ref1->linesize[2], tref1,
row << 2, (col << 3) + 4,
......@@ -273,7 +277,8 @@ static void FN(inter_pred)(AVCodecContext *ctx)
row << 3, col << 3,
&b->mv[0][0], 4, 4, w1, h1, 0);
mc_chroma_dir(s, mc[4][b->filter][0],
s->dst[1] + 4, s->dst[2] + 4, ls_uv,
s->dst[1] + 4 * bytesperpixel,
s->dst[2] + 4 * bytesperpixel, ls_uv,
ref1->data[1], ref1->linesize[1],
ref1->data[2], ref1->linesize[2], tref1,
row << 3, (col << 3) + 4,
......@@ -285,7 +290,8 @@ static void FN(inter_pred)(AVCodecContext *ctx)
(row << 3) + 4, col << 3,
&b->mv[2][0], 4, 4, w1, h1, 0);
mc_chroma_dir(s, mc[4][b->filter][0],
s->dst[1] + 4 * ls_uv + 4, s->dst[2] + 4 * ls_uv + 4, ls_uv,
s->dst[1] + 4 * ls_uv + 4 * bytesperpixel,
s->dst[2] + 4 * ls_uv + 4 * bytesperpixel, ls_uv,
ref1->data[1], ref1->linesize[1],
ref1->data[2], ref1->linesize[2], tref1,
(row << 3) + 4, (col << 3) + 4,
......@@ -297,7 +303,7 @@ static void FN(inter_pred)(AVCodecContext *ctx)
mc_luma_dir(s, mc[4][b->filter][1], s->dst[0], ls_y,
ref2->data[0], ref2->linesize[0], tref2,
row << 3, col << 3, &b->mv[0][1], 4, 4, w2, h2, 1);
mc_luma_dir(s, mc[4][b->filter][1], s->dst[0] + 4, ls_y,
mc_luma_dir(s, mc[4][b->filter][1], s->dst[0] + 4 * bytesperpixel, ls_y,
ref2->data[0], ref2->linesize[0], tref2,
row << 3, (col << 3) + 4, &b->mv[1][1], 4, 4, w2, h2, 1);
mc_luma_dir(s, mc[4][b->filter][1],
......@@ -305,7 +311,7 @@ static void FN(inter_pred)(AVCodecContext *ctx)
ref2->data[0], ref2->linesize[0], tref2,
(row << 3) + 4, col << 3, &b->mv[2][1], 4, 4, w2, h2, 1);
mc_luma_dir(s, mc[4][b->filter][1],
s->dst[0] + 4 * ls_y + 4, ls_y,
s->dst[0] + 4 * ls_y + 4 * bytesperpixel, ls_y,
ref2->data[0], ref2->linesize[0], tref2,
(row << 3) + 4, (col << 3) + 4, &b->mv[3][1], 4, 4, w2, h2, 1);
if (s->ss_v) {
......@@ -330,7 +336,8 @@ static void FN(inter_pred)(AVCodecContext *ctx)
&uvmv, 4, 4, w2, h2, 1);
uvmv = ROUNDED_DIV_MVx2(b->mv[1][1], b->mv[3][1]);
mc_chroma_dir(s, mc[4][b->filter][1],
s->dst[1] + 4, s->dst[2] + 4, ls_uv,
s->dst[1] + 4 * bytesperpixel,
s->dst[2] + 4 * bytesperpixel, ls_uv,
ref2->data[1], ref2->linesize[1],
ref2->data[2], ref2->linesize[2], tref2,
row << 2, (col << 3) + 4,
......@@ -364,7 +371,8 @@ static void FN(inter_pred)(AVCodecContext *ctx)
row << 3, col << 3,
&b->mv[0][1], 4, 4, w2, h2, 1);
mc_chroma_dir(s, mc[4][b->filter][1],
s->dst[1] + 4, s->dst[2] + 4, ls_uv,
s->dst[1] + 4 * bytesperpixel,
s->dst[2] + 4 * bytesperpixel, ls_uv,
ref2->data[1], ref2->linesize[1],
ref2->data[2], ref2->linesize[2], tref2,
row << 3, (col << 3) + 4,
......@@ -376,7 +384,8 @@ static void FN(inter_pred)(AVCodecContext *ctx)
(row << 3) + 4, col << 3,
&b->mv[2][1], 4, 4, w2, h2, 1);
mc_chroma_dir(s, mc[4][b->filter][1],
s->dst[1] + 4 * ls_uv + 4, s->dst[2] + 4 * ls_uv + 4, ls_uv,
s->dst[1] + 4 * ls_uv + 4 * bytesperpixel,
s->dst[2] + 4 * ls_uv + 4 * bytesperpixel, ls_uv,
ref2->data[1], ref2->linesize[1],
ref2->data[2], ref2->linesize[2], tref2,
(row << 3) + 4, (col << 3) + 4,
......
......@@ -1911,22 +1911,27 @@ static av_always_inline void copy_c(uint8_t *dst, ptrdiff_t dst_stride,
int w, int h)
{
do {
memcpy(dst, src, w);
memcpy(dst, src, w * sizeof(pixel));
dst += dst_stride;
src += src_stride;
} while (--h);
}
static av_always_inline void avg_c(uint8_t *dst, ptrdiff_t dst_stride,
const uint8_t *src, ptrdiff_t src_stride,
static av_always_inline void avg_c(uint8_t *_dst, ptrdiff_t dst_stride,
const uint8_t *_src, ptrdiff_t src_stride,
int w, int h)
{
pixel *dst = (pixel *) _dst;
const pixel *src = (const pixel *) _src;
dst_stride /= sizeof(pixel);
src_stride /= sizeof(pixel);
do {
int x;
for (x = 0; x < w; x += 4)
AV_WN32A(&dst[x], rnd_avg32(AV_RN32A(&dst[x]), AV_RN32(&src[x])));
AV_WN4PA(&dst[x], rnd_avg_pixel4(AV_RN4PA(&dst[x]), AV_RN4P(&src[x])));
dst += dst_stride;
src += src_stride;
......@@ -2010,7 +2015,7 @@ static const int16_t vp9_subpel_filters[3][16][8] = {
};
#define FILTER_8TAP(src, x, F, stride) \
av_clip_uint8((F[0] * src[x + -3 * stride] + \
av_clip_pixel((F[0] * src[x + -3 * stride] + \
F[1] * src[x + -2 * stride] + \
F[2] * src[x + -1 * stride] + \
F[3] * src[x + +0 * stride] + \
......@@ -2019,11 +2024,16 @@ static const int16_t vp9_subpel_filters[3][16][8] = {
F[6] * src[x + +3 * stride] + \
F[7] * src[x + +4 * stride] + 64) >> 7)
static av_always_inline void do_8tap_1d_c(uint8_t *dst, ptrdiff_t dst_stride,
const uint8_t *src, ptrdiff_t src_stride,
static av_always_inline void do_8tap_1d_c(uint8_t *_dst, ptrdiff_t dst_stride,
const uint8_t *_src, ptrdiff_t src_stride,
int w, int h, ptrdiff_t ds,
const int16_t *filter, int avg)
{
pixel *dst = (pixel *) _dst;
const pixel *src = (const pixel *) _src;
dst_stride /= sizeof(pixel);
src_stride /= sizeof(pixel);
do {
int x;
......@@ -2047,21 +2057,25 @@ static av_noinline void opn##_8tap_1d_##dir##_c(uint8_t *dst, ptrdiff_t dst_stri
do_8tap_1d_c(dst, dst_stride, src, src_stride, w, h, ds, filter, opa); \
}
filter_8tap_1d_fn(put, 0, v, src_stride)
filter_8tap_1d_fn(put, 0, v, src_stride / sizeof(pixel))
filter_8tap_1d_fn(put, 0, h, 1)
filter_8tap_1d_fn(avg, 1, v, src_stride)
filter_8tap_1d_fn(avg, 1, v, src_stride / sizeof(pixel))
filter_8tap_1d_fn(avg, 1, h, 1)
#undef filter_8tap_1d_fn
static av_always_inline void do_8tap_2d_c(uint8_t *dst, ptrdiff_t dst_stride,
const uint8_t *src, ptrdiff_t src_stride,
static av_always_inline void do_8tap_2d_c(uint8_t *_dst, ptrdiff_t dst_stride,
const uint8_t *_src, ptrdiff_t src_stride,
int w, int h, const int16_t *filterx,
const int16_t *filtery, int avg)
{
int tmp_h = h + 7;
uint8_t tmp[64 * 71], *tmp_ptr = tmp;
pixel tmp[64 * 71], *tmp_ptr = tmp;
pixel *dst = (pixel *) _dst;
const pixel *src = (const pixel *) _src;
dst_stride /= sizeof(pixel);
src_stride /= sizeof(pixel);
src -= src_stride * 3;
do {
int x;
......@@ -2125,10 +2139,15 @@ static void avg##_8tap_##type##_##sz##hv_c(uint8_t *dst, ptrdiff_t dst_stride, \
#define FILTER_BILIN(src, x, mxy, stride) \
(src[x] + ((mxy * (src[x + stride] - src[x]) + 8) >> 4))
static av_always_inline void do_bilin_1d_c(uint8_t *dst, ptrdiff_t dst_stride,
const uint8_t *src, ptrdiff_t src_stride,
static av_always_inline void do_bilin_1d_c(uint8_t *_dst, ptrdiff_t dst_stride,
const uint8_t *_src, ptrdiff_t src_stride,
int w, int h, ptrdiff_t ds, int mxy, int avg)
{
pixel *dst = (pixel *) _dst;
const pixel *src = (const pixel *) _src;
dst_stride /= sizeof(pixel);
src_stride /= sizeof(pixel);
do {
int x;
......@@ -2152,20 +2171,24 @@ static av_noinline void opn##_bilin_1d_##dir##_c(uint8_t *dst, ptrdiff_t dst_str
do_bilin_1d_c(dst, dst_stride, src, src_stride, w, h, ds, mxy, opa); \
}
bilin_1d_fn(put, 0, v, src_stride)
bilin_1d_fn(put, 0, v, src_stride / sizeof(pixel))
bilin_1d_fn(put, 0, h, 1)
bilin_1d_fn(avg, 1, v, src_stride)
bilin_1d_fn(avg, 1, v, src_stride / sizeof(pixel))
bilin_1d_fn(avg, 1, h, 1)
#undef bilin_1d_fn
static av_always_inline void do_bilin_2d_c(uint8_t *dst, ptrdiff_t dst_stride,
const uint8_t *src, ptrdiff_t src_stride,
static av_always_inline void do_bilin_2d_c(uint8_t *_dst, ptrdiff_t dst_stride,
const uint8_t *_src, ptrdiff_t src_stride,
int w, int h, int mx, int my, int avg)
{
uint8_t tmp[64 * 65], *tmp_ptr = tmp;
pixel tmp[64 * 65], *tmp_ptr = tmp;
int tmp_h = h + 1;
pixel *dst = (pixel *) _dst;
const pixel *src = (const pixel *) _src;
dst_stride /= sizeof(pixel);
src_stride /= sizeof(pixel);
do {
int x;
......@@ -2299,15 +2322,19 @@ static av_cold void vp9dsp_mc_init(VP9DSPContext *dsp)
#undef init_subpel3
}
static av_always_inline void do_scaled_8tap_c(uint8_t *dst, ptrdiff_t dst_stride,
const uint8_t *src, ptrdiff_t src_stride,
static av_always_inline void do_scaled_8tap_c(uint8_t *_dst, ptrdiff_t dst_stride,
const uint8_t *_src, ptrdiff_t src_stride,
int w, int h, int mx, int my,
int dx, int dy, int avg,
const int16_t (*filters)[8])
{
int tmp_h = (((h - 1) * dy + my) >> 4) + 8;
uint8_t tmp[64 * 135], *tmp_ptr = tmp;
pixel tmp[64 * 135], *tmp_ptr = tmp;
pixel *dst = (pixel *) _dst;
const pixel *src = (const pixel *) _src;
dst_stride /= sizeof(pixel);
src_stride /= sizeof(pixel);
src -= src_stride * 3;
do {
int x;
......@@ -2369,14 +2396,18 @@ static void avg##_scaled_##type##_##sz##_c(uint8_t *dst, ptrdiff_t dst_stride, \
vp9_subpel_filters[type_idx]); \
}
static av_always_inline void do_scaled_bilin_c(uint8_t *dst, ptrdiff_t dst_stride,
const uint8_t *src, ptrdiff_t src_stride,
static av_always_inline void do_scaled_bilin_c(uint8_t *_dst, ptrdiff_t dst_stride,
const uint8_t *_src, ptrdiff_t src_stride,
int w, int h, int mx, int my,
int dx, int dy, int avg)
{
uint8_t tmp[64 * 129], *tmp_ptr = tmp;
pixel tmp[64 * 129], *tmp_ptr = tmp;
int tmp_h = (((h - 1) * dy + my) >> 4) + 2;
pixel *dst = (pixel *) _dst;
const pixel *src = (const pixel *) _src;
dst_stride /= sizeof(pixel);
src_stride /= sizeof(pixel);
do {
int x;
int imx = mx, ioff = 0;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment