Commit 2e55e26b authored by Martin Storsjö's avatar Martin Storsjö

vp9: Flip the order of arguments in MC functions

This makes it match the pattern already used for VP8 MC functions.

This also makes the signature match ffmpeg's version of these
functions, easing porting of code in both directions.
Signed-off-by: 's avatarMartin Storsjö <martin@martin.st>
parent e3fb74f7
......@@ -127,9 +127,8 @@ typedef struct ProbContext {
uint8_t partition[4][4][3];
} ProbContext;
typedef void (*vp9_mc_func)(uint8_t *dst, const uint8_t *ref,
ptrdiff_t dst_stride,
ptrdiff_t ref_stride,
typedef void (*vp9_mc_func)(uint8_t *dst, ptrdiff_t dst_stride,
const uint8_t *ref, ptrdiff_t ref_stride,
int h, int mx, int my);
typedef struct VP9DSPContext {
......
......@@ -1187,7 +1187,7 @@ static av_always_inline void mc_luma_dir(VP9Context *s, vp9_mc_func(*mc)[2],
ref = s->edge_emu_buffer + !!my * 3 * 80 + !!mx * 3;
ref_stride = 80;
}
mc[!!mx][!!my](dst, ref, dst_stride, ref_stride, bh, mx << 1, my << 1);
mc[!!mx][!!my](dst, dst_stride, ref, ref_stride, bh, mx << 1, my << 1);
}
static av_always_inline void mc_chroma_dir(VP9Context *s, vp9_mc_func(*mc)[2],
......@@ -1227,7 +1227,7 @@ static av_always_inline void mc_chroma_dir(VP9Context *s, vp9_mc_func(*mc)[2],
bw + !!mx * 7, bh + !!my * 7,
x - !!mx * 3, y - !!my * 3, w, h);
ref_u = s->edge_emu_buffer + !!my * 3 * 80 + !!mx * 3;
mc[!!mx][!!my](dst_u, ref_u, dst_stride, 80, bh, mx, my);
mc[!!mx][!!my](dst_u, dst_stride, ref_u, 80, bh, mx, my);
s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
ref_v - !!my * 3 * src_stride_v - !!mx * 3,
......@@ -1236,10 +1236,10 @@ static av_always_inline void mc_chroma_dir(VP9Context *s, vp9_mc_func(*mc)[2],
bw + !!mx * 7, bh + !!my * 7,
x - !!mx * 3, y - !!my * 3, w, h);
ref_v = s->edge_emu_buffer + !!my * 3 * 80 + !!mx * 3;
mc[!!mx][!!my](dst_v, ref_v, dst_stride, 80, bh, mx, my);
mc[!!mx][!!my](dst_v, dst_stride, ref_v, 80, bh, mx, my);
} else {
mc[!!mx][!!my](dst_u, ref_u, dst_stride, src_stride_u, bh, mx, my);
mc[!!mx][!!my](dst_v, ref_v, dst_stride, src_stride_v, bh, mx, my);
mc[!!mx][!!my](dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my);
mc[!!mx][!!my](dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my);
}
}
......@@ -1668,8 +1668,8 @@ int ff_vp9_decode_block(AVCodecContext *avctx, int row, int col,
av_assert2(n <= 4);
if (w & bw) {
s->dsp.mc[n][0][0][0][0](f->data[0] + yoff + o,
s->tmp_y + o,
f->linesize[0],
s->tmp_y + o,
64, h, 0, 0);
o += bw;
}
......@@ -1686,12 +1686,12 @@ int ff_vp9_decode_block(AVCodecContext *avctx, int row, int col,
av_assert2(n <= 4);
if (w & bw) {
s->dsp.mc[n][0][0][0][0](f->data[1] + uvoff + o,
s->tmp_uv[0] + o,
f->linesize[1],
s->tmp_uv[0] + o,
32, h, 0, 0);
s->dsp.mc[n][0][0][0][0](f->data[2] + uvoff + o,
s->tmp_uv[1] + o,
f->linesize[2],
s->tmp_uv[1] + o,
32, h, 0, 0);
o += bw;
}
......
This diff is collapsed.
......@@ -29,10 +29,9 @@
#if HAVE_YASM
#define fpel_func(avg, sz, opt) \
void ff_vp9_ ## avg ## sz ## _ ## opt(uint8_t *dst, const uint8_t *src, \
ptrdiff_t dst_stride, \
ptrdiff_t src_stride, \
#define fpel_func(avg, sz, opt) \
void ff_vp9_ ## avg ## sz ## _ ## opt(uint8_t *dst, ptrdiff_t dst_stride, \
const uint8_t *src, ptrdiff_t src_stride, \
int h, int mx, int my)
fpel_func(put, 4, mmx);
......@@ -54,8 +53,8 @@ fpel_func(avg, 64, avx2);
#define mc_func(avg, sz, dir, opt, type, f_sz) \
void \
ff_vp9_ ## avg ## _8tap_1d_ ## dir ## _ ## sz ## _ ## opt(uint8_t *dst, \
const uint8_t *src, \
ptrdiff_t dst_stride, \
const uint8_t *src, \
ptrdiff_t src_stride, \
int h, \
const type (*filter)[f_sz])
......@@ -81,20 +80,21 @@ mc_funcs(32, avx2, int8_t, 32);
#define mc_rep_func(avg, sz, hsz, dir, opt, type, f_sz) \
static av_always_inline void \
ff_vp9_ ## avg ## _8tap_1d_ ## dir ## _ ## sz ## _ ## opt(uint8_t *dst, \
const uint8_t *src, \
ptrdiff_t dst_stride, \
const uint8_t *src, \
ptrdiff_t src_stride, \
int h, \
const type (*filter)[f_sz]) \
{ \
ff_vp9_ ## avg ## _8tap_1d_ ## dir ## _ ## hsz ## _ ## opt(dst, src, \
ff_vp9_ ## avg ## _8tap_1d_ ## dir ## _ ## hsz ## _ ## opt(dst, \
dst_stride, \
src, \
src_stride, \
h, \
filter); \
ff_vp9_ ## avg ## _8tap_1d_ ## dir ## _ ## hsz ## _ ## opt(dst + hsz, \
src + hsz, \
dst_stride, \
src + hsz, \
src_stride, \
h, filter); \
}
......@@ -126,19 +126,18 @@ extern const int16_t ff_filters_sse2[3][15][8][8];
#define filter_8tap_2d_fn(op, sz, f, f_opt, fname, align, opt) \
static void \
op ## _8tap_ ## fname ## _ ## sz ## hv_ ## opt(uint8_t *dst, \
const uint8_t *src, \
ptrdiff_t dst_stride, \
const uint8_t *src, \
ptrdiff_t src_stride, \
int h, int mx, int my) \
{ \
LOCAL_ALIGNED_ ## align(uint8_t, temp, [71 * 64]); \
ff_vp9_put_8tap_1d_h_ ## sz ## _ ## opt(temp, src - 3 * src_stride, \
64, src_stride, \
h + 7, \
ff_vp9_put_8tap_1d_h_ ## sz ## _ ## opt(temp, 64, \
src - 3 * src_stride, \
src_stride, h + 7, \
ff_filters_ ## f_opt[f][mx - 1]); \
ff_vp9_ ## op ## _8tap_1d_v_ ## sz ## _ ## opt(dst, temp + 3 * 64, \
dst_stride, 64, \
h, \
ff_vp9_ ## op ## _8tap_1d_v_ ## sz ## _ ## opt(dst, dst_stride, \
temp + 3 * 64, 64, h, \
ff_filters_ ## f_opt[f][my - 1]); \
}
......@@ -173,14 +172,15 @@ filters_8tap_2d_fn(avg, 32, 32, avx2, ssse3)
#define filter_8tap_1d_fn(op, sz, f, f_opt, fname, dir, dvar, opt) \
static void \
op ## _8tap_ ## fname ## _ ## sz ## dir ## _ ## opt(uint8_t *dst, \
const uint8_t *src, \
ptrdiff_t dst_stride, \
const uint8_t *src, \
ptrdiff_t src_stride, \
int h, int mx, \
int my) \
{ \
ff_vp9_ ## op ## _8tap_1d_ ## dir ## _ ## sz ## _ ## opt(dst, src, \
ff_vp9_ ## op ## _8tap_1d_ ## dir ## _ ## sz ## _ ## opt(dst, \
dst_stride, \
src, \
src_stride, h,\
ff_filters_ ## f_opt[f][dvar - 1]); \
}
......
......@@ -107,7 +107,7 @@ SECTION .text
%macro filter_sse2_h_fn 1
%assign %%px mmsize/2
cglobal vp9_%1_8tap_1d_h_ %+ %%px, 6, 6, 15, dst, src, dstride, sstride, h, filtery
cglobal vp9_%1_8tap_1d_h_ %+ %%px, 6, 6, 15, dst, dstride, src, sstride, h, filtery
pxor m5, m5
mova m6, [pw_64]
mova m7, [filteryq+ 0]
......@@ -192,7 +192,7 @@ filter_sse2_h_fn avg
%macro filter_h_fn 1
%assign %%px mmsize/2
cglobal vp9_%1_8tap_1d_h_ %+ %%px, 6, 6, 11, dst, src, dstride, sstride, h, filtery
cglobal vp9_%1_8tap_1d_h_ %+ %%px, 6, 6, 11, dst, dstride, src, sstride, h, filtery
mova m6, [pw_256]
mova m7, [filteryq+ 0]
%if ARCH_X86_64 && mmsize > 8
......@@ -253,7 +253,7 @@ filter_h_fn avg
%if ARCH_X86_64
%macro filter_hx2_fn 1
%assign %%px mmsize
cglobal vp9_%1_8tap_1d_h_ %+ %%px, 6, 6, 14, dst, src, dstride, sstride, h, filtery
cglobal vp9_%1_8tap_1d_h_ %+ %%px, 6, 6, 14, dst, dstride, src, sstride, h, filtery
mova m13, [pw_256]
mova m8, [filteryq+ 0]
mova m9, [filteryq+32]
......@@ -315,9 +315,9 @@ filter_hx2_fn avg
%macro filter_sse2_v_fn 1
%assign %%px mmsize/2
%if ARCH_X86_64
cglobal vp9_%1_8tap_1d_v_ %+ %%px, 6, 8, 15, dst, src, dstride, sstride, h, filtery, src4, sstride3
cglobal vp9_%1_8tap_1d_v_ %+ %%px, 6, 8, 15, dst, dstride, src, sstride, h, filtery, src4, sstride3
%else
cglobal vp9_%1_8tap_1d_v_ %+ %%px, 4, 7, 15, dst, src, dstride, sstride, filtery, src4, sstride3
cglobal vp9_%1_8tap_1d_v_ %+ %%px, 4, 7, 15, dst, dstride, src, sstride, filtery, src4, sstride3
mov filteryq, r5mp
%define hd r4mp
%endif
......@@ -413,9 +413,9 @@ filter_sse2_v_fn avg
%macro filter_v_fn 1
%assign %%px mmsize/2
%if ARCH_X86_64
cglobal vp9_%1_8tap_1d_v_ %+ %%px, 6, 8, 11, dst, src, dstride, sstride, h, filtery, src4, sstride3
cglobal vp9_%1_8tap_1d_v_ %+ %%px, 6, 8, 11, dst, dstride, src, sstride, h, filtery, src4, sstride3
%else
cglobal vp9_%1_8tap_1d_v_ %+ %%px, 4, 7, 11, dst, src, dstride, sstride, filtery, src4, sstride3
cglobal vp9_%1_8tap_1d_v_ %+ %%px, 4, 7, 11, dst, dstride, src, sstride, filtery, src4, sstride3
mov filteryq, r5mp
%define hd r4mp
%endif
......@@ -486,7 +486,7 @@ filter_v_fn avg
%macro filter_vx2_fn 1
%assign %%px mmsize
cglobal vp9_%1_8tap_1d_v_ %+ %%px, 6, 8, 14, dst, src, dstride, sstride, h, filtery, src4, sstride3
cglobal vp9_%1_8tap_1d_v_ %+ %%px, 6, 8, 14, dst, dstride, src, sstride, h, filtery, src4, sstride3
mova m13, [pw_256]
lea sstride3q, [sstrideq*3]
lea src4q, [srcq+sstrideq]
......@@ -562,11 +562,11 @@ filter_vx2_fn avg
%endif
%if %2 <= mmsize
cglobal vp9_%1%2, 5, 7, 4, dst, src, dstride, sstride, h, dstride3, sstride3
cglobal vp9_%1%2, 5, 7, 4, dst, dstride, src, sstride, h, dstride3, sstride3
lea sstride3q, [sstrideq*3]
lea dstride3q, [dstrideq*3]
%else
cglobal vp9_%1%2, 5, 5, 4, dst, src, dstride, sstride, h
cglobal vp9_%1%2, 5, 5, 4, dst, dstride, src, sstride, h
%endif
.loop:
%%srcfn m0, [srcq]
......
......@@ -228,8 +228,8 @@ static void check_mc(void)
int op, hsize, filter, dx, dy;
declare_func_emms(AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT,
void, uint8_t *dst, const uint8_t *ref,
ptrdiff_t dst_stride, ptrdiff_t ref_stride,
void, uint8_t *dst, ptrdiff_t dst_stride,
const uint8_t *ref, ptrdiff_t ref_stride,
int h, int mx, int my);
for (op = 0; op < 2; op++) {
......@@ -252,13 +252,11 @@ static void check_mc(void)
int mx = dx ? 1 + (rnd() % 14) : 0;
int my = dy ? 1 + (rnd() % 14) : 0;
randomize_buffers();
call_ref(dst0, src,
size * SIZEOF_PIXEL,
SRC_BUF_STRIDE * SIZEOF_PIXEL,
call_ref(dst0, size * SIZEOF_PIXEL,
src, SRC_BUF_STRIDE * SIZEOF_PIXEL,
size, mx, my);
call_new(dst1, src,
size * SIZEOF_PIXEL,
SRC_BUF_STRIDE * SIZEOF_PIXEL,
call_new(dst1, size * SIZEOF_PIXEL,
src, SRC_BUF_STRIDE * SIZEOF_PIXEL,
size, mx, my);
if (memcmp(dst0, dst1, DST_BUF_SIZE))
fail();
......@@ -267,8 +265,8 @@ static void check_mc(void)
// functions are identical
if (filter >= 1 && filter <= 2) continue;
bench_new(dst1, src, size * SIZEOF_PIXEL,
SRC_BUF_STRIDE * SIZEOF_PIXEL,
bench_new(dst1, size * SIZEOF_PIXEL,
src, SRC_BUF_STRIDE * SIZEOF_PIXEL,
size, mx, my);
}
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment