Commit b28d5c49 authored by Michael Niedermayer's avatar Michael Niedermayer

Merge remote-tracking branch 'rbultje/vp9-profile23-wip'

* rbultje/vp9-profile23-wip:
  libvpxdec: add 440 pixfmts.
  vp9: add profile 2/3 fate tests.
  vp9: add inter-frame profile 2/3 suport.
  vp9: add keyframe profile 2/3 support.
  vp9: parse profile 2/3 bitdepth in frame header.
  vp9: add profile 2/3 to exported profiles.
  fate: add/update reference files for 440 addition.
  swscale: add yuv440p10/12 pixfmts.
  lavc: add yuv440p10/12 formats to aligned pixfmt list.
  lavu: add yuv440p10/12 pixfmts.
Merged-by: 's avatarMichael Niedermayer <michaelni@gmx.at>
parents 9253cc49 2293ec6a
...@@ -515,7 +515,8 @@ OBJS-$(CONFIG_VP6_DECODER) += vp6.o vp56.o vp56data.o vp56dsp.o \ ...@@ -515,7 +515,8 @@ OBJS-$(CONFIG_VP6_DECODER) += vp6.o vp56.o vp56data.o vp56dsp.o \
vp6dsp.o vp56rac.o vp6dsp.o vp56rac.o
OBJS-$(CONFIG_VP7_DECODER) += vp8.o vp8dsp.o vp56rac.o OBJS-$(CONFIG_VP7_DECODER) += vp8.o vp8dsp.o vp56rac.o
OBJS-$(CONFIG_VP8_DECODER) += vp8.o vp8dsp.o vp56rac.o OBJS-$(CONFIG_VP8_DECODER) += vp8.o vp8dsp.o vp56rac.o
OBJS-$(CONFIG_VP9_DECODER) += vp9.o vp9dsp.o vp56rac.o OBJS-$(CONFIG_VP9_DECODER) += vp9.o vp9dsp.o vp56rac.o vp9dsp_8bpp.o \
vp9dsp_10bpp.o vp9dsp_12bpp.o
OBJS-$(CONFIG_VPLAYER_DECODER) += textdec.o ass.o OBJS-$(CONFIG_VPLAYER_DECODER) += textdec.o ass.o
OBJS-$(CONFIG_VQA_DECODER) += vqavideo.o OBJS-$(CONFIG_VQA_DECODER) += vqavideo.o
OBJS-$(CONFIG_WAVPACK_DECODER) += wavpack.o OBJS-$(CONFIG_WAVPACK_DECODER) += wavpack.o
......
...@@ -2914,6 +2914,8 @@ typedef struct AVCodecContext { ...@@ -2914,6 +2914,8 @@ typedef struct AVCodecContext {
#define FF_PROFILE_VP9_0 0 #define FF_PROFILE_VP9_0 0
#define FF_PROFILE_VP9_1 1 #define FF_PROFILE_VP9_1 1
#define FF_PROFILE_VP9_2 2
#define FF_PROFILE_VP9_3 3
/** /**
* level * level
......
...@@ -72,6 +72,11 @@ static int set_pix_fmt(AVCodecContext *avctx, struct vpx_image *img) ...@@ -72,6 +72,11 @@ static int set_pix_fmt(AVCodecContext *avctx, struct vpx_image *img)
case VPX_IMG_FMT_I422: case VPX_IMG_FMT_I422:
avctx->pix_fmt = AV_PIX_FMT_YUV422P; avctx->pix_fmt = AV_PIX_FMT_YUV422P;
return 0; return 0;
#if VPX_IMAGE_ABI_VERSION >= 3
case VPX_IMG_FMT_I440:
avctx->pix_fmt = AV_PIX_FMT_YUV440P;
return 0;
#endif
case VPX_IMG_FMT_I444: case VPX_IMG_FMT_I444:
avctx->pix_fmt = AV_PIX_FMT_YUV444P; avctx->pix_fmt = AV_PIX_FMT_YUV444P;
return 0; return 0;
...@@ -96,6 +101,18 @@ static int set_pix_fmt(AVCodecContext *avctx, struct vpx_image *img) ...@@ -96,6 +101,18 @@ static int set_pix_fmt(AVCodecContext *avctx, struct vpx_image *img)
} else { } else {
return AVERROR_INVALIDDATA; return AVERROR_INVALIDDATA;
} }
#if VPX_IMAGE_ABI_VERSION >= 3
case VPX_IMG_FMT_I44016:
if (img->bit_depth == 10) {
avctx->pix_fmt = AV_PIX_FMT_YUV440P10LE;
return 0;
} else if (img->bit_depth == 12) {
avctx->pix_fmt = AV_PIX_FMT_YUV440P12LE;
return 0;
} else {
return AVERROR_INVALIDDATA;
}
#endif
case VPX_IMG_FMT_I44416: case VPX_IMG_FMT_I44416:
if (img->bit_depth == 10) { if (img->bit_depth == 10) {
avctx->pix_fmt = AV_PIX_FMT_YUV444P10LE; avctx->pix_fmt = AV_PIX_FMT_YUV444P10LE;
......
...@@ -341,6 +341,10 @@ void avcodec_align_dimensions2(AVCodecContext *s, int *width, int *height, ...@@ -341,6 +341,10 @@ void avcodec_align_dimensions2(AVCodecContext *s, int *width, int *height,
case AV_PIX_FMT_YUVA422P10BE: case AV_PIX_FMT_YUVA422P10BE:
case AV_PIX_FMT_YUVA422P16LE: case AV_PIX_FMT_YUVA422P16LE:
case AV_PIX_FMT_YUVA422P16BE: case AV_PIX_FMT_YUVA422P16BE:
case AV_PIX_FMT_YUV440P10LE:
case AV_PIX_FMT_YUV440P10BE:
case AV_PIX_FMT_YUV440P12LE:
case AV_PIX_FMT_YUV440P12BE:
case AV_PIX_FMT_YUV444P9LE: case AV_PIX_FMT_YUV444P9LE:
case AV_PIX_FMT_YUV444P9BE: case AV_PIX_FMT_YUV444P9BE:
case AV_PIX_FMT_YUV444P10LE: case AV_PIX_FMT_YUV444P10LE:
......
...@@ -109,6 +109,7 @@ typedef struct VP9Context { ...@@ -109,6 +109,7 @@ typedef struct VP9Context {
// bitstream header // bitstream header
uint8_t keyframe, last_keyframe; uint8_t keyframe, last_keyframe;
uint8_t last_bpp, bpp, bpp_index, bytesperpixel;
uint8_t invisible; uint8_t invisible;
uint8_t use_last_frame_mvs; uint8_t use_last_frame_mvs;
uint8_t errorres; uint8_t errorres;
...@@ -241,15 +242,15 @@ typedef struct VP9Context { ...@@ -241,15 +242,15 @@ typedef struct VP9Context {
// whole-frame cache // whole-frame cache
uint8_t *intra_pred_data[3]; uint8_t *intra_pred_data[3];
struct VP9Filter *lflvl; struct VP9Filter *lflvl;
DECLARE_ALIGNED(32, uint8_t, edge_emu_buffer)[135*144]; DECLARE_ALIGNED(32, uint8_t, edge_emu_buffer)[135 * 144 * 2];
// block reconstruction intermediates // block reconstruction intermediates
int block_alloc_using_2pass; int block_alloc_using_2pass;
int16_t *block_base, *block, *uvblock_base[2], *uvblock[2]; int16_t *block_base, *block, *uvblock_base[2], *uvblock[2];
uint8_t *eob_base, *uveob_base[2], *eob, *uveob[2]; uint8_t *eob_base, *uveob_base[2], *eob, *uveob[2];
struct { int x, y; } min_mv, max_mv; struct { int x, y; } min_mv, max_mv;
DECLARE_ALIGNED(32, uint8_t, tmp_y)[64 * 64]; DECLARE_ALIGNED(32, uint8_t, tmp_y)[64 * 64 * 2];
DECLARE_ALIGNED(32, uint8_t, tmp_uv)[2][64 * 64]; DECLARE_ALIGNED(32, uint8_t, tmp_uv)[2][64 * 64 * 2];
uint16_t mvscale[3][2]; uint16_t mvscale[3][2];
uint8_t mvstep[3][2]; uint8_t mvstep[3][2];
} VP9Context; } VP9Context;
...@@ -311,6 +312,7 @@ static int update_size(AVCodecContext *ctx, int w, int h, enum AVPixelFormat fmt ...@@ -311,6 +312,7 @@ static int update_size(AVCodecContext *ctx, int w, int h, enum AVPixelFormat fmt
{ {
VP9Context *s = ctx->priv_data; VP9Context *s = ctx->priv_data;
uint8_t *p; uint8_t *p;
int bytesperpixel = s->bytesperpixel;
av_assert0(w > 0 && h > 0); av_assert0(w > 0 && h > 0);
...@@ -329,12 +331,13 @@ static int update_size(AVCodecContext *ctx, int w, int h, enum AVPixelFormat fmt ...@@ -329,12 +331,13 @@ static int update_size(AVCodecContext *ctx, int w, int h, enum AVPixelFormat fmt
av_freep(&s->intra_pred_data[0]); av_freep(&s->intra_pred_data[0]);
// FIXME we slightly over-allocate here for subsampled chroma, but a little // FIXME we slightly over-allocate here for subsampled chroma, but a little
// bit of padding shouldn't affect performance... // bit of padding shouldn't affect performance...
p = av_malloc(s->sb_cols * (320 + sizeof(*s->lflvl) + 16 * sizeof(*s->above_mv_ctx))); p = av_malloc(s->sb_cols * (128 + 192 * bytesperpixel +
sizeof(*s->lflvl) + 16 * sizeof(*s->above_mv_ctx)));
if (!p) if (!p)
return AVERROR(ENOMEM); return AVERROR(ENOMEM);
assign(s->intra_pred_data[0], uint8_t *, 64); assign(s->intra_pred_data[0], uint8_t *, 64 * bytesperpixel);
assign(s->intra_pred_data[1], uint8_t *, 64); assign(s->intra_pred_data[1], uint8_t *, 64 * bytesperpixel);
assign(s->intra_pred_data[2], uint8_t *, 64); assign(s->intra_pred_data[2], uint8_t *, 64 * bytesperpixel);
assign(s->above_y_nnz_ctx, uint8_t *, 16); assign(s->above_y_nnz_ctx, uint8_t *, 16);
assign(s->above_mode_ctx, uint8_t *, 16); assign(s->above_mode_ctx, uint8_t *, 16);
assign(s->above_mv_ctx, VP56mv(*)[2], 16); assign(s->above_mv_ctx, VP56mv(*)[2], 16);
...@@ -355,13 +358,19 @@ static int update_size(AVCodecContext *ctx, int w, int h, enum AVPixelFormat fmt ...@@ -355,13 +358,19 @@ static int update_size(AVCodecContext *ctx, int w, int h, enum AVPixelFormat fmt
av_freep(&s->b_base); av_freep(&s->b_base);
av_freep(&s->block_base); av_freep(&s->block_base);
if (s->bpp != s->last_bpp) {
ff_vp9dsp_init(&s->dsp, s->bpp);
ff_videodsp_init(&s->vdsp, s->bpp);
s->last_bpp = s->bpp;
}
return 0; return 0;
} }
static int update_block_buffers(AVCodecContext *ctx) static int update_block_buffers(AVCodecContext *ctx)
{ {
VP9Context *s = ctx->priv_data; VP9Context *s = ctx->priv_data;
int chroma_blocks, chroma_eobs; int chroma_blocks, chroma_eobs, bytesperpixel = s->bytesperpixel;
if (s->b_base && s->block_base && s->block_alloc_using_2pass == s->frames[CUR_FRAME].uses_2pass) if (s->b_base && s->block_base && s->block_alloc_using_2pass == s->frames[CUR_FRAME].uses_2pass)
return 0; return 0;
...@@ -374,24 +383,24 @@ static int update_block_buffers(AVCodecContext *ctx) ...@@ -374,24 +383,24 @@ static int update_block_buffers(AVCodecContext *ctx)
int sbs = s->sb_cols * s->sb_rows; int sbs = s->sb_cols * s->sb_rows;
s->b_base = av_malloc_array(s->cols * s->rows, sizeof(VP9Block)); s->b_base = av_malloc_array(s->cols * s->rows, sizeof(VP9Block));
s->block_base = av_mallocz(((64 * 64 + 2 * chroma_blocks) * sizeof(int16_t) + s->block_base = av_mallocz(((64 * 64 + 2 * chroma_blocks) * bytesperpixel * sizeof(int16_t) +
16 * 16 + 2 * chroma_eobs) * sbs); 16 * 16 + 2 * chroma_eobs) * sbs);
if (!s->b_base || !s->block_base) if (!s->b_base || !s->block_base)
return AVERROR(ENOMEM); return AVERROR(ENOMEM);
s->uvblock_base[0] = s->block_base + sbs * 64 * 64; s->uvblock_base[0] = s->block_base + sbs * 64 * 64 * bytesperpixel;
s->uvblock_base[1] = s->uvblock_base[0] + sbs * chroma_blocks; s->uvblock_base[1] = s->uvblock_base[0] + sbs * chroma_blocks * bytesperpixel;
s->eob_base = (uint8_t *) (s->uvblock_base[1] + sbs * chroma_blocks); s->eob_base = (uint8_t *) (s->uvblock_base[1] + sbs * chroma_blocks * bytesperpixel);
s->uveob_base[0] = s->eob_base + 16 * 16 * sbs; s->uveob_base[0] = s->eob_base + 16 * 16 * sbs;
s->uveob_base[1] = s->uveob_base[0] + chroma_eobs * sbs; s->uveob_base[1] = s->uveob_base[0] + chroma_eobs * sbs;
} else { } else {
s->b_base = av_malloc(sizeof(VP9Block)); s->b_base = av_malloc(sizeof(VP9Block));
s->block_base = av_mallocz((64 * 64 + 2 * chroma_blocks) * sizeof(int16_t) + s->block_base = av_mallocz((64 * 64 + 2 * chroma_blocks) * bytesperpixel * sizeof(int16_t) +
16 * 16 + 2 * chroma_eobs); 16 * 16 + 2 * chroma_eobs);
if (!s->b_base || !s->block_base) if (!s->b_base || !s->block_base)
return AVERROR(ENOMEM); return AVERROR(ENOMEM);
s->uvblock_base[0] = s->block_base + 64 * 64; s->uvblock_base[0] = s->block_base + 64 * 64 * bytesperpixel;
s->uvblock_base[1] = s->uvblock_base[0] + chroma_blocks; s->uvblock_base[1] = s->uvblock_base[0] + chroma_blocks * bytesperpixel;
s->eob_base = (uint8_t *) (s->uvblock_base[1] + chroma_blocks); s->eob_base = (uint8_t *) (s->uvblock_base[1] + chroma_blocks * bytesperpixel);
s->uveob_base[0] = s->eob_base + 16 * 16; s->uveob_base[0] = s->eob_base + 16 * 16;
s->uveob_base[1] = s->uveob_base[0] + chroma_eobs; s->uveob_base[1] = s->uveob_base[0] + chroma_eobs;
} }
...@@ -478,36 +487,50 @@ static enum AVPixelFormat read_colorspace_details(AVCodecContext *ctx) ...@@ -478,36 +487,50 @@ static enum AVPixelFormat read_colorspace_details(AVCodecContext *ctx)
}; };
VP9Context *s = ctx->priv_data; VP9Context *s = ctx->priv_data;
enum AVPixelFormat res; enum AVPixelFormat res;
int bits = ctx->profile <= 1 ? 0 : 1 + get_bits1(&s->gb); // 0:8, 1:10, 2:12
s->bpp_index = bits;
s->bpp = 8 + bits * 2;
s->bytesperpixel = (7 + s->bpp) >> 3;
ctx->colorspace = colorspaces[get_bits(&s->gb, 3)]; ctx->colorspace = colorspaces[get_bits(&s->gb, 3)];
if (ctx->colorspace == AVCOL_SPC_RGB) { // RGB = profile 1 if (ctx->colorspace == AVCOL_SPC_RGB) { // RGB = profile 1
if (ctx->profile == 1) { static const enum AVPixelFormat pix_fmt_rgb[3] = {
AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRP10, AV_PIX_FMT_GBRP12
};
if (ctx->profile & 1) {
s->ss_h = s->ss_v = 1; s->ss_h = s->ss_v = 1;
res = AV_PIX_FMT_GBRP; res = pix_fmt_rgb[bits];
ctx->color_range = AVCOL_RANGE_JPEG; ctx->color_range = AVCOL_RANGE_JPEG;
} else { } else {
av_log(ctx, AV_LOG_ERROR, "RGB not supported in profile 0\n"); av_log(ctx, AV_LOG_ERROR, "RGB not supported in profile %d\n",
ctx->profile);
return AVERROR_INVALIDDATA; return AVERROR_INVALIDDATA;
} }
} else { } else {
static const enum AVPixelFormat pix_fmt_for_ss[2 /* v */][2 /* h */] = { static const enum AVPixelFormat pix_fmt_for_ss[3][2 /* v */][2 /* h */] = {
{ AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV422P }, { { AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV422P },
{ AV_PIX_FMT_YUV440P, AV_PIX_FMT_YUV420P }, { AV_PIX_FMT_YUV440P, AV_PIX_FMT_YUV420P } },
{ { AV_PIX_FMT_YUV444P10, AV_PIX_FMT_YUV422P10 },
{ AV_PIX_FMT_YUV440P10, AV_PIX_FMT_YUV420P10 } },
{ { AV_PIX_FMT_YUV444P12, AV_PIX_FMT_YUV422P12 },
{ AV_PIX_FMT_YUV440P12, AV_PIX_FMT_YUV420P12 } }
}; };
ctx->color_range = get_bits1(&s->gb) ? AVCOL_RANGE_JPEG : AVCOL_RANGE_MPEG; ctx->color_range = get_bits1(&s->gb) ? AVCOL_RANGE_JPEG : AVCOL_RANGE_MPEG;
if (ctx->profile == 1) { if (ctx->profile & 1) {
s->ss_h = get_bits1(&s->gb); s->ss_h = get_bits1(&s->gb);
s->ss_v = get_bits1(&s->gb); s->ss_v = get_bits1(&s->gb);
if ((res = pix_fmt_for_ss[s->ss_v][s->ss_h]) == AV_PIX_FMT_YUV420P) { if ((res = pix_fmt_for_ss[bits][s->ss_v][s->ss_h]) == AV_PIX_FMT_YUV420P) {
av_log(ctx, AV_LOG_ERROR, "YUV 4:2:0 not supported in profile 1\n"); av_log(ctx, AV_LOG_ERROR, "YUV 4:2:0 not supported in profile %d\n",
ctx->profile);
return AVERROR_INVALIDDATA; return AVERROR_INVALIDDATA;
} else if (get_bits1(&s->gb)) { } else if (get_bits1(&s->gb)) {
av_log(ctx, AV_LOG_ERROR, "Profile 1 color details reserved bit set\n"); av_log(ctx, AV_LOG_ERROR, "Profile %d color details reserved bit set\n",
ctx->profile);
return AVERROR_INVALIDDATA; return AVERROR_INVALIDDATA;
} }
} else { } else {
s->ss_h = s->ss_v = 1; s->ss_h = s->ss_v = 1;
res = AV_PIX_FMT_YUV420P; res = pix_fmt_for_ss[bits][1][1];
} }
} }
...@@ -534,7 +557,8 @@ static int decode_frame_header(AVCodecContext *ctx, ...@@ -534,7 +557,8 @@ static int decode_frame_header(AVCodecContext *ctx,
} }
ctx->profile = get_bits1(&s->gb); ctx->profile = get_bits1(&s->gb);
ctx->profile |= get_bits1(&s->gb) << 1; ctx->profile |= get_bits1(&s->gb) << 1;
if (ctx->profile > 1) { if (ctx->profile == 3) ctx->profile += get_bits1(&s->gb);
if (ctx->profile > 3) {
av_log(ctx, AV_LOG_ERROR, "Profile %d is not yet supported\n", ctx->profile); av_log(ctx, AV_LOG_ERROR, "Profile %d is not yet supported\n", ctx->profile);
return AVERROR_INVALIDDATA; return AVERROR_INVALIDDATA;
} }
...@@ -574,6 +598,9 @@ static int decode_frame_header(AVCodecContext *ctx, ...@@ -574,6 +598,9 @@ static int decode_frame_header(AVCodecContext *ctx,
return fmt; return fmt;
} else { } else {
s->ss_h = s->ss_v = 1; s->ss_h = s->ss_v = 1;
s->bpp = 8;
s->bpp_index = 0;
s->bytesperpixel = 1;
fmt = AV_PIX_FMT_YUV420P; fmt = AV_PIX_FMT_YUV420P;
ctx->colorspace = AVCOL_SPC_BT470BG; ctx->colorspace = AVCOL_SPC_BT470BG;
ctx->color_range = AVCOL_RANGE_JPEG; ctx->color_range = AVCOL_RANGE_JPEG;
...@@ -753,10 +780,10 @@ static int decode_frame_header(AVCodecContext *ctx, ...@@ -753,10 +780,10 @@ static int decode_frame_header(AVCodecContext *ctx,
quvac = av_clip_uintp2(qyac + s->uvac_qdelta, 8); quvac = av_clip_uintp2(qyac + s->uvac_qdelta, 8);
qyac = av_clip_uintp2(qyac, 8); qyac = av_clip_uintp2(qyac, 8);
s->segmentation.feat[i].qmul[0][0] = vp9_dc_qlookup[qydc]; s->segmentation.feat[i].qmul[0][0] = vp9_dc_qlookup[s->bpp_index][qydc];
s->segmentation.feat[i].qmul[0][1] = vp9_ac_qlookup[qyac]; s->segmentation.feat[i].qmul[0][1] = vp9_ac_qlookup[s->bpp_index][qyac];
s->segmentation.feat[i].qmul[1][0] = vp9_dc_qlookup[quvdc]; s->segmentation.feat[i].qmul[1][0] = vp9_dc_qlookup[s->bpp_index][quvdc];
s->segmentation.feat[i].qmul[1][1] = vp9_ac_qlookup[quvac]; s->segmentation.feat[i].qmul[1][1] = vp9_ac_qlookup[s->bpp_index][quvac];
sh = s->filter.level >= 32; sh = s->filter.level >= 32;
if (s->segmentation.feat[i].lf_enabled) { if (s->segmentation.feat[i].lf_enabled) {
...@@ -2106,7 +2133,7 @@ static void decode_mode(AVCodecContext *ctx) ...@@ -2106,7 +2133,7 @@ static void decode_mode(AVCodecContext *ctx)
// FIXME merge cnt/eob arguments? // FIXME merge cnt/eob arguments?
static av_always_inline int static av_always_inline int
decode_coeffs_b_generic(VP56RangeCoder *c, int16_t *coef, int n_coeffs, decode_coeffs_b_generic(VP56RangeCoder *c, int16_t *coef, int n_coeffs,
int is_tx32x32, unsigned (*cnt)[6][3], int is_tx32x32, int is8bitsperpixel, int bpp, unsigned (*cnt)[6][3],
unsigned (*eob)[6][2], uint8_t (*p)[6][11], unsigned (*eob)[6][2], uint8_t (*p)[6][11],
int nnz, const int16_t *scan, const int16_t (*nb)[2], int nnz, const int16_t *scan, const int16_t (*nb)[2],
const int16_t *band_counts, const int16_t *qmul) const int16_t *band_counts, const int16_t *qmul)
...@@ -2182,7 +2209,16 @@ decode_coeffs_b_generic(VP56RangeCoder *c, int16_t *coef, int n_coeffs, ...@@ -2182,7 +2209,16 @@ decode_coeffs_b_generic(VP56RangeCoder *c, int16_t *coef, int n_coeffs,
val += (vp56_rac_get_prob(c, 134) << 1); val += (vp56_rac_get_prob(c, 134) << 1);
val += vp56_rac_get_prob(c, 130); val += vp56_rac_get_prob(c, 130);
} else { } else {
val = 67 + (vp56_rac_get_prob(c, 254) << 13); val = 67;
if (!is8bitsperpixel) {
if (bpp == 12) {
val += vp56_rac_get_prob(c, 255) << 17;
val += vp56_rac_get_prob(c, 255) << 16;
}
val += (vp56_rac_get_prob(c, 255) << 15);
val += (vp56_rac_get_prob(c, 255) << 14);
}
val += (vp56_rac_get_prob(c, 254) << 13);
val += (vp56_rac_get_prob(c, 254) << 12); val += (vp56_rac_get_prob(c, 254) << 12);
val += (vp56_rac_get_prob(c, 254) << 11); val += (vp56_rac_get_prob(c, 254) << 11);
val += (vp56_rac_get_prob(c, 252) << 10); val += (vp56_rac_get_prob(c, 252) << 10);
...@@ -2199,12 +2235,19 @@ decode_coeffs_b_generic(VP56RangeCoder *c, int16_t *coef, int n_coeffs, ...@@ -2199,12 +2235,19 @@ decode_coeffs_b_generic(VP56RangeCoder *c, int16_t *coef, int n_coeffs,
} }
} }
} }
#define STORE_COEF(c, i, v) do { \
if (is8bitsperpixel) { \
c[i] = v; \
} else { \
AV_WN32A(&c[i * 2], v); \
} \
} while (0)
if (!--band_left) if (!--band_left)
band_left = band_counts[++band]; band_left = band_counts[++band];
if (is_tx32x32) if (is_tx32x32)
coef[rc] = ((vp8_rac_get(c) ? -val : val) * qmul[!!i]) / 2; STORE_COEF(coef, rc, ((vp8_rac_get(c) ? -val : val) * qmul[!!i]) / 2);
else else
coef[rc] = (vp8_rac_get(c) ? -val : val) * qmul[!!i]; STORE_COEF(coef, rc, (vp8_rac_get(c) ? -val : val) * qmul[!!i]);
nnz = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1; nnz = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1;
tp = p[band][nnz]; tp = p[band][nnz];
} while (++i < n_coeffs); } while (++i < n_coeffs);
...@@ -2212,27 +2255,47 @@ decode_coeffs_b_generic(VP56RangeCoder *c, int16_t *coef, int n_coeffs, ...@@ -2212,27 +2255,47 @@ decode_coeffs_b_generic(VP56RangeCoder *c, int16_t *coef, int n_coeffs,
return i; return i;
} }
static int decode_coeffs_b(VP56RangeCoder *c, int16_t *coef, int n_coeffs, static int decode_coeffs_b_8bpp(VP9Context *s, int16_t *coef, int n_coeffs,
unsigned (*cnt)[6][3], unsigned (*eob)[6][2], unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
uint8_t (*p)[6][11], int nnz, const int16_t *scan, uint8_t (*p)[6][11], int nnz, const int16_t *scan,
const int16_t (*nb)[2], const int16_t *band_counts, const int16_t (*nb)[2], const int16_t *band_counts,
const int16_t *qmul) const int16_t *qmul)
{
return decode_coeffs_b_generic(&s->c, coef, n_coeffs, 0, 1, 8, cnt, eob, p,
nnz, scan, nb, band_counts, qmul);
}
static int decode_coeffs_b32_8bpp(VP9Context *s, int16_t *coef, int n_coeffs,
unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
uint8_t (*p)[6][11], int nnz, const int16_t *scan,
const int16_t (*nb)[2], const int16_t *band_counts,
const int16_t *qmul)
{
return decode_coeffs_b_generic(&s->c, coef, n_coeffs, 1, 1, 8, cnt, eob, p,
nnz, scan, nb, band_counts, qmul);
}
static int decode_coeffs_b_16bpp(VP9Context *s, int16_t *coef, int n_coeffs,
unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
uint8_t (*p)[6][11], int nnz, const int16_t *scan,
const int16_t (*nb)[2], const int16_t *band_counts,
const int16_t *qmul)
{ {
return decode_coeffs_b_generic(c, coef, n_coeffs, 0, cnt, eob, p, return decode_coeffs_b_generic(&s->c, coef, n_coeffs, 0, 0, s->bpp, cnt, eob, p,
nnz, scan, nb, band_counts, qmul); nnz, scan, nb, band_counts, qmul);
} }
static int decode_coeffs_b32(VP56RangeCoder *c, int16_t *coef, int n_coeffs, static int decode_coeffs_b32_16bpp(VP9Context *s, int16_t *coef, int n_coeffs,
unsigned (*cnt)[6][3], unsigned (*eob)[6][2], unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
uint8_t (*p)[6][11], int nnz, const int16_t *scan, uint8_t (*p)[6][11], int nnz, const int16_t *scan,
const int16_t (*nb)[2], const int16_t *band_counts, const int16_t (*nb)[2], const int16_t *band_counts,
const int16_t *qmul) const int16_t *qmul)
{ {
return decode_coeffs_b_generic(c, coef, n_coeffs, 1, cnt, eob, p, return decode_coeffs_b_generic(&s->c, coef, n_coeffs, 1, 0, s->bpp, cnt, eob, p,
nnz, scan, nb, band_counts, qmul); nnz, scan, nb, band_counts, qmul);
} }
static void decode_coeffs(AVCodecContext *ctx) static av_always_inline void decode_coeffs(AVCodecContext *ctx, int is8bitsperpixel)
{ {
VP9Context *s = ctx->priv_data; VP9Context *s = ctx->priv_data;
VP9Block *b = s->b; VP9Block *b = s->b;
...@@ -2260,6 +2323,7 @@ static void decode_coeffs(AVCodecContext *ctx) ...@@ -2260,6 +2323,7 @@ static void decode_coeffs(AVCodecContext *ctx)
}; };
const int16_t *y_band_counts = band_counts[b->tx]; const int16_t *y_band_counts = band_counts[b->tx];
const int16_t *uv_band_counts = band_counts[b->uvtx]; const int16_t *uv_band_counts = band_counts[b->uvtx];
int bytesperpixel = is8bitsperpixel ? 1 : 2;
#define MERGE(la, end, step, rd) \ #define MERGE(la, end, step, rd) \
for (n = 0; n < end; n += step) \ for (n = 0; n < end; n += step) \
...@@ -2274,7 +2338,8 @@ static void decode_coeffs(AVCodecContext *ctx) ...@@ -2274,7 +2338,8 @@ static void decode_coeffs(AVCodecContext *ctx)
for (n = 0, y = 0; y < end_y; y += step) { \ for (n = 0, y = 0; y < end_y; y += step) { \
for (x = 0; x < end_x; x += step, n += step * step) { \ for (x = 0; x < end_x; x += step, n += step * step) { \
enum TxfmType txtp = vp9_intra_txfm_type[b->mode[mode_index]]; \ enum TxfmType txtp = vp9_intra_txfm_type[b->mode[mode_index]]; \
res = decode_coeffs_b##v(&s->c, s->block + 16 * n, 16 * step * step, \ res = (is8bitsperpixel ? decode_coeffs_b##v##_8bpp : decode_coeffs_b##v##_16bpp) \
(s, s->block + 16 * n * bytesperpixel, 16 * step * step, \
c, e, p, a[x] + l[y], yscans[txtp], \ c, e, p, a[x] + l[y], yscans[txtp], \
ynbs[txtp], y_band_counts, qmul[0]); \ ynbs[txtp], y_band_counts, qmul[0]); \
a[x] = l[y] = !!res; \ a[x] = l[y] = !!res; \
...@@ -2343,12 +2408,13 @@ static void decode_coeffs(AVCodecContext *ctx) ...@@ -2343,12 +2408,13 @@ static void decode_coeffs(AVCodecContext *ctx)
break; break;
} }
#define DECODE_UV_COEF_LOOP(step, decode_coeffs_fn) \ #define DECODE_UV_COEF_LOOP(step, v) \
for (n = 0, y = 0; y < end_y; y += step) { \ for (n = 0, y = 0; y < end_y; y += step) { \
for (x = 0; x < end_x; x += step, n += step * step) { \ for (x = 0; x < end_x; x += step, n += step * step) { \
res = decode_coeffs_fn(&s->c, s->uvblock[pl] + 16 * n, \ res = (is8bitsperpixel ? decode_coeffs_b##v##_8bpp : decode_coeffs_b##v##_16bpp) \
16 * step * step, c, e, p, a[x] + l[y], \ (s, s->uvblock[pl] + 16 * n * bytesperpixel, \
uvscan, uvnb, uv_band_counts, qmul[1]); \ 16 * step * step, c, e, p, a[x] + l[y], \
uvscan, uvnb, uv_band_counts, qmul[1]); \
a[x] = l[y] = !!res; \ a[x] = l[y] = !!res; \
if (step >= 4) { \ if (step >= 4) { \
AV_WN16A(&s->uveob[pl][n], res); \ AV_WN16A(&s->uveob[pl][n], res); \
...@@ -2370,37 +2436,48 @@ static void decode_coeffs(AVCodecContext *ctx) ...@@ -2370,37 +2436,48 @@ static void decode_coeffs(AVCodecContext *ctx)
l = &s->left_uv_nnz_ctx[pl][(row & 7) << !s->ss_v]; l = &s->left_uv_nnz_ctx[pl][(row & 7) << !s->ss_v];
switch (b->uvtx) { switch (b->uvtx) {
case TX_4X4: case TX_4X4:
DECODE_UV_COEF_LOOP(1, decode_coeffs_b); DECODE_UV_COEF_LOOP(1,);
break; break;
case TX_8X8: case TX_8X8:
MERGE_CTX(2, AV_RN16A); MERGE_CTX(2, AV_RN16A);
DECODE_UV_COEF_LOOP(2, decode_coeffs_b); DECODE_UV_COEF_LOOP(2,);
SPLAT_CTX(2); SPLAT_CTX(2);
break; break;
case TX_16X16: case TX_16X16:
MERGE_CTX(4, AV_RN32A); MERGE_CTX(4, AV_RN32A);
DECODE_UV_COEF_LOOP(4, decode_coeffs_b); DECODE_UV_COEF_LOOP(4,);
SPLAT_CTX(4); SPLAT_CTX(4);
break; break;
case TX_32X32: case TX_32X32:
MERGE_CTX(8, AV_RN64A); MERGE_CTX(8, AV_RN64A);
DECODE_UV_COEF_LOOP(8, decode_coeffs_b32); DECODE_UV_COEF_LOOP(8, 32);
SPLAT_CTX(8); SPLAT_CTX(8);
break; break;
} }
} }
} }
static void decode_coeffs_8bpp(AVCodecContext *ctx)
{
decode_coeffs(ctx, 1);
}
static void decode_coeffs_16bpp(AVCodecContext *ctx)
{
decode_coeffs(ctx, 0);
}
static av_always_inline int check_intra_mode(VP9Context *s, int mode, uint8_t **a, static av_always_inline int check_intra_mode(VP9Context *s, int mode, uint8_t **a,
uint8_t *dst_edge, ptrdiff_t stride_edge, uint8_t *dst_edge, ptrdiff_t stride_edge,
uint8_t *dst_inner, ptrdiff_t stride_inner, uint8_t *dst_inner, ptrdiff_t stride_inner,
uint8_t *l, int col, int x, int w, uint8_t *l, int col, int x, int w,
int row, int y, enum TxfmMode tx, int row, int y, enum TxfmMode tx,
int p, int ss_h, int ss_v) int p, int ss_h, int ss_v, int bytesperpixel)
{ {
int have_top = row > 0 || y > 0; int have_top = row > 0 || y > 0;
int have_left = col > s->tiling.tile_col_start || x > 0; int have_left = col > s->tiling.tile_col_start || x > 0;
int have_right = x < w - 1; int have_right = x < w - 1;
int bpp = s->bpp;
static const uint8_t mode_conv[10][2 /* have_left */][2 /* have_top */] = { static const uint8_t mode_conv[10][2 /* have_left */][2 /* have_top */] = {
[VERT_PRED] = { { DC_127_PRED, VERT_PRED }, [VERT_PRED] = { { DC_127_PRED, VERT_PRED },
{ DC_127_PRED, VERT_PRED } }, { DC_127_PRED, VERT_PRED } },
...@@ -2462,11 +2539,11 @@ static av_always_inline int check_intra_mode(VP9Context *s, int mode, uint8_t ** ...@@ -2462,11 +2539,11 @@ static av_always_inline int check_intra_mode(VP9Context *s, int mode, uint8_t **
// post-loopfilter data) // post-loopfilter data)
if (have_top) { if (have_top) {
top = !(row & 7) && !y ? top = !(row & 7) && !y ?
s->intra_pred_data[p] + col * (8 >> ss_h) + x * 4 : s->intra_pred_data[p] + (col * (8 >> ss_h) + x * 4) * bytesperpixel :
y == 0 ? &dst_edge[-stride_edge] : &dst_inner[-stride_inner]; y == 0 ? &dst_edge[-stride_edge] : &dst_inner[-stride_inner];
if (have_left) if (have_left)
topleft = !(row & 7) && !y ? topleft = !(row & 7) && !y ?
s->intra_pred_data[p] + col * (8 >> ss_h) + x * 4 : s->intra_pred_data[p] + (col * (8 >> ss_h) + x * 4) * bytesperpixel :
y == 0 || x == 0 ? &dst_edge[-stride_edge] : y == 0 || x == 0 ? &dst_edge[-stride_edge] :
&dst_inner[-stride_inner]; &dst_inner[-stride_inner];
} }
...@@ -2479,28 +2556,61 @@ static av_always_inline int check_intra_mode(VP9Context *s, int mode, uint8_t ** ...@@ -2479,28 +2556,61 @@ static av_always_inline int check_intra_mode(VP9Context *s, int mode, uint8_t **
} else { } else {
if (have_top) { if (have_top) {
if (n_px_need <= n_px_have) { if (n_px_need <= n_px_have) {
memcpy(*a, top, n_px_need); memcpy(*a, top, n_px_need * bytesperpixel);
} else { } else {
memcpy(*a, top, n_px_have); #define memset_bpp(c, i1, v, i2, num) do { \
memset(&(*a)[n_px_have], (*a)[n_px_have - 1], if (bytesperpixel == 1) { \
n_px_need - n_px_have); memset(&(c)[(i1)], (v)[(i2)], (num)); \
} else { \
int n, val = AV_RN16A(&(v)[(i2) * 2]); \
for (n = 0; n < (num); n++) { \
AV_WN16A(&(c)[((i1) + n) * 2], val); \
} \
} \
} while (0)
memcpy(*a, top, n_px_have * bytesperpixel);
memset_bpp(*a, n_px_have, (*a), n_px_have - 1, n_px_need - n_px_have);
} }
} else { } else {
memset(*a, 127, n_px_need); #define memset_val(c, val, num) do { \
if (bytesperpixel == 1) { \
memset((c), (val), (num)); \
} else { \
int n; \
for (n = 0; n < (num); n++) { \
AV_WN16A(&(c)[n * 2], (val)); \
} \
} \
} while (0)
memset_val(*a, (128 << (bpp - 8)) - 1, n_px_need);
} }
if (edges[mode].needs_topleft) { if (edges[mode].needs_topleft) {
if (have_left && have_top) { if (have_left && have_top) {
(*a)[-1] = topleft[-1]; #define assign_bpp(c, i1, v, i2) do { \
if (bytesperpixel == 1) { \
(c)[(i1)] = (v)[(i2)]; \
} else { \
AV_COPY16(&(c)[(i1) * 2], &(v)[(i2) * 2]); \
} \
} while (0)
assign_bpp(*a, -1, topleft, -1);
} else { } else {
(*a)[-1] = have_top ? 129 : 127; #define assign_val(c, i, v) do { \
if (bytesperpixel == 1) { \
(c)[(i)] = (v); \
} else { \
AV_WN16A(&(c)[(i) * 2], (v)); \
} \
} while (0)
assign_val((*a), -1, (128 << (bpp - 8)) + (have_top ? +1 : -1));
} }
} }
if (tx == TX_4X4 && edges[mode].needs_topright) { if (tx == TX_4X4 && edges[mode].needs_topright) {
if (have_top && have_right && if (have_top && have_right &&
n_px_need + n_px_need_tr <= n_px_have) { n_px_need + n_px_need_tr <= n_px_have) {
memcpy(&(*a)[4], &top[4], 4); memcpy(&(*a)[4 * bytesperpixel], &top[4 * bytesperpixel], 4 * bytesperpixel);
} else { } else {
memset(&(*a)[4], (*a)[3], 4); memset_bpp(*a, 4, *a, 3, 4);
} }
} }
} }
...@@ -2514,31 +2624,32 @@ static av_always_inline int check_intra_mode(VP9Context *s, int mode, uint8_t ** ...@@ -2514,31 +2624,32 @@ static av_always_inline int check_intra_mode(VP9Context *s, int mode, uint8_t **
if (edges[mode].invert_left) { if (edges[mode].invert_left) {
if (n_px_need <= n_px_have) { if (n_px_need <= n_px_have) {
for (i = 0; i < n_px_need; i++) for (i = 0; i < n_px_need; i++)
l[i] = dst[i * stride - 1]; assign_bpp(l, i, &dst[i * stride], -1);
} else { } else {
for (i = 0; i < n_px_have; i++) for (i = 0; i < n_px_have; i++)
l[i] = dst[i * stride - 1]; assign_bpp(l, i, &dst[i * stride], -1);
memset(&l[n_px_have], l[n_px_have - 1], n_px_need - n_px_have); memset_bpp(l, n_px_have, l, n_px_have - 1, n_px_need - n_px_have);
} }
} else { } else {
if (n_px_need <= n_px_have) { if (n_px_need <= n_px_have) {
for (i = 0; i < n_px_need; i++) for (i = 0; i < n_px_need; i++)
l[n_px_need - 1 - i] = dst[i * stride - 1]; assign_bpp(l, n_px_need - 1 - i, &dst[i * stride], -1);
} else { } else {
for (i = 0; i < n_px_have; i++) for (i = 0; i < n_px_have; i++)
l[n_px_need - 1 - i] = dst[i * stride - 1]; assign_bpp(l, n_px_need - 1 - i, &dst[i * stride], -1);
memset(l, l[n_px_need - n_px_have], n_px_need - n_px_have); memset_bpp(l, 0, l, n_px_need - n_px_have, n_px_need - n_px_have);
} }
} }
} else { } else {
memset(l, 129, 4 << tx); memset_val(l, (128 << (bpp - 8)) + 1, 4 << tx);
} }
} }
return mode; return mode;
} }
static void intra_recon(AVCodecContext *ctx, ptrdiff_t y_off, ptrdiff_t uv_off) static av_always_inline void intra_recon(AVCodecContext *ctx, ptrdiff_t y_off,
ptrdiff_t uv_off, int bytesperpixel)
{ {
VP9Context *s = ctx->priv_data; VP9Context *s = ctx->priv_data;
VP9Block *b = s->b; VP9Block *b = s->b;
...@@ -2550,13 +2661,13 @@ static void intra_recon(AVCodecContext *ctx, ptrdiff_t y_off, ptrdiff_t uv_off) ...@@ -2550,13 +2661,13 @@ static void intra_recon(AVCodecContext *ctx, ptrdiff_t y_off, ptrdiff_t uv_off)
int tx = 4 * s->lossless + b->tx, uvtx = b->uvtx + 4 * s->lossless; int tx = 4 * s->lossless + b->tx, uvtx = b->uvtx + 4 * s->lossless;
int uvstep1d = 1 << b->uvtx, p; int uvstep1d = 1 << b->uvtx, p;
uint8_t *dst = s->dst[0], *dst_r = s->frames[CUR_FRAME].tf.f->data[0] + y_off; uint8_t *dst = s->dst[0], *dst_r = s->frames[CUR_FRAME].tf.f->data[0] + y_off;
LOCAL_ALIGNED_32(uint8_t, a_buf, [64]); LOCAL_ALIGNED_32(uint8_t, a_buf, [96]);
LOCAL_ALIGNED_32(uint8_t, l, [32]); LOCAL_ALIGNED_32(uint8_t, l, [64]);
for (n = 0, y = 0; y < end_y; y += step1d) { for (n = 0, y = 0; y < end_y; y += step1d) {
uint8_t *ptr = dst, *ptr_r = dst_r; uint8_t *ptr = dst, *ptr_r = dst_r;
for (x = 0; x < end_x; x += step1d, ptr += 4 * step1d, for (x = 0; x < end_x; x += step1d, ptr += 4 * step1d * bytesperpixel,
ptr_r += 4 * step1d, n += step) { ptr_r += 4 * step1d * bytesperpixel, n += step) {
int mode = b->mode[b->bs > BS_8x8 && b->tx == TX_4X4 ? int mode = b->mode[b->bs > BS_8x8 && b->tx == TX_4X4 ?
y * 2 + x : 0]; y * 2 + x : 0];
uint8_t *a = &a_buf[32]; uint8_t *a = &a_buf[32];
...@@ -2566,11 +2677,11 @@ static void intra_recon(AVCodecContext *ctx, ptrdiff_t y_off, ptrdiff_t uv_off) ...@@ -2566,11 +2677,11 @@ static void intra_recon(AVCodecContext *ctx, ptrdiff_t y_off, ptrdiff_t uv_off)
mode = check_intra_mode(s, mode, &a, ptr_r, mode = check_intra_mode(s, mode, &a, ptr_r,
s->frames[CUR_FRAME].tf.f->linesize[0], s->frames[CUR_FRAME].tf.f->linesize[0],
ptr, s->y_stride, l, ptr, s->y_stride, l,
col, x, w4, row, y, b->tx, 0, 0, 0); col, x, w4, row, y, b->tx, 0, 0, 0, bytesperpixel);
s->dsp.intra_pred[b->tx][mode](ptr, s->y_stride, l, a); s->dsp.intra_pred[b->tx][mode](ptr, s->y_stride, l, a);
if (eob) if (eob)
s->dsp.itxfm_add[tx][txtp](ptr, s->y_stride, s->dsp.itxfm_add[tx][txtp](ptr, s->y_stride,
s->block + 16 * n, eob); s->block + 16 * n * bytesperpixel, eob);
} }
dst_r += 4 * step1d * s->frames[CUR_FRAME].tf.f->linesize[0]; dst_r += 4 * step1d * s->frames[CUR_FRAME].tf.f->linesize[0];
dst += 4 * step1d * s->y_stride; dst += 4 * step1d * s->y_stride;
...@@ -2586,8 +2697,8 @@ static void intra_recon(AVCodecContext *ctx, ptrdiff_t y_off, ptrdiff_t uv_off) ...@@ -2586,8 +2697,8 @@ static void intra_recon(AVCodecContext *ctx, ptrdiff_t y_off, ptrdiff_t uv_off)
dst_r = s->frames[CUR_FRAME].tf.f->data[1 + p] + uv_off; dst_r = s->frames[CUR_FRAME].tf.f->data[1 + p] + uv_off;
for (n = 0, y = 0; y < end_y; y += uvstep1d) { for (n = 0, y = 0; y < end_y; y += uvstep1d) {
uint8_t *ptr = dst, *ptr_r = dst_r; uint8_t *ptr = dst, *ptr_r = dst_r;
for (x = 0; x < end_x; x += uvstep1d, ptr += 4 * uvstep1d, for (x = 0; x < end_x; x += uvstep1d, ptr += 4 * uvstep1d * bytesperpixel,
ptr_r += 4 * uvstep1d, n += step) { ptr_r += 4 * uvstep1d * bytesperpixel, n += step) {
int mode = b->uvmode; int mode = b->uvmode;
uint8_t *a = &a_buf[32]; uint8_t *a = &a_buf[32];
int eob = b->skip ? 0 : b->uvtx > TX_8X8 ? AV_RN16A(&s->uveob[p][n]) : s->uveob[p][n]; int eob = b->skip ? 0 : b->uvtx > TX_8X8 ? AV_RN16A(&s->uveob[p][n]) : s->uveob[p][n];
...@@ -2595,11 +2706,11 @@ static void intra_recon(AVCodecContext *ctx, ptrdiff_t y_off, ptrdiff_t uv_off) ...@@ -2595,11 +2706,11 @@ static void intra_recon(AVCodecContext *ctx, ptrdiff_t y_off, ptrdiff_t uv_off)
mode = check_intra_mode(s, mode, &a, ptr_r, mode = check_intra_mode(s, mode, &a, ptr_r,
s->frames[CUR_FRAME].tf.f->linesize[1], s->frames[CUR_FRAME].tf.f->linesize[1],
ptr, s->uv_stride, l, col, x, w4, row, y, ptr, s->uv_stride, l, col, x, w4, row, y,
b->uvtx, p + 1, s->ss_h, s->ss_v); b->uvtx, p + 1, s->ss_h, s->ss_v, bytesperpixel);
s->dsp.intra_pred[b->uvtx][mode](ptr, s->uv_stride, l, a); s->dsp.intra_pred[b->uvtx][mode](ptr, s->uv_stride, l, a);
if (eob) if (eob)
s->dsp.itxfm_add[uvtx][DCT_DCT](ptr, s->uv_stride, s->dsp.itxfm_add[uvtx][DCT_DCT](ptr, s->uv_stride,
s->uvblock[p] + 16 * n, eob); s->uvblock[p] + 16 * n * bytesperpixel, eob);
} }
dst_r += 4 * uvstep1d * s->frames[CUR_FRAME].tf.f->linesize[1]; dst_r += 4 * uvstep1d * s->frames[CUR_FRAME].tf.f->linesize[1];
dst += 4 * uvstep1d * s->uv_stride; dst += 4 * uvstep1d * s->uv_stride;
...@@ -2607,12 +2718,22 @@ static void intra_recon(AVCodecContext *ctx, ptrdiff_t y_off, ptrdiff_t uv_off) ...@@ -2607,12 +2718,22 @@ static void intra_recon(AVCodecContext *ctx, ptrdiff_t y_off, ptrdiff_t uv_off)
} }
} }
static void intra_recon_8bpp(AVCodecContext *ctx, ptrdiff_t y_off, ptrdiff_t uv_off)
{
intra_recon(ctx, y_off, uv_off, 1);
}
static void intra_recon_16bpp(AVCodecContext *ctx, ptrdiff_t y_off, ptrdiff_t uv_off)
{
intra_recon(ctx, y_off, uv_off, 2);
}
static av_always_inline void mc_luma_scaled(VP9Context *s, vp9_scaled_mc_func smc, static av_always_inline void mc_luma_scaled(VP9Context *s, vp9_scaled_mc_func smc,
uint8_t *dst, ptrdiff_t dst_stride, uint8_t *dst, ptrdiff_t dst_stride,
const uint8_t *ref, ptrdiff_t ref_stride, const uint8_t *ref, ptrdiff_t ref_stride,
ThreadFrame *ref_frame, ThreadFrame *ref_frame,
ptrdiff_t y, ptrdiff_t x, const VP56mv *mv, ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
int bw, int bh, int w, int h, int bw, int bh, int w, int h, int bytesperpixel,
const uint16_t *scale, const uint8_t *step) const uint16_t *scale, const uint8_t *step)
{ {
#define scale_mv(n, dim) (((int64_t)(n) * scale[dim]) >> 14) #define scale_mv(n, dim) (((int64_t)(n) * scale[dim]) >> 14)
...@@ -2626,7 +2747,7 @@ static av_always_inline void mc_luma_scaled(VP9Context *s, vp9_scaled_mc_func sm ...@@ -2626,7 +2747,7 @@ static av_always_inline void mc_luma_scaled(VP9Context *s, vp9_scaled_mc_func sm
y = my >> 4; y = my >> 4;
x = mx >> 4; x = mx >> 4;
ref += y * ref_stride + x; ref += y * ref_stride + x * bytesperpixel;
mx &= 15; mx &= 15;
my &= 15; my &= 15;
refbw_m1 = ((bw - 1) * step[0] + mx) >> 4; refbw_m1 = ((bw - 1) * step[0] + mx) >> 4;
...@@ -2638,12 +2759,12 @@ static av_always_inline void mc_luma_scaled(VP9Context *s, vp9_scaled_mc_func sm ...@@ -2638,12 +2759,12 @@ static av_always_inline void mc_luma_scaled(VP9Context *s, vp9_scaled_mc_func sm
ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0); ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
if (x < 3 || y < 3 || x + 4 >= w - refbw_m1 || y + 4 >= h - refbh_m1) { if (x < 3 || y < 3 || x + 4 >= w - refbw_m1 || y + 4 >= h - refbh_m1) {
s->vdsp.emulated_edge_mc(s->edge_emu_buffer, s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
ref - 3 * ref_stride - 3, ref - 3 * ref_stride - 3 * bytesperpixel,
144, ref_stride, 288, ref_stride,
refbw_m1 + 8, refbh_m1 + 8, refbw_m1 + 8, refbh_m1 + 8,
x - 3, y - 3, w, h); x - 3, y - 3, w, h);
ref = s->edge_emu_buffer + 3 * 144 + 3; ref = s->edge_emu_buffer + 3 * 288 + 3 * bytesperpixel;
ref_stride = 144; ref_stride = 288;
} }
smc(dst, dst_stride, ref, ref_stride, bh, mx, my, step[0], step[1]); smc(dst, dst_stride, ref, ref_stride, bh, mx, my, step[0], step[1]);
} }
...@@ -2655,7 +2776,7 @@ static av_always_inline void mc_chroma_scaled(VP9Context *s, vp9_scaled_mc_func ...@@ -2655,7 +2776,7 @@ static av_always_inline void mc_chroma_scaled(VP9Context *s, vp9_scaled_mc_func
const uint8_t *ref_v, ptrdiff_t src_stride_v, const uint8_t *ref_v, ptrdiff_t src_stride_v,
ThreadFrame *ref_frame, ThreadFrame *ref_frame,
ptrdiff_t y, ptrdiff_t x, const VP56mv *mv, ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
int bw, int bh, int w, int h, int bw, int bh, int w, int h, int bytesperpixel,
const uint16_t *scale, const uint8_t *step) const uint16_t *scale, const uint8_t *step)
{ {
// BUG https://code.google.com/p/webm/issues/detail?id=820 // BUG https://code.google.com/p/webm/issues/detail?id=820
...@@ -2667,8 +2788,8 @@ static av_always_inline void mc_chroma_scaled(VP9Context *s, vp9_scaled_mc_func ...@@ -2667,8 +2788,8 @@ static av_always_inline void mc_chroma_scaled(VP9Context *s, vp9_scaled_mc_func
y = my >> 4; y = my >> 4;
x = mx >> 4; x = mx >> 4;
ref_u += y * src_stride_u + x; ref_u += y * src_stride_u + x * bytesperpixel;
ref_v += y * src_stride_v + x; ref_v += y * src_stride_v + x * bytesperpixel;
mx &= 15; mx &= 15;
my &= 15; my &= 15;
refbw_m1 = ((bw - 1) * step[0] + mx) >> 4; refbw_m1 = ((bw - 1) * step[0] + mx) >> 4;
...@@ -2680,51 +2801,60 @@ static av_always_inline void mc_chroma_scaled(VP9Context *s, vp9_scaled_mc_func ...@@ -2680,51 +2801,60 @@ static av_always_inline void mc_chroma_scaled(VP9Context *s, vp9_scaled_mc_func
ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0); ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
if (x < 3 || y < 3 || x + 4 >= w - refbw_m1 || y + 4 >= h - refbh_m1) { if (x < 3 || y < 3 || x + 4 >= w - refbw_m1 || y + 4 >= h - refbh_m1) {
s->vdsp.emulated_edge_mc(s->edge_emu_buffer, s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
ref_u - 3 * src_stride_u - 3, ref_u - 3 * src_stride_u - 3 * bytesperpixel,
144, src_stride_u, 288, src_stride_u,
refbw_m1 + 8, refbh_m1 + 8, refbw_m1 + 8, refbh_m1 + 8,
x - 3, y - 3, w, h); x - 3, y - 3, w, h);
ref_u = s->edge_emu_buffer + 3 * 144 + 3; ref_u = s->edge_emu_buffer + 3 * 288 + 3 * bytesperpixel;
smc(dst_u, dst_stride, ref_u, 144, bh, mx, my, step[0], step[1]); smc(dst_u, dst_stride, ref_u, 288, bh, mx, my, step[0], step[1]);
s->vdsp.emulated_edge_mc(s->edge_emu_buffer, s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
ref_v - 3 * src_stride_v - 3, ref_v - 3 * src_stride_v - 3 * bytesperpixel,
144, src_stride_v, 288, src_stride_v,
refbw_m1 + 8, refbh_m1 + 8, refbw_m1 + 8, refbh_m1 + 8,
x - 3, y - 3, w, h); x - 3, y - 3, w, h);
ref_v = s->edge_emu_buffer + 3 * 144 + 3; ref_v = s->edge_emu_buffer + 3 * 288 + 3 * bytesperpixel;
smc(dst_v, dst_stride, ref_v, 144, bh, mx, my, step[0], step[1]); smc(dst_v, dst_stride, ref_v, 288, bh, mx, my, step[0], step[1]);
} else { } else {
smc(dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my, step[0], step[1]); smc(dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my, step[0], step[1]);
smc(dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my, step[0], step[1]); smc(dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my, step[0], step[1]);
} }
} }
#define FN(x) x##_scaled
#define mc_luma_dir(s, mc, dst, dst_ls, src, src_ls, tref, row, col, mv, bw, bh, w, h, i) \ #define mc_luma_dir(s, mc, dst, dst_ls, src, src_ls, tref, row, col, mv, bw, bh, w, h, i) \
mc_luma_scaled(s, s->dsp.s##mc, dst, dst_ls, src, src_ls, tref, row, col, \ mc_luma_scaled(s, s->dsp.s##mc, dst, dst_ls, src, src_ls, tref, row, col, \
mv, bw, bh, w, h, s->mvscale[b->ref[i]], s->mvstep[b->ref[i]]) mv, bw, bh, w, h, bytesperpixel, \
s->mvscale[b->ref[i]], s->mvstep[b->ref[i]])
#define mc_chroma_dir(s, mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \ #define mc_chroma_dir(s, mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
row, col, mv, bw, bh, w, h, i) \ row, col, mv, bw, bh, w, h, i) \
mc_chroma_scaled(s, s->dsp.s##mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \ mc_chroma_scaled(s, s->dsp.s##mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
row, col, mv, bw, bh, w, h, s->mvscale[b->ref[i]], s->mvstep[b->ref[i]]) row, col, mv, bw, bh, w, h, bytesperpixel, \
s->mvscale[b->ref[i]], s->mvstep[b->ref[i]])
#define FN(x) x##_scaled_8bpp
#define BYTES_PER_PIXEL 1
#include "vp9_mc_template.c"
#undef FN
#undef BYTES_PER_PIXEL
#define FN(x) x##_scaled_16bpp
#define BYTES_PER_PIXEL 2
#include "vp9_mc_template.c" #include "vp9_mc_template.c"
#undef mc_luma_dir #undef mc_luma_dir
#undef mc_chroma_dir #undef mc_chroma_dir
#undef FN #undef FN
#undef BYTES_PER_PIXEL
static av_always_inline void mc_luma_unscaled(VP9Context *s, vp9_mc_func (*mc)[2], static av_always_inline void mc_luma_unscaled(VP9Context *s, vp9_mc_func (*mc)[2],
uint8_t *dst, ptrdiff_t dst_stride, uint8_t *dst, ptrdiff_t dst_stride,
const uint8_t *ref, ptrdiff_t ref_stride, const uint8_t *ref, ptrdiff_t ref_stride,
ThreadFrame *ref_frame, ThreadFrame *ref_frame,
ptrdiff_t y, ptrdiff_t x, const VP56mv *mv, ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
int bw, int bh, int w, int h) int bw, int bh, int w, int h, int bytesperpixel)
{ {
int mx = mv->x, my = mv->y, th; int mx = mv->x, my = mv->y, th;
y += my >> 3; y += my >> 3;
x += mx >> 3; x += mx >> 3;
ref += y * ref_stride + x; ref += y * ref_stride + x * bytesperpixel;
mx &= 7; mx &= 7;
my &= 7; my &= 7;
// FIXME bilinear filter only needs 0/1 pixels, not 3/4 // FIXME bilinear filter only needs 0/1 pixels, not 3/4
...@@ -2735,12 +2865,12 @@ static av_always_inline void mc_luma_unscaled(VP9Context *s, vp9_mc_func (*mc)[2 ...@@ -2735,12 +2865,12 @@ static av_always_inline void mc_luma_unscaled(VP9Context *s, vp9_mc_func (*mc)[2
if (x < !!mx * 3 || y < !!my * 3 || if (x < !!mx * 3 || y < !!my * 3 ||
x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) { x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) {
s->vdsp.emulated_edge_mc(s->edge_emu_buffer, s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
ref - !!my * 3 * ref_stride - !!mx * 3, ref - !!my * 3 * ref_stride - !!mx * 3 * bytesperpixel,
80, ref_stride, 160, ref_stride,
bw + !!mx * 7, bh + !!my * 7, bw + !!mx * 7, bh + !!my * 7,
x - !!mx * 3, y - !!my * 3, w, h); x - !!mx * 3, y - !!my * 3, w, h);
ref = s->edge_emu_buffer + !!my * 3 * 80 + !!mx * 3; ref = s->edge_emu_buffer + !!my * 3 * 160 + !!mx * 3 * bytesperpixel;
ref_stride = 80; ref_stride = 160;
} }
mc[!!mx][!!my](dst, dst_stride, ref, ref_stride, bh, mx << 1, my << 1); mc[!!mx][!!my](dst, dst_stride, ref, ref_stride, bh, mx << 1, my << 1);
} }
...@@ -2752,14 +2882,14 @@ static av_always_inline void mc_chroma_unscaled(VP9Context *s, vp9_mc_func (*mc) ...@@ -2752,14 +2882,14 @@ static av_always_inline void mc_chroma_unscaled(VP9Context *s, vp9_mc_func (*mc)
const uint8_t *ref_v, ptrdiff_t src_stride_v, const uint8_t *ref_v, ptrdiff_t src_stride_v,
ThreadFrame *ref_frame, ThreadFrame *ref_frame,
ptrdiff_t y, ptrdiff_t x, const VP56mv *mv, ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
int bw, int bh, int w, int h) int bw, int bh, int w, int h, int bytesperpixel)
{ {
int mx = mv->x << !s->ss_h, my = mv->y << !s->ss_v, th; int mx = mv->x << !s->ss_h, my = mv->y << !s->ss_v, th;
y += my >> 4; y += my >> 4;
x += mx >> 4; x += mx >> 4;
ref_u += y * src_stride_u + x; ref_u += y * src_stride_u + x * bytesperpixel;
ref_v += y * src_stride_v + x; ref_v += y * src_stride_v + x * bytesperpixel;
mx &= 15; mx &= 15;
my &= 15; my &= 15;
// FIXME bilinear filter only needs 0/1 pixels, not 3/4 // FIXME bilinear filter only needs 0/1 pixels, not 3/4
...@@ -2770,49 +2900,64 @@ static av_always_inline void mc_chroma_unscaled(VP9Context *s, vp9_mc_func (*mc) ...@@ -2770,49 +2900,64 @@ static av_always_inline void mc_chroma_unscaled(VP9Context *s, vp9_mc_func (*mc)
if (x < !!mx * 3 || y < !!my * 3 || if (x < !!mx * 3 || y < !!my * 3 ||
x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) { x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) {
s->vdsp.emulated_edge_mc(s->edge_emu_buffer, s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
ref_u - !!my * 3 * src_stride_u - !!mx * 3, ref_u - !!my * 3 * src_stride_u - !!mx * 3 * bytesperpixel,
80, src_stride_u, 160, src_stride_u,
bw + !!mx * 7, bh + !!my * 7, bw + !!mx * 7, bh + !!my * 7,
x - !!mx * 3, y - !!my * 3, w, h); x - !!mx * 3, y - !!my * 3, w, h);
ref_u = s->edge_emu_buffer + !!my * 3 * 80 + !!mx * 3; ref_u = s->edge_emu_buffer + !!my * 3 * 160 + !!mx * 3 * bytesperpixel;
mc[!!mx][!!my](dst_u, dst_stride, ref_u, 80, bh, mx, my); mc[!!mx][!!my](dst_u, dst_stride, ref_u, 160, bh, mx, my);
s->vdsp.emulated_edge_mc(s->edge_emu_buffer, s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
ref_v - !!my * 3 * src_stride_v - !!mx * 3, ref_v - !!my * 3 * src_stride_v - !!mx * 3 * bytesperpixel,
80, src_stride_v, 160, src_stride_v,
bw + !!mx * 7, bh + !!my * 7, bw + !!mx * 7, bh + !!my * 7,
x - !!mx * 3, y - !!my * 3, w, h); x - !!mx * 3, y - !!my * 3, w, h);
ref_v = s->edge_emu_buffer + !!my * 3 * 80 + !!mx * 3; ref_v = s->edge_emu_buffer + !!my * 3 * 160 + !!mx * 3 * bytesperpixel;
mc[!!mx][!!my](dst_v, dst_stride, ref_v, 80, bh, mx, my); mc[!!mx][!!my](dst_v, dst_stride, ref_v, 160, bh, mx, my);
} else { } else {
mc[!!mx][!!my](dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my); mc[!!mx][!!my](dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my);
mc[!!mx][!!my](dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my); mc[!!mx][!!my](dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my);
} }
} }
#define FN(x) x
#define mc_luma_dir(s, mc, dst, dst_ls, src, src_ls, tref, row, col, mv, bw, bh, w, h, i) \ #define mc_luma_dir(s, mc, dst, dst_ls, src, src_ls, tref, row, col, mv, bw, bh, w, h, i) \
mc_luma_unscaled(s, s->dsp.mc, dst, dst_ls, src, src_ls, tref, row, col, \ mc_luma_unscaled(s, s->dsp.mc, dst, dst_ls, src, src_ls, tref, row, col, \
mv, bw, bh, w, h) mv, bw, bh, w, h, bytesperpixel)
#define mc_chroma_dir(s, mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \ #define mc_chroma_dir(s, mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
row, col, mv, bw, bh, w, h, i) \ row, col, mv, bw, bh, w, h, i) \
mc_chroma_unscaled(s, s->dsp.mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \ mc_chroma_unscaled(s, s->dsp.mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
row, col, mv, bw, bh, w, h) row, col, mv, bw, bh, w, h, bytesperpixel)
#define FN(x) x##_8bpp
#define BYTES_PER_PIXEL 1
#include "vp9_mc_template.c"
#undef FN
#undef BYTES_PER_PIXEL
#define FN(x) x##_16bpp
#define BYTES_PER_PIXEL 2
#include "vp9_mc_template.c" #include "vp9_mc_template.c"
#undef mc_luma_dir_dir #undef mc_luma_dir_dir
#undef mc_chroma_dir_dir #undef mc_chroma_dir_dir
#undef FN #undef FN
#undef BYTES_PER_PIXEL
static void inter_recon(AVCodecContext *ctx) static av_always_inline void inter_recon(AVCodecContext *ctx, int bytesperpixel)
{ {
VP9Context *s = ctx->priv_data; VP9Context *s = ctx->priv_data;
VP9Block *b = s->b; VP9Block *b = s->b;
int row = s->row, col = s->col; int row = s->row, col = s->col;
if (s->mvscale[b->ref[0]][0] || (b->comp && s->mvscale[b->ref[1]][0])) { if (s->mvscale[b->ref[0]][0] || (b->comp && s->mvscale[b->ref[1]][0])) {
inter_pred_scaled(ctx); if (bytesperpixel == 1) {
inter_pred_scaled_8bpp(ctx);
} else {
inter_pred_scaled_16bpp(ctx);
}
} else { } else {
inter_pred(ctx); if (bytesperpixel == 1) {
inter_pred_8bpp(ctx);
} else {
inter_pred_16bpp(ctx);
}
} }
if (!b->skip) { if (!b->skip) {
/* mostly copied intra_recon() */ /* mostly copied intra_recon() */
...@@ -2828,12 +2973,13 @@ static void inter_recon(AVCodecContext *ctx) ...@@ -2828,12 +2973,13 @@ static void inter_recon(AVCodecContext *ctx)
// y itxfm add // y itxfm add
for (n = 0, y = 0; y < end_y; y += step1d) { for (n = 0, y = 0; y < end_y; y += step1d) {
uint8_t *ptr = dst; uint8_t *ptr = dst;
for (x = 0; x < end_x; x += step1d, ptr += 4 * step1d, n += step) { for (x = 0; x < end_x; x += step1d,
ptr += 4 * step1d * bytesperpixel, n += step) {
int eob = b->tx > TX_8X8 ? AV_RN16A(&s->eob[n]) : s->eob[n]; int eob = b->tx > TX_8X8 ? AV_RN16A(&s->eob[n]) : s->eob[n];
if (eob) if (eob)
s->dsp.itxfm_add[tx][DCT_DCT](ptr, s->y_stride, s->dsp.itxfm_add[tx][DCT_DCT](ptr, s->y_stride,
s->block + 16 * n, eob); s->block + 16 * n * bytesperpixel, eob);
} }
dst += 4 * s->y_stride * step1d; dst += 4 * s->y_stride * step1d;
} }
...@@ -2846,12 +2992,13 @@ static void inter_recon(AVCodecContext *ctx) ...@@ -2846,12 +2992,13 @@ static void inter_recon(AVCodecContext *ctx)
dst = s->dst[p + 1]; dst = s->dst[p + 1];
for (n = 0, y = 0; y < end_y; y += uvstep1d) { for (n = 0, y = 0; y < end_y; y += uvstep1d) {
uint8_t *ptr = dst; uint8_t *ptr = dst;
for (x = 0; x < end_x; x += uvstep1d, ptr += 4 * uvstep1d, n += step) { for (x = 0; x < end_x; x += uvstep1d,
ptr += 4 * uvstep1d * bytesperpixel, n += step) {
int eob = b->uvtx > TX_8X8 ? AV_RN16A(&s->uveob[p][n]) : s->uveob[p][n]; int eob = b->uvtx > TX_8X8 ? AV_RN16A(&s->uveob[p][n]) : s->uveob[p][n];
if (eob) if (eob)
s->dsp.itxfm_add[uvtx][DCT_DCT](ptr, s->uv_stride, s->dsp.itxfm_add[uvtx][DCT_DCT](ptr, s->uv_stride,
s->uvblock[p] + 16 * n, eob); s->uvblock[p] + 16 * n * bytesperpixel, eob);
} }
dst += 4 * uvstep1d * s->uv_stride; dst += 4 * uvstep1d * s->uv_stride;
} }
...@@ -2859,6 +3006,16 @@ static void inter_recon(AVCodecContext *ctx) ...@@ -2859,6 +3006,16 @@ static void inter_recon(AVCodecContext *ctx)
} }
} }
static void inter_recon_8bpp(AVCodecContext *ctx)
{
inter_recon(ctx, 1);
}
static void inter_recon_16bpp(AVCodecContext *ctx)
{
inter_recon(ctx, 2);
}
static av_always_inline void mask_edges(uint8_t (*mask)[8][4], int ss_h, int ss_v, static av_always_inline void mask_edges(uint8_t (*mask)[8][4], int ss_h, int ss_v,
int row_and_7, int col_and_7, int row_and_7, int col_and_7,
int w, int h, int col_end, int row_end, int w, int h, int col_end, int row_end,
...@@ -2984,6 +3141,7 @@ static void decode_b(AVCodecContext *ctx, int row, int col, ...@@ -2984,6 +3141,7 @@ static void decode_b(AVCodecContext *ctx, int row, int col,
VP9Context *s = ctx->priv_data; VP9Context *s = ctx->priv_data;
VP9Block *b = s->b; VP9Block *b = s->b;
enum BlockSize bs = bl * 3 + bp; enum BlockSize bs = bl * 3 + bp;
int bytesperpixel = s->bytesperpixel;
int w4 = bwh_tab[1][bs][0], h4 = bwh_tab[1][bs][1], lvl; int w4 = bwh_tab[1][bs][0], h4 = bwh_tab[1][bs][1], lvl;
int emu[2]; int emu[2];
AVFrame *f = s->frames[CUR_FRAME].tf.f; AVFrame *f = s->frames[CUR_FRAME].tf.f;
...@@ -3005,7 +3163,11 @@ static void decode_b(AVCodecContext *ctx, int row, int col, ...@@ -3005,7 +3163,11 @@ static void decode_b(AVCodecContext *ctx, int row, int col,
(s->ss_v && h4 * 2 == (1 << b->tx))); (s->ss_v && h4 * 2 == (1 << b->tx)));
if (!b->skip) { if (!b->skip) {
decode_coeffs(ctx); if (bytesperpixel == 1) {
decode_coeffs_8bpp(ctx);
} else {
decode_coeffs_16bpp(ctx);
}
} else { } else {
int row7 = s->row7; int row7 = s->row7;
...@@ -3044,9 +3206,9 @@ static void decode_b(AVCodecContext *ctx, int row, int col, ...@@ -3044,9 +3206,9 @@ static void decode_b(AVCodecContext *ctx, int row, int col,
} }
if (s->pass == 1) { if (s->pass == 1) {
s->b++; s->b++;
s->block += w4 * h4 * 64; s->block += w4 * h4 * 64 * bytesperpixel;
s->uvblock[0] += w4 * h4 * 64 >> (s->ss_h + s->ss_v); s->uvblock[0] += w4 * h4 * 64 * bytesperpixel >> (s->ss_h + s->ss_v);
s->uvblock[1] += w4 * h4 * 64 >> (s->ss_h + s->ss_v); s->uvblock[1] += w4 * h4 * 64 * bytesperpixel >> (s->ss_h + s->ss_v);
s->eob += 4 * w4 * h4; s->eob += 4 * w4 * h4;
s->uveob[0] += 4 * w4 * h4 >> (s->ss_h + s->ss_v); s->uveob[0] += 4 * w4 * h4 >> (s->ss_h + s->ss_v);
s->uveob[1] += 4 * w4 * h4 >> (s->ss_h + s->ss_v); s->uveob[1] += 4 * w4 * h4 >> (s->ss_h + s->ss_v);
...@@ -3064,7 +3226,7 @@ static void decode_b(AVCodecContext *ctx, int row, int col, ...@@ -3064,7 +3226,7 @@ static void decode_b(AVCodecContext *ctx, int row, int col,
(row + h4) > s->rows; (row + h4) > s->rows;
if (emu[0]) { if (emu[0]) {
s->dst[0] = s->tmp_y; s->dst[0] = s->tmp_y;
s->y_stride = 64; s->y_stride = 128;
} else { } else {
s->dst[0] = f->data[0] + yoff; s->dst[0] = f->data[0] + yoff;
s->y_stride = f->linesize[0]; s->y_stride = f->linesize[0];
...@@ -3072,16 +3234,24 @@ static void decode_b(AVCodecContext *ctx, int row, int col, ...@@ -3072,16 +3234,24 @@ static void decode_b(AVCodecContext *ctx, int row, int col,
if (emu[1]) { if (emu[1]) {
s->dst[1] = s->tmp_uv[0]; s->dst[1] = s->tmp_uv[0];
s->dst[2] = s->tmp_uv[1]; s->dst[2] = s->tmp_uv[1];
s->uv_stride = 32; s->uv_stride = 128;
} else { } else {
s->dst[1] = f->data[1] + uvoff; s->dst[1] = f->data[1] + uvoff;
s->dst[2] = f->data[2] + uvoff; s->dst[2] = f->data[2] + uvoff;
s->uv_stride = f->linesize[1]; s->uv_stride = f->linesize[1];
} }
if (b->intra) { if (b->intra) {
intra_recon(ctx, yoff, uvoff); if (s->bpp > 8) {
intra_recon_16bpp(ctx, yoff, uvoff);
} else {
intra_recon_8bpp(ctx, yoff, uvoff);
}
} else { } else {
inter_recon(ctx); if (s->bpp > 8) {
inter_recon_16bpp(ctx);
} else {
inter_recon_8bpp(ctx);
}
} }
if (emu[0]) { if (emu[0]) {
int w = FFMIN(s->cols - col, w4) * 8, h = FFMIN(s->rows - row, h4) * 8, n, o = 0; int w = FFMIN(s->cols - col, w4) * 8, h = FFMIN(s->rows - row, h4) * 8, n, o = 0;
...@@ -3092,13 +3262,14 @@ static void decode_b(AVCodecContext *ctx, int row, int col, ...@@ -3092,13 +3262,14 @@ static void decode_b(AVCodecContext *ctx, int row, int col,
av_assert2(n <= 4); av_assert2(n <= 4);
if (w & bw) { if (w & bw) {
s->dsp.mc[n][0][0][0][0](f->data[0] + yoff + o, f->linesize[0], s->dsp.mc[n][0][0][0][0](f->data[0] + yoff + o, f->linesize[0],
s->tmp_y + o, 64, h, 0, 0); s->tmp_y + o, 128, h, 0, 0);
o += bw; o += bw * bytesperpixel;
} }
} }
} }
if (emu[1]) { if (emu[1]) {
int w = FFMIN(s->cols - col, w4) * 4, h = FFMIN(s->rows - row, h4) * 4, n, o = 0; int w = FFMIN(s->cols - col, w4) * 8 >> s->ss_h;
int h = FFMIN(s->rows - row, h4) * 8 >> s->ss_v, n, o = 0;
for (n = 1; o < w; n++) { for (n = 1; o < w; n++) {
int bw = 64 >> n; int bw = 64 >> n;
...@@ -3106,10 +3277,10 @@ static void decode_b(AVCodecContext *ctx, int row, int col, ...@@ -3106,10 +3277,10 @@ static void decode_b(AVCodecContext *ctx, int row, int col,
av_assert2(n <= 4); av_assert2(n <= 4);
if (w & bw) { if (w & bw) {
s->dsp.mc[n][0][0][0][0](f->data[1] + uvoff + o, f->linesize[1], s->dsp.mc[n][0][0][0][0](f->data[1] + uvoff + o, f->linesize[1],
s->tmp_uv[0] + o, 32, h, 0, 0); s->tmp_uv[0] + o, 128, h, 0, 0);
s->dsp.mc[n][0][0][0][0](f->data[2] + uvoff + o, f->linesize[2], s->dsp.mc[n][0][0][0][0](f->data[2] + uvoff + o, f->linesize[2],
s->tmp_uv[1] + o, 32, h, 0, 0); s->tmp_uv[1] + o, 128, h, 0, 0);
o += bw; o += bw * bytesperpixel;
} }
} }
} }
...@@ -3146,9 +3317,9 @@ static void decode_b(AVCodecContext *ctx, int row, int col, ...@@ -3146,9 +3317,9 @@ static void decode_b(AVCodecContext *ctx, int row, int col,
if (s->pass == 2) { if (s->pass == 2) {
s->b++; s->b++;
s->block += w4 * h4 * 64; s->block += w4 * h4 * 64 * bytesperpixel;
s->uvblock[0] += w4 * h4 * 64 >> (s->ss_v + s->ss_h); s->uvblock[0] += w4 * h4 * 64 * bytesperpixel >> (s->ss_v + s->ss_h);
s->uvblock[1] += w4 * h4 * 64 >> (s->ss_v + s->ss_h); s->uvblock[1] += w4 * h4 * 64 * bytesperpixel >> (s->ss_v + s->ss_h);
s->eob += 4 * w4 * h4; s->eob += 4 * w4 * h4;
s->uveob[0] += 4 * w4 * h4 >> (s->ss_v + s->ss_h); s->uveob[0] += 4 * w4 * h4 >> (s->ss_v + s->ss_h);
s->uveob[1] += 4 * w4 * h4 >> (s->ss_v + s->ss_h); s->uveob[1] += 4 * w4 * h4 >> (s->ss_v + s->ss_h);
...@@ -3167,6 +3338,7 @@ static void decode_sb(AVCodecContext *ctx, int row, int col, struct VP9Filter *l ...@@ -3167,6 +3338,7 @@ static void decode_sb(AVCodecContext *ctx, int row, int col, struct VP9Filter *l
ptrdiff_t hbs = 4 >> bl; ptrdiff_t hbs = 4 >> bl;
AVFrame *f = s->frames[CUR_FRAME].tf.f; AVFrame *f = s->frames[CUR_FRAME].tf.f;
ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1]; ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1];
int bytesperpixel = s->bytesperpixel;
if (bl == BL_8X8) { if (bl == BL_8X8) {
bp = vp8_rac_get_tree(&s->c, vp9_partition_tree, p); bp = vp8_rac_get_tree(&s->c, vp9_partition_tree, p);
...@@ -3186,19 +3358,21 @@ static void decode_sb(AVCodecContext *ctx, int row, int col, struct VP9Filter *l ...@@ -3186,19 +3358,21 @@ static void decode_sb(AVCodecContext *ctx, int row, int col, struct VP9Filter *l
break; break;
case PARTITION_V: case PARTITION_V:
decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp); decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
yoff += hbs * 8; yoff += hbs * 8 * bytesperpixel;
uvoff += hbs * 8 >> s->ss_h; uvoff += hbs * 8 * bytesperpixel >> s->ss_h;
decode_b(ctx, row, col + hbs, lflvl, yoff, uvoff, bl, bp); decode_b(ctx, row, col + hbs, lflvl, yoff, uvoff, bl, bp);
break; break;
case PARTITION_SPLIT: case PARTITION_SPLIT:
decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1); decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
decode_sb(ctx, row, col + hbs, lflvl, decode_sb(ctx, row, col + hbs, lflvl,
yoff + 8 * hbs, uvoff + (8 * hbs >> s->ss_h), bl + 1); yoff + 8 * hbs * bytesperpixel,
uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
yoff += hbs * 8 * y_stride; yoff += hbs * 8 * y_stride;
uvoff += hbs * 8 * uv_stride >> s->ss_v; uvoff += hbs * 8 * uv_stride >> s->ss_v;
decode_sb(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1); decode_sb(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
decode_sb(ctx, row + hbs, col + hbs, lflvl, decode_sb(ctx, row + hbs, col + hbs, lflvl,
yoff + 8 * hbs, uvoff + (8 * hbs >> s->ss_h), bl + 1); yoff + 8 * hbs * bytesperpixel,
uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
break; break;
default: default:
av_assert0(0); av_assert0(0);
...@@ -3207,7 +3381,8 @@ static void decode_sb(AVCodecContext *ctx, int row, int col, struct VP9Filter *l ...@@ -3207,7 +3381,8 @@ static void decode_sb(AVCodecContext *ctx, int row, int col, struct VP9Filter *l
bp = PARTITION_SPLIT; bp = PARTITION_SPLIT;
decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1); decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
decode_sb(ctx, row, col + hbs, lflvl, decode_sb(ctx, row, col + hbs, lflvl,
yoff + 8 * hbs, uvoff + (8 * hbs >> s->ss_h), bl + 1); yoff + 8 * hbs * bytesperpixel,
uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
} else { } else {
bp = PARTITION_H; bp = PARTITION_H;
decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp); decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
...@@ -3238,6 +3413,7 @@ static void decode_sb_mem(AVCodecContext *ctx, int row, int col, struct VP9Filte ...@@ -3238,6 +3413,7 @@ static void decode_sb_mem(AVCodecContext *ctx, int row, int col, struct VP9Filte
ptrdiff_t hbs = 4 >> bl; ptrdiff_t hbs = 4 >> bl;
AVFrame *f = s->frames[CUR_FRAME].tf.f; AVFrame *f = s->frames[CUR_FRAME].tf.f;
ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1]; ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1];
int bytesperpixel = s->bytesperpixel;
if (bl == BL_8X8) { if (bl == BL_8X8) {
av_assert2(b->bl == BL_8X8); av_assert2(b->bl == BL_8X8);
...@@ -3249,24 +3425,25 @@ static void decode_sb_mem(AVCodecContext *ctx, int row, int col, struct VP9Filte ...@@ -3249,24 +3425,25 @@ static void decode_sb_mem(AVCodecContext *ctx, int row, int col, struct VP9Filte
uvoff += hbs * 8 * uv_stride >> s->ss_v; uvoff += hbs * 8 * uv_stride >> s->ss_v;
decode_b(ctx, row + hbs, col, lflvl, yoff, uvoff, b->bl, b->bp); decode_b(ctx, row + hbs, col, lflvl, yoff, uvoff, b->bl, b->bp);
} else if (b->bp == PARTITION_V && col + hbs < s->cols) { } else if (b->bp == PARTITION_V && col + hbs < s->cols) {
yoff += hbs * 8; yoff += hbs * 8 * bytesperpixel;
uvoff += hbs * 8 >> s->ss_h; uvoff += hbs * 8 * bytesperpixel >> s->ss_h;
decode_b(ctx, row, col + hbs, lflvl, yoff, uvoff, b->bl, b->bp); decode_b(ctx, row, col + hbs, lflvl, yoff, uvoff, b->bl, b->bp);
} }
} else { } else {
decode_sb_mem(ctx, row, col, lflvl, yoff, uvoff, bl + 1); decode_sb_mem(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
if (col + hbs < s->cols) { // FIXME why not <=? if (col + hbs < s->cols) { // FIXME why not <=?
if (row + hbs < s->rows) { if (row + hbs < s->rows) {
decode_sb_mem(ctx, row, col + hbs, lflvl, yoff + 8 * hbs, decode_sb_mem(ctx, row, col + hbs, lflvl, yoff + 8 * hbs * bytesperpixel,
uvoff + (8 * hbs >> s->ss_h), bl + 1); uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
yoff += hbs * 8 * y_stride; yoff += hbs * 8 * y_stride;
uvoff += hbs * 8 * uv_stride >> s->ss_v; uvoff += hbs * 8 * uv_stride >> s->ss_v;
decode_sb_mem(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1); decode_sb_mem(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
decode_sb_mem(ctx, row + hbs, col + hbs, lflvl, decode_sb_mem(ctx, row + hbs, col + hbs, lflvl,
yoff + 8 * hbs, uvoff + (8 * hbs >> s->ss_h), bl + 1); yoff + 8 * hbs * bytesperpixel,
uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
} else { } else {
yoff += hbs * 8; yoff += hbs * 8 * bytesperpixel;
uvoff += hbs * 8 >> s->ss_h; uvoff += hbs * 8 * bytesperpixel >> s->ss_h;
decode_sb_mem(ctx, row, col + hbs, lflvl, yoff, uvoff, bl + 1); decode_sb_mem(ctx, row, col + hbs, lflvl, yoff, uvoff, bl + 1);
} }
} else if (row + hbs < s->rows) { } else if (row + hbs < s->rows) {
...@@ -3281,7 +3458,7 @@ static av_always_inline void filter_plane_cols(VP9Context *s, int col, int ss_h, ...@@ -3281,7 +3458,7 @@ static av_always_inline void filter_plane_cols(VP9Context *s, int col, int ss_h,
uint8_t *lvl, uint8_t (*mask)[4], uint8_t *lvl, uint8_t (*mask)[4],
uint8_t *dst, ptrdiff_t ls) uint8_t *dst, ptrdiff_t ls)
{ {
int y, x; int y, x, bytesperpixel = s->bytesperpixel;
// filter edges between columns (e.g. block1 | block2) // filter edges between columns (e.g. block1 | block2)
for (y = 0; y < 8; y += 2 << ss_v, dst += 16 * ls, lvl += 16 << ss_v) { for (y = 0; y < 8; y += 2 << ss_v, dst += 16 * ls, lvl += 16 << ss_v) {
...@@ -3290,7 +3467,7 @@ static av_always_inline void filter_plane_cols(VP9Context *s, int col, int ss_h, ...@@ -3290,7 +3467,7 @@ static av_always_inline void filter_plane_cols(VP9Context *s, int col, int ss_h,
unsigned hm2 = hmask2[1] | hmask2[2], hm23 = hmask2[3]; unsigned hm2 = hmask2[1] | hmask2[2], hm23 = hmask2[3];
unsigned hm = hm1 | hm2 | hm13 | hm23; unsigned hm = hm1 | hm2 | hm13 | hm23;
for (x = 1; hm & ~(x - 1); x <<= 1, ptr += 8 >> ss_h) { for (x = 1; hm & ~(x - 1); x <<= 1, ptr += 8 * bytesperpixel >> ss_h) {
if (col || x > 1) { if (col || x > 1) {
if (hm1 & x) { if (hm1 & x) {
int L = *l, H = L >> 4; int L = *l, H = L >> 4;
...@@ -3336,15 +3513,15 @@ static av_always_inline void filter_plane_cols(VP9Context *s, int col, int ss_h, ...@@ -3336,15 +3513,15 @@ static av_always_inline void filter_plane_cols(VP9Context *s, int col, int ss_h,
H |= (L >> 4) << 8; H |= (L >> 4) << 8;
E |= s->filter.mblim_lut[L] << 8; E |= s->filter.mblim_lut[L] << 8;
I |= s->filter.lim_lut[L] << 8; I |= s->filter.lim_lut[L] << 8;
s->dsp.loop_filter_mix2[0][0][0](ptr + 4, ls, E, I, H); s->dsp.loop_filter_mix2[0][0][0](ptr + 4 * bytesperpixel, ls, E, I, H);
} else { } else {
s->dsp.loop_filter_8[0][0](ptr + 4, ls, E, I, H); s->dsp.loop_filter_8[0][0](ptr + 4 * bytesperpixel, ls, E, I, H);
} }
} else if (hm23 & x) { } else if (hm23 & x) {
int L = l[8 << ss_v], H = L >> 4; int L = l[8 << ss_v], H = L >> 4;
int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L]; int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
s->dsp.loop_filter_8[0][0](ptr + 8 * ls + 4, ls, E, I, H); s->dsp.loop_filter_8[0][0](ptr + 8 * ls + 4 * bytesperpixel, ls, E, I, H);
} }
l++; l++;
} }
...@@ -3356,7 +3533,7 @@ static av_always_inline void filter_plane_rows(VP9Context *s, int row, int ss_h, ...@@ -3356,7 +3533,7 @@ static av_always_inline void filter_plane_rows(VP9Context *s, int row, int ss_h,
uint8_t *lvl, uint8_t (*mask)[4], uint8_t *lvl, uint8_t (*mask)[4],
uint8_t *dst, ptrdiff_t ls) uint8_t *dst, ptrdiff_t ls)
{ {
int y, x; int y, x, bytesperpixel = s->bytesperpixel;
// block1 // block1
// filter edges between rows (e.g. ------) // filter edges between rows (e.g. ------)
...@@ -3365,7 +3542,7 @@ static av_always_inline void filter_plane_rows(VP9Context *s, int row, int ss_h, ...@@ -3365,7 +3542,7 @@ static av_always_inline void filter_plane_rows(VP9Context *s, int row, int ss_h,
uint8_t *ptr = dst, *l = lvl, *vmask = mask[y]; uint8_t *ptr = dst, *l = lvl, *vmask = mask[y];
unsigned vm = vmask[0] | vmask[1] | vmask[2], vm3 = vmask[3]; unsigned vm = vmask[0] | vmask[1] | vmask[2], vm3 = vmask[3];
for (x = 1; vm & ~(x - 1); x <<= (2 << ss_h), ptr += 16, l += 2 << ss_h) { for (x = 1; vm & ~(x - 1); x <<= (2 << ss_h), ptr += 16 * bytesperpixel, l += 2 << ss_h) {
if (row || y) { if (row || y) {
if (vm & x) { if (vm & x) {
int L = *l, H = L >> 4; int L = *l, H = L >> 4;
...@@ -3395,7 +3572,7 @@ static av_always_inline void filter_plane_rows(VP9Context *s, int row, int ss_h, ...@@ -3395,7 +3572,7 @@ static av_always_inline void filter_plane_rows(VP9Context *s, int row, int ss_h,
int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L]; int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
s->dsp.loop_filter_8[!!(vmask[1] & (x << (1 + ss_h)))] s->dsp.loop_filter_8[!!(vmask[1] & (x << (1 + ss_h)))]
[1](ptr + 8, ls, E, I, H); [1](ptr + 8 * bytesperpixel, ls, E, I, H);
} }
} }
if (!ss_v) { if (!ss_v) {
...@@ -3416,7 +3593,7 @@ static av_always_inline void filter_plane_rows(VP9Context *s, int row, int ss_h, ...@@ -3416,7 +3593,7 @@ static av_always_inline void filter_plane_rows(VP9Context *s, int row, int ss_h,
int L = l[1 + ss_h], H = L >> 4; int L = l[1 + ss_h], H = L >> 4;
int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L]; int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
s->dsp.loop_filter_8[0][1](ptr + ls * 4 + 8, ls, E, I, H); s->dsp.loop_filter_8[0][1](ptr + ls * 4 + 8 * bytesperpixel, ls, E, I, H);
} }
} }
} }
...@@ -3746,6 +3923,7 @@ static int vp9_decode_frame(AVCodecContext *ctx, void *frame, ...@@ -3746,6 +3923,7 @@ static int vp9_decode_frame(AVCodecContext *ctx, void *frame,
int retain_segmap_ref = s->segmentation.enabled && !s->segmentation.update_map; int retain_segmap_ref = s->segmentation.enabled && !s->segmentation.update_map;
ptrdiff_t yoff, uvoff, ls_y, ls_uv; ptrdiff_t yoff, uvoff, ls_y, ls_uv;
AVFrame *f; AVFrame *f;
int bytesperpixel;
if ((res = decode_frame_header(ctx, data, size, &ref)) < 0) { if ((res = decode_frame_header(ctx, data, size, &ref)) < 0) {
return res; return res;
...@@ -3807,6 +3985,7 @@ static int vp9_decode_frame(AVCodecContext *ctx, void *frame, ...@@ -3807,6 +3985,7 @@ static int vp9_decode_frame(AVCodecContext *ctx, void *frame,
} }
// main tile decode loop // main tile decode loop
bytesperpixel = s->bytesperpixel;
memset(s->above_partition_ctx, 0, s->cols); memset(s->above_partition_ctx, 0, s->cols);
memset(s->above_skip_ctx, 0, s->cols); memset(s->above_skip_ctx, 0, s->cols);
if (s->keyframe || s->intraonly) { if (s->keyframe || s->intraonly) {
...@@ -3909,7 +4088,8 @@ static int vp9_decode_frame(AVCodecContext *ctx, void *frame, ...@@ -3909,7 +4088,8 @@ static int vp9_decode_frame(AVCodecContext *ctx, void *frame,
for (col = s->tiling.tile_col_start; for (col = s->tiling.tile_col_start;
col < s->tiling.tile_col_end; col < s->tiling.tile_col_end;
col += 8, yoff2 += 64, uvoff2 += 64 >> s->ss_h, lflvl_ptr++) { col += 8, yoff2 += 64 * bytesperpixel,
uvoff2 += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
// FIXME integrate with lf code (i.e. zero after each // FIXME integrate with lf code (i.e. zero after each
// use, similar to invtxfm coefficients, or similar) // use, similar to invtxfm coefficients, or similar)
if (s->pass != 1) { if (s->pass != 1) {
...@@ -3938,13 +4118,13 @@ static int vp9_decode_frame(AVCodecContext *ctx, void *frame, ...@@ -3938,13 +4118,13 @@ static int vp9_decode_frame(AVCodecContext *ctx, void *frame,
if (row + 8 < s->rows) { if (row + 8 < s->rows) {
memcpy(s->intra_pred_data[0], memcpy(s->intra_pred_data[0],
f->data[0] + yoff + 63 * ls_y, f->data[0] + yoff + 63 * ls_y,
8 * s->cols); 8 * s->cols * bytesperpixel);
memcpy(s->intra_pred_data[1], memcpy(s->intra_pred_data[1],
f->data[1] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv, f->data[1] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
8 * s->cols >> s->ss_h); 8 * s->cols * bytesperpixel >> s->ss_h);
memcpy(s->intra_pred_data[2], memcpy(s->intra_pred_data[2],
f->data[2] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv, f->data[2] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
8 * s->cols >> s->ss_h); 8 * s->cols * bytesperpixel >> s->ss_h);
} }
// loopfilter one row // loopfilter one row
...@@ -3953,7 +4133,8 @@ static int vp9_decode_frame(AVCodecContext *ctx, void *frame, ...@@ -3953,7 +4133,8 @@ static int vp9_decode_frame(AVCodecContext *ctx, void *frame,
uvoff2 = uvoff; uvoff2 = uvoff;
lflvl_ptr = s->lflvl; lflvl_ptr = s->lflvl;
for (col = 0; col < s->cols; for (col = 0; col < s->cols;
col += 8, yoff2 += 64, uvoff2 += 64 >> s->ss_h, lflvl_ptr++) { col += 8, yoff2 += 64 * bytesperpixel,
uvoff2 += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
loopfilter_sb(ctx, lflvl_ptr, row, col, yoff2, uvoff2); loopfilter_sb(ctx, lflvl_ptr, row, col, yoff2, uvoff2);
} }
} }
...@@ -4030,8 +4211,7 @@ static av_cold int vp9_decode_init(AVCodecContext *ctx) ...@@ -4030,8 +4211,7 @@ static av_cold int vp9_decode_init(AVCodecContext *ctx)
VP9Context *s = ctx->priv_data; VP9Context *s = ctx->priv_data;
ctx->internal->allocate_progress = 1; ctx->internal->allocate_progress = 1;
ff_vp9dsp_init(&s->dsp); s->last_bpp = 0;
ff_videodsp_init(&s->vdsp, 8);
s->filter.sharpness = -1; s->filter.sharpness = -1;
return init_frames(ctx); return init_frames(ctx);
...@@ -4076,6 +4256,9 @@ static int vp9_decode_update_thread_context(AVCodecContext *dst, const AVCodecCo ...@@ -4076,6 +4256,9 @@ static int vp9_decode_update_thread_context(AVCodecContext *dst, const AVCodecCo
s->ss_h = ssrc->ss_h; s->ss_h = ssrc->ss_h;
s->segmentation.enabled = ssrc->segmentation.enabled; s->segmentation.enabled = ssrc->segmentation.enabled;
s->segmentation.update_map = ssrc->segmentation.update_map; s->segmentation.update_map = ssrc->segmentation.update_map;
s->bytesperpixel = ssrc->bytesperpixel;
s->bpp = ssrc->bpp;
s->bpp_index = ssrc->bpp_index;
memcpy(&s->prob_ctx, &ssrc->prob_ctx, sizeof(s->prob_ctx)); memcpy(&s->prob_ctx, &ssrc->prob_ctx, sizeof(s->prob_ctx));
memcpy(&s->lf_delta, &ssrc->lf_delta, sizeof(s->lf_delta)); memcpy(&s->lf_delta, &ssrc->lf_delta, sizeof(s->lf_delta));
if (ssrc->segmentation.enabled) { if (ssrc->segmentation.enabled) {
...@@ -4089,6 +4272,8 @@ static int vp9_decode_update_thread_context(AVCodecContext *dst, const AVCodecCo ...@@ -4089,6 +4272,8 @@ static int vp9_decode_update_thread_context(AVCodecContext *dst, const AVCodecCo
static const AVProfile profiles[] = { static const AVProfile profiles[] = {
{ FF_PROFILE_VP9_0, "Profile 0" }, { FF_PROFILE_VP9_0, "Profile 0" },
{ FF_PROFILE_VP9_1, "Profile 1" }, { FF_PROFILE_VP9_1, "Profile 1" },
{ FF_PROFILE_VP9_2, "Profile 2" },
{ FF_PROFILE_VP9_3, "Profile 3" },
{ FF_PROFILE_UNKNOWN }, { FF_PROFILE_UNKNOWN },
}; };
......
...@@ -40,6 +40,7 @@ static void FN(inter_pred)(AVCodecContext *ctx) ...@@ -40,6 +40,7 @@ static void FN(inter_pred)(AVCodecContext *ctx)
AVFrame *ref1 = tref1->f, *ref2; AVFrame *ref1 = tref1->f, *ref2;
int w1 = ref1->width, h1 = ref1->height, w2, h2; int w1 = ref1->width, h1 = ref1->height, w2, h2;
ptrdiff_t ls_y = s->y_stride, ls_uv = s->uv_stride; ptrdiff_t ls_y = s->y_stride, ls_uv = s->uv_stride;
int bytesperpixel = BYTES_PER_PIXEL;
if (b->comp) { if (b->comp) {
tref2 = &s->refs[s->refidx[b->ref[1]]]; tref2 = &s->refs[s->refidx[b->ref[1]]];
...@@ -138,7 +139,7 @@ static void FN(inter_pred)(AVCodecContext *ctx) ...@@ -138,7 +139,7 @@ static void FN(inter_pred)(AVCodecContext *ctx)
mc_luma_dir(s, mc[4][b->filter][0], s->dst[0], ls_y, mc_luma_dir(s, mc[4][b->filter][0], s->dst[0], ls_y,
ref1->data[0], ref1->linesize[0], tref1, ref1->data[0], ref1->linesize[0], tref1,
row << 3, col << 3, &b->mv[0][0], 4, 8, w1, h1, 0); row << 3, col << 3, &b->mv[0][0], 4, 8, w1, h1, 0);
mc_luma_dir(s, mc[4][b->filter][0], s->dst[0] + 4, ls_y, mc_luma_dir(s, mc[4][b->filter][0], s->dst[0] + 4 * bytesperpixel, ls_y,
ref1->data[0], ref1->linesize[0], tref1, ref1->data[0], ref1->linesize[0], tref1,
row << 3, (col << 3) + 4, &b->mv[1][0], 4, 8, w1, h1, 0); row << 3, (col << 3) + 4, &b->mv[1][0], 4, 8, w1, h1, 0);
h1 = (h1 + s->ss_v) >> s->ss_v; h1 = (h1 + s->ss_v) >> s->ss_v;
...@@ -159,7 +160,8 @@ static void FN(inter_pred)(AVCodecContext *ctx) ...@@ -159,7 +160,8 @@ static void FN(inter_pred)(AVCodecContext *ctx)
row << (3 - s->ss_v), col << 3, row << (3 - s->ss_v), col << 3,
&b->mv[0][0], 4, 8 >> s->ss_v, w1, h1, 0); &b->mv[0][0], 4, 8 >> s->ss_v, w1, h1, 0);
mc_chroma_dir(s, mc[4][b->filter][0], mc_chroma_dir(s, mc[4][b->filter][0],
s->dst[1] + 4, s->dst[2] + 4, ls_uv, s->dst[1] + 4 * bytesperpixel,
s->dst[2] + 4 * bytesperpixel, ls_uv,
ref1->data[1], ref1->linesize[1], ref1->data[1], ref1->linesize[1],
ref1->data[2], ref1->linesize[2], tref1, ref1->data[2], ref1->linesize[2], tref1,
row << (3 - s->ss_v), (col << 3) + 4, row << (3 - s->ss_v), (col << 3) + 4,
...@@ -170,7 +172,7 @@ static void FN(inter_pred)(AVCodecContext *ctx) ...@@ -170,7 +172,7 @@ static void FN(inter_pred)(AVCodecContext *ctx)
mc_luma_dir(s, mc[4][b->filter][1], s->dst[0], ls_y, mc_luma_dir(s, mc[4][b->filter][1], s->dst[0], ls_y,
ref2->data[0], ref2->linesize[0], tref2, ref2->data[0], ref2->linesize[0], tref2,
row << 3, col << 3, &b->mv[0][1], 4, 8, w2, h2, 1); row << 3, col << 3, &b->mv[0][1], 4, 8, w2, h2, 1);
mc_luma_dir(s, mc[4][b->filter][1], s->dst[0] + 4, ls_y, mc_luma_dir(s, mc[4][b->filter][1], s->dst[0] + 4 * bytesperpixel, ls_y,
ref2->data[0], ref2->linesize[0], tref2, ref2->data[0], ref2->linesize[0], tref2,
row << 3, (col << 3) + 4, &b->mv[1][1], 4, 8, w2, h2, 1); row << 3, (col << 3) + 4, &b->mv[1][1], 4, 8, w2, h2, 1);
h2 = (h2 + s->ss_v) >> s->ss_v; h2 = (h2 + s->ss_v) >> s->ss_v;
...@@ -191,7 +193,8 @@ static void FN(inter_pred)(AVCodecContext *ctx) ...@@ -191,7 +193,8 @@ static void FN(inter_pred)(AVCodecContext *ctx)
row << (3 - s->ss_v), col << 3, row << (3 - s->ss_v), col << 3,
&b->mv[0][1], 4, 8 >> s->ss_v, w2, h2, 1); &b->mv[0][1], 4, 8 >> s->ss_v, w2, h2, 1);
mc_chroma_dir(s, mc[4][b->filter][1], mc_chroma_dir(s, mc[4][b->filter][1],
s->dst[1] + 4, s->dst[2] + 4, ls_uv, s->dst[1] + 4 * bytesperpixel,
s->dst[2] + 4 * bytesperpixel, ls_uv,
ref2->data[1], ref2->linesize[1], ref2->data[1], ref2->linesize[1],
ref2->data[2], ref2->linesize[2], tref2, ref2->data[2], ref2->linesize[2], tref2,
row << (3 - s->ss_v), (col << 3) + 4, row << (3 - s->ss_v), (col << 3) + 4,
...@@ -206,7 +209,7 @@ static void FN(inter_pred)(AVCodecContext *ctx) ...@@ -206,7 +209,7 @@ static void FN(inter_pred)(AVCodecContext *ctx)
mc_luma_dir(s, mc[4][b->filter][0], s->dst[0], ls_y, mc_luma_dir(s, mc[4][b->filter][0], s->dst[0], ls_y,
ref1->data[0], ref1->linesize[0], tref1, ref1->data[0], ref1->linesize[0], tref1,
row << 3, col << 3, &b->mv[0][0], 4, 4, w1, h1, 0); row << 3, col << 3, &b->mv[0][0], 4, 4, w1, h1, 0);
mc_luma_dir(s, mc[4][b->filter][0], s->dst[0] + 4, ls_y, mc_luma_dir(s, mc[4][b->filter][0], s->dst[0] + 4 * bytesperpixel, ls_y,
ref1->data[0], ref1->linesize[0], tref1, ref1->data[0], ref1->linesize[0], tref1,
row << 3, (col << 3) + 4, &b->mv[1][0], 4, 4, w1, h1, 0); row << 3, (col << 3) + 4, &b->mv[1][0], 4, 4, w1, h1, 0);
mc_luma_dir(s, mc[4][b->filter][0], mc_luma_dir(s, mc[4][b->filter][0],
...@@ -214,7 +217,7 @@ static void FN(inter_pred)(AVCodecContext *ctx) ...@@ -214,7 +217,7 @@ static void FN(inter_pred)(AVCodecContext *ctx)
ref1->data[0], ref1->linesize[0], tref1, ref1->data[0], ref1->linesize[0], tref1,
(row << 3) + 4, col << 3, &b->mv[2][0], 4, 4, w1, h1, 0); (row << 3) + 4, col << 3, &b->mv[2][0], 4, 4, w1, h1, 0);
mc_luma_dir(s, mc[4][b->filter][0], mc_luma_dir(s, mc[4][b->filter][0],
s->dst[0] + 4 * ls_y + 4, ls_y, s->dst[0] + 4 * ls_y + 4 * bytesperpixel, ls_y,
ref1->data[0], ref1->linesize[0], tref1, ref1->data[0], ref1->linesize[0], tref1,
(row << 3) + 4, (col << 3) + 4, &b->mv[3][0], 4, 4, w1, h1, 0); (row << 3) + 4, (col << 3) + 4, &b->mv[3][0], 4, 4, w1, h1, 0);
if (s->ss_v) { if (s->ss_v) {
...@@ -239,7 +242,8 @@ static void FN(inter_pred)(AVCodecContext *ctx) ...@@ -239,7 +242,8 @@ static void FN(inter_pred)(AVCodecContext *ctx)
&uvmv, 4, 4, w1, h1, 0); &uvmv, 4, 4, w1, h1, 0);
uvmv = ROUNDED_DIV_MVx2(b->mv[1][0], b->mv[3][0]); uvmv = ROUNDED_DIV_MVx2(b->mv[1][0], b->mv[3][0]);
mc_chroma_dir(s, mc[4][b->filter][0], mc_chroma_dir(s, mc[4][b->filter][0],
s->dst[1] + 4, s->dst[2] + 4, ls_uv, s->dst[1] + 4 * bytesperpixel,
s->dst[2] + 4 * bytesperpixel, ls_uv,
ref1->data[1], ref1->linesize[1], ref1->data[1], ref1->linesize[1],
ref1->data[2], ref1->linesize[2], tref1, ref1->data[2], ref1->linesize[2], tref1,
row << 2, (col << 3) + 4, row << 2, (col << 3) + 4,
...@@ -273,7 +277,8 @@ static void FN(inter_pred)(AVCodecContext *ctx) ...@@ -273,7 +277,8 @@ static void FN(inter_pred)(AVCodecContext *ctx)
row << 3, col << 3, row << 3, col << 3,
&b->mv[0][0], 4, 4, w1, h1, 0); &b->mv[0][0], 4, 4, w1, h1, 0);
mc_chroma_dir(s, mc[4][b->filter][0], mc_chroma_dir(s, mc[4][b->filter][0],
s->dst[1] + 4, s->dst[2] + 4, ls_uv, s->dst[1] + 4 * bytesperpixel,
s->dst[2] + 4 * bytesperpixel, ls_uv,
ref1->data[1], ref1->linesize[1], ref1->data[1], ref1->linesize[1],
ref1->data[2], ref1->linesize[2], tref1, ref1->data[2], ref1->linesize[2], tref1,
row << 3, (col << 3) + 4, row << 3, (col << 3) + 4,
...@@ -285,7 +290,8 @@ static void FN(inter_pred)(AVCodecContext *ctx) ...@@ -285,7 +290,8 @@ static void FN(inter_pred)(AVCodecContext *ctx)
(row << 3) + 4, col << 3, (row << 3) + 4, col << 3,
&b->mv[2][0], 4, 4, w1, h1, 0); &b->mv[2][0], 4, 4, w1, h1, 0);
mc_chroma_dir(s, mc[4][b->filter][0], mc_chroma_dir(s, mc[4][b->filter][0],
s->dst[1] + 4 * ls_uv + 4, s->dst[2] + 4 * ls_uv + 4, ls_uv, s->dst[1] + 4 * ls_uv + 4 * bytesperpixel,
s->dst[2] + 4 * ls_uv + 4 * bytesperpixel, ls_uv,
ref1->data[1], ref1->linesize[1], ref1->data[1], ref1->linesize[1],
ref1->data[2], ref1->linesize[2], tref1, ref1->data[2], ref1->linesize[2], tref1,
(row << 3) + 4, (col << 3) + 4, (row << 3) + 4, (col << 3) + 4,
...@@ -297,7 +303,7 @@ static void FN(inter_pred)(AVCodecContext *ctx) ...@@ -297,7 +303,7 @@ static void FN(inter_pred)(AVCodecContext *ctx)
mc_luma_dir(s, mc[4][b->filter][1], s->dst[0], ls_y, mc_luma_dir(s, mc[4][b->filter][1], s->dst[0], ls_y,
ref2->data[0], ref2->linesize[0], tref2, ref2->data[0], ref2->linesize[0], tref2,
row << 3, col << 3, &b->mv[0][1], 4, 4, w2, h2, 1); row << 3, col << 3, &b->mv[0][1], 4, 4, w2, h2, 1);
mc_luma_dir(s, mc[4][b->filter][1], s->dst[0] + 4, ls_y, mc_luma_dir(s, mc[4][b->filter][1], s->dst[0] + 4 * bytesperpixel, ls_y,
ref2->data[0], ref2->linesize[0], tref2, ref2->data[0], ref2->linesize[0], tref2,
row << 3, (col << 3) + 4, &b->mv[1][1], 4, 4, w2, h2, 1); row << 3, (col << 3) + 4, &b->mv[1][1], 4, 4, w2, h2, 1);
mc_luma_dir(s, mc[4][b->filter][1], mc_luma_dir(s, mc[4][b->filter][1],
...@@ -305,7 +311,7 @@ static void FN(inter_pred)(AVCodecContext *ctx) ...@@ -305,7 +311,7 @@ static void FN(inter_pred)(AVCodecContext *ctx)
ref2->data[0], ref2->linesize[0], tref2, ref2->data[0], ref2->linesize[0], tref2,
(row << 3) + 4, col << 3, &b->mv[2][1], 4, 4, w2, h2, 1); (row << 3) + 4, col << 3, &b->mv[2][1], 4, 4, w2, h2, 1);
mc_luma_dir(s, mc[4][b->filter][1], mc_luma_dir(s, mc[4][b->filter][1],
s->dst[0] + 4 * ls_y + 4, ls_y, s->dst[0] + 4 * ls_y + 4 * bytesperpixel, ls_y,
ref2->data[0], ref2->linesize[0], tref2, ref2->data[0], ref2->linesize[0], tref2,
(row << 3) + 4, (col << 3) + 4, &b->mv[3][1], 4, 4, w2, h2, 1); (row << 3) + 4, (col << 3) + 4, &b->mv[3][1], 4, 4, w2, h2, 1);
if (s->ss_v) { if (s->ss_v) {
...@@ -330,7 +336,8 @@ static void FN(inter_pred)(AVCodecContext *ctx) ...@@ -330,7 +336,8 @@ static void FN(inter_pred)(AVCodecContext *ctx)
&uvmv, 4, 4, w2, h2, 1); &uvmv, 4, 4, w2, h2, 1);
uvmv = ROUNDED_DIV_MVx2(b->mv[1][1], b->mv[3][1]); uvmv = ROUNDED_DIV_MVx2(b->mv[1][1], b->mv[3][1]);
mc_chroma_dir(s, mc[4][b->filter][1], mc_chroma_dir(s, mc[4][b->filter][1],
s->dst[1] + 4, s->dst[2] + 4, ls_uv, s->dst[1] + 4 * bytesperpixel,
s->dst[2] + 4 * bytesperpixel, ls_uv,
ref2->data[1], ref2->linesize[1], ref2->data[1], ref2->linesize[1],
ref2->data[2], ref2->linesize[2], tref2, ref2->data[2], ref2->linesize[2], tref2,
row << 2, (col << 3) + 4, row << 2, (col << 3) + 4,
...@@ -364,7 +371,8 @@ static void FN(inter_pred)(AVCodecContext *ctx) ...@@ -364,7 +371,8 @@ static void FN(inter_pred)(AVCodecContext *ctx)
row << 3, col << 3, row << 3, col << 3,
&b->mv[0][1], 4, 4, w2, h2, 1); &b->mv[0][1], 4, 4, w2, h2, 1);
mc_chroma_dir(s, mc[4][b->filter][1], mc_chroma_dir(s, mc[4][b->filter][1],
s->dst[1] + 4, s->dst[2] + 4, ls_uv, s->dst[1] + 4 * bytesperpixel,
s->dst[2] + 4 * bytesperpixel, ls_uv,
ref2->data[1], ref2->linesize[1], ref2->data[1], ref2->linesize[1],
ref2->data[2], ref2->linesize[2], tref2, ref2->data[2], ref2->linesize[2], tref2,
row << 3, (col << 3) + 4, row << 3, (col << 3) + 4,
...@@ -376,7 +384,8 @@ static void FN(inter_pred)(AVCodecContext *ctx) ...@@ -376,7 +384,8 @@ static void FN(inter_pred)(AVCodecContext *ctx)
(row << 3) + 4, col << 3, (row << 3) + 4, col << 3,
&b->mv[2][1], 4, 4, w2, h2, 1); &b->mv[2][1], 4, 4, w2, h2, 1);
mc_chroma_dir(s, mc[4][b->filter][1], mc_chroma_dir(s, mc[4][b->filter][1],
s->dst[1] + 4 * ls_uv + 4, s->dst[2] + 4 * ls_uv + 4, ls_uv, s->dst[1] + 4 * ls_uv + 4 * bytesperpixel,
s->dst[2] + 4 * ls_uv + 4 * bytesperpixel, ls_uv,
ref2->data[1], ref2->linesize[1], ref2->data[1], ref2->linesize[1],
ref2->data[2], ref2->linesize[2], tref2, ref2->data[2], ref2->linesize[2], tref2,
(row << 3) + 4, (col << 3) + 4, (row << 3) + 4, (col << 3) + 4,
......
...@@ -236,74 +236,210 @@ static const enum FilterMode vp9_filter_lut[3] = { ...@@ -236,74 +236,210 @@ static const enum FilterMode vp9_filter_lut[3] = {
FILTER_8TAP_SHARP, FILTER_8TAP_SHARP,
}; };
static const int16_t vp9_dc_qlookup[256] = { static const int16_t vp9_dc_qlookup[3][256] = {
4, 8, 8, 9, 10, 11, 12, 12, {
13, 14, 15, 16, 17, 18, 19, 19, 4, 8, 8, 9, 10, 11, 12, 12,
20, 21, 22, 23, 24, 25, 26, 26, 13, 14, 15, 16, 17, 18, 19, 19,
27, 28, 29, 30, 31, 32, 32, 33, 20, 21, 22, 23, 24, 25, 26, 26,
34, 35, 36, 37, 38, 38, 39, 40, 27, 28, 29, 30, 31, 32, 32, 33,
41, 42, 43, 43, 44, 45, 46, 47, 34, 35, 36, 37, 38, 38, 39, 40,
48, 48, 49, 50, 51, 52, 53, 53, 41, 42, 43, 43, 44, 45, 46, 47,
54, 55, 56, 57, 57, 58, 59, 60, 48, 48, 49, 50, 51, 52, 53, 53,
61, 62, 62, 63, 64, 65, 66, 66, 54, 55, 56, 57, 57, 58, 59, 60,
67, 68, 69, 70, 70, 71, 72, 73, 61, 62, 62, 63, 64, 65, 66, 66,
74, 74, 75, 76, 77, 78, 78, 79, 67, 68, 69, 70, 70, 71, 72, 73,
80, 81, 81, 82, 83, 84, 85, 85, 74, 74, 75, 76, 77, 78, 78, 79,
87, 88, 90, 92, 93, 95, 96, 98, 80, 81, 81, 82, 83, 84, 85, 85,
99, 101, 102, 104, 105, 107, 108, 110, 87, 88, 90, 92, 93, 95, 96, 98,
111, 113, 114, 116, 117, 118, 120, 121, 99, 101, 102, 104, 105, 107, 108, 110,
123, 125, 127, 129, 131, 134, 136, 138, 111, 113, 114, 116, 117, 118, 120, 121,
140, 142, 144, 146, 148, 150, 152, 154, 123, 125, 127, 129, 131, 134, 136, 138,
156, 158, 161, 164, 166, 169, 172, 174, 140, 142, 144, 146, 148, 150, 152, 154,
177, 180, 182, 185, 187, 190, 192, 195, 156, 158, 161, 164, 166, 169, 172, 174,
199, 202, 205, 208, 211, 214, 217, 220, 177, 180, 182, 185, 187, 190, 192, 195,
223, 226, 230, 233, 237, 240, 243, 247, 199, 202, 205, 208, 211, 214, 217, 220,
250, 253, 257, 261, 265, 269, 272, 276, 223, 226, 230, 233, 237, 240, 243, 247,
280, 284, 288, 292, 296, 300, 304, 309, 250, 253, 257, 261, 265, 269, 272, 276,
313, 317, 322, 326, 330, 335, 340, 344, 280, 284, 288, 292, 296, 300, 304, 309,
349, 354, 359, 364, 369, 374, 379, 384, 313, 317, 322, 326, 330, 335, 340, 344,
389, 395, 400, 406, 411, 417, 423, 429, 349, 354, 359, 364, 369, 374, 379, 384,
435, 441, 447, 454, 461, 467, 475, 482, 389, 395, 400, 406, 411, 417, 423, 429,
489, 497, 505, 513, 522, 530, 539, 549, 435, 441, 447, 454, 461, 467, 475, 482,
559, 569, 579, 590, 602, 614, 626, 640, 489, 497, 505, 513, 522, 530, 539, 549,
654, 668, 684, 700, 717, 736, 755, 775, 559, 569, 579, 590, 602, 614, 626, 640,
796, 819, 843, 869, 896, 925, 955, 988, 654, 668, 684, 700, 717, 736, 755, 775,
1022, 1058, 1098, 1139, 1184, 1232, 1282, 1336, 796, 819, 843, 869, 896, 925, 955, 988,
1022, 1058, 1098, 1139, 1184, 1232, 1282, 1336,
}, {
4, 9, 10, 13, 15, 17, 20, 22,
25, 28, 31, 34, 37, 40, 43, 47,
50, 53, 57, 60, 64, 68, 71, 75,
78, 82, 86, 90, 93, 97, 101, 105,
109, 113, 116, 120, 124, 128, 132, 136,
140, 143, 147, 151, 155, 159, 163, 166,
170, 174, 178, 182, 185, 189, 193, 197,
200, 204, 208, 212, 215, 219, 223, 226,
230, 233, 237, 241, 244, 248, 251, 255,
259, 262, 266, 269, 273, 276, 280, 283,
287, 290, 293, 297, 300, 304, 307, 310,
314, 317, 321, 324, 327, 331, 334, 337,
343, 350, 356, 362, 369, 375, 381, 387,
394, 400, 406, 412, 418, 424, 430, 436,
442, 448, 454, 460, 466, 472, 478, 484,
490, 499, 507, 516, 525, 533, 542, 550,
559, 567, 576, 584, 592, 601, 609, 617,
625, 634, 644, 655, 666, 676, 687, 698,
708, 718, 729, 739, 749, 759, 770, 782,
795, 807, 819, 831, 844, 856, 868, 880,
891, 906, 920, 933, 947, 961, 975, 988,
1001, 1015, 1030, 1045, 1061, 1076, 1090, 1105,
1120, 1137, 1153, 1170, 1186, 1202, 1218, 1236,
1253, 1271, 1288, 1306, 1323, 1342, 1361, 1379,
1398, 1416, 1436, 1456, 1476, 1496, 1516, 1537,
1559, 1580, 1601, 1624, 1647, 1670, 1692, 1717,
1741, 1766, 1791, 1817, 1844, 1871, 1900, 1929,
1958, 1990, 2021, 2054, 2088, 2123, 2159, 2197,
2236, 2276, 2319, 2363, 2410, 2458, 2508, 2561,
2616, 2675, 2737, 2802, 2871, 2944, 3020, 3102,
3188, 3280, 3375, 3478, 3586, 3702, 3823, 3953,
4089, 4236, 4394, 4559, 4737, 4929, 5130, 5347,
}, {
4, 12, 18, 25, 33, 41, 50, 60,
70, 80, 91, 103, 115, 127, 140, 153,
166, 180, 194, 208, 222, 237, 251, 266,
281, 296, 312, 327, 343, 358, 374, 390,
405, 421, 437, 453, 469, 484, 500, 516,
532, 548, 564, 580, 596, 611, 627, 643,
659, 674, 690, 706, 721, 737, 752, 768,
783, 798, 814, 829, 844, 859, 874, 889,
904, 919, 934, 949, 964, 978, 993, 1008,
1022, 1037, 1051, 1065, 1080, 1094, 1108, 1122,
1136, 1151, 1165, 1179, 1192, 1206, 1220, 1234,
1248, 1261, 1275, 1288, 1302, 1315, 1329, 1342,
1368, 1393, 1419, 1444, 1469, 1494, 1519, 1544,
1569, 1594, 1618, 1643, 1668, 1692, 1717, 1741,
1765, 1789, 1814, 1838, 1862, 1885, 1909, 1933,
1957, 1992, 2027, 2061, 2096, 2130, 2165, 2199,
2233, 2267, 2300, 2334, 2367, 2400, 2434, 2467,
2499, 2532, 2575, 2618, 2661, 2704, 2746, 2788,
2830, 2872, 2913, 2954, 2995, 3036, 3076, 3127,
3177, 3226, 3275, 3324, 3373, 3421, 3469, 3517,
3565, 3621, 3677, 3733, 3788, 3843, 3897, 3951,
4005, 4058, 4119, 4181, 4241, 4301, 4361, 4420,
4479, 4546, 4612, 4677, 4742, 4807, 4871, 4942,
5013, 5083, 5153, 5222, 5291, 5367, 5442, 5517,
5591, 5665, 5745, 5825, 5905, 5984, 6063, 6149,
6234, 6319, 6404, 6495, 6587, 6678, 6769, 6867,
6966, 7064, 7163, 7269, 7376, 7483, 7599, 7715,
7832, 7958, 8085, 8214, 8352, 8492, 8635, 8788,
8945, 9104, 9275, 9450, 9639, 9832, 10031, 10245,
10465, 10702, 10946, 11210, 11482, 11776, 12081, 12409,
12750, 13118, 13501, 13913, 14343, 14807, 15290, 15812,
16356, 16943, 17575, 18237, 18949, 19718, 20521, 21387,
}
}; };
static const int16_t vp9_ac_qlookup[256] = { static const int16_t vp9_ac_qlookup[3][256] = {
4, 8, 9, 10, 11, 12, 13, 14, {
15, 16, 17, 18, 19, 20, 21, 22, 4, 8, 9, 10, 11, 12, 13, 14,
23, 24, 25, 26, 27, 28, 29, 30, 15, 16, 17, 18, 19, 20, 21, 22,
31, 32, 33, 34, 35, 36, 37, 38, 23, 24, 25, 26, 27, 28, 29, 30,
39, 40, 41, 42, 43, 44, 45, 46, 31, 32, 33, 34, 35, 36, 37, 38,
47, 48, 49, 50, 51, 52, 53, 54, 39, 40, 41, 42, 43, 44, 45, 46,
55, 56, 57, 58, 59, 60, 61, 62, 47, 48, 49, 50, 51, 52, 53, 54,
63, 64, 65, 66, 67, 68, 69, 70, 55, 56, 57, 58, 59, 60, 61, 62,
71, 72, 73, 74, 75, 76, 77, 78, 63, 64, 65, 66, 67, 68, 69, 70,
79, 80, 81, 82, 83, 84, 85, 86, 71, 72, 73, 74, 75, 76, 77, 78,
87, 88, 89, 90, 91, 92, 93, 94, 79, 80, 81, 82, 83, 84, 85, 86,
95, 96, 97, 98, 99, 100, 101, 102, 87, 88, 89, 90, 91, 92, 93, 94,
104, 106, 108, 110, 112, 114, 116, 118, 95, 96, 97, 98, 99, 100, 101, 102,
120, 122, 124, 126, 128, 130, 132, 134, 104, 106, 108, 110, 112, 114, 116, 118,
136, 138, 140, 142, 144, 146, 148, 150, 120, 122, 124, 126, 128, 130, 132, 134,
152, 155, 158, 161, 164, 167, 170, 173, 136, 138, 140, 142, 144, 146, 148, 150,
176, 179, 182, 185, 188, 191, 194, 197, 152, 155, 158, 161, 164, 167, 170, 173,
200, 203, 207, 211, 215, 219, 223, 227, 176, 179, 182, 185, 188, 191, 194, 197,
231, 235, 239, 243, 247, 251, 255, 260, 200, 203, 207, 211, 215, 219, 223, 227,
265, 270, 275, 280, 285, 290, 295, 300, 231, 235, 239, 243, 247, 251, 255, 260,
305, 311, 317, 323, 329, 335, 341, 347, 265, 270, 275, 280, 285, 290, 295, 300,
353, 359, 366, 373, 380, 387, 394, 401, 305, 311, 317, 323, 329, 335, 341, 347,
408, 416, 424, 432, 440, 448, 456, 465, 353, 359, 366, 373, 380, 387, 394, 401,
474, 483, 492, 501, 510, 520, 530, 540, 408, 416, 424, 432, 440, 448, 456, 465,
550, 560, 571, 582, 593, 604, 615, 627, 474, 483, 492, 501, 510, 520, 530, 540,
639, 651, 663, 676, 689, 702, 715, 729, 550, 560, 571, 582, 593, 604, 615, 627,
743, 757, 771, 786, 801, 816, 832, 848, 639, 651, 663, 676, 689, 702, 715, 729,
864, 881, 898, 915, 933, 951, 969, 988, 743, 757, 771, 786, 801, 816, 832, 848,
1007, 1026, 1046, 1066, 1087, 1108, 1129, 1151, 864, 881, 898, 915, 933, 951, 969, 988,
1173, 1196, 1219, 1243, 1267, 1292, 1317, 1343, 1007, 1026, 1046, 1066, 1087, 1108, 1129, 1151,
1369, 1396, 1423, 1451, 1479, 1508, 1537, 1567, 1173, 1196, 1219, 1243, 1267, 1292, 1317, 1343,
1597, 1628, 1660, 1692, 1725, 1759, 1793, 1828, 1369, 1396, 1423, 1451, 1479, 1508, 1537, 1567,
1597, 1628, 1660, 1692, 1725, 1759, 1793, 1828,
}, {
4, 9, 11, 13, 16, 18, 21, 24,
27, 30, 33, 37, 40, 44, 48, 51,
55, 59, 63, 67, 71, 75, 79, 83,
88, 92, 96, 100, 105, 109, 114, 118,
122, 127, 131, 136, 140, 145, 149, 154,
158, 163, 168, 172, 177, 181, 186, 190,
195, 199, 204, 208, 213, 217, 222, 226,
231, 235, 240, 244, 249, 253, 258, 262,
267, 271, 275, 280, 284, 289, 293, 297,
302, 306, 311, 315, 319, 324, 328, 332,
337, 341, 345, 349, 354, 358, 362, 367,
371, 375, 379, 384, 388, 392, 396, 401,
409, 417, 425, 433, 441, 449, 458, 466,
474, 482, 490, 498, 506, 514, 523, 531,
539, 547, 555, 563, 571, 579, 588, 596,
604, 616, 628, 640, 652, 664, 676, 688,
700, 713, 725, 737, 749, 761, 773, 785,
797, 809, 825, 841, 857, 873, 889, 905,
922, 938, 954, 970, 986, 1002, 1018, 1038,
1058, 1078, 1098, 1118, 1138, 1158, 1178, 1198,
1218, 1242, 1266, 1290, 1314, 1338, 1362, 1386,
1411, 1435, 1463, 1491, 1519, 1547, 1575, 1603,
1631, 1663, 1695, 1727, 1759, 1791, 1823, 1859,
1895, 1931, 1967, 2003, 2039, 2079, 2119, 2159,
2199, 2239, 2283, 2327, 2371, 2415, 2459, 2507,
2555, 2603, 2651, 2703, 2755, 2807, 2859, 2915,
2971, 3027, 3083, 3143, 3203, 3263, 3327, 3391,
3455, 3523, 3591, 3659, 3731, 3803, 3876, 3952,
4028, 4104, 4184, 4264, 4348, 4432, 4516, 4604,
4692, 4784, 4876, 4972, 5068, 5168, 5268, 5372,
5476, 5584, 5692, 5804, 5916, 6032, 6148, 6268,
6388, 6512, 6640, 6768, 6900, 7036, 7172, 7312,
}, {
4, 13, 19, 27, 35, 44, 54, 64,
75, 87, 99, 112, 126, 139, 154, 168,
183, 199, 214, 230, 247, 263, 280, 297,
314, 331, 349, 366, 384, 402, 420, 438,
456, 475, 493, 511, 530, 548, 567, 586,
604, 623, 642, 660, 679, 698, 716, 735,
753, 772, 791, 809, 828, 846, 865, 884,
902, 920, 939, 957, 976, 994, 1012, 1030,
1049, 1067, 1085, 1103, 1121, 1139, 1157, 1175,
1193, 1211, 1229, 1246, 1264, 1282, 1299, 1317,
1335, 1352, 1370, 1387, 1405, 1422, 1440, 1457,
1474, 1491, 1509, 1526, 1543, 1560, 1577, 1595,
1627, 1660, 1693, 1725, 1758, 1791, 1824, 1856,
1889, 1922, 1954, 1987, 2020, 2052, 2085, 2118,
2150, 2183, 2216, 2248, 2281, 2313, 2346, 2378,
2411, 2459, 2508, 2556, 2605, 2653, 2701, 2750,
2798, 2847, 2895, 2943, 2992, 3040, 3088, 3137,
3185, 3234, 3298, 3362, 3426, 3491, 3555, 3619,
3684, 3748, 3812, 3876, 3941, 4005, 4069, 4149,
4230, 4310, 4390, 4470, 4550, 4631, 4711, 4791,
4871, 4967, 5064, 5160, 5256, 5352, 5448, 5544,
5641, 5737, 5849, 5961, 6073, 6185, 6297, 6410,
6522, 6650, 6778, 6906, 7034, 7162, 7290, 7435,
7579, 7723, 7867, 8011, 8155, 8315, 8475, 8635,
8795, 8956, 9132, 9308, 9484, 9660, 9836, 10028,
10220, 10412, 10604, 10812, 11020, 11228, 11437, 11661,
11885, 12109, 12333, 12573, 12813, 13053, 13309, 13565,
13821, 14093, 14365, 14637, 14925, 15213, 15502, 15806,
16110, 16414, 16734, 17054, 17390, 17726, 18062, 18414,
18766, 19134, 19502, 19886, 20270, 20670, 21070, 21486,
21902, 22334, 22766, 23214, 23662, 24126, 24590, 25070,
25551, 26047, 26559, 27071, 27599, 28143, 28687, 29247,
}
}; };
static const enum TxfmType vp9_intra_txfm_type[14] = { static const enum TxfmType vp9_intra_txfm_type[14] = {
......
...@@ -22,2220 +22,18 @@ ...@@ -22,2220 +22,18 @@
*/ */
#include "libavutil/common.h" #include "libavutil/common.h"
#include "libavutil/intreadwrite.h"
#include "vp9dsp.h" #include "vp9dsp.h"
#include "rnd_avg.h"
// FIXME see whether we can merge parts of this (perhaps at least 4x4 and 8x8) av_cold void ff_vp9dsp_init(VP9DSPContext *dsp, int bpp)
// back with h264pred.[ch]
static void vert_4x4_c(uint8_t *dst, ptrdiff_t stride,
const uint8_t *left, const uint8_t *top)
{
unsigned p4 = AV_RN32A(top);
AV_WN32A(dst + stride * 0, p4);
AV_WN32A(dst + stride * 1, p4);
AV_WN32A(dst + stride * 2, p4);
AV_WN32A(dst + stride * 3, p4);
}
static void vert_8x8_c(uint8_t *dst, ptrdiff_t stride,
const uint8_t *left, const uint8_t *top)
{
uint64_t p8 = AV_RN64A(top);
int y;
for (y = 0; y < 8; y++) {
AV_WN64A(dst, p8);
dst += stride;
}
}
static void vert_16x16_c(uint8_t *dst, ptrdiff_t stride,
const uint8_t *left, const uint8_t *top)
{
uint64_t p8a = AV_RN64A(top + 0), p8b = AV_RN64A(top + 8);
int y;
for (y = 0; y < 16; y++) {
AV_WN64A(dst + 0, p8a);
AV_WN64A(dst + 8, p8b);
dst += stride;
}
}
static void vert_32x32_c(uint8_t *dst, ptrdiff_t stride,
const uint8_t *left, const uint8_t *top)
{
uint64_t p8a = AV_RN64A(top + 0), p8b = AV_RN64A(top + 8),
p8c = AV_RN64A(top + 16), p8d = AV_RN64A(top + 24);
int y;
for (y = 0; y < 32; y++) {
AV_WN64A(dst + 0, p8a);
AV_WN64A(dst + 8, p8b);
AV_WN64A(dst + 16, p8c);
AV_WN64A(dst + 24, p8d);
dst += stride;
}
}
static void hor_4x4_c(uint8_t *dst, ptrdiff_t stride,
const uint8_t *left, const uint8_t *top)
{
AV_WN32A(dst + stride * 0, left[3] * 0x01010101U);
AV_WN32A(dst + stride * 1, left[2] * 0x01010101U);
AV_WN32A(dst + stride * 2, left[1] * 0x01010101U);
AV_WN32A(dst + stride * 3, left[0] * 0x01010101U);
}
static void hor_8x8_c(uint8_t *dst, ptrdiff_t stride,
const uint8_t *left, const uint8_t *top)
{
int y;
for (y = 0; y < 8; y++) {
AV_WN64A(dst, left[7 - y] * 0x0101010101010101ULL);
dst += stride;
}
}
static void hor_16x16_c(uint8_t *dst, ptrdiff_t stride,
const uint8_t *left, const uint8_t *top)
{
int y;
for (y = 0; y < 16; y++) {
uint64_t p8 = left[15 - y] * 0x0101010101010101ULL;
AV_WN64A(dst + 0, p8);
AV_WN64A(dst + 8, p8);
dst += stride;
}
}
static void hor_32x32_c(uint8_t *dst, ptrdiff_t stride,
const uint8_t *left, const uint8_t *top)
{
int y;
for (y = 0; y < 32; y++) {
uint64_t p8 = left[31 - y] * 0x0101010101010101ULL;
AV_WN64A(dst + 0, p8);
AV_WN64A(dst + 8, p8);
AV_WN64A(dst + 16, p8);
AV_WN64A(dst + 24, p8);
dst += stride;
}
}
static void tm_4x4_c(uint8_t *dst, ptrdiff_t stride,
const uint8_t *left, const uint8_t *top)
{
int y, tl = top[-1];
for (y = 0; y < 4; y++) {
int l_m_tl = left[3 - y] - tl;
dst[0] = av_clip_uint8(top[0] + l_m_tl);
dst[1] = av_clip_uint8(top[1] + l_m_tl);
dst[2] = av_clip_uint8(top[2] + l_m_tl);
dst[3] = av_clip_uint8(top[3] + l_m_tl);
dst += stride;
}
}
static void tm_8x8_c(uint8_t *dst, ptrdiff_t stride,
const uint8_t *left, const uint8_t *top)
{
int y, tl = top[-1];
for (y = 0; y < 8; y++) {
int l_m_tl = left[7 - y] - tl;
dst[0] = av_clip_uint8(top[0] + l_m_tl);
dst[1] = av_clip_uint8(top[1] + l_m_tl);
dst[2] = av_clip_uint8(top[2] + l_m_tl);
dst[3] = av_clip_uint8(top[3] + l_m_tl);
dst[4] = av_clip_uint8(top[4] + l_m_tl);
dst[5] = av_clip_uint8(top[5] + l_m_tl);
dst[6] = av_clip_uint8(top[6] + l_m_tl);
dst[7] = av_clip_uint8(top[7] + l_m_tl);
dst += stride;
}
}
static void tm_16x16_c(uint8_t *dst, ptrdiff_t stride,
const uint8_t *left, const uint8_t *top)
{
int y, tl = top[-1];
for (y = 0; y < 16; y++) {
int l_m_tl = left[15 - y] - tl;
dst[ 0] = av_clip_uint8(top[ 0] + l_m_tl);
dst[ 1] = av_clip_uint8(top[ 1] + l_m_tl);
dst[ 2] = av_clip_uint8(top[ 2] + l_m_tl);
dst[ 3] = av_clip_uint8(top[ 3] + l_m_tl);
dst[ 4] = av_clip_uint8(top[ 4] + l_m_tl);
dst[ 5] = av_clip_uint8(top[ 5] + l_m_tl);
dst[ 6] = av_clip_uint8(top[ 6] + l_m_tl);
dst[ 7] = av_clip_uint8(top[ 7] + l_m_tl);
dst[ 8] = av_clip_uint8(top[ 8] + l_m_tl);
dst[ 9] = av_clip_uint8(top[ 9] + l_m_tl);
dst[10] = av_clip_uint8(top[10] + l_m_tl);
dst[11] = av_clip_uint8(top[11] + l_m_tl);
dst[12] = av_clip_uint8(top[12] + l_m_tl);
dst[13] = av_clip_uint8(top[13] + l_m_tl);
dst[14] = av_clip_uint8(top[14] + l_m_tl);
dst[15] = av_clip_uint8(top[15] + l_m_tl);
dst += stride;
}
}
static void tm_32x32_c(uint8_t *dst, ptrdiff_t stride,
const uint8_t *left, const uint8_t *top)
{
int y, tl = top[-1];
for (y = 0; y < 32; y++) {
int l_m_tl = left[31 - y] - tl;
dst[ 0] = av_clip_uint8(top[ 0] + l_m_tl);
dst[ 1] = av_clip_uint8(top[ 1] + l_m_tl);
dst[ 2] = av_clip_uint8(top[ 2] + l_m_tl);
dst[ 3] = av_clip_uint8(top[ 3] + l_m_tl);
dst[ 4] = av_clip_uint8(top[ 4] + l_m_tl);
dst[ 5] = av_clip_uint8(top[ 5] + l_m_tl);
dst[ 6] = av_clip_uint8(top[ 6] + l_m_tl);
dst[ 7] = av_clip_uint8(top[ 7] + l_m_tl);
dst[ 8] = av_clip_uint8(top[ 8] + l_m_tl);
dst[ 9] = av_clip_uint8(top[ 9] + l_m_tl);
dst[10] = av_clip_uint8(top[10] + l_m_tl);
dst[11] = av_clip_uint8(top[11] + l_m_tl);
dst[12] = av_clip_uint8(top[12] + l_m_tl);
dst[13] = av_clip_uint8(top[13] + l_m_tl);
dst[14] = av_clip_uint8(top[14] + l_m_tl);
dst[15] = av_clip_uint8(top[15] + l_m_tl);
dst[16] = av_clip_uint8(top[16] + l_m_tl);
dst[17] = av_clip_uint8(top[17] + l_m_tl);
dst[18] = av_clip_uint8(top[18] + l_m_tl);
dst[19] = av_clip_uint8(top[19] + l_m_tl);
dst[20] = av_clip_uint8(top[20] + l_m_tl);
dst[21] = av_clip_uint8(top[21] + l_m_tl);
dst[22] = av_clip_uint8(top[22] + l_m_tl);
dst[23] = av_clip_uint8(top[23] + l_m_tl);
dst[24] = av_clip_uint8(top[24] + l_m_tl);
dst[25] = av_clip_uint8(top[25] + l_m_tl);
dst[26] = av_clip_uint8(top[26] + l_m_tl);
dst[27] = av_clip_uint8(top[27] + l_m_tl);
dst[28] = av_clip_uint8(top[28] + l_m_tl);
dst[29] = av_clip_uint8(top[29] + l_m_tl);
dst[30] = av_clip_uint8(top[30] + l_m_tl);
dst[31] = av_clip_uint8(top[31] + l_m_tl);
dst += stride;
}
}
static void dc_4x4_c(uint8_t *dst, ptrdiff_t stride,
const uint8_t *left, const uint8_t *top)
{
unsigned dc = 0x01010101U * ((left[0] + left[1] + left[2] + left[3] +
top[0] + top[1] + top[2] + top[3] + 4) >> 3);
AV_WN32A(dst + stride * 0, dc);
AV_WN32A(dst + stride * 1, dc);
AV_WN32A(dst + stride * 2, dc);
AV_WN32A(dst + stride * 3, dc);
}
static void dc_8x8_c(uint8_t *dst, ptrdiff_t stride,
const uint8_t *left, const uint8_t *top)
{
uint64_t dc = 0x0101010101010101ULL *
((left[0] + left[1] + left[2] + left[3] + left[4] + left[5] +
left[6] + left[7] + top[0] + top[1] + top[2] + top[3] +
top[4] + top[5] + top[6] + top[7] + 8) >> 4);
int y;
for (y = 0; y < 8; y++) {
AV_WN64A(dst, dc);
dst += stride;
}
}
static void dc_16x16_c(uint8_t *dst, ptrdiff_t stride,
const uint8_t *left, const uint8_t *top)
{
uint64_t dc = 0x0101010101010101ULL *
((left[0] + left[1] + left[2] + left[3] + left[4] + left[5] + left[6] +
left[7] + left[8] + left[9] + left[10] + left[11] + left[12] +
left[13] + left[14] + left[15] + top[0] + top[1] + top[2] + top[3] +
top[4] + top[5] + top[6] + top[7] + top[8] + top[9] + top[10] +
top[11] + top[12] + top[13] + top[14] + top[15] + 16) >> 5);
int y;
for (y = 0; y < 16; y++) {
AV_WN64A(dst + 0, dc);
AV_WN64A(dst + 8, dc);
dst += stride;
}
}
static void dc_32x32_c(uint8_t *dst, ptrdiff_t stride,
const uint8_t *left, const uint8_t *top)
{
uint64_t dc = 0x0101010101010101ULL *
((left[0] + left[1] + left[2] + left[3] + left[4] + left[5] + left[6] +
left[7] + left[8] + left[9] + left[10] + left[11] + left[12] +
left[13] + left[14] + left[15] + left[16] + left[17] + left[18] +
left[19] + left[20] + left[21] + left[22] + left[23] + left[24] +
left[25] + left[26] + left[27] + left[28] + left[29] + left[30] +
left[31] + top[0] + top[1] + top[2] + top[3] + top[4] + top[5] +
top[6] + top[7] + top[8] + top[9] + top[10] + top[11] + top[12] +
top[13] + top[14] + top[15] + top[16] + top[17] + top[18] + top[19] +
top[20] + top[21] + top[22] + top[23] + top[24] + top[25] + top[26] +
top[27] + top[28] + top[29] + top[30] + top[31] + 32) >> 6);
int y;
for (y = 0; y < 32; y++) {
AV_WN64A(dst + 0, dc);
AV_WN64A(dst + 8, dc);
AV_WN64A(dst + 16, dc);
AV_WN64A(dst + 24, dc);
dst += stride;
}
}
static void dc_left_4x4_c(uint8_t *dst, ptrdiff_t stride,
const uint8_t *left, const uint8_t *top)
{
unsigned dc = 0x01010101U * ((left[0] + left[1] + left[2] + left[3] + 2) >> 2);
AV_WN32A(dst + stride * 0, dc);
AV_WN32A(dst + stride * 1, dc);
AV_WN32A(dst + stride * 2, dc);
AV_WN32A(dst + stride * 3, dc);
}
static void dc_left_8x8_c(uint8_t *dst, ptrdiff_t stride,
const uint8_t *left, const uint8_t *top)
{
uint64_t dc = 0x0101010101010101ULL *
((left[0] + left[1] + left[2] + left[3] +
left[4] + left[5] + left[6] + left[7] + 4) >> 3);
int y;
for (y = 0; y < 8; y++) {
AV_WN64A(dst, dc);
dst += stride;
}
}
static void dc_left_16x16_c(uint8_t *dst, ptrdiff_t stride,
const uint8_t *left, const uint8_t *top)
{
uint64_t dc = 0x0101010101010101ULL *
((left[0] + left[1] + left[2] + left[3] + left[4] + left[5] +
left[6] + left[7] + left[8] + left[9] + left[10] + left[11] +
left[12] + left[13] + left[14] + left[15] + 8) >> 4);
int y;
for (y = 0; y < 16; y++) {
AV_WN64A(dst + 0, dc);
AV_WN64A(dst + 8, dc);
dst += stride;
}
}
static void dc_left_32x32_c(uint8_t *dst, ptrdiff_t stride,
const uint8_t *left, const uint8_t *top)
{
uint64_t dc = 0x0101010101010101ULL *
((left[0] + left[1] + left[2] + left[3] + left[4] + left[5] +
left[6] + left[7] + left[8] + left[9] + left[10] + left[11] +
left[12] + left[13] + left[14] + left[15] + left[16] + left[17] +
left[18] + left[19] + left[20] + left[21] + left[22] + left[23] +
left[24] + left[25] + left[26] + left[27] + left[28] + left[29] +
left[30] + left[31] + 16) >> 5);
int y;
for (y = 0; y < 32; y++) {
AV_WN64A(dst + 0, dc);
AV_WN64A(dst + 8, dc);
AV_WN64A(dst + 16, dc);
AV_WN64A(dst + 24, dc);
dst += stride;
}
}
static void dc_top_4x4_c(uint8_t *dst, ptrdiff_t stride,
const uint8_t *left, const uint8_t *top)
{
unsigned dc = 0x01010101U * ((top[0] + top[1] + top[2] + top[3] + 2) >> 2);
AV_WN32A(dst + stride * 0, dc);
AV_WN32A(dst + stride * 1, dc);
AV_WN32A(dst + stride * 2, dc);
AV_WN32A(dst + stride * 3, dc);
}
static void dc_top_8x8_c(uint8_t *dst, ptrdiff_t stride,
const uint8_t *left, const uint8_t *top)
{
uint64_t dc = 0x0101010101010101ULL *
((top[0] + top[1] + top[2] + top[3] +
top[4] + top[5] + top[6] + top[7] + 4) >> 3);
int y;
for (y = 0; y < 8; y++) {
AV_WN64A(dst, dc);
dst += stride;
}
}
static void dc_top_16x16_c(uint8_t *dst, ptrdiff_t stride,
const uint8_t *left, const uint8_t *top)
{
uint64_t dc = 0x0101010101010101ULL *
((top[0] + top[1] + top[2] + top[3] + top[4] + top[5] +
top[6] + top[7] + top[8] + top[9] + top[10] + top[11] +
top[12] + top[13] + top[14] + top[15] + 8) >> 4);
int y;
for (y = 0; y < 16; y++) {
AV_WN64A(dst + 0, dc);
AV_WN64A(dst + 8, dc);
dst += stride;
}
}
static void dc_top_32x32_c(uint8_t *dst, ptrdiff_t stride,
const uint8_t *left, const uint8_t *top)
{ {
uint64_t dc = 0x0101010101010101ULL * if (bpp == 8) {
((top[0] + top[1] + top[2] + top[3] + top[4] + top[5] + ff_vp9dsp_init_8(dsp);
top[6] + top[7] + top[8] + top[9] + top[10] + top[11] + } else if (bpp == 10) {
top[12] + top[13] + top[14] + top[15] + top[16] + top[17] + ff_vp9dsp_init_10(dsp);
top[18] + top[19] + top[20] + top[21] + top[22] + top[23] +
top[24] + top[25] + top[26] + top[27] + top[28] + top[29] +
top[30] + top[31] + 16) >> 5);
int y;
for (y = 0; y < 32; y++) {
AV_WN64A(dst + 0, dc);
AV_WN64A(dst + 8, dc);
AV_WN64A(dst + 16, dc);
AV_WN64A(dst + 24, dc);
dst += stride;
}
}
static void dc_128_4x4_c(uint8_t *dst, ptrdiff_t stride,
const uint8_t *left, const uint8_t *top)
{
AV_WN32A(dst + stride * 0, 0x80808080U);
AV_WN32A(dst + stride * 1, 0x80808080U);
AV_WN32A(dst + stride * 2, 0x80808080U);
AV_WN32A(dst + stride * 3, 0x80808080U);
}
static void dc_128_8x8_c(uint8_t *dst, ptrdiff_t stride,
const uint8_t *left, const uint8_t *top)
{
int y;
for (y = 0; y < 8; y++) {
AV_WN64A(dst, 0x8080808080808080ULL);
dst += stride;
}
}
static void dc_128_16x16_c(uint8_t *dst, ptrdiff_t stride,
const uint8_t *left, const uint8_t *top)
{
int y;
for (y = 0; y < 16; y++) {
AV_WN64A(dst + 0, 0x8080808080808080ULL);
AV_WN64A(dst + 8, 0x8080808080808080ULL);
dst += stride;
}
}
static void dc_128_32x32_c(uint8_t *dst, ptrdiff_t stride,
const uint8_t *left, const uint8_t *top)
{
int y;
for (y = 0; y < 32; y++) {
AV_WN64A(dst + 0, 0x8080808080808080ULL);
AV_WN64A(dst + 8, 0x8080808080808080ULL);
AV_WN64A(dst + 16, 0x8080808080808080ULL);
AV_WN64A(dst + 24, 0x8080808080808080ULL);
dst += stride;
}
}
static void dc_127_4x4_c(uint8_t *dst, ptrdiff_t stride,
const uint8_t *left, const uint8_t *top)
{
AV_WN32A(dst + stride * 0, 0x7F7F7F7FU);
AV_WN32A(dst + stride * 1, 0x7F7F7F7FU);
AV_WN32A(dst + stride * 2, 0x7F7F7F7FU);
AV_WN32A(dst + stride * 3, 0x7F7F7F7FU);
}
static void dc_127_8x8_c(uint8_t *dst, ptrdiff_t stride,
const uint8_t *left, const uint8_t *top)
{
int y;
for (y = 0; y < 8; y++) {
AV_WN64A(dst, 0x7F7F7F7F7F7F7F7FULL);
dst += stride;
}
}
static void dc_127_16x16_c(uint8_t *dst, ptrdiff_t stride,
const uint8_t *left, const uint8_t *top)
{
int y;
for (y = 0; y < 16; y++) {
AV_WN64A(dst + 0, 0x7F7F7F7F7F7F7F7FULL);
AV_WN64A(dst + 8, 0x7F7F7F7F7F7F7F7FULL);
dst += stride;
}
}
static void dc_127_32x32_c(uint8_t *dst, ptrdiff_t stride,
const uint8_t *left, const uint8_t *top)
{
int y;
for (y = 0; y < 32; y++) {
AV_WN64A(dst + 0, 0x7F7F7F7F7F7F7F7FULL);
AV_WN64A(dst + 8, 0x7F7F7F7F7F7F7F7FULL);
AV_WN64A(dst + 16, 0x7F7F7F7F7F7F7F7FULL);
AV_WN64A(dst + 24, 0x7F7F7F7F7F7F7F7FULL);
dst += stride;
}
}
static void dc_129_4x4_c(uint8_t *dst, ptrdiff_t stride,
const uint8_t *left, const uint8_t *top)
{
AV_WN32A(dst + stride * 0, 0x81818181U);
AV_WN32A(dst + stride * 1, 0x81818181U);
AV_WN32A(dst + stride * 2, 0x81818181U);
AV_WN32A(dst + stride * 3, 0x81818181U);
}
static void dc_129_8x8_c(uint8_t *dst, ptrdiff_t stride,
const uint8_t *left, const uint8_t *top)
{
int y;
for (y = 0; y < 8; y++) {
AV_WN64A(dst, 0x8181818181818181ULL);
dst += stride;
}
}
static void dc_129_16x16_c(uint8_t *dst, ptrdiff_t stride,
const uint8_t *left, const uint8_t *top)
{
int y;
for (y = 0; y < 16; y++) {
AV_WN64A(dst + 0, 0x8181818181818181ULL);
AV_WN64A(dst + 8, 0x8181818181818181ULL);
dst += stride;
}
}
static void dc_129_32x32_c(uint8_t *dst, ptrdiff_t stride,
const uint8_t *left, const uint8_t *top)
{
int y;
for (y = 0; y < 32; y++) {
AV_WN64A(dst + 0, 0x8181818181818181ULL);
AV_WN64A(dst + 8, 0x8181818181818181ULL);
AV_WN64A(dst + 16, 0x8181818181818181ULL);
AV_WN64A(dst + 24, 0x8181818181818181ULL);
dst += stride;
}
}
#define DST(x, y) dst[(x) + (y) * stride]
static void diag_downleft_4x4_c(uint8_t *dst, ptrdiff_t stride,
const uint8_t *left, const uint8_t *top)
{
int a0 = top[0], a1 = top[1], a2 = top[2], a3 = top[3],
a4 = top[4], a5 = top[5], a6 = top[6], a7 = top[7];
DST(0,0) = (a0 + a1 * 2 + a2 + 2) >> 2;
DST(1,0) = DST(0,1) = (a1 + a2 * 2 + a3 + 2) >> 2;
DST(2,0) = DST(1,1) = DST(0,2) = (a2 + a3 * 2 + a4 + 2) >> 2;
DST(3,0) = DST(2,1) = DST(1,2) = DST(0,3) = (a3 + a4 * 2 + a5 + 2) >> 2;
DST(3,1) = DST(2,2) = DST(1,3) = (a4 + a5 * 2 + a6 + 2) >> 2;
DST(3,2) = DST(2,3) = (a5 + a6 * 2 + a7 + 2) >> 2;
DST(3,3) = a7; // note: this is different from vp8 and such
}
#define def_diag_downleft(size) \
static void diag_downleft_##size##x##size##_c(uint8_t *dst, ptrdiff_t stride, \
const uint8_t *left, const uint8_t *top) \
{ \
int i, j; \
uint8_t v[size - 1]; \
\
for (i = 0; i < size - 2; i++) \
v[i] = (top[i] + top[i + 1] * 2 + top[i + 2] + 2) >> 2; \
v[size - 2] = (top[size - 2] + top[size - 1] * 3 + 2) >> 2; \
\
for (j = 0; j < size; j++) { \
memcpy(dst + j*stride, v + j, size - 1 - j); \
memset(dst + j*stride + size - 1 - j, top[size - 1], j + 1); \
} \
}
def_diag_downleft(8)
def_diag_downleft(16)
def_diag_downleft(32)
static void diag_downright_4x4_c(uint8_t *dst, ptrdiff_t stride,
const uint8_t *left, const uint8_t *top)
{
int tl = top[-1], a0 = top[0], a1 = top[1], a2 = top[2], a3 = top[3],
l0 = left[3], l1 = left[2], l2 = left[1], l3 = left[0];
DST(0,3) = (l1 + l2 * 2 + l3 + 2) >> 2;
DST(0,2) = DST(1,3) = (l0 + l1 * 2 + l2 + 2) >> 2;
DST(0,1) = DST(1,2) = DST(2,3) = (tl + l0 * 2 + l1 + 2) >> 2;
DST(0,0) = DST(1,1) = DST(2,2) = DST(3,3) = (l0 + tl * 2 + a0 + 2) >> 2;
DST(1,0) = DST(2,1) = DST(3,2) = (tl + a0 * 2 + a1 + 2) >> 2;
DST(2,0) = DST(3,1) = (a0 + a1 * 2 + a2 + 2) >> 2;
DST(3,0) = (a1 + a2 * 2 + a3 + 2) >> 2;
}
#define def_diag_downright(size) \
static void diag_downright_##size##x##size##_c(uint8_t *dst, ptrdiff_t stride, \
const uint8_t *left, const uint8_t *top) \
{ \
int i, j; \
uint8_t v[size + size - 1]; \
\
for (i = 0; i < size - 2; i++) { \
v[i ] = (left[i] + left[i + 1] * 2 + left[i + 2] + 2) >> 2; \
v[size + 1 + i] = (top[i] + top[i + 1] * 2 + top[i + 2] + 2) >> 2; \
} \
v[size - 2] = (left[size - 2] + left[size - 1] * 2 + top[-1] + 2) >> 2; \
v[size - 1] = (left[size - 1] + top[-1] * 2 + top[ 0] + 2) >> 2; \
v[size ] = (top[-1] + top[0] * 2 + top[ 1] + 2) >> 2; \
\
for (j = 0; j < size; j++) \
memcpy(dst + j*stride, v + size - 1 - j, size); \
}
def_diag_downright(8)
def_diag_downright(16)
def_diag_downright(32)
static void vert_right_4x4_c(uint8_t *dst, ptrdiff_t stride,
const uint8_t *left, const uint8_t *top)
{
int tl = top[-1], a0 = top[0], a1 = top[1], a2 = top[2], a3 = top[3],
l0 = left[3], l1 = left[2], l2 = left[1];
DST(0,3) = (l0 + l1 * 2 + l2 + 2) >> 2;
DST(0,2) = (tl + l0 * 2 + l1 + 2) >> 2;
DST(0,0) = DST(1,2) = (tl + a0 + 1) >> 1;
DST(0,1) = DST(1,3) = (l0 + tl * 2 + a0 + 2) >> 2;
DST(1,0) = DST(2,2) = (a0 + a1 + 1) >> 1;
DST(1,1) = DST(2,3) = (tl + a0 * 2 + a1 + 2) >> 2;
DST(2,0) = DST(3,2) = (a1 + a2 + 1) >> 1;
DST(2,1) = DST(3,3) = (a0 + a1 * 2 + a2 + 2) >> 2;
DST(3,0) = (a2 + a3 + 1) >> 1;
DST(3,1) = (a1 + a2 * 2 + a3 + 2) >> 2;
}
#define def_vert_right(size) \
static void vert_right_##size##x##size##_c(uint8_t *dst, ptrdiff_t stride, \
const uint8_t *left, const uint8_t *top) \
{ \
int i, j; \
uint8_t ve[size + size/2 - 1], vo[size + size/2 - 1]; \
\
for (i = 0; i < size/2 - 2; i++) { \
vo[i] = (left[i*2 + 3] + left[i*2 + 2] * 2 + left[i*2 + 1] + 2) >> 2; \
ve[i] = (left[i*2 + 4] + left[i*2 + 3] * 2 + left[i*2 + 2] + 2) >> 2; \
} \
vo[size/2 - 2] = (left[size - 1] + left[size - 2] * 2 + left[size - 3] + 2) >> 2; \
ve[size/2 - 2] = (top[-1] + left[size - 1] * 2 + left[size - 2] + 2) >> 2; \
\
ve[size/2 - 1] = (top[-1] + top[0] + 1) >> 1; \
vo[size/2 - 1] = (left[size - 1] + top[-1] * 2 + top[0] + 2) >> 2; \
for (i = 0; i < size - 1; i++) { \
ve[size/2 + i] = (top[i] + top[i + 1] + 1) >> 1; \
vo[size/2 + i] = (top[i - 1] + top[i] * 2 + top[i + 1] + 2) >> 2; \
} \
\
for (j = 0; j < size / 2; j++) { \
memcpy(dst + j*2 *stride, ve + size/2 - 1 - j, size); \
memcpy(dst + (j*2 + 1)*stride, vo + size/2 - 1 - j, size); \
} \
}
def_vert_right(8)
def_vert_right(16)
def_vert_right(32)
static void hor_down_4x4_c(uint8_t *dst, ptrdiff_t stride,
const uint8_t *left, const uint8_t *top)
{
int l0 = left[3], l1 = left[2], l2 = left[1], l3 = left[0],
tl = top[-1], a0 = top[0], a1 = top[1], a2 = top[2];
DST(2,0) = (tl + a0 * 2 + a1 + 2) >> 2;
DST(3,0) = (a0 + a1 * 2 + a2 + 2) >> 2;
DST(0,0) = DST(2,1) = (tl + l0 + 1) >> 1;
DST(1,0) = DST(3,1) = (a0 + tl * 2 + l0 + 2) >> 2;
DST(0,1) = DST(2,2) = (l0 + l1 + 1) >> 1;
DST(1,1) = DST(3,2) = (tl + l0 * 2 + l1 + 2) >> 2;
DST(0,2) = DST(2,3) = (l1 + l2 + 1) >> 1;
DST(1,2) = DST(3,3) = (l0 + l1 * 2 + l2 + 2) >> 2;
DST(0,3) = (l2 + l3 + 1) >> 1;
DST(1,3) = (l1 + l2 * 2 + l3 + 2) >> 2;
}
#define def_hor_down(size) \
static void hor_down_##size##x##size##_c(uint8_t *dst, ptrdiff_t stride, \
const uint8_t *left, const uint8_t *top) \
{ \
int i, j; \
uint8_t v[size * 3 - 2]; \
\
for (i = 0; i < size - 2; i++) { \
v[i*2 ] = (left[i + 1] + left[i + 0] + 1) >> 1; \
v[i*2 + 1] = (left[i + 2] + left[i + 1] * 2 + left[i + 0] + 2) >> 2; \
v[size*2 + i] = (top[i - 1] + top[i] * 2 + top[i + 1] + 2) >> 2; \
} \
v[size*2 - 2] = (top[-1] + left[size - 1] + 1) >> 1; \
v[size*2 - 4] = (left[size - 1] + left[size - 2] + 1) >> 1; \
v[size*2 - 1] = (top[0] + top[-1] * 2 + left[size - 1] + 2) >> 2; \
v[size*2 - 3] = (top[-1] + left[size - 1] * 2 + left[size - 2] + 2) >> 2; \
\
for (j = 0; j < size; j++) \
memcpy(dst + j*stride, v + size*2 - 2 - j*2, size); \
}
def_hor_down(8)
def_hor_down(16)
def_hor_down(32)
static void vert_left_4x4_c(uint8_t *dst, ptrdiff_t stride,
const uint8_t *left, const uint8_t *top)
{
int a0 = top[0], a1 = top[1], a2 = top[2], a3 = top[3],
a4 = top[4], a5 = top[5], a6 = top[6];
DST(0,0) = (a0 + a1 + 1) >> 1;
DST(0,1) = (a0 + a1 * 2 + a2 + 2) >> 2;
DST(1,0) = DST(0,2) = (a1 + a2 + 1) >> 1;
DST(1,1) = DST(0,3) = (a1 + a2 * 2 + a3 + 2) >> 2;
DST(2,0) = DST(1,2) = (a2 + a3 + 1) >> 1;
DST(2,1) = DST(1,3) = (a2 + a3 * 2 + a4 + 2) >> 2;
DST(3,0) = DST(2,2) = (a3 + a4 + 1) >> 1;
DST(3,1) = DST(2,3) = (a3 + a4 * 2 + a5 + 2) >> 2;
DST(3,2) = (a4 + a5 + 1) >> 1;
DST(3,3) = (a4 + a5 * 2 + a6 + 2) >> 2;
}
#define def_vert_left(size) \
static void vert_left_##size##x##size##_c(uint8_t *dst, ptrdiff_t stride, \
const uint8_t *left, const uint8_t *top) \
{ \
int i, j; \
uint8_t ve[size - 1], vo[size - 1]; \
\
for (i = 0; i < size - 2; i++) { \
ve[i] = (top[i] + top[i + 1] + 1) >> 1; \
vo[i] = (top[i] + top[i + 1] * 2 + top[i + 2] + 2) >> 2; \
} \
ve[size - 2] = (top[size - 2] + top[size - 1] + 1) >> 1; \
vo[size - 2] = (top[size - 2] + top[size - 1] * 3 + 2) >> 2; \
\
for (j = 0; j < size / 2; j++) { \
memcpy(dst + j*2 * stride, ve + j, size - j - 1); \
memset(dst + j*2 * stride + size - j - 1, top[size - 1], j + 1); \
memcpy(dst + (j*2 + 1) * stride, vo + j, size - j - 1); \
memset(dst + (j*2 + 1) * stride + size - j - 1, top[size - 1], j + 1); \
} \
}
def_vert_left(8)
def_vert_left(16)
def_vert_left(32)
static void hor_up_4x4_c(uint8_t *dst, ptrdiff_t stride,
const uint8_t *left, const uint8_t *top)
{
int l0 = left[0], l1 = left[1], l2 = left[2], l3 = left[3];
DST(0,0) = (l0 + l1 + 1) >> 1;
DST(1,0) = (l0 + l1 * 2 + l2 + 2) >> 2;
DST(0,1) = DST(2,0) = (l1 + l2 + 1) >> 1;
DST(1,1) = DST(3,0) = (l1 + l2 * 2 + l3 + 2) >> 2;
DST(0,2) = DST(2,1) = (l2 + l3 + 1) >> 1;
DST(1,2) = DST(3,1) = (l2 + l3 * 3 + 2) >> 2;
DST(0,3) = DST(1,3) = DST(2,2) = DST(2,3) = DST(3,2) = DST(3,3) = l3;
}
#define def_hor_up(size) \
static void hor_up_##size##x##size##_c(uint8_t *dst, ptrdiff_t stride, \
const uint8_t *left, const uint8_t *top) \
{ \
int i, j; \
uint8_t v[size*2 - 2]; \
\
for (i = 0; i < size - 2; i++) { \
v[i*2 ] = (left[i] + left[i + 1] + 1) >> 1; \
v[i*2 + 1] = (left[i] + left[i + 1] * 2 + left[i + 2] + 2) >> 2; \
} \
v[size*2 - 4] = (left[size - 2] + left[size - 1] + 1) >> 1; \
v[size*2 - 3] = (left[size - 2] + left[size - 1] * 3 + 2) >> 2; \
\
for (j = 0; j < size / 2; j++) \
memcpy(dst + j*stride, v + j*2, size); \
for (j = size / 2; j < size; j++) { \
memcpy(dst + j*stride, v + j*2, size*2 - 2 - j*2); \
memset(dst + j*stride + size*2 - 2 - j*2, left[size - 1], \
2 + j*2 - size); \
} \
}
def_hor_up(8)
def_hor_up(16)
def_hor_up(32)
#undef DST
static av_cold void vp9dsp_intrapred_init(VP9DSPContext *dsp)
{
#define init_intra_pred(tx, sz) \
dsp->intra_pred[tx][VERT_PRED] = vert_##sz##_c; \
dsp->intra_pred[tx][HOR_PRED] = hor_##sz##_c; \
dsp->intra_pred[tx][DC_PRED] = dc_##sz##_c; \
dsp->intra_pred[tx][DIAG_DOWN_LEFT_PRED] = diag_downleft_##sz##_c; \
dsp->intra_pred[tx][DIAG_DOWN_RIGHT_PRED] = diag_downright_##sz##_c; \
dsp->intra_pred[tx][VERT_RIGHT_PRED] = vert_right_##sz##_c; \
dsp->intra_pred[tx][HOR_DOWN_PRED] = hor_down_##sz##_c; \
dsp->intra_pred[tx][VERT_LEFT_PRED] = vert_left_##sz##_c; \
dsp->intra_pred[tx][HOR_UP_PRED] = hor_up_##sz##_c; \
dsp->intra_pred[tx][TM_VP8_PRED] = tm_##sz##_c; \
dsp->intra_pred[tx][LEFT_DC_PRED] = dc_left_##sz##_c; \
dsp->intra_pred[tx][TOP_DC_PRED] = dc_top_##sz##_c; \
dsp->intra_pred[tx][DC_128_PRED] = dc_128_##sz##_c; \
dsp->intra_pred[tx][DC_127_PRED] = dc_127_##sz##_c; \
dsp->intra_pred[tx][DC_129_PRED] = dc_129_##sz##_c
init_intra_pred(TX_4X4, 4x4);
init_intra_pred(TX_8X8, 8x8);
init_intra_pred(TX_16X16, 16x16);
init_intra_pred(TX_32X32, 32x32);
#undef init_intra_pred
}
#define itxfm_wrapper(type_a, type_b, sz, bits, has_dconly) \
static void type_a##_##type_b##_##sz##x##sz##_add_c(uint8_t *dst, \
ptrdiff_t stride, \
int16_t *block, int eob) \
{ \
int i, j; \
int16_t tmp[sz * sz], out[sz]; \
\
if (has_dconly && eob == 1) { \
const int t = (((block[0] * 11585 + (1 << 13)) >> 14) \
* 11585 + (1 << 13)) >> 14; \
block[0] = 0; \
for (i = 0; i < sz; i++) { \
for (j = 0; j < sz; j++) \
dst[j * stride] = av_clip_uint8(dst[j * stride] + \
(bits ? \
(t + (1 << (bits - 1))) >> bits : \
t)); \
dst++; \
} \
return; \
} \
\
for (i = 0; i < sz; i++) \
type_a##sz##_1d(block + i, sz, tmp + i * sz, 0); \
memset(block, 0, sz * sz * sizeof(*block)); \
for (i = 0; i < sz; i++) { \
type_b##sz##_1d(tmp + i, sz, out, 1); \
for (j = 0; j < sz; j++) \
dst[j * stride] = av_clip_uint8(dst[j * stride] + \
(bits ? \
(out[j] + (1 << (bits - 1))) >> bits : \
out[j])); \
dst++; \
} \
}
#define itxfm_wrap(sz, bits) \
itxfm_wrapper(idct, idct, sz, bits, 1) \
itxfm_wrapper(iadst, idct, sz, bits, 0) \
itxfm_wrapper(idct, iadst, sz, bits, 0) \
itxfm_wrapper(iadst, iadst, sz, bits, 0)
#define IN(x) in[(x) * stride]
static av_always_inline void idct4_1d(const int16_t *in, ptrdiff_t stride,
int16_t *out, int pass)
{
int t0, t1, t2, t3;
t0 = ((IN(0) + IN(2)) * 11585 + (1 << 13)) >> 14;
t1 = ((IN(0) - IN(2)) * 11585 + (1 << 13)) >> 14;
t2 = (IN(1) * 6270 - IN(3) * 15137 + (1 << 13)) >> 14;
t3 = (IN(1) * 15137 + IN(3) * 6270 + (1 << 13)) >> 14;
out[0] = t0 + t3;
out[1] = t1 + t2;
out[2] = t1 - t2;
out[3] = t0 - t3;
}
static av_always_inline void iadst4_1d(const int16_t *in, ptrdiff_t stride,
int16_t *out, int pass)
{
int t0, t1, t2, t3;
t0 = 5283 * IN(0) + 15212 * IN(2) + 9929 * IN(3);
t1 = 9929 * IN(0) - 5283 * IN(2) - 15212 * IN(3);
t2 = 13377 * (IN(0) - IN(2) + IN(3));
t3 = 13377 * IN(1);
out[0] = (t0 + t3 + (1 << 13)) >> 14;
out[1] = (t1 + t3 + (1 << 13)) >> 14;
out[2] = (t2 + (1 << 13)) >> 14;
out[3] = (t0 + t1 - t3 + (1 << 13)) >> 14;
}
itxfm_wrap(4, 4)
static av_always_inline void idct8_1d(const int16_t *in, ptrdiff_t stride,
int16_t *out, int pass)
{
int t0, t0a, t1, t1a, t2, t2a, t3, t3a, t4, t4a, t5, t5a, t6, t6a, t7, t7a;
t0a = ((IN(0) + IN(4)) * 11585 + (1 << 13)) >> 14;
t1a = ((IN(0) - IN(4)) * 11585 + (1 << 13)) >> 14;
t2a = (IN(2) * 6270 - IN(6) * 15137 + (1 << 13)) >> 14;
t3a = (IN(2) * 15137 + IN(6) * 6270 + (1 << 13)) >> 14;
t4a = (IN(1) * 3196 - IN(7) * 16069 + (1 << 13)) >> 14;
t5a = (IN(5) * 13623 - IN(3) * 9102 + (1 << 13)) >> 14;
t6a = (IN(5) * 9102 + IN(3) * 13623 + (1 << 13)) >> 14;
t7a = (IN(1) * 16069 + IN(7) * 3196 + (1 << 13)) >> 14;
t0 = t0a + t3a;
t1 = t1a + t2a;
t2 = t1a - t2a;
t3 = t0a - t3a;
t4 = t4a + t5a;
t5a = t4a - t5a;
t7 = t7a + t6a;
t6a = t7a - t6a;
t5 = ((t6a - t5a) * 11585 + (1 << 13)) >> 14;
t6 = ((t6a + t5a) * 11585 + (1 << 13)) >> 14;
out[0] = t0 + t7;
out[1] = t1 + t6;
out[2] = t2 + t5;
out[3] = t3 + t4;
out[4] = t3 - t4;
out[5] = t2 - t5;
out[6] = t1 - t6;
out[7] = t0 - t7;
}
static av_always_inline void iadst8_1d(const int16_t *in, ptrdiff_t stride,
int16_t *out, int pass)
{
int t0, t0a, t1, t1a, t2, t2a, t3, t3a, t4, t4a, t5, t5a, t6, t6a, t7, t7a;
t0a = 16305 * IN(7) + 1606 * IN(0);
t1a = 1606 * IN(7) - 16305 * IN(0);
t2a = 14449 * IN(5) + 7723 * IN(2);
t3a = 7723 * IN(5) - 14449 * IN(2);
t4a = 10394 * IN(3) + 12665 * IN(4);
t5a = 12665 * IN(3) - 10394 * IN(4);
t6a = 4756 * IN(1) + 15679 * IN(6);
t7a = 15679 * IN(1) - 4756 * IN(6);
t0 = (t0a + t4a + (1 << 13)) >> 14;
t1 = (t1a + t5a + (1 << 13)) >> 14;
t2 = (t2a + t6a + (1 << 13)) >> 14;
t3 = (t3a + t7a + (1 << 13)) >> 14;
t4 = (t0a - t4a + (1 << 13)) >> 14;
t5 = (t1a - t5a + (1 << 13)) >> 14;
t6 = (t2a - t6a + (1 << 13)) >> 14;
t7 = (t3a - t7a + (1 << 13)) >> 14;
t4a = 15137 * t4 + 6270 * t5;
t5a = 6270 * t4 - 15137 * t5;
t6a = 15137 * t7 - 6270 * t6;
t7a = 6270 * t7 + 15137 * t6;
out[0] = t0 + t2;
out[7] = -(t1 + t3);
t2 = t0 - t2;
t3 = t1 - t3;
out[1] = -((t4a + t6a + (1 << 13)) >> 14);
out[6] = (t5a + t7a + (1 << 13)) >> 14;
t6 = (t4a - t6a + (1 << 13)) >> 14;
t7 = (t5a - t7a + (1 << 13)) >> 14;
out[3] = -(((t2 + t3) * 11585 + (1 << 13)) >> 14);
out[4] = ((t2 - t3) * 11585 + (1 << 13)) >> 14;
out[2] = ((t6 + t7) * 11585 + (1 << 13)) >> 14;
out[5] = -(((t6 - t7) * 11585 + (1 << 13)) >> 14);
}
itxfm_wrap(8, 5)
static av_always_inline void idct16_1d(const int16_t *in, ptrdiff_t stride,
int16_t *out, int pass)
{
int t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11, t12, t13, t14, t15;
int t0a, t1a, t2a, t3a, t4a, t5a, t6a, t7a;
int t8a, t9a, t10a, t11a, t12a, t13a, t14a, t15a;
t0a = ((IN(0) + IN(8)) * 11585 + (1 << 13)) >> 14;
t1a = ((IN(0) - IN(8)) * 11585 + (1 << 13)) >> 14;
t2a = (IN(4) * 6270 - IN(12) * 15137 + (1 << 13)) >> 14;
t3a = (IN(4) * 15137 + IN(12) * 6270 + (1 << 13)) >> 14;
t4a = (IN(2) * 3196 - IN(14) * 16069 + (1 << 13)) >> 14;
t7a = (IN(2) * 16069 + IN(14) * 3196 + (1 << 13)) >> 14;
t5a = (IN(10) * 13623 - IN(6) * 9102 + (1 << 13)) >> 14;
t6a = (IN(10) * 9102 + IN(6) * 13623 + (1 << 13)) >> 14;
t8a = (IN(1) * 1606 - IN(15) * 16305 + (1 << 13)) >> 14;
t15a = (IN(1) * 16305 + IN(15) * 1606 + (1 << 13)) >> 14;
t9a = (IN(9) * 12665 - IN(7) * 10394 + (1 << 13)) >> 14;
t14a = (IN(9) * 10394 + IN(7) * 12665 + (1 << 13)) >> 14;
t10a = (IN(5) * 7723 - IN(11) * 14449 + (1 << 13)) >> 14;
t13a = (IN(5) * 14449 + IN(11) * 7723 + (1 << 13)) >> 14;
t11a = (IN(13) * 15679 - IN(3) * 4756 + (1 << 13)) >> 14;
t12a = (IN(13) * 4756 + IN(3) * 15679 + (1 << 13)) >> 14;
t0 = t0a + t3a;
t1 = t1a + t2a;
t2 = t1a - t2a;
t3 = t0a - t3a;
t4 = t4a + t5a;
t5 = t4a - t5a;
t6 = t7a - t6a;
t7 = t7a + t6a;
t8 = t8a + t9a;
t9 = t8a - t9a;
t10 = t11a - t10a;
t11 = t11a + t10a;
t12 = t12a + t13a;
t13 = t12a - t13a;
t14 = t15a - t14a;
t15 = t15a + t14a;
t5a = ((t6 - t5) * 11585 + (1 << 13)) >> 14;
t6a = ((t6 + t5) * 11585 + (1 << 13)) >> 14;
t9a = ( t14 * 6270 - t9 * 15137 + (1 << 13)) >> 14;
t14a = ( t14 * 15137 + t9 * 6270 + (1 << 13)) >> 14;
t10a = (-(t13 * 15137 + t10 * 6270) + (1 << 13)) >> 14;
t13a = ( t13 * 6270 - t10 * 15137 + (1 << 13)) >> 14;
t0a = t0 + t7;
t1a = t1 + t6a;
t2a = t2 + t5a;
t3a = t3 + t4;
t4 = t3 - t4;
t5 = t2 - t5a;
t6 = t1 - t6a;
t7 = t0 - t7;
t8a = t8 + t11;
t9 = t9a + t10a;
t10 = t9a - t10a;
t11a = t8 - t11;
t12a = t15 - t12;
t13 = t14a - t13a;
t14 = t14a + t13a;
t15a = t15 + t12;
t10a = ((t13 - t10) * 11585 + (1 << 13)) >> 14;
t13a = ((t13 + t10) * 11585 + (1 << 13)) >> 14;
t11 = ((t12a - t11a) * 11585 + (1 << 13)) >> 14;
t12 = ((t12a + t11a) * 11585 + (1 << 13)) >> 14;
out[ 0] = t0a + t15a;
out[ 1] = t1a + t14;
out[ 2] = t2a + t13a;
out[ 3] = t3a + t12;
out[ 4] = t4 + t11;
out[ 5] = t5 + t10a;
out[ 6] = t6 + t9;
out[ 7] = t7 + t8a;
out[ 8] = t7 - t8a;
out[ 9] = t6 - t9;
out[10] = t5 - t10a;
out[11] = t4 - t11;
out[12] = t3a - t12;
out[13] = t2a - t13a;
out[14] = t1a - t14;
out[15] = t0a - t15a;
}
static av_always_inline void iadst16_1d(const int16_t *in, ptrdiff_t stride,
int16_t *out, int pass)
{
int t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11, t12, t13, t14, t15;
int t0a, t1a, t2a, t3a, t4a, t5a, t6a, t7a;
int t8a, t9a, t10a, t11a, t12a, t13a, t14a, t15a;
t0 = IN(15) * 16364 + IN(0) * 804;
t1 = IN(15) * 804 - IN(0) * 16364;
t2 = IN(13) * 15893 + IN(2) * 3981;
t3 = IN(13) * 3981 - IN(2) * 15893;
t4 = IN(11) * 14811 + IN(4) * 7005;
t5 = IN(11) * 7005 - IN(4) * 14811;
t6 = IN(9) * 13160 + IN(6) * 9760;
t7 = IN(9) * 9760 - IN(6) * 13160;
t8 = IN(7) * 11003 + IN(8) * 12140;
t9 = IN(7) * 12140 - IN(8) * 11003;
t10 = IN(5) * 8423 + IN(10) * 14053;
t11 = IN(5) * 14053 - IN(10) * 8423;
t12 = IN(3) * 5520 + IN(12) * 15426;
t13 = IN(3) * 15426 - IN(12) * 5520;
t14 = IN(1) * 2404 + IN(14) * 16207;
t15 = IN(1) * 16207 - IN(14) * 2404;
t0a = (t0 + t8 + (1 << 13)) >> 14;
t1a = (t1 + t9 + (1 << 13)) >> 14;
t2a = (t2 + t10 + (1 << 13)) >> 14;
t3a = (t3 + t11 + (1 << 13)) >> 14;
t4a = (t4 + t12 + (1 << 13)) >> 14;
t5a = (t5 + t13 + (1 << 13)) >> 14;
t6a = (t6 + t14 + (1 << 13)) >> 14;
t7a = (t7 + t15 + (1 << 13)) >> 14;
t8a = (t0 - t8 + (1 << 13)) >> 14;
t9a = (t1 - t9 + (1 << 13)) >> 14;
t10a = (t2 - t10 + (1 << 13)) >> 14;
t11a = (t3 - t11 + (1 << 13)) >> 14;
t12a = (t4 - t12 + (1 << 13)) >> 14;
t13a = (t5 - t13 + (1 << 13)) >> 14;
t14a = (t6 - t14 + (1 << 13)) >> 14;
t15a = (t7 - t15 + (1 << 13)) >> 14;
t8 = t8a * 16069 + t9a * 3196;
t9 = t8a * 3196 - t9a * 16069;
t10 = t10a * 9102 + t11a * 13623;
t11 = t10a * 13623 - t11a * 9102;
t12 = t13a * 16069 - t12a * 3196;
t13 = t13a * 3196 + t12a * 16069;
t14 = t15a * 9102 - t14a * 13623;
t15 = t15a * 13623 + t14a * 9102;
t0 = t0a + t4a;
t1 = t1a + t5a;
t2 = t2a + t6a;
t3 = t3a + t7a;
t4 = t0a - t4a;
t5 = t1a - t5a;
t6 = t2a - t6a;
t7 = t3a - t7a;
t8a = (t8 + t12 + (1 << 13)) >> 14;
t9a = (t9 + t13 + (1 << 13)) >> 14;
t10a = (t10 + t14 + (1 << 13)) >> 14;
t11a = (t11 + t15 + (1 << 13)) >> 14;
t12a = (t8 - t12 + (1 << 13)) >> 14;
t13a = (t9 - t13 + (1 << 13)) >> 14;
t14a = (t10 - t14 + (1 << 13)) >> 14;
t15a = (t11 - t15 + (1 << 13)) >> 14;
t4a = t4 * 15137 + t5 * 6270;
t5a = t4 * 6270 - t5 * 15137;
t6a = t7 * 15137 - t6 * 6270;
t7a = t7 * 6270 + t6 * 15137;
t12 = t12a * 15137 + t13a * 6270;
t13 = t12a * 6270 - t13a * 15137;
t14 = t15a * 15137 - t14a * 6270;
t15 = t15a * 6270 + t14a * 15137;
out[ 0] = t0 + t2;
out[15] = -(t1 + t3);
t2a = t0 - t2;
t3a = t1 - t3;
out[ 3] = -((t4a + t6a + (1 << 13)) >> 14);
out[12] = (t5a + t7a + (1 << 13)) >> 14;
t6 = (t4a - t6a + (1 << 13)) >> 14;
t7 = (t5a - t7a + (1 << 13)) >> 14;
out[ 1] = -(t8a + t10a);
out[14] = t9a + t11a;
t10 = t8a - t10a;
t11 = t9a - t11a;
out[ 2] = (t12 + t14 + (1 << 13)) >> 14;
out[13] = -((t13 + t15 + (1 << 13)) >> 14);
t14a = (t12 - t14 + (1 << 13)) >> 14;
t15a = (t13 - t15 + (1 << 13)) >> 14;
out[ 7] = ((t2a + t3a) * -11585 + (1 << 13)) >> 14;
out[ 8] = ((t2a - t3a) * 11585 + (1 << 13)) >> 14;
out[ 4] = ((t7 + t6) * 11585 + (1 << 13)) >> 14;
out[11] = ((t7 - t6) * 11585 + (1 << 13)) >> 14;
out[ 6] = ((t11 + t10) * 11585 + (1 << 13)) >> 14;
out[ 9] = ((t11 - t10) * 11585 + (1 << 13)) >> 14;
out[ 5] = ((t14a + t15a) * -11585 + (1 << 13)) >> 14;
out[10] = ((t14a - t15a) * 11585 + (1 << 13)) >> 14;
}
itxfm_wrap(16, 6)
static av_always_inline void idct32_1d(const int16_t *in, ptrdiff_t stride,
int16_t *out, int pass)
{
int t0a = ((IN(0) + IN(16)) * 11585 + (1 << 13)) >> 14;
int t1a = ((IN(0) - IN(16)) * 11585 + (1 << 13)) >> 14;
int t2a = (IN( 8) * 6270 - IN(24) * 15137 + (1 << 13)) >> 14;
int t3a = (IN( 8) * 15137 + IN(24) * 6270 + (1 << 13)) >> 14;
int t4a = (IN( 4) * 3196 - IN(28) * 16069 + (1 << 13)) >> 14;
int t7a = (IN( 4) * 16069 + IN(28) * 3196 + (1 << 13)) >> 14;
int t5a = (IN(20) * 13623 - IN(12) * 9102 + (1 << 13)) >> 14;
int t6a = (IN(20) * 9102 + IN(12) * 13623 + (1 << 13)) >> 14;
int t8a = (IN( 2) * 1606 - IN(30) * 16305 + (1 << 13)) >> 14;
int t15a = (IN( 2) * 16305 + IN(30) * 1606 + (1 << 13)) >> 14;
int t9a = (IN(18) * 12665 - IN(14) * 10394 + (1 << 13)) >> 14;
int t14a = (IN(18) * 10394 + IN(14) * 12665 + (1 << 13)) >> 14;
int t10a = (IN(10) * 7723 - IN(22) * 14449 + (1 << 13)) >> 14;
int t13a = (IN(10) * 14449 + IN(22) * 7723 + (1 << 13)) >> 14;
int t11a = (IN(26) * 15679 - IN( 6) * 4756 + (1 << 13)) >> 14;
int t12a = (IN(26) * 4756 + IN( 6) * 15679 + (1 << 13)) >> 14;
int t16a = (IN( 1) * 804 - IN(31) * 16364 + (1 << 13)) >> 14;
int t31a = (IN( 1) * 16364 + IN(31) * 804 + (1 << 13)) >> 14;
int t17a = (IN(17) * 12140 - IN(15) * 11003 + (1 << 13)) >> 14;
int t30a = (IN(17) * 11003 + IN(15) * 12140 + (1 << 13)) >> 14;
int t18a = (IN( 9) * 7005 - IN(23) * 14811 + (1 << 13)) >> 14;
int t29a = (IN( 9) * 14811 + IN(23) * 7005 + (1 << 13)) >> 14;
int t19a = (IN(25) * 15426 - IN( 7) * 5520 + (1 << 13)) >> 14;
int t28a = (IN(25) * 5520 + IN( 7) * 15426 + (1 << 13)) >> 14;
int t20a = (IN( 5) * 3981 - IN(27) * 15893 + (1 << 13)) >> 14;
int t27a = (IN( 5) * 15893 + IN(27) * 3981 + (1 << 13)) >> 14;
int t21a = (IN(21) * 14053 - IN(11) * 8423 + (1 << 13)) >> 14;
int t26a = (IN(21) * 8423 + IN(11) * 14053 + (1 << 13)) >> 14;
int t22a = (IN(13) * 9760 - IN(19) * 13160 + (1 << 13)) >> 14;
int t25a = (IN(13) * 13160 + IN(19) * 9760 + (1 << 13)) >> 14;
int t23a = (IN(29) * 16207 - IN( 3) * 2404 + (1 << 13)) >> 14;
int t24a = (IN(29) * 2404 + IN( 3) * 16207 + (1 << 13)) >> 14;
int t0 = t0a + t3a;
int t1 = t1a + t2a;
int t2 = t1a - t2a;
int t3 = t0a - t3a;
int t4 = t4a + t5a;
int t5 = t4a - t5a;
int t6 = t7a - t6a;
int t7 = t7a + t6a;
int t8 = t8a + t9a;
int t9 = t8a - t9a;
int t10 = t11a - t10a;
int t11 = t11a + t10a;
int t12 = t12a + t13a;
int t13 = t12a - t13a;
int t14 = t15a - t14a;
int t15 = t15a + t14a;
int t16 = t16a + t17a;
int t17 = t16a - t17a;
int t18 = t19a - t18a;
int t19 = t19a + t18a;
int t20 = t20a + t21a;
int t21 = t20a - t21a;
int t22 = t23a - t22a;
int t23 = t23a + t22a;
int t24 = t24a + t25a;
int t25 = t24a - t25a;
int t26 = t27a - t26a;
int t27 = t27a + t26a;
int t28 = t28a + t29a;
int t29 = t28a - t29a;
int t30 = t31a - t30a;
int t31 = t31a + t30a;
t5a = ((t6 - t5) * 11585 + (1 << 13)) >> 14;
t6a = ((t6 + t5) * 11585 + (1 << 13)) >> 14;
t9a = ( t14 * 6270 - t9 * 15137 + (1 << 13)) >> 14;
t14a = ( t14 * 15137 + t9 * 6270 + (1 << 13)) >> 14;
t10a = (-(t13 * 15137 + t10 * 6270) + (1 << 13)) >> 14;
t13a = ( t13 * 6270 - t10 * 15137 + (1 << 13)) >> 14;
t17a = ( t30 * 3196 - t17 * 16069 + (1 << 13)) >> 14;
t30a = ( t30 * 16069 + t17 * 3196 + (1 << 13)) >> 14;
t18a = (-(t29 * 16069 + t18 * 3196) + (1 << 13)) >> 14;
t29a = ( t29 * 3196 - t18 * 16069 + (1 << 13)) >> 14;
t21a = ( t26 * 13623 - t21 * 9102 + (1 << 13)) >> 14;
t26a = ( t26 * 9102 + t21 * 13623 + (1 << 13)) >> 14;
t22a = (-(t25 * 9102 + t22 * 13623) + (1 << 13)) >> 14;
t25a = ( t25 * 13623 - t22 * 9102 + (1 << 13)) >> 14;
t0a = t0 + t7;
t1a = t1 + t6a;
t2a = t2 + t5a;
t3a = t3 + t4;
t4a = t3 - t4;
t5 = t2 - t5a;
t6 = t1 - t6a;
t7a = t0 - t7;
t8a = t8 + t11;
t9 = t9a + t10a;
t10 = t9a - t10a;
t11a = t8 - t11;
t12a = t15 - t12;
t13 = t14a - t13a;
t14 = t14a + t13a;
t15a = t15 + t12;
t16a = t16 + t19;
t17 = t17a + t18a;
t18 = t17a - t18a;
t19a = t16 - t19;
t20a = t23 - t20;
t21 = t22a - t21a;
t22 = t22a + t21a;
t23a = t23 + t20;
t24a = t24 + t27;
t25 = t25a + t26a;
t26 = t25a - t26a;
t27a = t24 - t27;
t28a = t31 - t28;
t29 = t30a - t29a;
t30 = t30a + t29a;
t31a = t31 + t28;
t10a = ((t13 - t10) * 11585 + (1 << 13)) >> 14;
t13a = ((t13 + t10) * 11585 + (1 << 13)) >> 14;
t11 = ((t12a - t11a) * 11585 + (1 << 13)) >> 14;
t12 = ((t12a + t11a) * 11585 + (1 << 13)) >> 14;
t18a = ( t29 * 6270 - t18 * 15137 + (1 << 13)) >> 14;
t29a = ( t29 * 15137 + t18 * 6270 + (1 << 13)) >> 14;
t19 = ( t28a * 6270 - t19a * 15137 + (1 << 13)) >> 14;
t28 = ( t28a * 15137 + t19a * 6270 + (1 << 13)) >> 14;
t20 = (-(t27a * 15137 + t20a * 6270) + (1 << 13)) >> 14;
t27 = ( t27a * 6270 - t20a * 15137 + (1 << 13)) >> 14;
t21a = (-(t26 * 15137 + t21 * 6270) + (1 << 13)) >> 14;
t26a = ( t26 * 6270 - t21 * 15137 + (1 << 13)) >> 14;
t0 = t0a + t15a;
t1 = t1a + t14;
t2 = t2a + t13a;
t3 = t3a + t12;
t4 = t4a + t11;
t5a = t5 + t10a;
t6a = t6 + t9;
t7 = t7a + t8a;
t8 = t7a - t8a;
t9a = t6 - t9;
t10 = t5 - t10a;
t11a = t4a - t11;
t12a = t3a - t12;
t13 = t2a - t13a;
t14a = t1a - t14;
t15 = t0a - t15a;
t16 = t16a + t23a;
t17a = t17 + t22;
t18 = t18a + t21a;
t19a = t19 + t20;
t20a = t19 - t20;
t21 = t18a - t21a;
t22a = t17 - t22;
t23 = t16a - t23a;
t24 = t31a - t24a;
t25a = t30 - t25;
t26 = t29a - t26a;
t27a = t28 - t27;
t28a = t28 + t27;
t29 = t29a + t26a;
t30a = t30 + t25;
t31 = t31a + t24a;
t20 = ((t27a - t20a) * 11585 + (1 << 13)) >> 14;
t27 = ((t27a + t20a) * 11585 + (1 << 13)) >> 14;
t21a = ((t26 - t21 ) * 11585 + (1 << 13)) >> 14;
t26a = ((t26 + t21 ) * 11585 + (1 << 13)) >> 14;
t22 = ((t25a - t22a) * 11585 + (1 << 13)) >> 14;
t25 = ((t25a + t22a) * 11585 + (1 << 13)) >> 14;
t23a = ((t24 - t23 ) * 11585 + (1 << 13)) >> 14;
t24a = ((t24 + t23 ) * 11585 + (1 << 13)) >> 14;
out[ 0] = t0 + t31;
out[ 1] = t1 + t30a;
out[ 2] = t2 + t29;
out[ 3] = t3 + t28a;
out[ 4] = t4 + t27;
out[ 5] = t5a + t26a;
out[ 6] = t6a + t25;
out[ 7] = t7 + t24a;
out[ 8] = t8 + t23a;
out[ 9] = t9a + t22;
out[10] = t10 + t21a;
out[11] = t11a + t20;
out[12] = t12a + t19a;
out[13] = t13 + t18;
out[14] = t14a + t17a;
out[15] = t15 + t16;
out[16] = t15 - t16;
out[17] = t14a - t17a;
out[18] = t13 - t18;
out[19] = t12a - t19a;
out[20] = t11a - t20;
out[21] = t10 - t21a;
out[22] = t9a - t22;
out[23] = t8 - t23a;
out[24] = t7 - t24a;
out[25] = t6a - t25;
out[26] = t5a - t26a;
out[27] = t4 - t27;
out[28] = t3 - t28a;
out[29] = t2 - t29;
out[30] = t1 - t30a;
out[31] = t0 - t31;
}
itxfm_wrapper(idct, idct, 32, 6, 1)
static av_always_inline void iwht4_1d(const int16_t *in, ptrdiff_t stride,
int16_t *out, int pass)
{
int t0, t1, t2, t3, t4;
if (pass == 0) {
t0 = IN(0) >> 2;
t1 = IN(3) >> 2;
t2 = IN(1) >> 2;
t3 = IN(2) >> 2;
} else { } else {
t0 = IN(0); assert(bpp == 12);
t1 = IN(3); ff_vp9dsp_init_12(dsp);
t2 = IN(1);
t3 = IN(2);
}
t0 += t2;
t3 -= t1;
t4 = (t0 - t3) >> 1;
t1 = t4 - t1;
t2 = t4 - t2;
t0 -= t1;
t3 += t2;
out[0] = t0;
out[1] = t1;
out[2] = t2;
out[3] = t3;
}
itxfm_wrapper(iwht, iwht, 4, 0, 0)
#undef IN
#undef itxfm_wrapper
#undef itxfm_wrap
static av_cold void vp9dsp_itxfm_init(VP9DSPContext *dsp)
{
#define init_itxfm(tx, sz) \
dsp->itxfm_add[tx][DCT_DCT] = idct_idct_##sz##_add_c; \
dsp->itxfm_add[tx][DCT_ADST] = iadst_idct_##sz##_add_c; \
dsp->itxfm_add[tx][ADST_DCT] = idct_iadst_##sz##_add_c; \
dsp->itxfm_add[tx][ADST_ADST] = iadst_iadst_##sz##_add_c
#define init_idct(tx, nm) \
dsp->itxfm_add[tx][DCT_DCT] = \
dsp->itxfm_add[tx][ADST_DCT] = \
dsp->itxfm_add[tx][DCT_ADST] = \
dsp->itxfm_add[tx][ADST_ADST] = nm##_add_c
init_itxfm(TX_4X4, 4x4);
init_itxfm(TX_8X8, 8x8);
init_itxfm(TX_16X16, 16x16);
init_idct(TX_32X32, idct_idct_32x32);
init_idct(4 /* lossless */, iwht_iwht_4x4);
#undef init_itxfm
#undef init_idct
}
static av_always_inline void loop_filter(uint8_t *dst, int E, int I, int H,
ptrdiff_t stridea, ptrdiff_t strideb,
int wd)
{
int i;
for (i = 0; i < 8; i++, dst += stridea) {
int p7, p6, p5, p4;
int p3 = dst[strideb * -4], p2 = dst[strideb * -3];
int p1 = dst[strideb * -2], p0 = dst[strideb * -1];
int q0 = dst[strideb * +0], q1 = dst[strideb * +1];
int q2 = dst[strideb * +2], q3 = dst[strideb * +3];
int q4, q5, q6, q7;
int fm = FFABS(p3 - p2) <= I && FFABS(p2 - p1) <= I &&
FFABS(p1 - p0) <= I && FFABS(q1 - q0) <= I &&
FFABS(q2 - q1) <= I && FFABS(q3 - q2) <= I &&
FFABS(p0 - q0) * 2 + (FFABS(p1 - q1) >> 1) <= E;
int flat8out, flat8in;
if (!fm)
continue;
if (wd >= 16) {
p7 = dst[strideb * -8];
p6 = dst[strideb * -7];
p5 = dst[strideb * -6];
p4 = dst[strideb * -5];
q4 = dst[strideb * +4];
q5 = dst[strideb * +5];
q6 = dst[strideb * +6];
q7 = dst[strideb * +7];
flat8out = FFABS(p7 - p0) <= 1 && FFABS(p6 - p0) <= 1 &&
FFABS(p5 - p0) <= 1 && FFABS(p4 - p0) <= 1 &&
FFABS(q4 - q0) <= 1 && FFABS(q5 - q0) <= 1 &&
FFABS(q6 - q0) <= 1 && FFABS(q7 - q0) <= 1;
}
if (wd >= 8)
flat8in = FFABS(p3 - p0) <= 1 && FFABS(p2 - p0) <= 1 &&
FFABS(p1 - p0) <= 1 && FFABS(q1 - q0) <= 1 &&
FFABS(q2 - q0) <= 1 && FFABS(q3 - q0) <= 1;
if (wd >= 16 && flat8out && flat8in) {
dst[strideb * -7] = (p7 + p7 + p7 + p7 + p7 + p7 + p7 + p6 * 2 +
p5 + p4 + p3 + p2 + p1 + p0 + q0 + 8) >> 4;
dst[strideb * -6] = (p7 + p7 + p7 + p7 + p7 + p7 + p6 + p5 * 2 +
p4 + p3 + p2 + p1 + p0 + q0 + q1 + 8) >> 4;
dst[strideb * -5] = (p7 + p7 + p7 + p7 + p7 + p6 + p5 + p4 * 2 +
p3 + p2 + p1 + p0 + q0 + q1 + q2 + 8) >> 4;
dst[strideb * -4] = (p7 + p7 + p7 + p7 + p6 + p5 + p4 + p3 * 2 +
p2 + p1 + p0 + q0 + q1 + q2 + q3 + 8) >> 4;
dst[strideb * -3] = (p7 + p7 + p7 + p6 + p5 + p4 + p3 + p2 * 2 +
p1 + p0 + q0 + q1 + q2 + q3 + q4 + 8) >> 4;
dst[strideb * -2] = (p7 + p7 + p6 + p5 + p4 + p3 + p2 + p1 * 2 +
p0 + q0 + q1 + q2 + q3 + q4 + q5 + 8) >> 4;
dst[strideb * -1] = (p7 + p6 + p5 + p4 + p3 + p2 + p1 + p0 * 2 +
q0 + q1 + q2 + q3 + q4 + q5 + q6 + 8) >> 4;
dst[strideb * +0] = (p6 + p5 + p4 + p3 + p2 + p1 + p0 + q0 * 2 +
q1 + q2 + q3 + q4 + q5 + q6 + q7 + 8) >> 4;
dst[strideb * +1] = (p5 + p4 + p3 + p2 + p1 + p0 + q0 + q1 * 2 +
q2 + q3 + q4 + q5 + q6 + q7 + q7 + 8) >> 4;
dst[strideb * +2] = (p4 + p3 + p2 + p1 + p0 + q0 + q1 + q2 * 2 +
q3 + q4 + q5 + q6 + q7 + q7 + q7 + 8) >> 4;
dst[strideb * +3] = (p3 + p2 + p1 + p0 + q0 + q1 + q2 + q3 * 2 +
q4 + q5 + q6 + q7 + q7 + q7 + q7 + 8) >> 4;
dst[strideb * +4] = (p2 + p1 + p0 + q0 + q1 + q2 + q3 + q4 * 2 +
q5 + q6 + q7 + q7 + q7 + q7 + q7 + 8) >> 4;
dst[strideb * +5] = (p1 + p0 + q0 + q1 + q2 + q3 + q4 + q5 * 2 +
q6 + q7 + q7 + q7 + q7 + q7 + q7 + 8) >> 4;
dst[strideb * +6] = (p0 + q0 + q1 + q2 + q3 + q4 + q5 + q6 * 2 +
q7 + q7 + q7 + q7 + q7 + q7 + q7 + 8) >> 4;
} else if (wd >= 8 && flat8in) {
dst[strideb * -3] = (p3 + p3 + p3 + 2 * p2 + p1 + p0 + q0 + 4) >> 3;
dst[strideb * -2] = (p3 + p3 + p2 + 2 * p1 + p0 + q0 + q1 + 4) >> 3;
dst[strideb * -1] = (p3 + p2 + p1 + 2 * p0 + q0 + q1 + q2 + 4) >> 3;
dst[strideb * +0] = (p2 + p1 + p0 + 2 * q0 + q1 + q2 + q3 + 4) >> 3;
dst[strideb * +1] = (p1 + p0 + q0 + 2 * q1 + q2 + q3 + q3 + 4) >> 3;
dst[strideb * +2] = (p0 + q0 + q1 + 2 * q2 + q3 + q3 + q3 + 4) >> 3;
} else {
int hev = FFABS(p1 - p0) > H || FFABS(q1 - q0) > H;
if (hev) {
int f = av_clip_int8(3 * (q0 - p0) + av_clip_int8(p1 - q1)), f1, f2;
f1 = FFMIN(f + 4, 127) >> 3;
f2 = FFMIN(f + 3, 127) >> 3;
dst[strideb * -1] = av_clip_uint8(p0 + f2);
dst[strideb * +0] = av_clip_uint8(q0 - f1);
} else {
int f = av_clip_int8(3 * (q0 - p0)), f1, f2;
f1 = FFMIN(f + 4, 127) >> 3;
f2 = FFMIN(f + 3, 127) >> 3;
dst[strideb * -1] = av_clip_uint8(p0 + f2);
dst[strideb * +0] = av_clip_uint8(q0 - f1);
f = (f1 + 1) >> 1;
dst[strideb * -2] = av_clip_uint8(p1 + f);
dst[strideb * +1] = av_clip_uint8(q1 - f);
}
}
} }
}
#define lf_8_fn(dir, wd, stridea, strideb) \
static void loop_filter_##dir##_##wd##_8_c(uint8_t *dst, \
ptrdiff_t stride, \
int E, int I, int H) \
{ \
loop_filter(dst, E, I, H, stridea, strideb, wd); \
}
#define lf_8_fns(wd) \
lf_8_fn(h, wd, stride, 1) \
lf_8_fn(v, wd, 1, stride)
lf_8_fns(4)
lf_8_fns(8)
lf_8_fns(16)
#undef lf_8_fn
#undef lf_8_fns
#define lf_16_fn(dir, stridea) \
static void loop_filter_##dir##_16_16_c(uint8_t *dst, \
ptrdiff_t stride, \
int E, int I, int H) \
{ \
loop_filter_##dir##_16_8_c(dst, stride, E, I, H); \
loop_filter_##dir##_16_8_c(dst + 8 * stridea, stride, E, I, H); \
}
lf_16_fn(h, stride)
lf_16_fn(v, 1)
#undef lf_16_fn
#define lf_mix_fn(dir, wd1, wd2, stridea) \
static void loop_filter_##dir##_##wd1##wd2##_16_c(uint8_t *dst, \
ptrdiff_t stride, \
int E, int I, int H) \
{ \
loop_filter_##dir##_##wd1##_8_c(dst, stride, E & 0xff, I & 0xff, H & 0xff); \
loop_filter_##dir##_##wd2##_8_c(dst + 8 * stridea, stride, E >> 8, I >> 8, H >> 8); \
}
#define lf_mix_fns(wd1, wd2) \
lf_mix_fn(h, wd1, wd2, stride) \
lf_mix_fn(v, wd1, wd2, 1)
lf_mix_fns(4, 4)
lf_mix_fns(4, 8)
lf_mix_fns(8, 4)
lf_mix_fns(8, 8)
#undef lf_mix_fn
#undef lf_mix_fns
static av_cold void vp9dsp_loopfilter_init(VP9DSPContext *dsp)
{
dsp->loop_filter_8[0][0] = loop_filter_h_4_8_c;
dsp->loop_filter_8[0][1] = loop_filter_v_4_8_c;
dsp->loop_filter_8[1][0] = loop_filter_h_8_8_c;
dsp->loop_filter_8[1][1] = loop_filter_v_8_8_c;
dsp->loop_filter_8[2][0] = loop_filter_h_16_8_c;
dsp->loop_filter_8[2][1] = loop_filter_v_16_8_c;
dsp->loop_filter_16[0] = loop_filter_h_16_16_c;
dsp->loop_filter_16[1] = loop_filter_v_16_16_c;
dsp->loop_filter_mix2[0][0][0] = loop_filter_h_44_16_c;
dsp->loop_filter_mix2[0][0][1] = loop_filter_v_44_16_c;
dsp->loop_filter_mix2[0][1][0] = loop_filter_h_48_16_c;
dsp->loop_filter_mix2[0][1][1] = loop_filter_v_48_16_c;
dsp->loop_filter_mix2[1][0][0] = loop_filter_h_84_16_c;
dsp->loop_filter_mix2[1][0][1] = loop_filter_v_84_16_c;
dsp->loop_filter_mix2[1][1][0] = loop_filter_h_88_16_c;
dsp->loop_filter_mix2[1][1][1] = loop_filter_v_88_16_c;
}
static av_always_inline void copy_c(uint8_t *dst, ptrdiff_t dst_stride,
const uint8_t *src, ptrdiff_t src_stride,
int w, int h)
{
do {
memcpy(dst, src, w);
dst += dst_stride;
src += src_stride;
} while (--h);
}
static av_always_inline void avg_c(uint8_t *dst, ptrdiff_t dst_stride,
const uint8_t *src, ptrdiff_t src_stride,
int w, int h)
{
do {
int x;
for (x = 0; x < w; x += 4)
AV_WN32A(&dst[x], rnd_avg32(AV_RN32A(&dst[x]), AV_RN32(&src[x])));
dst += dst_stride;
src += src_stride;
} while (--h);
}
#define fpel_fn(type, sz) \
static void type##sz##_c(uint8_t *dst, ptrdiff_t dst_stride, \
const uint8_t *src, ptrdiff_t src_stride, \
int h, int mx, int my) \
{ \
type##_c(dst, dst_stride, src, src_stride, sz, h); \
}
#define copy_avg_fn(sz) \
fpel_fn(copy, sz) \
fpel_fn(avg, sz)
copy_avg_fn(64)
copy_avg_fn(32)
copy_avg_fn(16)
copy_avg_fn(8)
copy_avg_fn(4)
#undef fpel_fn
#undef copy_avg_fn
static const int16_t vp9_subpel_filters[3][16][8] = {
[FILTER_8TAP_REGULAR] = {
{ 0, 0, 0, 128, 0, 0, 0, 0 },
{ 0, 1, -5, 126, 8, -3, 1, 0 },
{ -1, 3, -10, 122, 18, -6, 2, 0 },
{ -1, 4, -13, 118, 27, -9, 3, -1 },
{ -1, 4, -16, 112, 37, -11, 4, -1 },
{ -1, 5, -18, 105, 48, -14, 4, -1 },
{ -1, 5, -19, 97, 58, -16, 5, -1 },
{ -1, 6, -19, 88, 68, -18, 5, -1 },
{ -1, 6, -19, 78, 78, -19, 6, -1 },
{ -1, 5, -18, 68, 88, -19, 6, -1 },
{ -1, 5, -16, 58, 97, -19, 5, -1 },
{ -1, 4, -14, 48, 105, -18, 5, -1 },
{ -1, 4, -11, 37, 112, -16, 4, -1 },
{ -1, 3, -9, 27, 118, -13, 4, -1 },
{ 0, 2, -6, 18, 122, -10, 3, -1 },
{ 0, 1, -3, 8, 126, -5, 1, 0 },
}, [FILTER_8TAP_SHARP] = {
{ 0, 0, 0, 128, 0, 0, 0, 0 },
{ -1, 3, -7, 127, 8, -3, 1, 0 },
{ -2, 5, -13, 125, 17, -6, 3, -1 },
{ -3, 7, -17, 121, 27, -10, 5, -2 },
{ -4, 9, -20, 115, 37, -13, 6, -2 },
{ -4, 10, -23, 108, 48, -16, 8, -3 },
{ -4, 10, -24, 100, 59, -19, 9, -3 },
{ -4, 11, -24, 90, 70, -21, 10, -4 },
{ -4, 11, -23, 80, 80, -23, 11, -4 },
{ -4, 10, -21, 70, 90, -24, 11, -4 },
{ -3, 9, -19, 59, 100, -24, 10, -4 },
{ -3, 8, -16, 48, 108, -23, 10, -4 },
{ -2, 6, -13, 37, 115, -20, 9, -4 },
{ -2, 5, -10, 27, 121, -17, 7, -3 },
{ -1, 3, -6, 17, 125, -13, 5, -2 },
{ 0, 1, -3, 8, 127, -7, 3, -1 },
}, [FILTER_8TAP_SMOOTH] = {
{ 0, 0, 0, 128, 0, 0, 0, 0 },
{ -3, -1, 32, 64, 38, 1, -3, 0 },
{ -2, -2, 29, 63, 41, 2, -3, 0 },
{ -2, -2, 26, 63, 43, 4, -4, 0 },
{ -2, -3, 24, 62, 46, 5, -4, 0 },
{ -2, -3, 21, 60, 49, 7, -4, 0 },
{ -1, -4, 18, 59, 51, 9, -4, 0 },
{ -1, -4, 16, 57, 53, 12, -4, -1 },
{ -1, -4, 14, 55, 55, 14, -4, -1 },
{ -1, -4, 12, 53, 57, 16, -4, -1 },
{ 0, -4, 9, 51, 59, 18, -4, -1 },
{ 0, -4, 7, 49, 60, 21, -3, -2 },
{ 0, -4, 5, 46, 62, 24, -3, -2 },
{ 0, -4, 4, 43, 63, 26, -2, -2 },
{ 0, -3, 2, 41, 63, 29, -2, -2 },
{ 0, -3, 1, 38, 64, 32, -1, -3 },
}
};
#define FILTER_8TAP(src, x, F, stride) \
av_clip_uint8((F[0] * src[x + -3 * stride] + \
F[1] * src[x + -2 * stride] + \
F[2] * src[x + -1 * stride] + \
F[3] * src[x + +0 * stride] + \
F[4] * src[x + +1 * stride] + \
F[5] * src[x + +2 * stride] + \
F[6] * src[x + +3 * stride] + \
F[7] * src[x + +4 * stride] + 64) >> 7)
static av_always_inline void do_8tap_1d_c(uint8_t *dst, ptrdiff_t dst_stride,
const uint8_t *src, ptrdiff_t src_stride,
int w, int h, ptrdiff_t ds,
const int16_t *filter, int avg)
{
do {
int x;
for (x = 0; x < w; x++)
if (avg) {
dst[x] = (dst[x] + FILTER_8TAP(src, x, filter, ds) + 1) >> 1;
} else {
dst[x] = FILTER_8TAP(src, x, filter, ds);
}
dst += dst_stride;
src += src_stride;
} while (--h);
}
#define filter_8tap_1d_fn(opn, opa, dir, ds) \
static av_noinline void opn##_8tap_1d_##dir##_c(uint8_t *dst, ptrdiff_t dst_stride, \
const uint8_t *src, ptrdiff_t src_stride, \
int w, int h, const int16_t *filter) \
{ \
do_8tap_1d_c(dst, dst_stride, src, src_stride, w, h, ds, filter, opa); \
}
filter_8tap_1d_fn(put, 0, v, src_stride)
filter_8tap_1d_fn(put, 0, h, 1)
filter_8tap_1d_fn(avg, 1, v, src_stride)
filter_8tap_1d_fn(avg, 1, h, 1)
#undef filter_8tap_1d_fn
static av_always_inline void do_8tap_2d_c(uint8_t *dst, ptrdiff_t dst_stride,
const uint8_t *src, ptrdiff_t src_stride,
int w, int h, const int16_t *filterx,
const int16_t *filtery, int avg)
{
int tmp_h = h + 7;
uint8_t tmp[64 * 71], *tmp_ptr = tmp;
src -= src_stride * 3;
do {
int x;
for (x = 0; x < w; x++)
tmp_ptr[x] = FILTER_8TAP(src, x, filterx, 1);
tmp_ptr += 64;
src += src_stride;
} while (--tmp_h);
tmp_ptr = tmp + 64 * 3;
do {
int x;
for (x = 0; x < w; x++)
if (avg) {
dst[x] = (dst[x] + FILTER_8TAP(tmp_ptr, x, filtery, 64) + 1) >> 1;
} else {
dst[x] = FILTER_8TAP(tmp_ptr, x, filtery, 64);
}
tmp_ptr += 64;
dst += dst_stride;
} while (--h);
}
#define filter_8tap_2d_fn(opn, opa) \
static av_noinline void opn##_8tap_2d_hv_c(uint8_t *dst, ptrdiff_t dst_stride, \
const uint8_t *src, ptrdiff_t src_stride, \
int w, int h, const int16_t *filterx, \
const int16_t *filtery) \
{ \
do_8tap_2d_c(dst, dst_stride, src, src_stride, w, h, filterx, filtery, opa); \
}
filter_8tap_2d_fn(put, 0)
filter_8tap_2d_fn(avg, 1)
#undef filter_8tap_2d_fn
#define filter_fn_1d(sz, dir, dir_m, type, type_idx, avg) \
static void avg##_8tap_##type##_##sz##dir##_c(uint8_t *dst, ptrdiff_t dst_stride, \
const uint8_t *src, ptrdiff_t src_stride, \
int h, int mx, int my) \
{ \
avg##_8tap_1d_##dir##_c(dst, dst_stride, src, src_stride, sz, h, \
vp9_subpel_filters[type_idx][dir_m]); \
}
#define filter_fn_2d(sz, type, type_idx, avg) \
static void avg##_8tap_##type##_##sz##hv_c(uint8_t *dst, ptrdiff_t dst_stride, \
const uint8_t *src, ptrdiff_t src_stride, \
int h, int mx, int my) \
{ \
avg##_8tap_2d_hv_c(dst, dst_stride, src, src_stride, sz, h, \
vp9_subpel_filters[type_idx][mx], \
vp9_subpel_filters[type_idx][my]); \
}
#define FILTER_BILIN(src, x, mxy, stride) \
(src[x] + ((mxy * (src[x + stride] - src[x]) + 8) >> 4))
static av_always_inline void do_bilin_1d_c(uint8_t *dst, ptrdiff_t dst_stride,
const uint8_t *src, ptrdiff_t src_stride,
int w, int h, ptrdiff_t ds, int mxy, int avg)
{
do {
int x;
for (x = 0; x < w; x++)
if (avg) {
dst[x] = (dst[x] + FILTER_BILIN(src, x, mxy, ds) + 1) >> 1;
} else {
dst[x] = FILTER_BILIN(src, x, mxy, ds);
}
dst += dst_stride;
src += src_stride;
} while (--h);
}
#define bilin_1d_fn(opn, opa, dir, ds) \
static av_noinline void opn##_bilin_1d_##dir##_c(uint8_t *dst, ptrdiff_t dst_stride, \
const uint8_t *src, ptrdiff_t src_stride, \
int w, int h, int mxy) \
{ \
do_bilin_1d_c(dst, dst_stride, src, src_stride, w, h, ds, mxy, opa); \
}
bilin_1d_fn(put, 0, v, src_stride)
bilin_1d_fn(put, 0, h, 1)
bilin_1d_fn(avg, 1, v, src_stride)
bilin_1d_fn(avg, 1, h, 1)
#undef bilin_1d_fn
static av_always_inline void do_bilin_2d_c(uint8_t *dst, ptrdiff_t dst_stride,
const uint8_t *src, ptrdiff_t src_stride,
int w, int h, int mx, int my, int avg)
{
uint8_t tmp[64 * 65], *tmp_ptr = tmp;
int tmp_h = h + 1;
do {
int x;
for (x = 0; x < w; x++)
tmp_ptr[x] = FILTER_BILIN(src, x, mx, 1);
tmp_ptr += 64;
src += src_stride;
} while (--tmp_h);
tmp_ptr = tmp;
do {
int x;
for (x = 0; x < w; x++)
if (avg) {
dst[x] = (dst[x] + FILTER_BILIN(tmp_ptr, x, my, 64) + 1) >> 1;
} else {
dst[x] = FILTER_BILIN(tmp_ptr, x, my, 64);
}
tmp_ptr += 64;
dst += dst_stride;
} while (--h);
}
#define bilin_2d_fn(opn, opa) \
static av_noinline void opn##_bilin_2d_hv_c(uint8_t *dst, ptrdiff_t dst_stride, \
const uint8_t *src, ptrdiff_t src_stride, \
int w, int h, int mx, int my) \
{ \
do_bilin_2d_c(dst, dst_stride, src, src_stride, w, h, mx, my, opa); \
}
bilin_2d_fn(put, 0)
bilin_2d_fn(avg, 1)
#undef bilin_2d_fn
#define bilinf_fn_1d(sz, dir, dir_m, avg) \
static void avg##_bilin_##sz##dir##_c(uint8_t *dst, ptrdiff_t dst_stride, \
const uint8_t *src, ptrdiff_t src_stride, \
int h, int mx, int my) \
{ \
avg##_bilin_1d_##dir##_c(dst, dst_stride, src, src_stride, sz, h, dir_m); \
}
#define bilinf_fn_2d(sz, avg) \
static void avg##_bilin_##sz##hv_c(uint8_t *dst, ptrdiff_t dst_stride, \
const uint8_t *src, ptrdiff_t src_stride, \
int h, int mx, int my) \
{ \
avg##_bilin_2d_hv_c(dst, dst_stride, src, src_stride, sz, h, mx, my); \
}
#define filter_fn(sz, avg) \
filter_fn_1d(sz, h, mx, regular, FILTER_8TAP_REGULAR, avg) \
filter_fn_1d(sz, v, my, regular, FILTER_8TAP_REGULAR, avg) \
filter_fn_2d(sz, regular, FILTER_8TAP_REGULAR, avg) \
filter_fn_1d(sz, h, mx, smooth, FILTER_8TAP_SMOOTH, avg) \
filter_fn_1d(sz, v, my, smooth, FILTER_8TAP_SMOOTH, avg) \
filter_fn_2d(sz, smooth, FILTER_8TAP_SMOOTH, avg) \
filter_fn_1d(sz, h, mx, sharp, FILTER_8TAP_SHARP, avg) \
filter_fn_1d(sz, v, my, sharp, FILTER_8TAP_SHARP, avg) \
filter_fn_2d(sz, sharp, FILTER_8TAP_SHARP, avg) \
bilinf_fn_1d(sz, h, mx, avg) \
bilinf_fn_1d(sz, v, my, avg) \
bilinf_fn_2d(sz, avg)
#define filter_fn_set(avg) \
filter_fn(64, avg) \
filter_fn(32, avg) \
filter_fn(16, avg) \
filter_fn(8, avg) \
filter_fn(4, avg)
filter_fn_set(put)
filter_fn_set(avg)
#undef filter_fn
#undef filter_fn_set
#undef filter_fn_1d
#undef filter_fn_2d
#undef bilinf_fn_1d
#undef bilinf_fn_2d
static av_cold void vp9dsp_mc_init(VP9DSPContext *dsp)
{
#define init_fpel(idx1, idx2, sz, type) \
dsp->mc[idx1][FILTER_8TAP_SMOOTH ][idx2][0][0] = type##sz##_c; \
dsp->mc[idx1][FILTER_8TAP_REGULAR][idx2][0][0] = type##sz##_c; \
dsp->mc[idx1][FILTER_8TAP_SHARP ][idx2][0][0] = type##sz##_c; \
dsp->mc[idx1][FILTER_BILINEAR ][idx2][0][0] = type##sz##_c
#define init_copy_avg(idx, sz) \
init_fpel(idx, 0, sz, copy); \
init_fpel(idx, 1, sz, avg)
init_copy_avg(0, 64);
init_copy_avg(1, 32);
init_copy_avg(2, 16);
init_copy_avg(3, 8);
init_copy_avg(4, 4);
#undef init_copy_avg
#undef init_fpel
#define init_subpel1(idx1, idx2, idxh, idxv, sz, dir, type) \
dsp->mc[idx1][FILTER_8TAP_SMOOTH ][idx2][idxh][idxv] = type##_8tap_smooth_##sz##dir##_c; \
dsp->mc[idx1][FILTER_8TAP_REGULAR][idx2][idxh][idxv] = type##_8tap_regular_##sz##dir##_c; \
dsp->mc[idx1][FILTER_8TAP_SHARP ][idx2][idxh][idxv] = type##_8tap_sharp_##sz##dir##_c; \
dsp->mc[idx1][FILTER_BILINEAR ][idx2][idxh][idxv] = type##_bilin_##sz##dir##_c
#define init_subpel2(idx, idxh, idxv, dir, type) \
init_subpel1(0, idx, idxh, idxv, 64, dir, type); \
init_subpel1(1, idx, idxh, idxv, 32, dir, type); \
init_subpel1(2, idx, idxh, idxv, 16, dir, type); \
init_subpel1(3, idx, idxh, idxv, 8, dir, type); \
init_subpel1(4, idx, idxh, idxv, 4, dir, type)
#define init_subpel3(idx, type) \
init_subpel2(idx, 1, 1, hv, type); \
init_subpel2(idx, 0, 1, v, type); \
init_subpel2(idx, 1, 0, h, type)
init_subpel3(0, put);
init_subpel3(1, avg);
#undef init_subpel1
#undef init_subpel2
#undef init_subpel3
}
static av_always_inline void do_scaled_8tap_c(uint8_t *dst, ptrdiff_t dst_stride,
const uint8_t *src, ptrdiff_t src_stride,
int w, int h, int mx, int my,
int dx, int dy, int avg,
const int16_t (*filters)[8])
{
int tmp_h = (((h - 1) * dy + my) >> 4) + 8;
uint8_t tmp[64 * 135], *tmp_ptr = tmp;
src -= src_stride * 3;
do {
int x;
int imx = mx, ioff = 0;
for (x = 0; x < w; x++) {
tmp_ptr[x] = FILTER_8TAP(src, ioff, filters[imx], 1);
imx += dx;
ioff += imx >> 4;
imx &= 0xf;
}
tmp_ptr += 64;
src += src_stride;
} while (--tmp_h);
tmp_ptr = tmp + 64 * 3;
do {
int x;
const int16_t *filter = filters[my];
for (x = 0; x < w; x++)
if (avg) {
dst[x] = (dst[x] + FILTER_8TAP(tmp_ptr, x, filter, 64) + 1) >> 1;
} else {
dst[x] = FILTER_8TAP(tmp_ptr, x, filter, 64);
}
my += dy;
tmp_ptr += (my >> 4) * 64;
my &= 0xf;
dst += dst_stride;
} while (--h);
}
#define scaled_filter_8tap_fn(opn, opa) \
static av_noinline void opn##_scaled_8tap_c(uint8_t *dst, ptrdiff_t dst_stride, \
const uint8_t *src, ptrdiff_t src_stride, \
int w, int h, int mx, int my, int dx, int dy, \
const int16_t (*filters)[8]) \
{ \
do_scaled_8tap_c(dst, dst_stride, src, src_stride, w, h, mx, my, dx, dy, \
opa, filters); \
}
scaled_filter_8tap_fn(put, 0)
scaled_filter_8tap_fn(avg, 1)
#undef scaled_filter_8tap_fn
#undef FILTER_8TAP
#define scaled_filter_fn(sz, type, type_idx, avg) \
static void avg##_scaled_##type##_##sz##_c(uint8_t *dst, ptrdiff_t dst_stride, \
const uint8_t *src, ptrdiff_t src_stride, \
int h, int mx, int my, int dx, int dy) \
{ \
avg##_scaled_8tap_c(dst, dst_stride, src, src_stride, sz, h, mx, my, dx, dy, \
vp9_subpel_filters[type_idx]); \
}
static av_always_inline void do_scaled_bilin_c(uint8_t *dst, ptrdiff_t dst_stride,
const uint8_t *src, ptrdiff_t src_stride,
int w, int h, int mx, int my,
int dx, int dy, int avg)
{
uint8_t tmp[64 * 129], *tmp_ptr = tmp;
int tmp_h = (((h - 1) * dy + my) >> 4) + 2;
do {
int x;
int imx = mx, ioff = 0;
for (x = 0; x < w; x++) {
tmp_ptr[x] = FILTER_BILIN(src, ioff, imx, 1);
imx += dx;
ioff += imx >> 4;
imx &= 0xf;
}
tmp_ptr += 64;
src += src_stride;
} while (--tmp_h);
tmp_ptr = tmp;
do {
int x;
for (x = 0; x < w; x++)
if (avg) {
dst[x] = (dst[x] + FILTER_BILIN(tmp_ptr, x, my, 64) + 1) >> 1;
} else {
dst[x] = FILTER_BILIN(tmp_ptr, x, my, 64);
}
my += dy;
tmp_ptr += (my >> 4) * 64;
my &= 0xf;
dst += dst_stride;
} while (--h);
}
#define scaled_bilin_fn(opn, opa) \
static av_noinline void opn##_scaled_bilin_c(uint8_t *dst, ptrdiff_t dst_stride, \
const uint8_t *src, ptrdiff_t src_stride, \
int w, int h, int mx, int my, int dx, int dy) \
{ \
do_scaled_bilin_c(dst, dst_stride, src, src_stride, w, h, mx, my, dx, dy, opa); \
}
scaled_bilin_fn(put, 0)
scaled_bilin_fn(avg, 1)
#undef scaled_bilin_fn
#undef FILTER_BILIN
#define scaled_bilinf_fn(sz, avg) \
static void avg##_scaled_bilin_##sz##_c(uint8_t *dst, ptrdiff_t dst_stride, \
const uint8_t *src, ptrdiff_t src_stride, \
int h, int mx, int my, int dx, int dy) \
{ \
avg##_scaled_bilin_c(dst, dst_stride, src, src_stride, sz, h, mx, my, dx, dy); \
}
#define scaled_filter_fns(sz, avg) \
scaled_filter_fn(sz, regular, FILTER_8TAP_REGULAR, avg) \
scaled_filter_fn(sz, smooth, FILTER_8TAP_SMOOTH, avg) \
scaled_filter_fn(sz, sharp, FILTER_8TAP_SHARP, avg) \
scaled_bilinf_fn(sz, avg)
#define scaled_filter_fn_set(avg) \
scaled_filter_fns(64, avg) \
scaled_filter_fns(32, avg) \
scaled_filter_fns(16, avg) \
scaled_filter_fns(8, avg) \
scaled_filter_fns(4, avg)
scaled_filter_fn_set(put)
scaled_filter_fn_set(avg)
#undef scaled_filter_fns
#undef scaled_filter_fn_set
#undef scaled_filter_fn
#undef scaled_bilinf_fn
static av_cold void vp9dsp_scaled_mc_init(VP9DSPContext *dsp)
{
#define init_scaled(idx1, idx2, sz, type) \
dsp->smc[idx1][FILTER_8TAP_SMOOTH ][idx2] = type##_scaled_smooth_##sz##_c; \
dsp->smc[idx1][FILTER_8TAP_REGULAR][idx2] = type##_scaled_regular_##sz##_c; \
dsp->smc[idx1][FILTER_8TAP_SHARP ][idx2] = type##_scaled_sharp_##sz##_c; \
dsp->smc[idx1][FILTER_BILINEAR ][idx2] = type##_scaled_bilin_##sz##_c
#define init_scaled_put_avg(idx, sz) \
init_scaled(idx, 0, sz, put); \
init_scaled(idx, 1, sz, avg)
init_scaled_put_avg(0, 64);
init_scaled_put_avg(1, 32);
init_scaled_put_avg(2, 16);
init_scaled_put_avg(3, 8);
init_scaled_put_avg(4, 4);
#undef init_scaled_put_avg
#undef init_scaled
}
av_cold void ff_vp9dsp_init(VP9DSPContext *dsp)
{
vp9dsp_intrapred_init(dsp);
vp9dsp_itxfm_init(dsp);
vp9dsp_loopfilter_init(dsp);
vp9dsp_mc_init(dsp);
vp9dsp_scaled_mc_init(dsp);
if (ARCH_X86) ff_vp9dsp_init_x86(dsp); if (ARCH_X86) ff_vp9dsp_init_x86(dsp, bpp);
} }
...@@ -120,8 +120,12 @@ typedef struct VP9DSPContext { ...@@ -120,8 +120,12 @@ typedef struct VP9DSPContext {
vp9_scaled_mc_func smc[5][4][2]; vp9_scaled_mc_func smc[5][4][2];
} VP9DSPContext; } VP9DSPContext;
void ff_vp9dsp_init(VP9DSPContext *dsp); void ff_vp9dsp_init(VP9DSPContext *dsp, int bpp);
void ff_vp9dsp_init_x86(VP9DSPContext *dsp); void ff_vp9dsp_init_8(VP9DSPContext *dsp);
void ff_vp9dsp_init_10(VP9DSPContext *dsp);
void ff_vp9dsp_init_12(VP9DSPContext *dsp);
void ff_vp9dsp_init_x86(VP9DSPContext *dsp, int bpp);
#endif /* AVCODEC_VP9DSP_H */ #endif /* AVCODEC_VP9DSP_H */
/*
* VP9 compatible video decoder
*
* Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
* Copyright (C) 2013 Clément Bœsch <u pkh me>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#define BIT_DEPTH 10
#define dctint int64_t
#include "vp9dsp_template.c"
/*
* VP9 compatible video decoder
*
* Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
* Copyright (C) 2013 Clément Bœsch <u pkh me>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#define BIT_DEPTH 12
#define dctint int64_t
#include "vp9dsp_template.c"
/*
* VP9 compatible video decoder
*
* Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
* Copyright (C) 2013 Clément Bœsch <u pkh me>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#define BIT_DEPTH 8
#define dctint int
#include "vp9dsp_template.c"
/*
* VP9 compatible video decoder
*
* Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
* Copyright (C) 2013 Clément Bœsch <u pkh me>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "libavutil/common.h"
#include "bit_depth_template.c"
#include "vp9dsp.h"
// FIXME see whether we can merge parts of this (perhaps at least 4x4 and 8x8)
// back with h264pred.[ch]
static void vert_4x4_c(uint8_t *_dst, ptrdiff_t stride,
const uint8_t *left, const uint8_t *_top)
{
pixel *dst = (pixel *) _dst;
const pixel *top = (const pixel *) _top;
pixel4 p4 = AV_RN4PA(top);
stride /= sizeof(pixel);
AV_WN4PA(dst + stride * 0, p4);
AV_WN4PA(dst + stride * 1, p4);
AV_WN4PA(dst + stride * 2, p4);
AV_WN4PA(dst + stride * 3, p4);
}
static void vert_8x8_c(uint8_t *_dst, ptrdiff_t stride,
const uint8_t *left, const uint8_t *_top)
{
pixel *dst = (pixel *) _dst;
const pixel *top = (const pixel *) _top;
pixel4 p4a = AV_RN4PA(top + 0);
pixel4 p4b = AV_RN4PA(top + 4);
int y;
stride /= sizeof(pixel);
for (y = 0; y < 8; y++) {
AV_WN4PA(dst + 0, p4a);
AV_WN4PA(dst + 4, p4b);
dst += stride;
}
}
static void vert_16x16_c(uint8_t *_dst, ptrdiff_t stride,
const uint8_t *left, const uint8_t *_top)
{
pixel *dst = (pixel *) _dst;
const pixel *top = (const pixel *) _top;
pixel4 p4a = AV_RN4PA(top + 0);
pixel4 p4b = AV_RN4PA(top + 4);
pixel4 p4c = AV_RN4PA(top + 8);
pixel4 p4d = AV_RN4PA(top + 12);
int y;
stride /= sizeof(pixel);
for (y = 0; y < 16; y++) {
AV_WN4PA(dst + 0, p4a);
AV_WN4PA(dst + 4, p4b);
AV_WN4PA(dst + 8, p4c);
AV_WN4PA(dst + 12, p4d);
dst += stride;
}
}
static void vert_32x32_c(uint8_t *_dst, ptrdiff_t stride,
const uint8_t *left, const uint8_t *_top)
{
pixel *dst = (pixel *) _dst;
const pixel *top = (const pixel *) _top;
pixel4 p4a = AV_RN4PA(top + 0);
pixel4 p4b = AV_RN4PA(top + 4);
pixel4 p4c = AV_RN4PA(top + 8);
pixel4 p4d = AV_RN4PA(top + 12);
pixel4 p4e = AV_RN4PA(top + 16);
pixel4 p4f = AV_RN4PA(top + 20);
pixel4 p4g = AV_RN4PA(top + 24);
pixel4 p4h = AV_RN4PA(top + 28);
int y;
stride /= sizeof(pixel);
for (y = 0; y < 32; y++) {
AV_WN4PA(dst + 0, p4a);
AV_WN4PA(dst + 4, p4b);
AV_WN4PA(dst + 8, p4c);
AV_WN4PA(dst + 12, p4d);
AV_WN4PA(dst + 16, p4e);
AV_WN4PA(dst + 20, p4f);
AV_WN4PA(dst + 24, p4g);
AV_WN4PA(dst + 28, p4h);
dst += stride;
}
}
static void hor_4x4_c(uint8_t *_dst, ptrdiff_t stride,
const uint8_t *_left, const uint8_t *top)
{
pixel *dst = (pixel *) _dst;
const pixel *left = (const pixel *) _left;
stride /= sizeof(pixel);
AV_WN4PA(dst + stride * 0, PIXEL_SPLAT_X4(left[3]));
AV_WN4PA(dst + stride * 1, PIXEL_SPLAT_X4(left[2]));
AV_WN4PA(dst + stride * 2, PIXEL_SPLAT_X4(left[1]));
AV_WN4PA(dst + stride * 3, PIXEL_SPLAT_X4(left[0]));
}
static void hor_8x8_c(uint8_t *_dst, ptrdiff_t stride,
const uint8_t *_left, const uint8_t *top)
{
pixel *dst = (pixel *) _dst;
const pixel *left = (const pixel *) _left;
int y;
stride /= sizeof(pixel);
for (y = 0; y < 8; y++) {
pixel4 p4 = PIXEL_SPLAT_X4(left[7 - y]);
AV_WN4PA(dst + 0, p4);
AV_WN4PA(dst + 4, p4);
dst += stride;
}
}
static void hor_16x16_c(uint8_t *_dst, ptrdiff_t stride,
const uint8_t *_left, const uint8_t *top)
{
pixel *dst = (pixel *) _dst;
const pixel *left = (const pixel *) _left;
int y;
stride /= sizeof(pixel);
for (y = 0; y < 16; y++) {
pixel4 p4 = PIXEL_SPLAT_X4(left[15 - y]);
AV_WN4PA(dst + 0, p4);
AV_WN4PA(dst + 4, p4);
AV_WN4PA(dst + 8, p4);
AV_WN4PA(dst + 12, p4);
dst += stride;
}
}
static void hor_32x32_c(uint8_t *_dst, ptrdiff_t stride,
const uint8_t *_left, const uint8_t *top)
{
pixel *dst = (pixel *) _dst;
const pixel *left = (const pixel *) _left;
int y;
stride /= sizeof(pixel);
for (y = 0; y < 32; y++) {
pixel4 p4 = PIXEL_SPLAT_X4(left[31 - y]);
AV_WN4PA(dst + 0, p4);
AV_WN4PA(dst + 4, p4);
AV_WN4PA(dst + 8, p4);
AV_WN4PA(dst + 12, p4);
AV_WN4PA(dst + 16, p4);
AV_WN4PA(dst + 20, p4);
AV_WN4PA(dst + 24, p4);
AV_WN4PA(dst + 28, p4);
dst += stride;
}
}
static void tm_4x4_c(uint8_t *_dst, ptrdiff_t stride,
const uint8_t *_left, const uint8_t *_top)
{
pixel *dst = (pixel *) _dst;
const pixel *left = (const pixel *) _left;
const pixel *top = (const pixel *) _top;
int y, tl = top[-1];
stride /= sizeof(pixel);
for (y = 0; y < 4; y++) {
int l_m_tl = left[3 - y] - tl;
dst[0] = av_clip_pixel(top[0] + l_m_tl);
dst[1] = av_clip_pixel(top[1] + l_m_tl);
dst[2] = av_clip_pixel(top[2] + l_m_tl);
dst[3] = av_clip_pixel(top[3] + l_m_tl);
dst += stride;
}
}
static void tm_8x8_c(uint8_t *_dst, ptrdiff_t stride,
const uint8_t *_left, const uint8_t *_top)
{
pixel *dst = (pixel *) _dst;
const pixel *left = (const pixel *) _left;
const pixel *top = (const pixel *) _top;
int y, tl = top[-1];
stride /= sizeof(pixel);
for (y = 0; y < 8; y++) {
int l_m_tl = left[7 - y] - tl;
dst[0] = av_clip_pixel(top[0] + l_m_tl);
dst[1] = av_clip_pixel(top[1] + l_m_tl);
dst[2] = av_clip_pixel(top[2] + l_m_tl);
dst[3] = av_clip_pixel(top[3] + l_m_tl);
dst[4] = av_clip_pixel(top[4] + l_m_tl);
dst[5] = av_clip_pixel(top[5] + l_m_tl);
dst[6] = av_clip_pixel(top[6] + l_m_tl);
dst[7] = av_clip_pixel(top[7] + l_m_tl);
dst += stride;
}
}
static void tm_16x16_c(uint8_t *_dst, ptrdiff_t stride,
const uint8_t *_left, const uint8_t *_top)
{
pixel *dst = (pixel *) _dst;
const pixel *left = (const pixel *) _left;
const pixel *top = (const pixel *) _top;
int y, tl = top[-1];
stride /= sizeof(pixel);
for (y = 0; y < 16; y++) {
int l_m_tl = left[15 - y] - tl;
dst[ 0] = av_clip_pixel(top[ 0] + l_m_tl);
dst[ 1] = av_clip_pixel(top[ 1] + l_m_tl);
dst[ 2] = av_clip_pixel(top[ 2] + l_m_tl);
dst[ 3] = av_clip_pixel(top[ 3] + l_m_tl);
dst[ 4] = av_clip_pixel(top[ 4] + l_m_tl);
dst[ 5] = av_clip_pixel(top[ 5] + l_m_tl);
dst[ 6] = av_clip_pixel(top[ 6] + l_m_tl);
dst[ 7] = av_clip_pixel(top[ 7] + l_m_tl);
dst[ 8] = av_clip_pixel(top[ 8] + l_m_tl);
dst[ 9] = av_clip_pixel(top[ 9] + l_m_tl);
dst[10] = av_clip_pixel(top[10] + l_m_tl);
dst[11] = av_clip_pixel(top[11] + l_m_tl);
dst[12] = av_clip_pixel(top[12] + l_m_tl);
dst[13] = av_clip_pixel(top[13] + l_m_tl);
dst[14] = av_clip_pixel(top[14] + l_m_tl);
dst[15] = av_clip_pixel(top[15] + l_m_tl);
dst += stride;
}
}
static void tm_32x32_c(uint8_t *_dst, ptrdiff_t stride,
const uint8_t *_left, const uint8_t *_top)
{
pixel *dst = (pixel *) _dst;
const pixel *left = (const pixel *) _left;
const pixel *top = (const pixel *) _top;
int y, tl = top[-1];
stride /= sizeof(pixel);
for (y = 0; y < 32; y++) {
int l_m_tl = left[31 - y] - tl;
dst[ 0] = av_clip_pixel(top[ 0] + l_m_tl);
dst[ 1] = av_clip_pixel(top[ 1] + l_m_tl);
dst[ 2] = av_clip_pixel(top[ 2] + l_m_tl);
dst[ 3] = av_clip_pixel(top[ 3] + l_m_tl);
dst[ 4] = av_clip_pixel(top[ 4] + l_m_tl);
dst[ 5] = av_clip_pixel(top[ 5] + l_m_tl);
dst[ 6] = av_clip_pixel(top[ 6] + l_m_tl);
dst[ 7] = av_clip_pixel(top[ 7] + l_m_tl);
dst[ 8] = av_clip_pixel(top[ 8] + l_m_tl);
dst[ 9] = av_clip_pixel(top[ 9] + l_m_tl);
dst[10] = av_clip_pixel(top[10] + l_m_tl);
dst[11] = av_clip_pixel(top[11] + l_m_tl);
dst[12] = av_clip_pixel(top[12] + l_m_tl);
dst[13] = av_clip_pixel(top[13] + l_m_tl);
dst[14] = av_clip_pixel(top[14] + l_m_tl);
dst[15] = av_clip_pixel(top[15] + l_m_tl);
dst[16] = av_clip_pixel(top[16] + l_m_tl);
dst[17] = av_clip_pixel(top[17] + l_m_tl);
dst[18] = av_clip_pixel(top[18] + l_m_tl);
dst[19] = av_clip_pixel(top[19] + l_m_tl);
dst[20] = av_clip_pixel(top[20] + l_m_tl);
dst[21] = av_clip_pixel(top[21] + l_m_tl);
dst[22] = av_clip_pixel(top[22] + l_m_tl);
dst[23] = av_clip_pixel(top[23] + l_m_tl);
dst[24] = av_clip_pixel(top[24] + l_m_tl);
dst[25] = av_clip_pixel(top[25] + l_m_tl);
dst[26] = av_clip_pixel(top[26] + l_m_tl);
dst[27] = av_clip_pixel(top[27] + l_m_tl);
dst[28] = av_clip_pixel(top[28] + l_m_tl);
dst[29] = av_clip_pixel(top[29] + l_m_tl);
dst[30] = av_clip_pixel(top[30] + l_m_tl);
dst[31] = av_clip_pixel(top[31] + l_m_tl);
dst += stride;
}
}
static void dc_4x4_c(uint8_t *_dst, ptrdiff_t stride,
const uint8_t *_left, const uint8_t *_top)
{
pixel *dst = (pixel *) _dst;
const pixel *left = (const pixel *) _left;
const pixel *top = (const pixel *) _top;
pixel4 dc = PIXEL_SPLAT_X4((left[0] + left[1] + left[2] + left[3] +
top[0] + top[1] + top[2] + top[3] + 4) >> 3);
stride /= sizeof(pixel);
AV_WN4PA(dst + stride * 0, dc);
AV_WN4PA(dst + stride * 1, dc);
AV_WN4PA(dst + stride * 2, dc);
AV_WN4PA(dst + stride * 3, dc);
}
static void dc_8x8_c(uint8_t *_dst, ptrdiff_t stride,
const uint8_t *_left, const uint8_t *_top)
{
pixel *dst = (pixel *) _dst;
const pixel *left = (const pixel *) _left;
const pixel *top = (const pixel *) _top;
pixel4 dc = PIXEL_SPLAT_X4
((left[0] + left[1] + left[2] + left[3] + left[4] + left[5] +
left[6] + left[7] + top[0] + top[1] + top[2] + top[3] +
top[4] + top[5] + top[6] + top[7] + 8) >> 4);
int y;
stride /= sizeof(pixel);
for (y = 0; y < 8; y++) {
AV_WN4PA(dst + 0, dc);
AV_WN4PA(dst + 4, dc);
dst += stride;
}
}
static void dc_16x16_c(uint8_t *_dst, ptrdiff_t stride,
const uint8_t *_left, const uint8_t *_top)
{
pixel *dst = (pixel *) _dst;
const pixel *left = (const pixel *) _left;
const pixel *top = (const pixel *) _top;
pixel4 dc = PIXEL_SPLAT_X4
((left[0] + left[1] + left[2] + left[3] + left[4] + left[5] + left[6] +
left[7] + left[8] + left[9] + left[10] + left[11] + left[12] +
left[13] + left[14] + left[15] + top[0] + top[1] + top[2] + top[3] +
top[4] + top[5] + top[6] + top[7] + top[8] + top[9] + top[10] +
top[11] + top[12] + top[13] + top[14] + top[15] + 16) >> 5);
int y;
stride /= sizeof(pixel);
for (y = 0; y < 16; y++) {
AV_WN4PA(dst + 0, dc);
AV_WN4PA(dst + 4, dc);
AV_WN4PA(dst + 8, dc);
AV_WN4PA(dst + 12, dc);
dst += stride;
}
}
static void dc_32x32_c(uint8_t *_dst, ptrdiff_t stride,
const uint8_t *_left, const uint8_t *_top)
{
pixel *dst = (pixel *) _dst;
const pixel *left = (const pixel *) _left;
const pixel *top = (const pixel *) _top;
pixel4 dc = PIXEL_SPLAT_X4
((left[0] + left[1] + left[2] + left[3] + left[4] + left[5] + left[6] +
left[7] + left[8] + left[9] + left[10] + left[11] + left[12] +
left[13] + left[14] + left[15] + left[16] + left[17] + left[18] +
left[19] + left[20] + left[21] + left[22] + left[23] + left[24] +
left[25] + left[26] + left[27] + left[28] + left[29] + left[30] +
left[31] + top[0] + top[1] + top[2] + top[3] + top[4] + top[5] +
top[6] + top[7] + top[8] + top[9] + top[10] + top[11] + top[12] +
top[13] + top[14] + top[15] + top[16] + top[17] + top[18] + top[19] +
top[20] + top[21] + top[22] + top[23] + top[24] + top[25] + top[26] +
top[27] + top[28] + top[29] + top[30] + top[31] + 32) >> 6);
int y;
stride /= sizeof(pixel);
for (y = 0; y < 32; y++) {
AV_WN4PA(dst + 0, dc);
AV_WN4PA(dst + 4, dc);
AV_WN4PA(dst + 8, dc);
AV_WN4PA(dst + 12, dc);
AV_WN4PA(dst + 16, dc);
AV_WN4PA(dst + 20, dc);
AV_WN4PA(dst + 24, dc);
AV_WN4PA(dst + 28, dc);
dst += stride;
}
}
static void dc_left_4x4_c(uint8_t *_dst, ptrdiff_t stride,
const uint8_t *_left, const uint8_t *top)
{
pixel *dst = (pixel *) _dst;
const pixel *left = (const pixel *) _left;
pixel4 dc = PIXEL_SPLAT_X4((left[0] + left[1] + left[2] + left[3] + 2) >> 2);
stride /= sizeof(pixel);
AV_WN4PA(dst + stride * 0, dc);
AV_WN4PA(dst + stride * 1, dc);
AV_WN4PA(dst + stride * 2, dc);
AV_WN4PA(dst + stride * 3, dc);
}
static void dc_left_8x8_c(uint8_t *_dst, ptrdiff_t stride,
const uint8_t *_left, const uint8_t *top)
{
pixel *dst = (pixel *) _dst;
const pixel *left = (const pixel *) _left;
pixel4 dc = PIXEL_SPLAT_X4
((left[0] + left[1] + left[2] + left[3] +
left[4] + left[5] + left[6] + left[7] + 4) >> 3);
int y;
stride /= sizeof(pixel);
for (y = 0; y < 8; y++) {
AV_WN4PA(dst + 0, dc);
AV_WN4PA(dst + 4, dc);
dst += stride;
}
}
static void dc_left_16x16_c(uint8_t *_dst, ptrdiff_t stride,
const uint8_t *_left, const uint8_t *top)
{
pixel *dst = (pixel *) _dst;
const pixel *left = (const pixel *) _left;
pixel4 dc = PIXEL_SPLAT_X4
((left[0] + left[1] + left[2] + left[3] + left[4] + left[5] +
left[6] + left[7] + left[8] + left[9] + left[10] + left[11] +
left[12] + left[13] + left[14] + left[15] + 8) >> 4);
int y;
stride /= sizeof(pixel);
for (y = 0; y < 16; y++) {
AV_WN4PA(dst + 0, dc);
AV_WN4PA(dst + 4, dc);
AV_WN4PA(dst + 8, dc);
AV_WN4PA(dst + 12, dc);
dst += stride;
}
}
static void dc_left_32x32_c(uint8_t *_dst, ptrdiff_t stride,
const uint8_t *_left, const uint8_t *top)
{
pixel *dst = (pixel *) _dst;
const pixel *left = (const pixel *) _left;
pixel4 dc = PIXEL_SPLAT_X4
((left[0] + left[1] + left[2] + left[3] + left[4] + left[5] +
left[6] + left[7] + left[8] + left[9] + left[10] + left[11] +
left[12] + left[13] + left[14] + left[15] + left[16] + left[17] +
left[18] + left[19] + left[20] + left[21] + left[22] + left[23] +
left[24] + left[25] + left[26] + left[27] + left[28] + left[29] +
left[30] + left[31] + 16) >> 5);
int y;
stride /= sizeof(pixel);
for (y = 0; y < 32; y++) {
AV_WN4PA(dst + 0, dc);
AV_WN4PA(dst + 4, dc);
AV_WN4PA(dst + 8, dc);
AV_WN4PA(dst + 12, dc);
AV_WN4PA(dst + 16, dc);
AV_WN4PA(dst + 20, dc);
AV_WN4PA(dst + 24, dc);
AV_WN4PA(dst + 28, dc);
dst += stride;
}
}
static void dc_top_4x4_c(uint8_t *_dst, ptrdiff_t stride,
const uint8_t *left, const uint8_t *_top)
{
pixel *dst = (pixel *) _dst;
const pixel *top = (const pixel *) _top;
pixel4 dc = PIXEL_SPLAT_X4((top[0] + top[1] + top[2] + top[3] + 2) >> 2);
stride /= sizeof(pixel);
AV_WN4PA(dst + stride * 0, dc);
AV_WN4PA(dst + stride * 1, dc);
AV_WN4PA(dst + stride * 2, dc);
AV_WN4PA(dst + stride * 3, dc);
}
static void dc_top_8x8_c(uint8_t *_dst, ptrdiff_t stride,
const uint8_t *left, const uint8_t *_top)
{
pixel *dst = (pixel *) _dst;
const pixel *top = (const pixel *) _top;
pixel4 dc = PIXEL_SPLAT_X4
((top[0] + top[1] + top[2] + top[3] +
top[4] + top[5] + top[6] + top[7] + 4) >> 3);
int y;
stride /= sizeof(pixel);
for (y = 0; y < 8; y++) {
AV_WN4PA(dst + 0, dc);
AV_WN4PA(dst + 4, dc);
dst += stride;
}
}
static void dc_top_16x16_c(uint8_t *_dst, ptrdiff_t stride,
const uint8_t *left, const uint8_t *_top)
{
pixel *dst = (pixel *) _dst;
const pixel *top = (const pixel *) _top;
pixel4 dc = PIXEL_SPLAT_X4
((top[0] + top[1] + top[2] + top[3] + top[4] + top[5] +
top[6] + top[7] + top[8] + top[9] + top[10] + top[11] +
top[12] + top[13] + top[14] + top[15] + 8) >> 4);
int y;
stride /= sizeof(pixel);
for (y = 0; y < 16; y++) {
AV_WN4PA(dst + 0, dc);
AV_WN4PA(dst + 4, dc);
AV_WN4PA(dst + 8, dc);
AV_WN4PA(dst + 12, dc);
dst += stride;
}
}
static void dc_top_32x32_c(uint8_t *_dst, ptrdiff_t stride,
const uint8_t *left, const uint8_t *_top)
{
pixel *dst = (pixel *) _dst;
const pixel *top = (const pixel *) _top;
pixel4 dc = PIXEL_SPLAT_X4
((top[0] + top[1] + top[2] + top[3] + top[4] + top[5] +
top[6] + top[7] + top[8] + top[9] + top[10] + top[11] +
top[12] + top[13] + top[14] + top[15] + top[16] + top[17] +
top[18] + top[19] + top[20] + top[21] + top[22] + top[23] +
top[24] + top[25] + top[26] + top[27] + top[28] + top[29] +
top[30] + top[31] + 16) >> 5);
int y;
stride /= sizeof(pixel);
for (y = 0; y < 32; y++) {
AV_WN4PA(dst + 0, dc);
AV_WN4PA(dst + 4, dc);
AV_WN4PA(dst + 8, dc);
AV_WN4PA(dst + 12, dc);
AV_WN4PA(dst + 16, dc);
AV_WN4PA(dst + 20, dc);
AV_WN4PA(dst + 24, dc);
AV_WN4PA(dst + 28, dc);
dst += stride;
}
}
static void dc_128_4x4_c(uint8_t *_dst, ptrdiff_t stride,
const uint8_t *left, const uint8_t *top)
{
pixel *dst = (pixel *) _dst;
pixel4 val = PIXEL_SPLAT_X4(128 << (BIT_DEPTH - 8));
stride /= sizeof(pixel);
AV_WN4PA(dst + stride * 0, val);
AV_WN4PA(dst + stride * 1, val);
AV_WN4PA(dst + stride * 2, val);
AV_WN4PA(dst + stride * 3, val);
}
static void dc_128_8x8_c(uint8_t *_dst, ptrdiff_t stride,
const uint8_t *left, const uint8_t *top)
{
pixel *dst = (pixel *) _dst;
pixel4 val = PIXEL_SPLAT_X4(128 << (BIT_DEPTH - 8));
int y;
stride /= sizeof(pixel);
for (y = 0; y < 8; y++) {
AV_WN4PA(dst + 0, val);
AV_WN4PA(dst + 4, val);
dst += stride;
}
}
static void dc_128_16x16_c(uint8_t *_dst, ptrdiff_t stride,
const uint8_t *left, const uint8_t *top)
{
pixel *dst = (pixel *) _dst;
pixel4 val = PIXEL_SPLAT_X4(128 << (BIT_DEPTH - 8));
int y;
stride /= sizeof(pixel);
for (y = 0; y < 16; y++) {
AV_WN4PA(dst + 0, val);
AV_WN4PA(dst + 4, val);
AV_WN4PA(dst + 8, val);
AV_WN4PA(dst + 12, val);
dst += stride;
}
}
static void dc_128_32x32_c(uint8_t *_dst, ptrdiff_t stride,
const uint8_t *left, const uint8_t *top)
{
pixel *dst = (pixel *) _dst;
pixel4 val = PIXEL_SPLAT_X4(128 << (BIT_DEPTH - 8));
int y;
stride /= sizeof(pixel);
for (y = 0; y < 32; y++) {
AV_WN4PA(dst + 0, val);
AV_WN4PA(dst + 4, val);
AV_WN4PA(dst + 8, val);
AV_WN4PA(dst + 12, val);
AV_WN4PA(dst + 16, val);
AV_WN4PA(dst + 20, val);
AV_WN4PA(dst + 24, val);
AV_WN4PA(dst + 28, val);
dst += stride;
}
}
static void dc_127_4x4_c(uint8_t *_dst, ptrdiff_t stride,
const uint8_t *left, const uint8_t *top)
{
pixel *dst = (pixel *) _dst;
pixel4 val = PIXEL_SPLAT_X4((128 << (BIT_DEPTH - 8)) - 1);
stride /= sizeof(pixel);
AV_WN4PA(dst + stride * 0, val);
AV_WN4PA(dst + stride * 1, val);
AV_WN4PA(dst + stride * 2, val);
AV_WN4PA(dst + stride * 3, val);}
static void dc_127_8x8_c(uint8_t *_dst, ptrdiff_t stride,
const uint8_t *left, const uint8_t *top)
{
pixel *dst = (pixel *) _dst;
pixel4 val = PIXEL_SPLAT_X4((128 << (BIT_DEPTH - 8)) - 1);
int y;
stride /= sizeof(pixel);
for (y = 0; y < 8; y++) {
AV_WN4PA(dst + 0, val);
AV_WN4PA(dst + 4, val);
dst += stride;
}
}
static void dc_127_16x16_c(uint8_t *_dst, ptrdiff_t stride,
const uint8_t *left, const uint8_t *top)
{
pixel *dst = (pixel *) _dst;
pixel4 val = PIXEL_SPLAT_X4((128 << (BIT_DEPTH - 8)) - 1);
int y;
stride /= sizeof(pixel);
for (y = 0; y < 16; y++) {
AV_WN4PA(dst + 0, val);
AV_WN4PA(dst + 4, val);
AV_WN4PA(dst + 8, val);
AV_WN4PA(dst + 12, val);
dst += stride;
}
}
static void dc_127_32x32_c(uint8_t *_dst, ptrdiff_t stride,
const uint8_t *left, const uint8_t *top)
{
pixel *dst = (pixel *) _dst;
pixel4 val = PIXEL_SPLAT_X4((128 << (BIT_DEPTH - 8)) - 1);
int y;
stride /= sizeof(pixel);
for (y = 0; y < 32; y++) {
AV_WN4PA(dst + 0, val);
AV_WN4PA(dst + 4, val);
AV_WN4PA(dst + 8, val);
AV_WN4PA(dst + 12, val);
AV_WN4PA(dst + 16, val);
AV_WN4PA(dst + 20, val);
AV_WN4PA(dst + 24, val);
AV_WN4PA(dst + 28, val);
dst += stride;
}
}
static void dc_129_4x4_c(uint8_t *_dst, ptrdiff_t stride,
const uint8_t *left, const uint8_t *top)
{
pixel *dst = (pixel *) _dst;
pixel4 val = PIXEL_SPLAT_X4((128 << (BIT_DEPTH - 8)) + 1);
stride /= sizeof(pixel);
AV_WN4PA(dst + stride * 0, val);
AV_WN4PA(dst + stride * 1, val);
AV_WN4PA(dst + stride * 2, val);
AV_WN4PA(dst + stride * 3, val);
}
static void dc_129_8x8_c(uint8_t *_dst, ptrdiff_t stride,
const uint8_t *left, const uint8_t *top)
{
pixel *dst = (pixel *) _dst;
pixel4 val = PIXEL_SPLAT_X4((128 << (BIT_DEPTH - 8)) + 1);
int y;
stride /= sizeof(pixel);
for (y = 0; y < 8; y++) {
AV_WN4PA(dst + 0, val);
AV_WN4PA(dst + 4, val);
dst += stride;
}
}
static void dc_129_16x16_c(uint8_t *_dst, ptrdiff_t stride,
const uint8_t *left, const uint8_t *top)
{
pixel *dst = (pixel *) _dst;
pixel4 val = PIXEL_SPLAT_X4((128 << (BIT_DEPTH - 8)) + 1);
int y;
stride /= sizeof(pixel);
for (y = 0; y < 16; y++) {
AV_WN4PA(dst + 0, val);
AV_WN4PA(dst + 4, val);
AV_WN4PA(dst + 8, val);
AV_WN4PA(dst + 12, val);
dst += stride;
}
}
static void dc_129_32x32_c(uint8_t *_dst, ptrdiff_t stride,
const uint8_t *left, const uint8_t *top)
{
pixel *dst = (pixel *) _dst;
pixel4 val = PIXEL_SPLAT_X4((128 << (BIT_DEPTH - 8)) + 1);
int y;
stride /= sizeof(pixel);
for (y = 0; y < 32; y++) {
AV_WN4PA(dst + 0, val);
AV_WN4PA(dst + 4, val);
AV_WN4PA(dst + 8, val);
AV_WN4PA(dst + 12, val);
AV_WN4PA(dst + 16, val);
AV_WN4PA(dst + 20, val);
AV_WN4PA(dst + 24, val);
AV_WN4PA(dst + 28, val);
dst += stride;
}
}
#if BIT_DEPTH == 8
#define memset_bpc memset
#else
static inline void memset_bpc(uint16_t *dst, int val, int len) {
int n;
for (n = 0; n < len; n++) {
dst[n] = val;
}
}
#endif
#define DST(x, y) dst[(x) + (y) * stride]
static void diag_downleft_4x4_c(uint8_t *_dst, ptrdiff_t stride,
const uint8_t *left, const uint8_t *_top)
{
pixel *dst = (pixel *) _dst;
const pixel *top = (const pixel *) _top;
int a0 = top[0], a1 = top[1], a2 = top[2], a3 = top[3],
a4 = top[4], a5 = top[5], a6 = top[6], a7 = top[7];
stride /= sizeof(pixel);
DST(0,0) = (a0 + a1 * 2 + a2 + 2) >> 2;
DST(1,0) = DST(0,1) = (a1 + a2 * 2 + a3 + 2) >> 2;
DST(2,0) = DST(1,1) = DST(0,2) = (a2 + a3 * 2 + a4 + 2) >> 2;
DST(3,0) = DST(2,1) = DST(1,2) = DST(0,3) = (a3 + a4 * 2 + a5 + 2) >> 2;
DST(3,1) = DST(2,2) = DST(1,3) = (a4 + a5 * 2 + a6 + 2) >> 2;
DST(3,2) = DST(2,3) = (a5 + a6 * 2 + a7 + 2) >> 2;
DST(3,3) = a7; // note: this is different from vp8 and such
}
#define def_diag_downleft(size) \
static void diag_downleft_##size##x##size##_c(uint8_t *_dst, ptrdiff_t stride, \
const uint8_t *left, const uint8_t *_top) \
{ \
pixel *dst = (pixel *) _dst; \
const pixel *top = (const pixel *) _top; \
int i, j; \
pixel v[size - 1]; \
\
stride /= sizeof(pixel); \
for (i = 0; i < size - 2; i++) \
v[i] = (top[i] + top[i + 1] * 2 + top[i + 2] + 2) >> 2; \
v[size - 2] = (top[size - 2] + top[size - 1] * 3 + 2) >> 2; \
\
for (j = 0; j < size; j++) { \
memcpy(dst + j*stride, v + j, (size - 1 - j) * sizeof(pixel)); \
memset_bpc(dst + j*stride + size - 1 - j, top[size - 1], j + 1); \
} \
}
def_diag_downleft(8)
def_diag_downleft(16)
def_diag_downleft(32)
static void diag_downright_4x4_c(uint8_t *_dst, ptrdiff_t stride,
const uint8_t *_left, const uint8_t *_top)
{
pixel *dst = (pixel *) _dst;
const pixel *top = (const pixel *) _top;
const pixel *left = (const pixel *) _left;
int tl = top[-1], a0 = top[0], a1 = top[1], a2 = top[2], a3 = top[3],
l0 = left[3], l1 = left[2], l2 = left[1], l3 = left[0];
stride /= sizeof(pixel);
DST(0,3) = (l1 + l2 * 2 + l3 + 2) >> 2;
DST(0,2) = DST(1,3) = (l0 + l1 * 2 + l2 + 2) >> 2;
DST(0,1) = DST(1,2) = DST(2,3) = (tl + l0 * 2 + l1 + 2) >> 2;
DST(0,0) = DST(1,1) = DST(2,2) = DST(3,3) = (l0 + tl * 2 + a0 + 2) >> 2;
DST(1,0) = DST(2,1) = DST(3,2) = (tl + a0 * 2 + a1 + 2) >> 2;
DST(2,0) = DST(3,1) = (a0 + a1 * 2 + a2 + 2) >> 2;
DST(3,0) = (a1 + a2 * 2 + a3 + 2) >> 2;
}
#define def_diag_downright(size) \
static void diag_downright_##size##x##size##_c(uint8_t *_dst, ptrdiff_t stride, \
const uint8_t *_left, const uint8_t *_top) \
{ \
pixel *dst = (pixel *) _dst; \
const pixel *top = (const pixel *) _top; \
const pixel *left = (const pixel *) _left; \
int i, j; \
pixel v[size + size - 1]; \
\
stride /= sizeof(pixel); \
for (i = 0; i < size - 2; i++) { \
v[i ] = (left[i] + left[i + 1] * 2 + left[i + 2] + 2) >> 2; \
v[size + 1 + i] = (top[i] + top[i + 1] * 2 + top[i + 2] + 2) >> 2; \
} \
v[size - 2] = (left[size - 2] + left[size - 1] * 2 + top[-1] + 2) >> 2; \
v[size - 1] = (left[size - 1] + top[-1] * 2 + top[ 0] + 2) >> 2; \
v[size ] = (top[-1] + top[0] * 2 + top[ 1] + 2) >> 2; \
\
for (j = 0; j < size; j++) \
memcpy(dst + j*stride, v + size - 1 - j, size * sizeof(pixel)); \
}
def_diag_downright(8)
def_diag_downright(16)
def_diag_downright(32)
static void vert_right_4x4_c(uint8_t *_dst, ptrdiff_t stride,
const uint8_t *_left, const uint8_t *_top)
{
pixel *dst = (pixel *) _dst;
const pixel *top = (const pixel *) _top;
const pixel *left = (const pixel *) _left;
int tl = top[-1], a0 = top[0], a1 = top[1], a2 = top[2], a3 = top[3],
l0 = left[3], l1 = left[2], l2 = left[1];
stride /= sizeof(pixel);
DST(0,3) = (l0 + l1 * 2 + l2 + 2) >> 2;
DST(0,2) = (tl + l0 * 2 + l1 + 2) >> 2;
DST(0,0) = DST(1,2) = (tl + a0 + 1) >> 1;
DST(0,1) = DST(1,3) = (l0 + tl * 2 + a0 + 2) >> 2;
DST(1,0) = DST(2,2) = (a0 + a1 + 1) >> 1;
DST(1,1) = DST(2,3) = (tl + a0 * 2 + a1 + 2) >> 2;
DST(2,0) = DST(3,2) = (a1 + a2 + 1) >> 1;
DST(2,1) = DST(3,3) = (a0 + a1 * 2 + a2 + 2) >> 2;
DST(3,0) = (a2 + a3 + 1) >> 1;
DST(3,1) = (a1 + a2 * 2 + a3 + 2) >> 2;
}
#define def_vert_right(size) \
static void vert_right_##size##x##size##_c(uint8_t *_dst, ptrdiff_t stride, \
const uint8_t *_left, const uint8_t *_top) \
{ \
pixel *dst = (pixel *) _dst; \
const pixel *top = (const pixel *) _top; \
const pixel *left = (const pixel *) _left; \
int i, j; \
pixel ve[size + size/2 - 1], vo[size + size/2 - 1]; \
\
stride /= sizeof(pixel); \
for (i = 0; i < size/2 - 2; i++) { \
vo[i] = (left[i*2 + 3] + left[i*2 + 2] * 2 + left[i*2 + 1] + 2) >> 2; \
ve[i] = (left[i*2 + 4] + left[i*2 + 3] * 2 + left[i*2 + 2] + 2) >> 2; \
} \
vo[size/2 - 2] = (left[size - 1] + left[size - 2] * 2 + left[size - 3] + 2) >> 2; \
ve[size/2 - 2] = (top[-1] + left[size - 1] * 2 + left[size - 2] + 2) >> 2; \
\
ve[size/2 - 1] = (top[-1] + top[0] + 1) >> 1; \
vo[size/2 - 1] = (left[size - 1] + top[-1] * 2 + top[0] + 2) >> 2; \
for (i = 0; i < size - 1; i++) { \
ve[size/2 + i] = (top[i] + top[i + 1] + 1) >> 1; \
vo[size/2 + i] = (top[i - 1] + top[i] * 2 + top[i + 1] + 2) >> 2; \
} \
\
for (j = 0; j < size / 2; j++) { \
memcpy(dst + j*2 *stride, ve + size/2 - 1 - j, size * sizeof(pixel)); \
memcpy(dst + (j*2 + 1)*stride, vo + size/2 - 1 - j, size * sizeof(pixel)); \
} \
}
def_vert_right(8)
def_vert_right(16)
def_vert_right(32)
static void hor_down_4x4_c(uint8_t *_dst, ptrdiff_t stride,
const uint8_t *_left, const uint8_t *_top)
{
pixel *dst = (pixel *) _dst;
const pixel *top = (const pixel *) _top;
const pixel *left = (const pixel *) _left;
int l0 = left[3], l1 = left[2], l2 = left[1], l3 = left[0],
tl = top[-1], a0 = top[0], a1 = top[1], a2 = top[2];
stride /= sizeof(pixel);
DST(2,0) = (tl + a0 * 2 + a1 + 2) >> 2;
DST(3,0) = (a0 + a1 * 2 + a2 + 2) >> 2;
DST(0,0) = DST(2,1) = (tl + l0 + 1) >> 1;
DST(1,0) = DST(3,1) = (a0 + tl * 2 + l0 + 2) >> 2;
DST(0,1) = DST(2,2) = (l0 + l1 + 1) >> 1;
DST(1,1) = DST(3,2) = (tl + l0 * 2 + l1 + 2) >> 2;
DST(0,2) = DST(2,3) = (l1 + l2 + 1) >> 1;
DST(1,2) = DST(3,3) = (l0 + l1 * 2 + l2 + 2) >> 2;
DST(0,3) = (l2 + l3 + 1) >> 1;
DST(1,3) = (l1 + l2 * 2 + l3 + 2) >> 2;
}
#define def_hor_down(size) \
static void hor_down_##size##x##size##_c(uint8_t *_dst, ptrdiff_t stride, \
const uint8_t *_left, const uint8_t *_top) \
{ \
pixel *dst = (pixel *) _dst; \
const pixel *top = (const pixel *) _top; \
const pixel *left = (const pixel *) _left; \
int i, j; \
pixel v[size * 3 - 2]; \
\
stride /= sizeof(pixel); \
for (i = 0; i < size - 2; i++) { \
v[i*2 ] = (left[i + 1] + left[i + 0] + 1) >> 1; \
v[i*2 + 1] = (left[i + 2] + left[i + 1] * 2 + left[i + 0] + 2) >> 2; \
v[size*2 + i] = (top[i - 1] + top[i] * 2 + top[i + 1] + 2) >> 2; \
} \
v[size*2 - 2] = (top[-1] + left[size - 1] + 1) >> 1; \
v[size*2 - 4] = (left[size - 1] + left[size - 2] + 1) >> 1; \
v[size*2 - 1] = (top[0] + top[-1] * 2 + left[size - 1] + 2) >> 2; \
v[size*2 - 3] = (top[-1] + left[size - 1] * 2 + left[size - 2] + 2) >> 2; \
\
for (j = 0; j < size; j++) \
memcpy(dst + j*stride, v + size*2 - 2 - j*2, size * sizeof(pixel)); \
}
def_hor_down(8)
def_hor_down(16)
def_hor_down(32)
static void vert_left_4x4_c(uint8_t *_dst, ptrdiff_t stride,
const uint8_t *left, const uint8_t *_top)
{
pixel *dst = (pixel *) _dst;
const pixel *top = (const pixel *) _top;
int a0 = top[0], a1 = top[1], a2 = top[2], a3 = top[3],
a4 = top[4], a5 = top[5], a6 = top[6];
stride /= sizeof(pixel);
DST(0,0) = (a0 + a1 + 1) >> 1;
DST(0,1) = (a0 + a1 * 2 + a2 + 2) >> 2;
DST(1,0) = DST(0,2) = (a1 + a2 + 1) >> 1;
DST(1,1) = DST(0,3) = (a1 + a2 * 2 + a3 + 2) >> 2;
DST(2,0) = DST(1,2) = (a2 + a3 + 1) >> 1;
DST(2,1) = DST(1,3) = (a2 + a3 * 2 + a4 + 2) >> 2;
DST(3,0) = DST(2,2) = (a3 + a4 + 1) >> 1;
DST(3,1) = DST(2,3) = (a3 + a4 * 2 + a5 + 2) >> 2;
DST(3,2) = (a4 + a5 + 1) >> 1;
DST(3,3) = (a4 + a5 * 2 + a6 + 2) >> 2;
}
#define def_vert_left(size) \
static void vert_left_##size##x##size##_c(uint8_t *_dst, ptrdiff_t stride, \
const uint8_t *left, const uint8_t *_top) \
{ \
pixel *dst = (pixel *) _dst; \
const pixel *top = (const pixel *) _top; \
int i, j; \
pixel ve[size - 1], vo[size - 1]; \
\
stride /= sizeof(pixel); \
for (i = 0; i < size - 2; i++) { \
ve[i] = (top[i] + top[i + 1] + 1) >> 1; \
vo[i] = (top[i] + top[i + 1] * 2 + top[i + 2] + 2) >> 2; \
} \
ve[size - 2] = (top[size - 2] + top[size - 1] + 1) >> 1; \
vo[size - 2] = (top[size - 2] + top[size - 1] * 3 + 2) >> 2; \
\
for (j = 0; j < size / 2; j++) { \
memcpy(dst + j*2 * stride, ve + j, (size - j - 1) * sizeof(pixel)); \
memset_bpc(dst + j*2 * stride + size - j - 1, top[size - 1], j + 1); \
memcpy(dst + (j*2 + 1) * stride, vo + j, (size - j - 1) * sizeof(pixel)); \
memset_bpc(dst + (j*2 + 1) * stride + size - j - 1, top[size - 1], j + 1); \
} \
}
def_vert_left(8)
def_vert_left(16)
def_vert_left(32)
static void hor_up_4x4_c(uint8_t *_dst, ptrdiff_t stride,
const uint8_t *_left, const uint8_t *top)
{
pixel *dst = (pixel *) _dst;
const pixel *left = (const pixel *) _left;
int l0 = left[0], l1 = left[1], l2 = left[2], l3 = left[3];
stride /= sizeof(pixel);
DST(0,0) = (l0 + l1 + 1) >> 1;
DST(1,0) = (l0 + l1 * 2 + l2 + 2) >> 2;
DST(0,1) = DST(2,0) = (l1 + l2 + 1) >> 1;
DST(1,1) = DST(3,0) = (l1 + l2 * 2 + l3 + 2) >> 2;
DST(0,2) = DST(2,1) = (l2 + l3 + 1) >> 1;
DST(1,2) = DST(3,1) = (l2 + l3 * 3 + 2) >> 2;
DST(0,3) = DST(1,3) = DST(2,2) = DST(2,3) = DST(3,2) = DST(3,3) = l3;
}
#define def_hor_up(size) \
static void hor_up_##size##x##size##_c(uint8_t *_dst, ptrdiff_t stride, \
const uint8_t *_left, const uint8_t *top) \
{ \
pixel *dst = (pixel *) _dst; \
const pixel *left = (const pixel *) _left; \
int i, j; \
pixel v[size*2 - 2]; \
\
stride /= sizeof(pixel); \
for (i = 0; i < size - 2; i++) { \
v[i*2 ] = (left[i] + left[i + 1] + 1) >> 1; \
v[i*2 + 1] = (left[i] + left[i + 1] * 2 + left[i + 2] + 2) >> 2; \
} \
v[size*2 - 4] = (left[size - 2] + left[size - 1] + 1) >> 1; \
v[size*2 - 3] = (left[size - 2] + left[size - 1] * 3 + 2) >> 2; \
\
for (j = 0; j < size / 2; j++) \
memcpy(dst + j*stride, v + j*2, size * sizeof(pixel)); \
for (j = size / 2; j < size; j++) { \
memcpy(dst + j*stride, v + j*2, (size*2 - 2 - j*2) * sizeof(pixel)); \
memset_bpc(dst + j*stride + size*2 - 2 - j*2, left[size - 1], \
2 + j*2 - size); \
} \
}
def_hor_up(8)
def_hor_up(16)
def_hor_up(32)
#undef DST
static av_cold void vp9dsp_intrapred_init(VP9DSPContext *dsp)
{
#define init_intra_pred(tx, sz) \
dsp->intra_pred[tx][VERT_PRED] = vert_##sz##_c; \
dsp->intra_pred[tx][HOR_PRED] = hor_##sz##_c; \
dsp->intra_pred[tx][DC_PRED] = dc_##sz##_c; \
dsp->intra_pred[tx][DIAG_DOWN_LEFT_PRED] = diag_downleft_##sz##_c; \
dsp->intra_pred[tx][DIAG_DOWN_RIGHT_PRED] = diag_downright_##sz##_c; \
dsp->intra_pred[tx][VERT_RIGHT_PRED] = vert_right_##sz##_c; \
dsp->intra_pred[tx][HOR_DOWN_PRED] = hor_down_##sz##_c; \
dsp->intra_pred[tx][VERT_LEFT_PRED] = vert_left_##sz##_c; \
dsp->intra_pred[tx][HOR_UP_PRED] = hor_up_##sz##_c; \
dsp->intra_pred[tx][TM_VP8_PRED] = tm_##sz##_c; \
dsp->intra_pred[tx][LEFT_DC_PRED] = dc_left_##sz##_c; \
dsp->intra_pred[tx][TOP_DC_PRED] = dc_top_##sz##_c; \
dsp->intra_pred[tx][DC_128_PRED] = dc_128_##sz##_c; \
dsp->intra_pred[tx][DC_127_PRED] = dc_127_##sz##_c; \
dsp->intra_pred[tx][DC_129_PRED] = dc_129_##sz##_c
init_intra_pred(TX_4X4, 4x4);
init_intra_pred(TX_8X8, 8x8);
init_intra_pred(TX_16X16, 16x16);
init_intra_pred(TX_32X32, 32x32);
#undef init_intra_pred
}
#define itxfm_wrapper(type_a, type_b, sz, bits, has_dconly) \
static void type_a##_##type_b##_##sz##x##sz##_add_c(uint8_t *_dst, \
ptrdiff_t stride, \
int16_t *_block, int eob) \
{ \
int i, j; \
pixel *dst = (pixel *) _dst; \
dctcoef *block = (dctcoef *) _block, tmp[sz * sz], out[sz]; \
\
stride /= sizeof(pixel); \
if (has_dconly && eob == 1) { \
const int t = (((block[0] * 11585 + (1 << 13)) >> 14) \
* 11585 + (1 << 13)) >> 14; \
block[0] = 0; \
for (i = 0; i < sz; i++) { \
for (j = 0; j < sz; j++) \
dst[j * stride] = av_clip_pixel(dst[j * stride] + \
(bits ? \
(t + (1 << (bits - 1))) >> bits : \
t)); \
dst++; \
} \
return; \
} \
\
for (i = 0; i < sz; i++) \
type_a##sz##_1d(block + i, sz, tmp + i * sz, 0); \
memset(block, 0, sz * sz * sizeof(*block)); \
for (i = 0; i < sz; i++) { \
type_b##sz##_1d(tmp + i, sz, out, 1); \
for (j = 0; j < sz; j++) \
dst[j * stride] = av_clip_pixel(dst[j * stride] + \
(bits ? \
(out[j] + (1 << (bits - 1))) >> bits : \
out[j])); \
dst++; \
} \
}
#define itxfm_wrap(sz, bits) \
itxfm_wrapper(idct, idct, sz, bits, 1) \
itxfm_wrapper(iadst, idct, sz, bits, 0) \
itxfm_wrapper(idct, iadst, sz, bits, 0) \
itxfm_wrapper(iadst, iadst, sz, bits, 0)
#define IN(x) ((dctint) in[(x) * stride])
static av_always_inline void idct4_1d(const dctcoef *in, ptrdiff_t stride,
dctcoef *out, int pass)
{
dctint t0, t1, t2, t3;
t0 = ((IN(0) + IN(2)) * 11585 + (1 << 13)) >> 14;
t1 = ((IN(0) - IN(2)) * 11585 + (1 << 13)) >> 14;
t2 = (IN(1) * 6270 - IN(3) * 15137 + (1 << 13)) >> 14;
t3 = (IN(1) * 15137 + IN(3) * 6270 + (1 << 13)) >> 14;
out[0] = t0 + t3;
out[1] = t1 + t2;
out[2] = t1 - t2;
out[3] = t0 - t3;
}
static av_always_inline void iadst4_1d(const dctcoef *in, ptrdiff_t stride,
dctcoef *out, int pass)
{
int t0, t1, t2, t3;
t0 = 5283 * IN(0) + 15212 * IN(2) + 9929 * IN(3);
t1 = 9929 * IN(0) - 5283 * IN(2) - 15212 * IN(3);
t2 = 13377 * (IN(0) - IN(2) + IN(3));
t3 = 13377 * IN(1);
out[0] = (t0 + t3 + (1 << 13)) >> 14;
out[1] = (t1 + t3 + (1 << 13)) >> 14;
out[2] = (t2 + (1 << 13)) >> 14;
out[3] = (t0 + t1 - t3 + (1 << 13)) >> 14;
}
itxfm_wrap(4, 4)
static av_always_inline void idct8_1d(const dctcoef *in, ptrdiff_t stride,
dctcoef *out, int pass)
{
dctint t0, t0a, t1, t1a, t2, t2a, t3, t3a, t4, t4a, t5, t5a, t6, t6a, t7, t7a;
t0a = ((IN(0) + IN(4)) * 11585 + (1 << 13)) >> 14;
t1a = ((IN(0) - IN(4)) * 11585 + (1 << 13)) >> 14;
t2a = (IN(2) * 6270 - IN(6) * 15137 + (1 << 13)) >> 14;
t3a = (IN(2) * 15137 + IN(6) * 6270 + (1 << 13)) >> 14;
t4a = (IN(1) * 3196 - IN(7) * 16069 + (1 << 13)) >> 14;
t5a = (IN(5) * 13623 - IN(3) * 9102 + (1 << 13)) >> 14;
t6a = (IN(5) * 9102 + IN(3) * 13623 + (1 << 13)) >> 14;
t7a = (IN(1) * 16069 + IN(7) * 3196 + (1 << 13)) >> 14;
t0 = t0a + t3a;
t1 = t1a + t2a;
t2 = t1a - t2a;
t3 = t0a - t3a;
t4 = t4a + t5a;
t5a = t4a - t5a;
t7 = t7a + t6a;
t6a = t7a - t6a;
t5 = ((t6a - t5a) * 11585 + (1 << 13)) >> 14;
t6 = ((t6a + t5a) * 11585 + (1 << 13)) >> 14;
out[0] = t0 + t7;
out[1] = t1 + t6;
out[2] = t2 + t5;
out[3] = t3 + t4;
out[4] = t3 - t4;
out[5] = t2 - t5;
out[6] = t1 - t6;
out[7] = t0 - t7;
}
static av_always_inline void iadst8_1d(const dctcoef *in, ptrdiff_t stride,
dctcoef *out, int pass)
{
dctint t0, t0a, t1, t1a, t2, t2a, t3, t3a, t4, t4a, t5, t5a, t6, t6a, t7, t7a;
t0a = 16305 * IN(7) + 1606 * IN(0);
t1a = 1606 * IN(7) - 16305 * IN(0);
t2a = 14449 * IN(5) + 7723 * IN(2);
t3a = 7723 * IN(5) - 14449 * IN(2);
t4a = 10394 * IN(3) + 12665 * IN(4);
t5a = 12665 * IN(3) - 10394 * IN(4);
t6a = 4756 * IN(1) + 15679 * IN(6);
t7a = 15679 * IN(1) - 4756 * IN(6);
t0 = (t0a + t4a + (1 << 13)) >> 14;
t1 = (t1a + t5a + (1 << 13)) >> 14;
t2 = (t2a + t6a + (1 << 13)) >> 14;
t3 = (t3a + t7a + (1 << 13)) >> 14;
t4 = (t0a - t4a + (1 << 13)) >> 14;
t5 = (t1a - t5a + (1 << 13)) >> 14;
t6 = (t2a - t6a + (1 << 13)) >> 14;
t7 = (t3a - t7a + (1 << 13)) >> 14;
t4a = 15137 * t4 + 6270 * t5;
t5a = 6270 * t4 - 15137 * t5;
t6a = 15137 * t7 - 6270 * t6;
t7a = 6270 * t7 + 15137 * t6;
out[0] = t0 + t2;
out[7] = -(t1 + t3);
t2 = t0 - t2;
t3 = t1 - t3;
out[1] = -((t4a + t6a + (1 << 13)) >> 14);
out[6] = (t5a + t7a + (1 << 13)) >> 14;
t6 = (t4a - t6a + (1 << 13)) >> 14;
t7 = (t5a - t7a + (1 << 13)) >> 14;
out[3] = -(((t2 + t3) * 11585 + (1 << 13)) >> 14);
out[4] = ((t2 - t3) * 11585 + (1 << 13)) >> 14;
out[2] = ((t6 + t7) * 11585 + (1 << 13)) >> 14;
out[5] = -(((t6 - t7) * 11585 + (1 << 13)) >> 14);
}
itxfm_wrap(8, 5)
static av_always_inline void idct16_1d(const dctcoef *in, ptrdiff_t stride,
dctcoef *out, int pass)
{
dctint t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11, t12, t13, t14, t15;
dctint t0a, t1a, t2a, t3a, t4a, t5a, t6a, t7a;
dctint t8a, t9a, t10a, t11a, t12a, t13a, t14a, t15a;
t0a = ((IN(0) + IN(8)) * 11585 + (1 << 13)) >> 14;
t1a = ((IN(0) - IN(8)) * 11585 + (1 << 13)) >> 14;
t2a = (IN(4) * 6270 - IN(12) * 15137 + (1 << 13)) >> 14;
t3a = (IN(4) * 15137 + IN(12) * 6270 + (1 << 13)) >> 14;
t4a = (IN(2) * 3196 - IN(14) * 16069 + (1 << 13)) >> 14;
t7a = (IN(2) * 16069 + IN(14) * 3196 + (1 << 13)) >> 14;
t5a = (IN(10) * 13623 - IN(6) * 9102 + (1 << 13)) >> 14;
t6a = (IN(10) * 9102 + IN(6) * 13623 + (1 << 13)) >> 14;
t8a = (IN(1) * 1606 - IN(15) * 16305 + (1 << 13)) >> 14;
t15a = (IN(1) * 16305 + IN(15) * 1606 + (1 << 13)) >> 14;
t9a = (IN(9) * 12665 - IN(7) * 10394 + (1 << 13)) >> 14;
t14a = (IN(9) * 10394 + IN(7) * 12665 + (1 << 13)) >> 14;
t10a = (IN(5) * 7723 - IN(11) * 14449 + (1 << 13)) >> 14;
t13a = (IN(5) * 14449 + IN(11) * 7723 + (1 << 13)) >> 14;
t11a = (IN(13) * 15679 - IN(3) * 4756 + (1 << 13)) >> 14;
t12a = (IN(13) * 4756 + IN(3) * 15679 + (1 << 13)) >> 14;
t0 = t0a + t3a;
t1 = t1a + t2a;
t2 = t1a - t2a;
t3 = t0a - t3a;
t4 = t4a + t5a;
t5 = t4a - t5a;
t6 = t7a - t6a;
t7 = t7a + t6a;
t8 = t8a + t9a;
t9 = t8a - t9a;
t10 = t11a - t10a;
t11 = t11a + t10a;
t12 = t12a + t13a;
t13 = t12a - t13a;
t14 = t15a - t14a;
t15 = t15a + t14a;
t5a = ((t6 - t5) * 11585 + (1 << 13)) >> 14;
t6a = ((t6 + t5) * 11585 + (1 << 13)) >> 14;
t9a = ( t14 * 6270 - t9 * 15137 + (1 << 13)) >> 14;
t14a = ( t14 * 15137 + t9 * 6270 + (1 << 13)) >> 14;
t10a = (-(t13 * 15137 + t10 * 6270) + (1 << 13)) >> 14;
t13a = ( t13 * 6270 - t10 * 15137 + (1 << 13)) >> 14;
t0a = t0 + t7;
t1a = t1 + t6a;
t2a = t2 + t5a;
t3a = t3 + t4;
t4 = t3 - t4;
t5 = t2 - t5a;
t6 = t1 - t6a;
t7 = t0 - t7;
t8a = t8 + t11;
t9 = t9a + t10a;
t10 = t9a - t10a;
t11a = t8 - t11;
t12a = t15 - t12;
t13 = t14a - t13a;
t14 = t14a + t13a;
t15a = t15 + t12;
t10a = ((t13 - t10) * 11585 + (1 << 13)) >> 14;
t13a = ((t13 + t10) * 11585 + (1 << 13)) >> 14;
t11 = ((t12a - t11a) * 11585 + (1 << 13)) >> 14;
t12 = ((t12a + t11a) * 11585 + (1 << 13)) >> 14;
out[ 0] = t0a + t15a;
out[ 1] = t1a + t14;
out[ 2] = t2a + t13a;
out[ 3] = t3a + t12;
out[ 4] = t4 + t11;
out[ 5] = t5 + t10a;
out[ 6] = t6 + t9;
out[ 7] = t7 + t8a;
out[ 8] = t7 - t8a;
out[ 9] = t6 - t9;
out[10] = t5 - t10a;
out[11] = t4 - t11;
out[12] = t3a - t12;
out[13] = t2a - t13a;
out[14] = t1a - t14;
out[15] = t0a - t15a;
}
static av_always_inline void iadst16_1d(const dctcoef *in, ptrdiff_t stride,
dctcoef *out, int pass)
{
dctint t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11, t12, t13, t14, t15;
dctint t0a, t1a, t2a, t3a, t4a, t5a, t6a, t7a;
dctint t8a, t9a, t10a, t11a, t12a, t13a, t14a, t15a;
t0 = IN(15) * 16364 + IN(0) * 804;
t1 = IN(15) * 804 - IN(0) * 16364;
t2 = IN(13) * 15893 + IN(2) * 3981;
t3 = IN(13) * 3981 - IN(2) * 15893;
t4 = IN(11) * 14811 + IN(4) * 7005;
t5 = IN(11) * 7005 - IN(4) * 14811;
t6 = IN(9) * 13160 + IN(6) * 9760;
t7 = IN(9) * 9760 - IN(6) * 13160;
t8 = IN(7) * 11003 + IN(8) * 12140;
t9 = IN(7) * 12140 - IN(8) * 11003;
t10 = IN(5) * 8423 + IN(10) * 14053;
t11 = IN(5) * 14053 - IN(10) * 8423;
t12 = IN(3) * 5520 + IN(12) * 15426;
t13 = IN(3) * 15426 - IN(12) * 5520;
t14 = IN(1) * 2404 + IN(14) * 16207;
t15 = IN(1) * 16207 - IN(14) * 2404;
t0a = (t0 + t8 + (1 << 13)) >> 14;
t1a = (t1 + t9 + (1 << 13)) >> 14;
t2a = (t2 + t10 + (1 << 13)) >> 14;
t3a = (t3 + t11 + (1 << 13)) >> 14;
t4a = (t4 + t12 + (1 << 13)) >> 14;
t5a = (t5 + t13 + (1 << 13)) >> 14;
t6a = (t6 + t14 + (1 << 13)) >> 14;
t7a = (t7 + t15 + (1 << 13)) >> 14;
t8a = (t0 - t8 + (1 << 13)) >> 14;
t9a = (t1 - t9 + (1 << 13)) >> 14;
t10a = (t2 - t10 + (1 << 13)) >> 14;
t11a = (t3 - t11 + (1 << 13)) >> 14;
t12a = (t4 - t12 + (1 << 13)) >> 14;
t13a = (t5 - t13 + (1 << 13)) >> 14;
t14a = (t6 - t14 + (1 << 13)) >> 14;
t15a = (t7 - t15 + (1 << 13)) >> 14;
t8 = t8a * 16069 + t9a * 3196;
t9 = t8a * 3196 - t9a * 16069;
t10 = t10a * 9102 + t11a * 13623;
t11 = t10a * 13623 - t11a * 9102;
t12 = t13a * 16069 - t12a * 3196;
t13 = t13a * 3196 + t12a * 16069;
t14 = t15a * 9102 - t14a * 13623;
t15 = t15a * 13623 + t14a * 9102;
t0 = t0a + t4a;
t1 = t1a + t5a;
t2 = t2a + t6a;
t3 = t3a + t7a;
t4 = t0a - t4a;
t5 = t1a - t5a;
t6 = t2a - t6a;
t7 = t3a - t7a;
t8a = (t8 + t12 + (1 << 13)) >> 14;
t9a = (t9 + t13 + (1 << 13)) >> 14;
t10a = (t10 + t14 + (1 << 13)) >> 14;
t11a = (t11 + t15 + (1 << 13)) >> 14;
t12a = (t8 - t12 + (1 << 13)) >> 14;
t13a = (t9 - t13 + (1 << 13)) >> 14;
t14a = (t10 - t14 + (1 << 13)) >> 14;
t15a = (t11 - t15 + (1 << 13)) >> 14;
t4a = t4 * 15137 + t5 * 6270;
t5a = t4 * 6270 - t5 * 15137;
t6a = t7 * 15137 - t6 * 6270;
t7a = t7 * 6270 + t6 * 15137;
t12 = t12a * 15137 + t13a * 6270;
t13 = t12a * 6270 - t13a * 15137;
t14 = t15a * 15137 - t14a * 6270;
t15 = t15a * 6270 + t14a * 15137;
out[ 0] = t0 + t2;
out[15] = -(t1 + t3);
t2a = t0 - t2;
t3a = t1 - t3;
out[ 3] = -((t4a + t6a + (1 << 13)) >> 14);
out[12] = (t5a + t7a + (1 << 13)) >> 14;
t6 = (t4a - t6a + (1 << 13)) >> 14;
t7 = (t5a - t7a + (1 << 13)) >> 14;
out[ 1] = -(t8a + t10a);
out[14] = t9a + t11a;
t10 = t8a - t10a;
t11 = t9a - t11a;
out[ 2] = (t12 + t14 + (1 << 13)) >> 14;
out[13] = -((t13 + t15 + (1 << 13)) >> 14);
t14a = (t12 - t14 + (1 << 13)) >> 14;
t15a = (t13 - t15 + (1 << 13)) >> 14;
out[ 7] = ((t2a + t3a) * -11585 + (1 << 13)) >> 14;
out[ 8] = ((t2a - t3a) * 11585 + (1 << 13)) >> 14;
out[ 4] = ((t7 + t6) * 11585 + (1 << 13)) >> 14;
out[11] = ((t7 - t6) * 11585 + (1 << 13)) >> 14;
out[ 6] = ((t11 + t10) * 11585 + (1 << 13)) >> 14;
out[ 9] = ((t11 - t10) * 11585 + (1 << 13)) >> 14;
out[ 5] = ((t14a + t15a) * -11585 + (1 << 13)) >> 14;
out[10] = ((t14a - t15a) * 11585 + (1 << 13)) >> 14;
}
itxfm_wrap(16, 6)
static av_always_inline void idct32_1d(const dctcoef *in, ptrdiff_t stride,
dctcoef *out, int pass)
{
dctint t0a = ((IN(0) + IN(16)) * 11585 + (1 << 13)) >> 14;
dctint t1a = ((IN(0) - IN(16)) * 11585 + (1 << 13)) >> 14;
dctint t2a = (IN( 8) * 6270 - IN(24) * 15137 + (1 << 13)) >> 14;
dctint t3a = (IN( 8) * 15137 + IN(24) * 6270 + (1 << 13)) >> 14;
dctint t4a = (IN( 4) * 3196 - IN(28) * 16069 + (1 << 13)) >> 14;
dctint t7a = (IN( 4) * 16069 + IN(28) * 3196 + (1 << 13)) >> 14;
dctint t5a = (IN(20) * 13623 - IN(12) * 9102 + (1 << 13)) >> 14;
dctint t6a = (IN(20) * 9102 + IN(12) * 13623 + (1 << 13)) >> 14;
dctint t8a = (IN( 2) * 1606 - IN(30) * 16305 + (1 << 13)) >> 14;
dctint t15a = (IN( 2) * 16305 + IN(30) * 1606 + (1 << 13)) >> 14;
dctint t9a = (IN(18) * 12665 - IN(14) * 10394 + (1 << 13)) >> 14;
dctint t14a = (IN(18) * 10394 + IN(14) * 12665 + (1 << 13)) >> 14;
dctint t10a = (IN(10) * 7723 - IN(22) * 14449 + (1 << 13)) >> 14;
dctint t13a = (IN(10) * 14449 + IN(22) * 7723 + (1 << 13)) >> 14;
dctint t11a = (IN(26) * 15679 - IN( 6) * 4756 + (1 << 13)) >> 14;
dctint t12a = (IN(26) * 4756 + IN( 6) * 15679 + (1 << 13)) >> 14;
dctint t16a = (IN( 1) * 804 - IN(31) * 16364 + (1 << 13)) >> 14;
dctint t31a = (IN( 1) * 16364 + IN(31) * 804 + (1 << 13)) >> 14;
dctint t17a = (IN(17) * 12140 - IN(15) * 11003 + (1 << 13)) >> 14;
dctint t30a = (IN(17) * 11003 + IN(15) * 12140 + (1 << 13)) >> 14;
dctint t18a = (IN( 9) * 7005 - IN(23) * 14811 + (1 << 13)) >> 14;
dctint t29a = (IN( 9) * 14811 + IN(23) * 7005 + (1 << 13)) >> 14;
dctint t19a = (IN(25) * 15426 - IN( 7) * 5520 + (1 << 13)) >> 14;
dctint t28a = (IN(25) * 5520 + IN( 7) * 15426 + (1 << 13)) >> 14;
dctint t20a = (IN( 5) * 3981 - IN(27) * 15893 + (1 << 13)) >> 14;
dctint t27a = (IN( 5) * 15893 + IN(27) * 3981 + (1 << 13)) >> 14;
dctint t21a = (IN(21) * 14053 - IN(11) * 8423 + (1 << 13)) >> 14;
dctint t26a = (IN(21) * 8423 + IN(11) * 14053 + (1 << 13)) >> 14;
dctint t22a = (IN(13) * 9760 - IN(19) * 13160 + (1 << 13)) >> 14;
dctint t25a = (IN(13) * 13160 + IN(19) * 9760 + (1 << 13)) >> 14;
dctint t23a = (IN(29) * 16207 - IN( 3) * 2404 + (1 << 13)) >> 14;
dctint t24a = (IN(29) * 2404 + IN( 3) * 16207 + (1 << 13)) >> 14;
dctint t0 = t0a + t3a;
dctint t1 = t1a + t2a;
dctint t2 = t1a - t2a;
dctint t3 = t0a - t3a;
dctint t4 = t4a + t5a;
dctint t5 = t4a - t5a;
dctint t6 = t7a - t6a;
dctint t7 = t7a + t6a;
dctint t8 = t8a + t9a;
dctint t9 = t8a - t9a;
dctint t10 = t11a - t10a;
dctint t11 = t11a + t10a;
dctint t12 = t12a + t13a;
dctint t13 = t12a - t13a;
dctint t14 = t15a - t14a;
dctint t15 = t15a + t14a;
dctint t16 = t16a + t17a;
dctint t17 = t16a - t17a;
dctint t18 = t19a - t18a;
dctint t19 = t19a + t18a;
dctint t20 = t20a + t21a;
dctint t21 = t20a - t21a;
dctint t22 = t23a - t22a;
dctint t23 = t23a + t22a;
dctint t24 = t24a + t25a;
dctint t25 = t24a - t25a;
dctint t26 = t27a - t26a;
dctint t27 = t27a + t26a;
dctint t28 = t28a + t29a;
dctint t29 = t28a - t29a;
dctint t30 = t31a - t30a;
dctint t31 = t31a + t30a;
t5a = ((t6 - t5) * 11585 + (1 << 13)) >> 14;
t6a = ((t6 + t5) * 11585 + (1 << 13)) >> 14;
t9a = ( t14 * 6270 - t9 * 15137 + (1 << 13)) >> 14;
t14a = ( t14 * 15137 + t9 * 6270 + (1 << 13)) >> 14;
t10a = (-(t13 * 15137 + t10 * 6270) + (1 << 13)) >> 14;
t13a = ( t13 * 6270 - t10 * 15137 + (1 << 13)) >> 14;
t17a = ( t30 * 3196 - t17 * 16069 + (1 << 13)) >> 14;
t30a = ( t30 * 16069 + t17 * 3196 + (1 << 13)) >> 14;
t18a = (-(t29 * 16069 + t18 * 3196) + (1 << 13)) >> 14;
t29a = ( t29 * 3196 - t18 * 16069 + (1 << 13)) >> 14;
t21a = ( t26 * 13623 - t21 * 9102 + (1 << 13)) >> 14;
t26a = ( t26 * 9102 + t21 * 13623 + (1 << 13)) >> 14;
t22a = (-(t25 * 9102 + t22 * 13623) + (1 << 13)) >> 14;
t25a = ( t25 * 13623 - t22 * 9102 + (1 << 13)) >> 14;
t0a = t0 + t7;
t1a = t1 + t6a;
t2a = t2 + t5a;
t3a = t3 + t4;
t4a = t3 - t4;
t5 = t2 - t5a;
t6 = t1 - t6a;
t7a = t0 - t7;
t8a = t8 + t11;
t9 = t9a + t10a;
t10 = t9a - t10a;
t11a = t8 - t11;
t12a = t15 - t12;
t13 = t14a - t13a;
t14 = t14a + t13a;
t15a = t15 + t12;
t16a = t16 + t19;
t17 = t17a + t18a;
t18 = t17a - t18a;
t19a = t16 - t19;
t20a = t23 - t20;
t21 = t22a - t21a;
t22 = t22a + t21a;
t23a = t23 + t20;
t24a = t24 + t27;
t25 = t25a + t26a;
t26 = t25a - t26a;
t27a = t24 - t27;
t28a = t31 - t28;
t29 = t30a - t29a;
t30 = t30a + t29a;
t31a = t31 + t28;
t10a = ((t13 - t10) * 11585 + (1 << 13)) >> 14;
t13a = ((t13 + t10) * 11585 + (1 << 13)) >> 14;
t11 = ((t12a - t11a) * 11585 + (1 << 13)) >> 14;
t12 = ((t12a + t11a) * 11585 + (1 << 13)) >> 14;
t18a = ( t29 * 6270 - t18 * 15137 + (1 << 13)) >> 14;
t29a = ( t29 * 15137 + t18 * 6270 + (1 << 13)) >> 14;
t19 = ( t28a * 6270 - t19a * 15137 + (1 << 13)) >> 14;
t28 = ( t28a * 15137 + t19a * 6270 + (1 << 13)) >> 14;
t20 = (-(t27a * 15137 + t20a * 6270) + (1 << 13)) >> 14;
t27 = ( t27a * 6270 - t20a * 15137 + (1 << 13)) >> 14;
t21a = (-(t26 * 15137 + t21 * 6270) + (1 << 13)) >> 14;
t26a = ( t26 * 6270 - t21 * 15137 + (1 << 13)) >> 14;
t0 = t0a + t15a;
t1 = t1a + t14;
t2 = t2a + t13a;
t3 = t3a + t12;
t4 = t4a + t11;
t5a = t5 + t10a;
t6a = t6 + t9;
t7 = t7a + t8a;
t8 = t7a - t8a;
t9a = t6 - t9;
t10 = t5 - t10a;
t11a = t4a - t11;
t12a = t3a - t12;
t13 = t2a - t13a;
t14a = t1a - t14;
t15 = t0a - t15a;
t16 = t16a + t23a;
t17a = t17 + t22;
t18 = t18a + t21a;
t19a = t19 + t20;
t20a = t19 - t20;
t21 = t18a - t21a;
t22a = t17 - t22;
t23 = t16a - t23a;
t24 = t31a - t24a;
t25a = t30 - t25;
t26 = t29a - t26a;
t27a = t28 - t27;
t28a = t28 + t27;
t29 = t29a + t26a;
t30a = t30 + t25;
t31 = t31a + t24a;
t20 = ((t27a - t20a) * 11585 + (1 << 13)) >> 14;
t27 = ((t27a + t20a) * 11585 + (1 << 13)) >> 14;
t21a = ((t26 - t21 ) * 11585 + (1 << 13)) >> 14;
t26a = ((t26 + t21 ) * 11585 + (1 << 13)) >> 14;
t22 = ((t25a - t22a) * 11585 + (1 << 13)) >> 14;
t25 = ((t25a + t22a) * 11585 + (1 << 13)) >> 14;
t23a = ((t24 - t23 ) * 11585 + (1 << 13)) >> 14;
t24a = ((t24 + t23 ) * 11585 + (1 << 13)) >> 14;
out[ 0] = t0 + t31;
out[ 1] = t1 + t30a;
out[ 2] = t2 + t29;
out[ 3] = t3 + t28a;
out[ 4] = t4 + t27;
out[ 5] = t5a + t26a;
out[ 6] = t6a + t25;
out[ 7] = t7 + t24a;
out[ 8] = t8 + t23a;
out[ 9] = t9a + t22;
out[10] = t10 + t21a;
out[11] = t11a + t20;
out[12] = t12a + t19a;
out[13] = t13 + t18;
out[14] = t14a + t17a;
out[15] = t15 + t16;
out[16] = t15 - t16;
out[17] = t14a - t17a;
out[18] = t13 - t18;
out[19] = t12a - t19a;
out[20] = t11a - t20;
out[21] = t10 - t21a;
out[22] = t9a - t22;
out[23] = t8 - t23a;
out[24] = t7 - t24a;
out[25] = t6a - t25;
out[26] = t5a - t26a;
out[27] = t4 - t27;
out[28] = t3 - t28a;
out[29] = t2 - t29;
out[30] = t1 - t30a;
out[31] = t0 - t31;
}
itxfm_wrapper(idct, idct, 32, 6, 1)
static av_always_inline void iwht4_1d(const dctcoef *in, ptrdiff_t stride,
dctcoef *out, int pass)
{
int t0, t1, t2, t3, t4;
if (pass == 0) {
t0 = IN(0) >> 2;
t1 = IN(3) >> 2;
t2 = IN(1) >> 2;
t3 = IN(2) >> 2;
} else {
t0 = IN(0);
t1 = IN(3);
t2 = IN(1);
t3 = IN(2);
}
t0 += t2;
t3 -= t1;
t4 = (t0 - t3) >> 1;
t1 = t4 - t1;
t2 = t4 - t2;
t0 -= t1;
t3 += t2;
out[0] = t0;
out[1] = t1;
out[2] = t2;
out[3] = t3;
}
itxfm_wrapper(iwht, iwht, 4, 0, 0)
#undef IN
#undef itxfm_wrapper
#undef itxfm_wrap
static av_cold void vp9dsp_itxfm_init(VP9DSPContext *dsp)
{
#define init_itxfm(tx, sz) \
dsp->itxfm_add[tx][DCT_DCT] = idct_idct_##sz##_add_c; \
dsp->itxfm_add[tx][DCT_ADST] = iadst_idct_##sz##_add_c; \
dsp->itxfm_add[tx][ADST_DCT] = idct_iadst_##sz##_add_c; \
dsp->itxfm_add[tx][ADST_ADST] = iadst_iadst_##sz##_add_c
#define init_idct(tx, nm) \
dsp->itxfm_add[tx][DCT_DCT] = \
dsp->itxfm_add[tx][ADST_DCT] = \
dsp->itxfm_add[tx][DCT_ADST] = \
dsp->itxfm_add[tx][ADST_ADST] = nm##_add_c
init_itxfm(TX_4X4, 4x4);
init_itxfm(TX_8X8, 8x8);
init_itxfm(TX_16X16, 16x16);
init_idct(TX_32X32, idct_idct_32x32);
init_idct(4 /* lossless */, iwht_iwht_4x4);
#undef init_itxfm
#undef init_idct
}
static av_always_inline void loop_filter(pixel *dst, int E, int I, int H,
ptrdiff_t stridea, ptrdiff_t strideb,
int wd)
{
int i, F = 1 << (BIT_DEPTH - 8);
E <<= (BIT_DEPTH - 8);
I <<= (BIT_DEPTH - 8);
H <<= (BIT_DEPTH - 8);
for (i = 0; i < 8; i++, dst += stridea) {
int p7, p6, p5, p4;
int p3 = dst[strideb * -4], p2 = dst[strideb * -3];
int p1 = dst[strideb * -2], p0 = dst[strideb * -1];
int q0 = dst[strideb * +0], q1 = dst[strideb * +1];
int q2 = dst[strideb * +2], q3 = dst[strideb * +3];
int q4, q5, q6, q7;
int fm = FFABS(p3 - p2) <= I && FFABS(p2 - p1) <= I &&
FFABS(p1 - p0) <= I && FFABS(q1 - q0) <= I &&
FFABS(q2 - q1) <= I && FFABS(q3 - q2) <= I &&
FFABS(p0 - q0) * 2 + (FFABS(p1 - q1) >> 1) <= E;
int flat8out, flat8in;
if (!fm)
continue;
if (wd >= 16) {
p7 = dst[strideb * -8];
p6 = dst[strideb * -7];
p5 = dst[strideb * -6];
p4 = dst[strideb * -5];
q4 = dst[strideb * +4];
q5 = dst[strideb * +5];
q6 = dst[strideb * +6];
q7 = dst[strideb * +7];
flat8out = FFABS(p7 - p0) <= F && FFABS(p6 - p0) <= F &&
FFABS(p5 - p0) <= F && FFABS(p4 - p0) <= F &&
FFABS(q4 - q0) <= F && FFABS(q5 - q0) <= F &&
FFABS(q6 - q0) <= F && FFABS(q7 - q0) <= F;
}
if (wd >= 8)
flat8in = FFABS(p3 - p0) <= F && FFABS(p2 - p0) <= F &&
FFABS(p1 - p0) <= F && FFABS(q1 - q0) <= F &&
FFABS(q2 - q0) <= F && FFABS(q3 - q0) <= F;
if (wd >= 16 && flat8out && flat8in) {
dst[strideb * -7] = (p7 + p7 + p7 + p7 + p7 + p7 + p7 + p6 * 2 +
p5 + p4 + p3 + p2 + p1 + p0 + q0 + 8) >> 4;
dst[strideb * -6] = (p7 + p7 + p7 + p7 + p7 + p7 + p6 + p5 * 2 +
p4 + p3 + p2 + p1 + p0 + q0 + q1 + 8) >> 4;
dst[strideb * -5] = (p7 + p7 + p7 + p7 + p7 + p6 + p5 + p4 * 2 +
p3 + p2 + p1 + p0 + q0 + q1 + q2 + 8) >> 4;
dst[strideb * -4] = (p7 + p7 + p7 + p7 + p6 + p5 + p4 + p3 * 2 +
p2 + p1 + p0 + q0 + q1 + q2 + q3 + 8) >> 4;
dst[strideb * -3] = (p7 + p7 + p7 + p6 + p5 + p4 + p3 + p2 * 2 +
p1 + p0 + q0 + q1 + q2 + q3 + q4 + 8) >> 4;
dst[strideb * -2] = (p7 + p7 + p6 + p5 + p4 + p3 + p2 + p1 * 2 +
p0 + q0 + q1 + q2 + q3 + q4 + q5 + 8) >> 4;
dst[strideb * -1] = (p7 + p6 + p5 + p4 + p3 + p2 + p1 + p0 * 2 +
q0 + q1 + q2 + q3 + q4 + q5 + q6 + 8) >> 4;
dst[strideb * +0] = (p6 + p5 + p4 + p3 + p2 + p1 + p0 + q0 * 2 +
q1 + q2 + q3 + q4 + q5 + q6 + q7 + 8) >> 4;
dst[strideb * +1] = (p5 + p4 + p3 + p2 + p1 + p0 + q0 + q1 * 2 +
q2 + q3 + q4 + q5 + q6 + q7 + q7 + 8) >> 4;
dst[strideb * +2] = (p4 + p3 + p2 + p1 + p0 + q0 + q1 + q2 * 2 +
q3 + q4 + q5 + q6 + q7 + q7 + q7 + 8) >> 4;
dst[strideb * +3] = (p3 + p2 + p1 + p0 + q0 + q1 + q2 + q3 * 2 +
q4 + q5 + q6 + q7 + q7 + q7 + q7 + 8) >> 4;
dst[strideb * +4] = (p2 + p1 + p0 + q0 + q1 + q2 + q3 + q4 * 2 +
q5 + q6 + q7 + q7 + q7 + q7 + q7 + 8) >> 4;
dst[strideb * +5] = (p1 + p0 + q0 + q1 + q2 + q3 + q4 + q5 * 2 +
q6 + q7 + q7 + q7 + q7 + q7 + q7 + 8) >> 4;
dst[strideb * +6] = (p0 + q0 + q1 + q2 + q3 + q4 + q5 + q6 * 2 +
q7 + q7 + q7 + q7 + q7 + q7 + q7 + 8) >> 4;
} else if (wd >= 8 && flat8in) {
dst[strideb * -3] = (p3 + p3 + p3 + 2 * p2 + p1 + p0 + q0 + 4) >> 3;
dst[strideb * -2] = (p3 + p3 + p2 + 2 * p1 + p0 + q0 + q1 + 4) >> 3;
dst[strideb * -1] = (p3 + p2 + p1 + 2 * p0 + q0 + q1 + q2 + 4) >> 3;
dst[strideb * +0] = (p2 + p1 + p0 + 2 * q0 + q1 + q2 + q3 + 4) >> 3;
dst[strideb * +1] = (p1 + p0 + q0 + 2 * q1 + q2 + q3 + q3 + 4) >> 3;
dst[strideb * +2] = (p0 + q0 + q1 + 2 * q2 + q3 + q3 + q3 + 4) >> 3;
} else {
int hev = FFABS(p1 - p0) > H || FFABS(q1 - q0) > H;
if (hev) {
int f = av_clip_intp2(p1 - q1, BIT_DEPTH - 1), f1, f2;
f = av_clip_intp2(3 * (q0 - p0) + f, BIT_DEPTH - 1);
f1 = FFMIN(f + 4, (1 << (BIT_DEPTH - 1)) - 1) >> 3;
f2 = FFMIN(f + 3, (1 << (BIT_DEPTH - 1)) - 1) >> 3;
dst[strideb * -1] = av_clip_pixel(p0 + f2);
dst[strideb * +0] = av_clip_pixel(q0 - f1);
} else {
int f = av_clip_intp2(3 * (q0 - p0), BIT_DEPTH - 1), f1, f2;
f1 = FFMIN(f + 4, (1 << (BIT_DEPTH - 1)) - 1) >> 3;
f2 = FFMIN(f + 3, (1 << (BIT_DEPTH - 1)) - 1) >> 3;
dst[strideb * -1] = av_clip_pixel(p0 + f2);
dst[strideb * +0] = av_clip_pixel(q0 - f1);
f = (f1 + 1) >> 1;
dst[strideb * -2] = av_clip_pixel(p1 + f);
dst[strideb * +1] = av_clip_pixel(q1 - f);
}
}
}
}
#define lf_8_fn(dir, wd, stridea, strideb) \
static void loop_filter_##dir##_##wd##_8_c(uint8_t *_dst, \
ptrdiff_t stride, \
int E, int I, int H) \
{ \
pixel *dst = (pixel *) _dst; \
stride /= sizeof(pixel); \
loop_filter(dst, E, I, H, stridea, strideb, wd); \
}
#define lf_8_fns(wd) \
lf_8_fn(h, wd, stride, 1) \
lf_8_fn(v, wd, 1, stride)
lf_8_fns(4)
lf_8_fns(8)
lf_8_fns(16)
#undef lf_8_fn
#undef lf_8_fns
#define lf_16_fn(dir, stridea) \
static void loop_filter_##dir##_16_16_c(uint8_t *dst, \
ptrdiff_t stride, \
int E, int I, int H) \
{ \
loop_filter_##dir##_16_8_c(dst, stride, E, I, H); \
loop_filter_##dir##_16_8_c(dst + 8 * stridea, stride, E, I, H); \
}
lf_16_fn(h, stride)
lf_16_fn(v, sizeof(pixel))
#undef lf_16_fn
#define lf_mix_fn(dir, wd1, wd2, stridea) \
static void loop_filter_##dir##_##wd1##wd2##_16_c(uint8_t *dst, \
ptrdiff_t stride, \
int E, int I, int H) \
{ \
loop_filter_##dir##_##wd1##_8_c(dst, stride, E & 0xff, I & 0xff, H & 0xff); \
loop_filter_##dir##_##wd2##_8_c(dst + 8 * stridea, stride, E >> 8, I >> 8, H >> 8); \
}
#define lf_mix_fns(wd1, wd2) \
lf_mix_fn(h, wd1, wd2, stride) \
lf_mix_fn(v, wd1, wd2, sizeof(pixel))
lf_mix_fns(4, 4)
lf_mix_fns(4, 8)
lf_mix_fns(8, 4)
lf_mix_fns(8, 8)
#undef lf_mix_fn
#undef lf_mix_fns
static av_cold void vp9dsp_loopfilter_init(VP9DSPContext *dsp)
{
dsp->loop_filter_8[0][0] = loop_filter_h_4_8_c;
dsp->loop_filter_8[0][1] = loop_filter_v_4_8_c;
dsp->loop_filter_8[1][0] = loop_filter_h_8_8_c;
dsp->loop_filter_8[1][1] = loop_filter_v_8_8_c;
dsp->loop_filter_8[2][0] = loop_filter_h_16_8_c;
dsp->loop_filter_8[2][1] = loop_filter_v_16_8_c;
dsp->loop_filter_16[0] = loop_filter_h_16_16_c;
dsp->loop_filter_16[1] = loop_filter_v_16_16_c;
dsp->loop_filter_mix2[0][0][0] = loop_filter_h_44_16_c;
dsp->loop_filter_mix2[0][0][1] = loop_filter_v_44_16_c;
dsp->loop_filter_mix2[0][1][0] = loop_filter_h_48_16_c;
dsp->loop_filter_mix2[0][1][1] = loop_filter_v_48_16_c;
dsp->loop_filter_mix2[1][0][0] = loop_filter_h_84_16_c;
dsp->loop_filter_mix2[1][0][1] = loop_filter_v_84_16_c;
dsp->loop_filter_mix2[1][1][0] = loop_filter_h_88_16_c;
dsp->loop_filter_mix2[1][1][1] = loop_filter_v_88_16_c;
}
static av_always_inline void copy_c(uint8_t *dst, ptrdiff_t dst_stride,
const uint8_t *src, ptrdiff_t src_stride,
int w, int h)
{
do {
memcpy(dst, src, w * sizeof(pixel));
dst += dst_stride;
src += src_stride;
} while (--h);
}
static av_always_inline void avg_c(uint8_t *_dst, ptrdiff_t dst_stride,
const uint8_t *_src, ptrdiff_t src_stride,
int w, int h)
{
pixel *dst = (pixel *) _dst;
const pixel *src = (const pixel *) _src;
dst_stride /= sizeof(pixel);
src_stride /= sizeof(pixel);
do {
int x;
for (x = 0; x < w; x += 4)
AV_WN4PA(&dst[x], rnd_avg_pixel4(AV_RN4PA(&dst[x]), AV_RN4P(&src[x])));
dst += dst_stride;
src += src_stride;
} while (--h);
}
#define fpel_fn(type, sz) \
static void type##sz##_c(uint8_t *dst, ptrdiff_t dst_stride, \
const uint8_t *src, ptrdiff_t src_stride, \
int h, int mx, int my) \
{ \
type##_c(dst, dst_stride, src, src_stride, sz, h); \
}
#define copy_avg_fn(sz) \
fpel_fn(copy, sz) \
fpel_fn(avg, sz)
copy_avg_fn(64)
copy_avg_fn(32)
copy_avg_fn(16)
copy_avg_fn(8)
copy_avg_fn(4)
#undef fpel_fn
#undef copy_avg_fn
static const int16_t vp9_subpel_filters[3][16][8] = {
[FILTER_8TAP_REGULAR] = {
{ 0, 0, 0, 128, 0, 0, 0, 0 },
{ 0, 1, -5, 126, 8, -3, 1, 0 },
{ -1, 3, -10, 122, 18, -6, 2, 0 },
{ -1, 4, -13, 118, 27, -9, 3, -1 },
{ -1, 4, -16, 112, 37, -11, 4, -1 },
{ -1, 5, -18, 105, 48, -14, 4, -1 },
{ -1, 5, -19, 97, 58, -16, 5, -1 },
{ -1, 6, -19, 88, 68, -18, 5, -1 },
{ -1, 6, -19, 78, 78, -19, 6, -1 },
{ -1, 5, -18, 68, 88, -19, 6, -1 },
{ -1, 5, -16, 58, 97, -19, 5, -1 },
{ -1, 4, -14, 48, 105, -18, 5, -1 },
{ -1, 4, -11, 37, 112, -16, 4, -1 },
{ -1, 3, -9, 27, 118, -13, 4, -1 },
{ 0, 2, -6, 18, 122, -10, 3, -1 },
{ 0, 1, -3, 8, 126, -5, 1, 0 },
}, [FILTER_8TAP_SHARP] = {
{ 0, 0, 0, 128, 0, 0, 0, 0 },
{ -1, 3, -7, 127, 8, -3, 1, 0 },
{ -2, 5, -13, 125, 17, -6, 3, -1 },
{ -3, 7, -17, 121, 27, -10, 5, -2 },
{ -4, 9, -20, 115, 37, -13, 6, -2 },
{ -4, 10, -23, 108, 48, -16, 8, -3 },
{ -4, 10, -24, 100, 59, -19, 9, -3 },
{ -4, 11, -24, 90, 70, -21, 10, -4 },
{ -4, 11, -23, 80, 80, -23, 11, -4 },
{ -4, 10, -21, 70, 90, -24, 11, -4 },
{ -3, 9, -19, 59, 100, -24, 10, -4 },
{ -3, 8, -16, 48, 108, -23, 10, -4 },
{ -2, 6, -13, 37, 115, -20, 9, -4 },
{ -2, 5, -10, 27, 121, -17, 7, -3 },
{ -1, 3, -6, 17, 125, -13, 5, -2 },
{ 0, 1, -3, 8, 127, -7, 3, -1 },
}, [FILTER_8TAP_SMOOTH] = {
{ 0, 0, 0, 128, 0, 0, 0, 0 },
{ -3, -1, 32, 64, 38, 1, -3, 0 },
{ -2, -2, 29, 63, 41, 2, -3, 0 },
{ -2, -2, 26, 63, 43, 4, -4, 0 },
{ -2, -3, 24, 62, 46, 5, -4, 0 },
{ -2, -3, 21, 60, 49, 7, -4, 0 },
{ -1, -4, 18, 59, 51, 9, -4, 0 },
{ -1, -4, 16, 57, 53, 12, -4, -1 },
{ -1, -4, 14, 55, 55, 14, -4, -1 },
{ -1, -4, 12, 53, 57, 16, -4, -1 },
{ 0, -4, 9, 51, 59, 18, -4, -1 },
{ 0, -4, 7, 49, 60, 21, -3, -2 },
{ 0, -4, 5, 46, 62, 24, -3, -2 },
{ 0, -4, 4, 43, 63, 26, -2, -2 },
{ 0, -3, 2, 41, 63, 29, -2, -2 },
{ 0, -3, 1, 38, 64, 32, -1, -3 },
}
};
#define FILTER_8TAP(src, x, F, stride) \
av_clip_pixel((F[0] * src[x + -3 * stride] + \
F[1] * src[x + -2 * stride] + \
F[2] * src[x + -1 * stride] + \
F[3] * src[x + +0 * stride] + \
F[4] * src[x + +1 * stride] + \
F[5] * src[x + +2 * stride] + \
F[6] * src[x + +3 * stride] + \
F[7] * src[x + +4 * stride] + 64) >> 7)
static av_always_inline void do_8tap_1d_c(uint8_t *_dst, ptrdiff_t dst_stride,
const uint8_t *_src, ptrdiff_t src_stride,
int w, int h, ptrdiff_t ds,
const int16_t *filter, int avg)
{
pixel *dst = (pixel *) _dst;
const pixel *src = (const pixel *) _src;
dst_stride /= sizeof(pixel);
src_stride /= sizeof(pixel);
do {
int x;
for (x = 0; x < w; x++)
if (avg) {
dst[x] = (dst[x] + FILTER_8TAP(src, x, filter, ds) + 1) >> 1;
} else {
dst[x] = FILTER_8TAP(src, x, filter, ds);
}
dst += dst_stride;
src += src_stride;
} while (--h);
}
#define filter_8tap_1d_fn(opn, opa, dir, ds) \
static av_noinline void opn##_8tap_1d_##dir##_c(uint8_t *dst, ptrdiff_t dst_stride, \
const uint8_t *src, ptrdiff_t src_stride, \
int w, int h, const int16_t *filter) \
{ \
do_8tap_1d_c(dst, dst_stride, src, src_stride, w, h, ds, filter, opa); \
}
filter_8tap_1d_fn(put, 0, v, src_stride / sizeof(pixel))
filter_8tap_1d_fn(put, 0, h, 1)
filter_8tap_1d_fn(avg, 1, v, src_stride / sizeof(pixel))
filter_8tap_1d_fn(avg, 1, h, 1)
#undef filter_8tap_1d_fn
static av_always_inline void do_8tap_2d_c(uint8_t *_dst, ptrdiff_t dst_stride,
const uint8_t *_src, ptrdiff_t src_stride,
int w, int h, const int16_t *filterx,
const int16_t *filtery, int avg)
{
int tmp_h = h + 7;
pixel tmp[64 * 71], *tmp_ptr = tmp;
pixel *dst = (pixel *) _dst;
const pixel *src = (const pixel *) _src;
dst_stride /= sizeof(pixel);
src_stride /= sizeof(pixel);
src -= src_stride * 3;
do {
int x;
for (x = 0; x < w; x++)
tmp_ptr[x] = FILTER_8TAP(src, x, filterx, 1);
tmp_ptr += 64;
src += src_stride;
} while (--tmp_h);
tmp_ptr = tmp + 64 * 3;
do {
int x;
for (x = 0; x < w; x++)
if (avg) {
dst[x] = (dst[x] + FILTER_8TAP(tmp_ptr, x, filtery, 64) + 1) >> 1;
} else {
dst[x] = FILTER_8TAP(tmp_ptr, x, filtery, 64);
}
tmp_ptr += 64;
dst += dst_stride;
} while (--h);
}
#define filter_8tap_2d_fn(opn, opa) \
static av_noinline void opn##_8tap_2d_hv_c(uint8_t *dst, ptrdiff_t dst_stride, \
const uint8_t *src, ptrdiff_t src_stride, \
int w, int h, const int16_t *filterx, \
const int16_t *filtery) \
{ \
do_8tap_2d_c(dst, dst_stride, src, src_stride, w, h, filterx, filtery, opa); \
}
filter_8tap_2d_fn(put, 0)
filter_8tap_2d_fn(avg, 1)
#undef filter_8tap_2d_fn
#define filter_fn_1d(sz, dir, dir_m, type, type_idx, avg) \
static void avg##_8tap_##type##_##sz##dir##_c(uint8_t *dst, ptrdiff_t dst_stride, \
const uint8_t *src, ptrdiff_t src_stride, \
int h, int mx, int my) \
{ \
avg##_8tap_1d_##dir##_c(dst, dst_stride, src, src_stride, sz, h, \
vp9_subpel_filters[type_idx][dir_m]); \
}
#define filter_fn_2d(sz, type, type_idx, avg) \
static void avg##_8tap_##type##_##sz##hv_c(uint8_t *dst, ptrdiff_t dst_stride, \
const uint8_t *src, ptrdiff_t src_stride, \
int h, int mx, int my) \
{ \
avg##_8tap_2d_hv_c(dst, dst_stride, src, src_stride, sz, h, \
vp9_subpel_filters[type_idx][mx], \
vp9_subpel_filters[type_idx][my]); \
}
#define FILTER_BILIN(src, x, mxy, stride) \
(src[x] + ((mxy * (src[x + stride] - src[x]) + 8) >> 4))
static av_always_inline void do_bilin_1d_c(uint8_t *_dst, ptrdiff_t dst_stride,
const uint8_t *_src, ptrdiff_t src_stride,
int w, int h, ptrdiff_t ds, int mxy, int avg)
{
pixel *dst = (pixel *) _dst;
const pixel *src = (const pixel *) _src;
dst_stride /= sizeof(pixel);
src_stride /= sizeof(pixel);
do {
int x;
for (x = 0; x < w; x++)
if (avg) {
dst[x] = (dst[x] + FILTER_BILIN(src, x, mxy, ds) + 1) >> 1;
} else {
dst[x] = FILTER_BILIN(src, x, mxy, ds);
}
dst += dst_stride;
src += src_stride;
} while (--h);
}
#define bilin_1d_fn(opn, opa, dir, ds) \
static av_noinline void opn##_bilin_1d_##dir##_c(uint8_t *dst, ptrdiff_t dst_stride, \
const uint8_t *src, ptrdiff_t src_stride, \
int w, int h, int mxy) \
{ \
do_bilin_1d_c(dst, dst_stride, src, src_stride, w, h, ds, mxy, opa); \
}
bilin_1d_fn(put, 0, v, src_stride / sizeof(pixel))
bilin_1d_fn(put, 0, h, 1)
bilin_1d_fn(avg, 1, v, src_stride / sizeof(pixel))
bilin_1d_fn(avg, 1, h, 1)
#undef bilin_1d_fn
static av_always_inline void do_bilin_2d_c(uint8_t *_dst, ptrdiff_t dst_stride,
const uint8_t *_src, ptrdiff_t src_stride,
int w, int h, int mx, int my, int avg)
{
pixel tmp[64 * 65], *tmp_ptr = tmp;
int tmp_h = h + 1;
pixel *dst = (pixel *) _dst;
const pixel *src = (const pixel *) _src;
dst_stride /= sizeof(pixel);
src_stride /= sizeof(pixel);
do {
int x;
for (x = 0; x < w; x++)
tmp_ptr[x] = FILTER_BILIN(src, x, mx, 1);
tmp_ptr += 64;
src += src_stride;
} while (--tmp_h);
tmp_ptr = tmp;
do {
int x;
for (x = 0; x < w; x++)
if (avg) {
dst[x] = (dst[x] + FILTER_BILIN(tmp_ptr, x, my, 64) + 1) >> 1;
} else {
dst[x] = FILTER_BILIN(tmp_ptr, x, my, 64);
}
tmp_ptr += 64;
dst += dst_stride;
} while (--h);
}
#define bilin_2d_fn(opn, opa) \
static av_noinline void opn##_bilin_2d_hv_c(uint8_t *dst, ptrdiff_t dst_stride, \
const uint8_t *src, ptrdiff_t src_stride, \
int w, int h, int mx, int my) \
{ \
do_bilin_2d_c(dst, dst_stride, src, src_stride, w, h, mx, my, opa); \
}
bilin_2d_fn(put, 0)
bilin_2d_fn(avg, 1)
#undef bilin_2d_fn
#define bilinf_fn_1d(sz, dir, dir_m, avg) \
static void avg##_bilin_##sz##dir##_c(uint8_t *dst, ptrdiff_t dst_stride, \
const uint8_t *src, ptrdiff_t src_stride, \
int h, int mx, int my) \
{ \
avg##_bilin_1d_##dir##_c(dst, dst_stride, src, src_stride, sz, h, dir_m); \
}
#define bilinf_fn_2d(sz, avg) \
static void avg##_bilin_##sz##hv_c(uint8_t *dst, ptrdiff_t dst_stride, \
const uint8_t *src, ptrdiff_t src_stride, \
int h, int mx, int my) \
{ \
avg##_bilin_2d_hv_c(dst, dst_stride, src, src_stride, sz, h, mx, my); \
}
#define filter_fn(sz, avg) \
filter_fn_1d(sz, h, mx, regular, FILTER_8TAP_REGULAR, avg) \
filter_fn_1d(sz, v, my, regular, FILTER_8TAP_REGULAR, avg) \
filter_fn_2d(sz, regular, FILTER_8TAP_REGULAR, avg) \
filter_fn_1d(sz, h, mx, smooth, FILTER_8TAP_SMOOTH, avg) \
filter_fn_1d(sz, v, my, smooth, FILTER_8TAP_SMOOTH, avg) \
filter_fn_2d(sz, smooth, FILTER_8TAP_SMOOTH, avg) \
filter_fn_1d(sz, h, mx, sharp, FILTER_8TAP_SHARP, avg) \
filter_fn_1d(sz, v, my, sharp, FILTER_8TAP_SHARP, avg) \
filter_fn_2d(sz, sharp, FILTER_8TAP_SHARP, avg) \
bilinf_fn_1d(sz, h, mx, avg) \
bilinf_fn_1d(sz, v, my, avg) \
bilinf_fn_2d(sz, avg)
#define filter_fn_set(avg) \
filter_fn(64, avg) \
filter_fn(32, avg) \
filter_fn(16, avg) \
filter_fn(8, avg) \
filter_fn(4, avg)
filter_fn_set(put)
filter_fn_set(avg)
#undef filter_fn
#undef filter_fn_set
#undef filter_fn_1d
#undef filter_fn_2d
#undef bilinf_fn_1d
#undef bilinf_fn_2d
static av_cold void vp9dsp_mc_init(VP9DSPContext *dsp)
{
#define init_fpel(idx1, idx2, sz, type) \
dsp->mc[idx1][FILTER_8TAP_SMOOTH ][idx2][0][0] = type##sz##_c; \
dsp->mc[idx1][FILTER_8TAP_REGULAR][idx2][0][0] = type##sz##_c; \
dsp->mc[idx1][FILTER_8TAP_SHARP ][idx2][0][0] = type##sz##_c; \
dsp->mc[idx1][FILTER_BILINEAR ][idx2][0][0] = type##sz##_c
#define init_copy_avg(idx, sz) \
init_fpel(idx, 0, sz, copy); \
init_fpel(idx, 1, sz, avg)
init_copy_avg(0, 64);
init_copy_avg(1, 32);
init_copy_avg(2, 16);
init_copy_avg(3, 8);
init_copy_avg(4, 4);
#undef init_copy_avg
#undef init_fpel
#define init_subpel1(idx1, idx2, idxh, idxv, sz, dir, type) \
dsp->mc[idx1][FILTER_8TAP_SMOOTH ][idx2][idxh][idxv] = type##_8tap_smooth_##sz##dir##_c; \
dsp->mc[idx1][FILTER_8TAP_REGULAR][idx2][idxh][idxv] = type##_8tap_regular_##sz##dir##_c; \
dsp->mc[idx1][FILTER_8TAP_SHARP ][idx2][idxh][idxv] = type##_8tap_sharp_##sz##dir##_c; \
dsp->mc[idx1][FILTER_BILINEAR ][idx2][idxh][idxv] = type##_bilin_##sz##dir##_c
#define init_subpel2(idx, idxh, idxv, dir, type) \
init_subpel1(0, idx, idxh, idxv, 64, dir, type); \
init_subpel1(1, idx, idxh, idxv, 32, dir, type); \
init_subpel1(2, idx, idxh, idxv, 16, dir, type); \
init_subpel1(3, idx, idxh, idxv, 8, dir, type); \
init_subpel1(4, idx, idxh, idxv, 4, dir, type)
#define init_subpel3(idx, type) \
init_subpel2(idx, 1, 1, hv, type); \
init_subpel2(idx, 0, 1, v, type); \
init_subpel2(idx, 1, 0, h, type)
init_subpel3(0, put);
init_subpel3(1, avg);
#undef init_subpel1
#undef init_subpel2
#undef init_subpel3
}
static av_always_inline void do_scaled_8tap_c(uint8_t *_dst, ptrdiff_t dst_stride,
const uint8_t *_src, ptrdiff_t src_stride,
int w, int h, int mx, int my,
int dx, int dy, int avg,
const int16_t (*filters)[8])
{
int tmp_h = (((h - 1) * dy + my) >> 4) + 8;
pixel tmp[64 * 135], *tmp_ptr = tmp;
pixel *dst = (pixel *) _dst;
const pixel *src = (const pixel *) _src;
dst_stride /= sizeof(pixel);
src_stride /= sizeof(pixel);
src -= src_stride * 3;
do {
int x;
int imx = mx, ioff = 0;
for (x = 0; x < w; x++) {
tmp_ptr[x] = FILTER_8TAP(src, ioff, filters[imx], 1);
imx += dx;
ioff += imx >> 4;
imx &= 0xf;
}
tmp_ptr += 64;
src += src_stride;
} while (--tmp_h);
tmp_ptr = tmp + 64 * 3;
do {
int x;
const int16_t *filter = filters[my];
for (x = 0; x < w; x++)
if (avg) {
dst[x] = (dst[x] + FILTER_8TAP(tmp_ptr, x, filter, 64) + 1) >> 1;
} else {
dst[x] = FILTER_8TAP(tmp_ptr, x, filter, 64);
}
my += dy;
tmp_ptr += (my >> 4) * 64;
my &= 0xf;
dst += dst_stride;
} while (--h);
}
#define scaled_filter_8tap_fn(opn, opa) \
static av_noinline void opn##_scaled_8tap_c(uint8_t *dst, ptrdiff_t dst_stride, \
const uint8_t *src, ptrdiff_t src_stride, \
int w, int h, int mx, int my, int dx, int dy, \
const int16_t (*filters)[8]) \
{ \
do_scaled_8tap_c(dst, dst_stride, src, src_stride, w, h, mx, my, dx, dy, \
opa, filters); \
}
scaled_filter_8tap_fn(put, 0)
scaled_filter_8tap_fn(avg, 1)
#undef scaled_filter_8tap_fn
#undef FILTER_8TAP
#define scaled_filter_fn(sz, type, type_idx, avg) \
static void avg##_scaled_##type##_##sz##_c(uint8_t *dst, ptrdiff_t dst_stride, \
const uint8_t *src, ptrdiff_t src_stride, \
int h, int mx, int my, int dx, int dy) \
{ \
avg##_scaled_8tap_c(dst, dst_stride, src, src_stride, sz, h, mx, my, dx, dy, \
vp9_subpel_filters[type_idx]); \
}
static av_always_inline void do_scaled_bilin_c(uint8_t *_dst, ptrdiff_t dst_stride,
const uint8_t *_src, ptrdiff_t src_stride,
int w, int h, int mx, int my,
int dx, int dy, int avg)
{
pixel tmp[64 * 129], *tmp_ptr = tmp;
int tmp_h = (((h - 1) * dy + my) >> 4) + 2;
pixel *dst = (pixel *) _dst;
const pixel *src = (const pixel *) _src;
dst_stride /= sizeof(pixel);
src_stride /= sizeof(pixel);
do {
int x;
int imx = mx, ioff = 0;
for (x = 0; x < w; x++) {
tmp_ptr[x] = FILTER_BILIN(src, ioff, imx, 1);
imx += dx;
ioff += imx >> 4;
imx &= 0xf;
}
tmp_ptr += 64;
src += src_stride;
} while (--tmp_h);
tmp_ptr = tmp;
do {
int x;
for (x = 0; x < w; x++)
if (avg) {
dst[x] = (dst[x] + FILTER_BILIN(tmp_ptr, x, my, 64) + 1) >> 1;
} else {
dst[x] = FILTER_BILIN(tmp_ptr, x, my, 64);
}
my += dy;
tmp_ptr += (my >> 4) * 64;
my &= 0xf;
dst += dst_stride;
} while (--h);
}
#define scaled_bilin_fn(opn, opa) \
static av_noinline void opn##_scaled_bilin_c(uint8_t *dst, ptrdiff_t dst_stride, \
const uint8_t *src, ptrdiff_t src_stride, \
int w, int h, int mx, int my, int dx, int dy) \
{ \
do_scaled_bilin_c(dst, dst_stride, src, src_stride, w, h, mx, my, dx, dy, opa); \
}
scaled_bilin_fn(put, 0)
scaled_bilin_fn(avg, 1)
#undef scaled_bilin_fn
#undef FILTER_BILIN
#define scaled_bilinf_fn(sz, avg) \
static void avg##_scaled_bilin_##sz##_c(uint8_t *dst, ptrdiff_t dst_stride, \
const uint8_t *src, ptrdiff_t src_stride, \
int h, int mx, int my, int dx, int dy) \
{ \
avg##_scaled_bilin_c(dst, dst_stride, src, src_stride, sz, h, mx, my, dx, dy); \
}
#define scaled_filter_fns(sz, avg) \
scaled_filter_fn(sz, regular, FILTER_8TAP_REGULAR, avg) \
scaled_filter_fn(sz, smooth, FILTER_8TAP_SMOOTH, avg) \
scaled_filter_fn(sz, sharp, FILTER_8TAP_SHARP, avg) \
scaled_bilinf_fn(sz, avg)
#define scaled_filter_fn_set(avg) \
scaled_filter_fns(64, avg) \
scaled_filter_fns(32, avg) \
scaled_filter_fns(16, avg) \
scaled_filter_fns(8, avg) \
scaled_filter_fns(4, avg)
scaled_filter_fn_set(put)
scaled_filter_fn_set(avg)
#undef scaled_filter_fns
#undef scaled_filter_fn_set
#undef scaled_filter_fn
#undef scaled_bilinf_fn
static av_cold void vp9dsp_scaled_mc_init(VP9DSPContext *dsp)
{
#define init_scaled(idx1, idx2, sz, type) \
dsp->smc[idx1][FILTER_8TAP_SMOOTH ][idx2] = type##_scaled_smooth_##sz##_c; \
dsp->smc[idx1][FILTER_8TAP_REGULAR][idx2] = type##_scaled_regular_##sz##_c; \
dsp->smc[idx1][FILTER_8TAP_SHARP ][idx2] = type##_scaled_sharp_##sz##_c; \
dsp->smc[idx1][FILTER_BILINEAR ][idx2] = type##_scaled_bilin_##sz##_c
#define init_scaled_put_avg(idx, sz) \
init_scaled(idx, 0, sz, put); \
init_scaled(idx, 1, sz, avg)
init_scaled_put_avg(0, 64);
init_scaled_put_avg(1, 32);
init_scaled_put_avg(2, 16);
init_scaled_put_avg(3, 8);
init_scaled_put_avg(4, 4);
#undef init_scaled_put_avg
#undef init_scaled
}
av_cold void FUNC(ff_vp9dsp_init)(VP9DSPContext *dsp)
{
vp9dsp_intrapred_init(dsp);
vp9dsp_itxfm_init(dsp);
vp9dsp_loopfilter_init(dsp);
vp9dsp_mc_init(dsp);
vp9dsp_scaled_mc_init(dsp);
}
...@@ -307,8 +307,10 @@ ipred_func(32, tm, avx2); ...@@ -307,8 +307,10 @@ ipred_func(32, tm, avx2);
#endif /* HAVE_YASM */ #endif /* HAVE_YASM */
av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp) av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp, int bpp)
{ {
if (bpp != 8) return;
#if HAVE_YASM #if HAVE_YASM
int cpu_flags = av_get_cpu_flags(); int cpu_flags = av_get_cpu_flags();
......
...@@ -605,6 +605,54 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = { ...@@ -605,6 +605,54 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
}, },
.flags = AV_PIX_FMT_FLAG_PLANAR, .flags = AV_PIX_FMT_FLAG_PLANAR,
}, },
[AV_PIX_FMT_YUV440P10LE] = {
.name = "yuv440p10le",
.nb_components = 3,
.log2_chroma_w = 0,
.log2_chroma_h = 1,
.comp = {
{ 0, 1, 1, 0, 9 }, /* Y */
{ 1, 1, 1, 0, 9 }, /* U */
{ 2, 1, 1, 0, 9 }, /* V */
},
.flags = AV_PIX_FMT_FLAG_PLANAR,
},
[AV_PIX_FMT_YUV440P10BE] = {
.name = "yuv440p10be",
.nb_components = 3,
.log2_chroma_w = 0,
.log2_chroma_h = 1,
.comp = {
{ 0, 1, 1, 0, 9 }, /* Y */
{ 1, 1, 1, 0, 9 }, /* U */
{ 2, 1, 1, 0, 9 }, /* V */
},
.flags = AV_PIX_FMT_FLAG_BE | AV_PIX_FMT_FLAG_PLANAR,
},
[AV_PIX_FMT_YUV440P12LE] = {
.name = "yuv440p12le",
.nb_components = 3,
.log2_chroma_w = 0,
.log2_chroma_h = 1,
.comp = {
{ 0, 1, 1, 0, 11 }, /* Y */
{ 1, 1, 1, 0, 11 }, /* U */
{ 2, 1, 1, 0, 11 }, /* V */
},
.flags = AV_PIX_FMT_FLAG_PLANAR,
},
[AV_PIX_FMT_YUV440P12BE] = {
.name = "yuv440p12be",
.nb_components = 3,
.log2_chroma_w = 0,
.log2_chroma_h = 1,
.comp = {
{ 0, 1, 1, 0, 11 }, /* Y */
{ 1, 1, 1, 0, 11 }, /* U */
{ 2, 1, 1, 0, 11 }, /* V */
},
.flags = AV_PIX_FMT_FLAG_BE | AV_PIX_FMT_FLAG_PLANAR,
},
[AV_PIX_FMT_YUVA420P] = { [AV_PIX_FMT_YUVA420P] = {
.name = "yuva420p", .name = "yuva420p",
.nb_components = 4, .nb_components = 4,
......
...@@ -307,6 +307,10 @@ enum AVPixelFormat { ...@@ -307,6 +307,10 @@ enum AVPixelFormat {
#if !FF_API_XVMC #if !FF_API_XVMC
AV_PIX_FMT_XVMC,///< XVideo Motion Acceleration via common packet passing AV_PIX_FMT_XVMC,///< XVideo Motion Acceleration via common packet passing
#endif /* !FF_API_XVMC */ #endif /* !FF_API_XVMC */
AV_PIX_FMT_YUV440P10LE, ///< planar YUV 4:4:0,20bpp, (1 Cr & Cb sample per 1x2 Y samples), little-endian
AV_PIX_FMT_YUV440P10BE, ///< planar YUV 4:4:0,20bpp, (1 Cr & Cb sample per 1x2 Y samples), big-endian
AV_PIX_FMT_YUV440P12LE, ///< planar YUV 4:4:0,24bpp, (1 Cr & Cb sample per 1x2 Y samples), little-endian
AV_PIX_FMT_YUV440P12BE, ///< planar YUV 4:4:0,24bpp, (1 Cr & Cb sample per 1x2 Y samples), big-endian
AV_PIX_FMT_NB, ///< number of pixel formats, DO NOT USE THIS if you want to link with shared libav* because the number of formats might differ between versions AV_PIX_FMT_NB, ///< number of pixel formats, DO NOT USE THIS if you want to link with shared libav* because the number of formats might differ between versions
...@@ -362,9 +366,11 @@ enum AVPixelFormat { ...@@ -362,9 +366,11 @@ enum AVPixelFormat {
#define AV_PIX_FMT_YUV444P9 AV_PIX_FMT_NE(YUV444P9BE , YUV444P9LE) #define AV_PIX_FMT_YUV444P9 AV_PIX_FMT_NE(YUV444P9BE , YUV444P9LE)
#define AV_PIX_FMT_YUV420P10 AV_PIX_FMT_NE(YUV420P10BE, YUV420P10LE) #define AV_PIX_FMT_YUV420P10 AV_PIX_FMT_NE(YUV420P10BE, YUV420P10LE)
#define AV_PIX_FMT_YUV422P10 AV_PIX_FMT_NE(YUV422P10BE, YUV422P10LE) #define AV_PIX_FMT_YUV422P10 AV_PIX_FMT_NE(YUV422P10BE, YUV422P10LE)
#define AV_PIX_FMT_YUV440P10 AV_PIX_FMT_NE(YUV440P10BE, YUV440P10LE)
#define AV_PIX_FMT_YUV444P10 AV_PIX_FMT_NE(YUV444P10BE, YUV444P10LE) #define AV_PIX_FMT_YUV444P10 AV_PIX_FMT_NE(YUV444P10BE, YUV444P10LE)
#define AV_PIX_FMT_YUV420P12 AV_PIX_FMT_NE(YUV420P12BE, YUV420P12LE) #define AV_PIX_FMT_YUV420P12 AV_PIX_FMT_NE(YUV420P12BE, YUV420P12LE)
#define AV_PIX_FMT_YUV422P12 AV_PIX_FMT_NE(YUV422P12BE, YUV422P12LE) #define AV_PIX_FMT_YUV422P12 AV_PIX_FMT_NE(YUV422P12BE, YUV422P12LE)
#define AV_PIX_FMT_YUV440P12 AV_PIX_FMT_NE(YUV440P12BE, YUV440P12LE)
#define AV_PIX_FMT_YUV444P12 AV_PIX_FMT_NE(YUV444P12BE, YUV444P12LE) #define AV_PIX_FMT_YUV444P12 AV_PIX_FMT_NE(YUV444P12BE, YUV444P12LE)
#define AV_PIX_FMT_YUV420P14 AV_PIX_FMT_NE(YUV420P14BE, YUV420P14LE) #define AV_PIX_FMT_YUV420P14 AV_PIX_FMT_NE(YUV420P14BE, YUV420P14LE)
#define AV_PIX_FMT_YUV422P14 AV_PIX_FMT_NE(YUV422P14BE, YUV422P14LE) #define AV_PIX_FMT_YUV422P14 AV_PIX_FMT_NE(YUV422P14BE, YUV422P14LE)
......
...@@ -931,9 +931,11 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c) ...@@ -931,9 +931,11 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c)
case AV_PIX_FMT_YUV422P9LE: case AV_PIX_FMT_YUV422P9LE:
case AV_PIX_FMT_YUV420P9LE: case AV_PIX_FMT_YUV420P9LE:
case AV_PIX_FMT_YUV422P10LE: case AV_PIX_FMT_YUV422P10LE:
case AV_PIX_FMT_YUV440P10LE:
case AV_PIX_FMT_YUV444P10LE: case AV_PIX_FMT_YUV444P10LE:
case AV_PIX_FMT_YUV420P10LE: case AV_PIX_FMT_YUV420P10LE:
case AV_PIX_FMT_YUV422P12LE: case AV_PIX_FMT_YUV422P12LE:
case AV_PIX_FMT_YUV440P12LE:
case AV_PIX_FMT_YUV444P12LE: case AV_PIX_FMT_YUV444P12LE:
case AV_PIX_FMT_YUV420P12LE: case AV_PIX_FMT_YUV420P12LE:
case AV_PIX_FMT_YUV422P14LE: case AV_PIX_FMT_YUV422P14LE:
...@@ -958,9 +960,11 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c) ...@@ -958,9 +960,11 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c)
case AV_PIX_FMT_YUV444P9BE: case AV_PIX_FMT_YUV444P9BE:
case AV_PIX_FMT_YUV422P9BE: case AV_PIX_FMT_YUV422P9BE:
case AV_PIX_FMT_YUV420P9BE: case AV_PIX_FMT_YUV420P9BE:
case AV_PIX_FMT_YUV440P10BE:
case AV_PIX_FMT_YUV444P10BE: case AV_PIX_FMT_YUV444P10BE:
case AV_PIX_FMT_YUV422P10BE: case AV_PIX_FMT_YUV422P10BE:
case AV_PIX_FMT_YUV420P10BE: case AV_PIX_FMT_YUV420P10BE:
case AV_PIX_FMT_YUV440P12BE:
case AV_PIX_FMT_YUV444P12BE: case AV_PIX_FMT_YUV444P12BE:
case AV_PIX_FMT_YUV422P12BE: case AV_PIX_FMT_YUV422P12BE:
case AV_PIX_FMT_YUV420P12BE: case AV_PIX_FMT_YUV420P12BE:
...@@ -1197,9 +1201,11 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c) ...@@ -1197,9 +1201,11 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c)
case AV_PIX_FMT_YUV422P9LE: case AV_PIX_FMT_YUV422P9LE:
case AV_PIX_FMT_YUV420P9LE: case AV_PIX_FMT_YUV420P9LE:
case AV_PIX_FMT_YUV444P10LE: case AV_PIX_FMT_YUV444P10LE:
case AV_PIX_FMT_YUV440P10LE:
case AV_PIX_FMT_YUV422P10LE: case AV_PIX_FMT_YUV422P10LE:
case AV_PIX_FMT_YUV420P10LE: case AV_PIX_FMT_YUV420P10LE:
case AV_PIX_FMT_YUV444P12LE: case AV_PIX_FMT_YUV444P12LE:
case AV_PIX_FMT_YUV440P12LE:
case AV_PIX_FMT_YUV422P12LE: case AV_PIX_FMT_YUV422P12LE:
case AV_PIX_FMT_YUV420P12LE: case AV_PIX_FMT_YUV420P12LE:
case AV_PIX_FMT_YUV444P14LE: case AV_PIX_FMT_YUV444P14LE:
...@@ -1229,9 +1235,11 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c) ...@@ -1229,9 +1235,11 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c)
case AV_PIX_FMT_YUV422P9BE: case AV_PIX_FMT_YUV422P9BE:
case AV_PIX_FMT_YUV420P9BE: case AV_PIX_FMT_YUV420P9BE:
case AV_PIX_FMT_YUV444P10BE: case AV_PIX_FMT_YUV444P10BE:
case AV_PIX_FMT_YUV440P10BE:
case AV_PIX_FMT_YUV422P10BE: case AV_PIX_FMT_YUV422P10BE:
case AV_PIX_FMT_YUV420P10BE: case AV_PIX_FMT_YUV420P10BE:
case AV_PIX_FMT_YUV444P12BE: case AV_PIX_FMT_YUV444P12BE:
case AV_PIX_FMT_YUV440P12BE:
case AV_PIX_FMT_YUV422P12BE: case AV_PIX_FMT_YUV422P12BE:
case AV_PIX_FMT_YUV420P12BE: case AV_PIX_FMT_YUV420P12BE:
case AV_PIX_FMT_YUV444P14BE: case AV_PIX_FMT_YUV444P14BE:
......
...@@ -1694,6 +1694,8 @@ void ff_get_unscaled_swscale(SwsContext *c) ...@@ -1694,6 +1694,8 @@ void ff_get_unscaled_swscale(SwsContext *c)
IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_YUV422P12) || IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_YUV422P12) ||
IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_YUV422P14) || IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_YUV422P14) ||
IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_YUV422P16) || IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_YUV422P16) ||
IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_YUV440P10) ||
IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_YUV440P12) ||
IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_YUV444P9) || IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_YUV444P9) ||
IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_YUV444P10) || IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_YUV444P10) ||
IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_YUV444P12) || IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_YUV444P12) ||
......
...@@ -119,6 +119,10 @@ static const FormatEntry format_entries[AV_PIX_FMT_NB] = { ...@@ -119,6 +119,10 @@ static const FormatEntry format_entries[AV_PIX_FMT_NB] = {
[AV_PIX_FMT_GRAY16LE] = { 1, 1 }, [AV_PIX_FMT_GRAY16LE] = { 1, 1 },
[AV_PIX_FMT_YUV440P] = { 1, 1 }, [AV_PIX_FMT_YUV440P] = { 1, 1 },
[AV_PIX_FMT_YUVJ440P] = { 1, 1 }, [AV_PIX_FMT_YUVJ440P] = { 1, 1 },
[AV_PIX_FMT_YUV440P10LE] = { 1, 1 },
[AV_PIX_FMT_YUV440P10BE] = { 1, 1 },
[AV_PIX_FMT_YUV440P12LE] = { 1, 1 },
[AV_PIX_FMT_YUV440P12BE] = { 1, 1 },
[AV_PIX_FMT_YUVA420P] = { 1, 1 }, [AV_PIX_FMT_YUVA420P] = { 1, 1 },
[AV_PIX_FMT_YUVA422P] = { 1, 1 }, [AV_PIX_FMT_YUVA422P] = { 1, 1 },
[AV_PIX_FMT_YUVA444P] = { 1, 1 }, [AV_PIX_FMT_YUVA444P] = { 1, 1 },
......
...@@ -87,7 +87,7 @@ endef ...@@ -87,7 +87,7 @@ endef
define FATE_VP9_PROFILE_SUITE define FATE_VP9_PROFILE_SUITE
FATE_VP9-$(CONFIG_MATROSKA_DEMUXER) += fate-vp9p$(2)-$(1) FATE_VP9-$(CONFIG_MATROSKA_DEMUXER) += fate-vp9p$(2)-$(1)
fate-vp9p$(2)-$(1): CMD = framemd5 -i $(TARGET_SAMPLES)/vp9-test-vectors/vp9$(2)-2-$(1).webm fate-vp9p$(2)-$(1): CMD = framemd5 -i $(TARGET_SAMPLES)/vp9-test-vectors/vp9$(2)-2-$(1).webm $(3)
fate-vp9p$(2)-$(1): REF = $(SRC_PATH)/tests/ref/fate/vp9p$(2)-$(1) fate-vp9p$(2)-$(1): REF = $(SRC_PATH)/tests/ref/fate/vp9p$(2)-$(1)
endef endef
...@@ -98,6 +98,8 @@ VP9_Q = 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 \ ...@@ -98,6 +98,8 @@ VP9_Q = 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 \
VP9_SHARP = 1 2 3 4 5 6 7 VP9_SHARP = 1 2 3 4 5 6 7
VP9_SIZE_A = 08 10 16 18 32 34 64 66 VP9_SIZE_A = 08 10 16 18 32 34 64 66
VP9_SIZE_B = 196 198 200 202 208 210 224 226 VP9_SIZE_B = 196 198 200 202 208 210 224 226
VP9_CHROMA_SUBSAMPLE = 422 440 444
VP9_HIGH_BITDEPTH = 10 12
define FATE_VP9_FULL define FATE_VP9_FULL
$(foreach Q,$(VP9_Q),$(eval $(call FATE_VP9_SUITE,00-quantizer-$(Q)))) $(foreach Q,$(VP9_Q),$(eval $(call FATE_VP9_SUITE,00-quantizer-$(Q))))
...@@ -105,9 +107,9 @@ $(foreach SHARP,$(VP9_SHARP),$(eval $(call FATE_VP9_SUITE,01-sharpness-$(SHARP)) ...@@ -105,9 +107,9 @@ $(foreach SHARP,$(VP9_SHARP),$(eval $(call FATE_VP9_SUITE,01-sharpness-$(SHARP))
$(foreach W,$(VP9_SIZE_A),$(eval $(foreach H,$(VP9_SIZE_A),$(eval $(call FATE_VP9_SUITE,02-size-$(W)x$(H)))))) $(foreach W,$(VP9_SIZE_A),$(eval $(foreach H,$(VP9_SIZE_A),$(eval $(call FATE_VP9_SUITE,02-size-$(W)x$(H))))))
$(foreach W,$(VP9_SIZE_B),$(eval $(foreach H,$(VP9_SIZE_B),$(eval $(call FATE_VP9_SUITE,03-size-$(W)x$(H)))))) $(foreach W,$(VP9_SIZE_B),$(eval $(foreach H,$(VP9_SIZE_B),$(eval $(call FATE_VP9_SUITE,03-size-$(W)x$(H))))))
$(eval $(call FATE_VP9_SUITE,03-deltaq)) $(eval $(call FATE_VP9_SUITE,03-deltaq))
$(eval $(call FATE_VP9_PROFILE_SUITE,04-yuv444,1)) $(foreach SS,$(VP9_CHROMA_SUBSAMPLE),$(eval $(call FATE_VP9_PROFILE_SUITE,04-yuv$(SS),1,)))
$(eval $(call FATE_VP9_PROFILE_SUITE,04-yuv440,1)) $(foreach BD,$(VP9_HIGH_BITDEPTH),$(eval $(call FATE_VP9_PROFILE_SUITE,20-$(BD)bit-yuv420,2,-pix_fmt yuv420p$(BD)le)))
$(eval $(call FATE_VP9_PROFILE_SUITE,04-yuv422,1)) $(foreach BD,$(VP9_HIGH_BITDEPTH),$(eval $(foreach SS,$(VP9_CHROMA_SUBSAMPLE),$(eval $(call FATE_VP9_PROFILE_SUITE,20-$(BD)bit-yuv$(SS),3,-pix_fmt yuv$(SS)p$(BD)le)))))
$(eval $(call FATE_VP9_SUITE,06-bilinear)) $(eval $(call FATE_VP9_SUITE,06-bilinear))
$(eval $(call FATE_VP9_SUITE,09-lf_deltas)) $(eval $(call FATE_VP9_SUITE,09-lf_deltas))
$(eval $(call FATE_VP9_SUITE,10-show-existing-frame)) $(eval $(call FATE_VP9_SUITE,10-show-existing-frame))
......
pixdesc-yuv440p10be 8bc503265081c9dc9890f95aa7a94000
pixdesc-yuv440p10le 84740bc888a73215a8487c6d2cabab37
pixdesc-yuv440p12be 0da181b349062381436c002ee72be5f9
pixdesc-yuv440p12le 682997272e77063a2e260a2c483b3306
...@@ -78,6 +78,10 @@ yuv422p16le 5d8e26fbbc8f25576065106c8820d56a ...@@ -78,6 +78,10 @@ yuv422p16le 5d8e26fbbc8f25576065106c8820d56a
yuv422p9be e3b364350da35d2ffd431665ed9d3592 yuv422p9be e3b364350da35d2ffd431665ed9d3592
yuv422p9le 0f21976b77781cada504f690d7dc6c15 yuv422p9le 0f21976b77781cada504f690d7dc6c15
yuv440p e6754959dfd9631f2dc1682fa71aa4e0 yuv440p e6754959dfd9631f2dc1682fa71aa4e0
yuv440p10be 8bc503265081c9dc9890f95aa7a94000
yuv440p10le 84740bc888a73215a8487c6d2cabab37
yuv440p12be 0da181b349062381436c002ee72be5f9
yuv440p12le 682997272e77063a2e260a2c483b3306
yuv444p db3ad634aea336663a2f5c2475b95410 yuv444p db3ad634aea336663a2f5c2475b95410
yuv444p10be e341efd871b34308831113f4b2ff29f9 yuv444p10be e341efd871b34308831113f4b2ff29f9
yuv444p10le f9666d63623e0d7dceda1a17202579cf yuv444p10le f9666d63623e0d7dceda1a17202579cf
......
...@@ -75,6 +75,10 @@ yuv422p16le 5420bd643996c2fc10709ca58a0323e9 ...@@ -75,6 +75,10 @@ yuv422p16le 5420bd643996c2fc10709ca58a0323e9
yuv422p9be 0cf0d2f092b01b86cb8883e5b9ee70fd yuv422p9be 0cf0d2f092b01b86cb8883e5b9ee70fd
yuv422p9le caa7eabbc6cc12b91314d5a03857c3d9 yuv422p9le caa7eabbc6cc12b91314d5a03857c3d9
yuv440p 54ee388cdaf8b1fe88b78348913b18b0 yuv440p 54ee388cdaf8b1fe88b78348913b18b0
yuv440p10be 74cc406aade7a42bd2e61065a26c37d6
yuv440p10le 23a988639a018805f4af77621530e629
yuv440p12be cbc98bace0a5a83bbe178ea086a8700c
yuv440p12le a1eafa7c4ec86933263bf435c7c25bad
yuv444p 434403ed452af56f5bc6e96863eaca97 yuv444p 434403ed452af56f5bc6e96863eaca97
yuv444p10be d7ef21572858c0071b5c74281ed8cb1b yuv444p10be d7ef21572858c0071b5c74281ed8cb1b
yuv444p10le 3c293e9a0aacd45fbcf9b6568cddcb4f yuv444p10le 3c293e9a0aacd45fbcf9b6568cddcb4f
......
...@@ -78,6 +78,10 @@ yuv422p16le b35565fe10f9a15b71d3706877118d8e ...@@ -78,6 +78,10 @@ yuv422p16le b35565fe10f9a15b71d3706877118d8e
yuv422p9be 41605ec9fb967cc3da4ac88d8c4ca6ba yuv422p9be 41605ec9fb967cc3da4ac88d8c4ca6ba
yuv422p9le 10bf2ae59bb45f5d632afe2fa0788f17 yuv422p9le 10bf2ae59bb45f5d632afe2fa0788f17
yuv440p 4558340790744ccad447adcd3c8f041c yuv440p 4558340790744ccad447adcd3c8f041c
yuv440p10be f4a35891ee216870553cb786aa019d11
yuv440p10le bd25aebbd7b69932fe102dee4ae062bb
yuv440p12be 710a7e4fa709901b8be00179c5387977
yuv440p12le c071b25efc3e826cfd20eb7ad09bc5f4
yuv444p 018c531ab9b4427f6b3ae863306f7bdf yuv444p 018c531ab9b4427f6b3ae863306f7bdf
yuv444p10be d45e1d588191d1e038e88ca3406bfe14 yuv444p10be d45e1d588191d1e038e88ca3406bfe14
yuv444p10le 991363b633a740ae2fc48d8cc8bde0b3 yuv444p10le 991363b633a740ae2fc48d8cc8bde0b3
......
...@@ -75,6 +75,10 @@ yuv422p16le 02975f7af27c35e68684235d185a4828 ...@@ -75,6 +75,10 @@ yuv422p16le 02975f7af27c35e68684235d185a4828
yuv422p9be d4fdec159cb4c106a81e407fe1b441be yuv422p9be d4fdec159cb4c106a81e407fe1b441be
yuv422p9le 7a19175e5e38a028029cf6c14f5485d8 yuv422p9le 7a19175e5e38a028029cf6c14f5485d8
yuv440p 18b37e71484029063d41c5716cd0f785 yuv440p 18b37e71484029063d41c5716cd0f785
yuv440p10be a2548200f66493eb10d23413fa893103
yuv440p10le 7a61ef006bbec1a98ba21caa89052e50
yuv440p12be 941298529e9c859b197508252c38c0b4
yuv440p12le 405aaae3b31ec699d130cef399a57f97
yuv444p e666141ffbbc8ed5ecfd65bf27b07c72 yuv444p e666141ffbbc8ed5ecfd65bf27b07c72
yuv444p10be 39437baaba84ff78b152769202566acd yuv444p10be 39437baaba84ff78b152769202566acd
yuv444p10le a8c9900e3593919c029373cc7781e64d yuv444p10le a8c9900e3593919c029373cc7781e64d
......
...@@ -77,6 +77,10 @@ yuv422p16le a1dbef3752c0c578e0285980a5859864 ...@@ -77,6 +77,10 @@ yuv422p16le a1dbef3752c0c578e0285980a5859864
yuv422p9be f3e1b7196d3e17ad5d31d9d200a2457e yuv422p9be f3e1b7196d3e17ad5d31d9d200a2457e
yuv422p9le a21cb4b38bbdee136f70c15abecd5f8b yuv422p9le a21cb4b38bbdee136f70c15abecd5f8b
yuv440p f0cfcb6a845fc7f33ab54f04e5f61099 yuv440p f0cfcb6a845fc7f33ab54f04e5f61099
yuv440p10be b7e2bfb6fbe4168837c93896e21c3163
yuv440p10le 70d77fa0c45ae283bd85c61633eab146
yuv440p12be 38662f60e8500ab35398952e468739e9
yuv440p12le 32aa0bf8408d0118bc002b89ee6adc42
yuv444p 8bf605570d90a638980e1c11526d99e7 yuv444p 8bf605570d90a638980e1c11526d99e7
yuv444p10be 755f929aff54eda092127fca283cf333 yuv444p10be 755f929aff54eda092127fca283cf333
yuv444p10le fe216173a6b4eff5c029d468861b3da6 yuv444p10le fe216173a6b4eff5c029d468861b3da6
......
...@@ -78,6 +78,10 @@ yuv422p16le 5d8e26fbbc8f25576065106c8820d56a ...@@ -78,6 +78,10 @@ yuv422p16le 5d8e26fbbc8f25576065106c8820d56a
yuv422p9be e3b364350da35d2ffd431665ed9d3592 yuv422p9be e3b364350da35d2ffd431665ed9d3592
yuv422p9le 0f21976b77781cada504f690d7dc6c15 yuv422p9le 0f21976b77781cada504f690d7dc6c15
yuv440p e6754959dfd9631f2dc1682fa71aa4e0 yuv440p e6754959dfd9631f2dc1682fa71aa4e0
yuv440p10be 8bc503265081c9dc9890f95aa7a94000
yuv440p10le 84740bc888a73215a8487c6d2cabab37
yuv440p12be 0da181b349062381436c002ee72be5f9
yuv440p12le 682997272e77063a2e260a2c483b3306
yuv444p db3ad634aea336663a2f5c2475b95410 yuv444p db3ad634aea336663a2f5c2475b95410
yuv444p10be e341efd871b34308831113f4b2ff29f9 yuv444p10be e341efd871b34308831113f4b2ff29f9
yuv444p10le f9666d63623e0d7dceda1a17202579cf yuv444p10le f9666d63623e0d7dceda1a17202579cf
......
...@@ -78,6 +78,10 @@ yuv422p16le 7200f2405f7b979bc29f5446653d1fbe ...@@ -78,6 +78,10 @@ yuv422p16le 7200f2405f7b979bc29f5446653d1fbe
yuv422p9be 4c44d041f51b499fe419c51be0831c12 yuv422p9be 4c44d041f51b499fe419c51be0831c12
yuv422p9le b48e78a8a6a3f88269b4f5f810b75603 yuv422p9le b48e78a8a6a3f88269b4f5f810b75603
yuv440p a9dd3fab4320c3c9b0eb01d2bf75acb9 yuv440p a9dd3fab4320c3c9b0eb01d2bf75acb9
yuv440p10be e79d6857dd66adf03fd90b7c4ba1a4f8
yuv440p10le 9cf6444970548a93ec8ceb0e3e4244bc
yuv440p12be 66eb24b71083413d2ece3ce9f9527b90
yuv440p12le d6b60a9453e2ec0c8e487fc82d8b476a
yuv444p 77387910c01eacca94793a9be37c1aa1 yuv444p 77387910c01eacca94793a9be37c1aa1
yuv444p10be b4e8cef69cb2ad2c24e795325a1d883c yuv444p10be b4e8cef69cb2ad2c24e795325a1d883c
yuv444p10le 83855dd296a1859c085193c1edbb35e2 yuv444p10le 83855dd296a1859c085193c1edbb35e2
......
...@@ -23,6 +23,10 @@ yuv422p16le fb8e5aff474eca0b9b4bba0ab5765158 ...@@ -23,6 +23,10 @@ yuv422p16le fb8e5aff474eca0b9b4bba0ab5765158
yuv422p9be 2d27809e85655e5ce9db5f194d617069 yuv422p9be 2d27809e85655e5ce9db5f194d617069
yuv422p9le 13f42a98d4c6c57883d6817329ec3ea7 yuv422p9le 13f42a98d4c6c57883d6817329ec3ea7
yuv440p 44c7ad06592e46b42143756210798d7d yuv440p 44c7ad06592e46b42143756210798d7d
yuv440p10be 9b623abf03ad22bec852250170bfb42d
yuv440p10le cfb3e95f24943a4ead5c3b59eed9f2ce
yuv440p12be caf621757fdc1942cb3d90fc16858eef
yuv440p12le 99a54b4e9a286ce34a686de534377253
yuv444p 49587b263a4c8da66ddc267f746b7041 yuv444p 49587b263a4c8da66ddc267f746b7041
yuv444p10be 6589db276e865800614153e43bad3da8 yuv444p10be 6589db276e865800614153e43bad3da8
yuv444p10le 8e17f00cbc47188e007b59e568bf13aa yuv444p10le 8e17f00cbc47188e007b59e568bf13aa
......
...@@ -78,6 +78,10 @@ yuv422p16le d3d1ff9c78b6d1dda8e5529aa881e254 ...@@ -78,6 +78,10 @@ yuv422p16le d3d1ff9c78b6d1dda8e5529aa881e254
yuv422p9be b910bf8de09521e2935749fa68fbcf8e yuv422p9be b910bf8de09521e2935749fa68fbcf8e
yuv422p9le 98d13d3db47da9be9f666ce773f2eed9 yuv422p9le 98d13d3db47da9be9f666ce773f2eed9
yuv440p aba0a5eda16ce1050175bb1151900a4c yuv440p aba0a5eda16ce1050175bb1151900a4c
yuv440p10be 0ed6a258237270dcf17f848037bb4175
yuv440p10le 91c69939e302885d3851b74d831aa094
yuv440p12be ffdd1de6e681e99b9362230d75f4d7de
yuv440p12le 55a95432d92f57acaae1dfaff5475d2f
yuv444p 81a72d37cdfba65c7a2dfccaf21cd0d8 yuv444p 81a72d37cdfba65c7a2dfccaf21cd0d8
yuv444p10be a10191d92e5a08983bec906d8fbb505a yuv444p10be a10191d92e5a08983bec906d8fbb505a
yuv444p10le 6367be8bb534416105e450a54ae2efbf yuv444p10le 6367be8bb534416105e450a54ae2efbf
......
#format: frame checksums
#version: 1
#hash: MD5
#tb 0: 1/50
#stream#, dts, pts, duration, size, hash
0, 0, 0, 1, 43200, c3964ed9065de7f839b8d878444c6140
0, 1, 1, 1, 43200, 87595f7f53d6e84af9708dba72422cc4
0, 2, 2, 1, 43200, 5cadbce099363a71040919e6f1cec496
0, 3, 3, 1, 43200, 0e3cab2b26b936de245a94b4128a368f
0, 4, 4, 1, 43200, 07bde452ca50655717a85cd9fdb3f7ce
0, 5, 5, 1, 43200, 00bee090fe849fee5fd4eb169c62c897
0, 6, 6, 1, 43200, 4564a423df89d7e9dea1226873ce9a51
0, 7, 7, 1, 43200, 7580af6956360112191380a677f5e625
0, 8, 8, 1, 43200, c9d05c5aadf8a372acfc2c93094d003e
0, 9, 9, 1, 43200, 6c08ea732cda06cf9a12f2e1a089d401
#format: frame checksums
#version: 1
#hash: MD5
#tb 0: 1/50
#stream#, dts, pts, duration, size, hash
0, 0, 0, 1, 43200, 8403bd5a381737e1c2d737047f6a5a0b
0, 1, 1, 1, 43200, 3c0bf7eecc3131e3598f6810d6b70539
0, 2, 2, 1, 43200, ff020bf894bb88d74426f02a75081695
0, 3, 3, 1, 43200, 097d81cb29f1caaf4446f3a3de4842d9
0, 4, 4, 1, 43200, e923a7e7e0349846ba27bd2e6ebdf4df
0, 5, 5, 1, 43200, 28c6016e6687c7eecbe4057a4dbfe372
0, 6, 6, 1, 43200, 15ae05537ea7152b535d112871b5ef84
0, 7, 7, 1, 43200, cb50d043a10a0e9b52eed0e8b3aabc7b
0, 8, 8, 1, 43200, f97dfbce56e36a42538ef000ce0e937e
0, 9, 9, 1, 43200, aae42063df910ed31c09eba5f73a195c
#format: frame checksums
#version: 1
#hash: MD5
#tb 0: 1/50
#stream#, dts, pts, duration, size, hash
0, 0, 0, 1, 57600, 18fed3eff623f0308561e5db0bc28691
0, 1, 1, 1, 57600, 3f22b2269cd53738e955c4b27ff6abce
0, 2, 2, 1, 57600, d1a44bae5cf45c68c5ca86f9e1ffd6a0
0, 3, 3, 1, 57600, 6592a5b6f1a8a18930bf27195a836efc
0, 4, 4, 1, 57600, f1dc8ba72916630aa8ff07f214bd1baf
0, 5, 5, 1, 57600, 195db6438c313e870ecfe9db3e3cbe83
0, 6, 6, 1, 57600, ad1cca6689026c31c2350594b669b7e7
0, 7, 7, 1, 57600, 6cb437a604a714ea746b429cdd68c47f
0, 8, 8, 1, 57600, cda524f9de8774cc161224c3b81033f5
0, 9, 9, 1, 57600, a2ae05ae998b4158a32b96e01ed02241
#format: frame checksums
#version: 1
#hash: MD5
#tb 0: 1/50
#stream#, dts, pts, duration, size, hash
0, 0, 0, 1, 57600, 504a347654ef41e568c484ab017f5387
0, 1, 1, 1, 57600, 6110f151a39144da5616227d44b81c21
0, 2, 2, 1, 57600, a856b5e0675c59fb9329d087580c6c5b
0, 3, 3, 1, 57600, d951b271bf38e7c9b9d215f7a87a079c
0, 4, 4, 1, 57600, a9e6feb4142a111c74d46cdbac08f2e6
0, 5, 5, 1, 57600, 4802b599651e7aa35cd52d0dfbfaa4d3
0, 6, 6, 1, 57600, 1244f3b9da3341057566915e708c7afb
0, 7, 7, 1, 57600, 212a02d7ee8d56b6f2a4fd7ee6eb3c93
0, 8, 8, 1, 57600, 1872091be977628ff5435f9186f64a29
0, 9, 9, 1, 57600, d5255599c33859ee96474a20929dc4bb
0, 10, 10, 1, 57600, 16d0056501cd1bfc630f6f99672504a0
0, 11, 11, 1, 57600, 7dcca62c4dc5d508f38376533648bce6
0, 12, 12, 1, 57600, 1e1617813af32cbf7be3df11d275339e
0, 13, 13, 1, 57600, 88b036fde717f72485b24e346f55f9dc
0, 14, 14, 1, 57600, e689287b89b116ec71670ee479e15717
0, 15, 15, 1, 57600, 211aa3727dcde2d9dfe15d6cebda69e4
0, 16, 16, 1, 57600, 3037677f680cbdcac242da94717da3ff
0, 17, 17, 1, 57600, 0138ea9fd279a939df385ea0c97700ca
0, 18, 18, 1, 57600, 9da47cb99085c352f9e9a07639a9b3cb
0, 19, 19, 1, 57600, d369f4c840ccb61ce3aaffc77b5f230e
#format: frame checksums
#version: 1
#hash: MD5
#tb 0: 1/50
#stream#, dts, pts, duration, size, hash
0, 0, 0, 1, 86400, 06a4c4550dc31f75f842a4e65fc32398
0, 1, 1, 1, 86400, f7feda15d5c2babc1f6438c72d4900e4
0, 2, 2, 1, 86400, 5aa9924c0a17c2209b72ba7ec9af6fdd
0, 3, 3, 1, 86400, 01597451aab5498ba89a7f632cd7d0cf
0, 4, 4, 1, 86400, 9a42ae17c2744c305895acd911066575
0, 5, 5, 1, 86400, 17c1d9cda113a3ce2fc7387e2adaa89b
0, 6, 6, 1, 86400, 6c6e303d282d8e043329eb6b29600cb4
0, 7, 7, 1, 86400, 97b7d47711222a91dd7ac121df01f922
0, 8, 8, 1, 86400, 69f305bc0a4a92422b5ebe05a3ac8b38
0, 9, 9, 1, 86400, a559c738c428c2f895e94a31957eec6b
#format: frame checksums
#version: 1
#hash: MD5
#tb 0: 1/50
#stream#, dts, pts, duration, size, hash
0, 0, 0, 1, 57600, 442a6d932a163907303f1e08056b33b3
0, 1, 1, 1, 57600, 80b21954eb522f0140e2226a03062839
0, 2, 2, 1, 57600, 991251ca3f55879e3ed90d5816bf5056
0, 3, 3, 1, 57600, 33f1a1aa1a48fda4ef7dccf302c4e7bf
0, 4, 4, 1, 57600, e9faf81b0664be622040910f29e3d522
0, 5, 5, 1, 57600, fa9aba755df9f52aa551d942bba5e26f
0, 6, 6, 1, 57600, ef58938b38ced2fecb8afdb9a6d8024b
0, 7, 7, 1, 57600, ce43338326c024eb187abaeaa64a44b1
0, 8, 8, 1, 57600, d487dab8c8b9f690f13569f3d1875a6d
0, 9, 9, 1, 57600, b4ab8672067ffce5df2daa9a2c2496d2
#format: frame checksums
#version: 1
#hash: MD5
#tb 0: 1/50
#stream#, dts, pts, duration, size, hash
0, 0, 0, 1, 57600, 9c03fb4aa813522d9a44b3b309b85d19
0, 1, 1, 1, 57600, 73081beafa17bc23a170172e311c0f54
0, 2, 2, 1, 57600, 36a1eb7aec96b81d54119e26a5056f48
0, 3, 3, 1, 57600, 228ca739c96ceb5c63f593790e487a49
0, 4, 4, 1, 57600, 289e08c01574d23562ce8162b0ec553b
0, 5, 5, 1, 57600, 76ca62fa4b80bef534e3cb4e37d9f178
0, 6, 6, 1, 57600, 0d9d5c09c4a453b8b7e04d2904924e15
0, 7, 7, 1, 57600, b659b38305c3afbbc84f5aab4c373592
0, 8, 8, 1, 57600, 3f5c87c678c2dc6ffc45ec875920e620
0, 9, 9, 1, 57600, d758cb908aee57e05af8dd7132ce0973
0, 10, 10, 1, 57600, 5539bdf8f2cfc09580bd9371c3655989
0, 11, 11, 1, 57600, 0446cf34d7f1a081e7eef6050cbcb7d8
0, 12, 12, 1, 57600, 2a29f74bd76f8c2187a0f61ff6935744
0, 13, 13, 1, 57600, fb4b0e71884069537e3e3e62fbe83877
0, 14, 14, 1, 57600, 57e19855e60825333fbbd9fdbad54ca0
0, 15, 15, 1, 57600, 2f111ed5be32c4cbff83efd5530893db
0, 16, 16, 1, 57600, 87afd9a47e6ee7a71cee99157dd89815
0, 17, 17, 1, 57600, 72992f2a91f9d91dca5c638d6a2f748c
0, 18, 18, 1, 57600, ad92cad72adfe2ad43c3be5e3b74439f
0, 19, 19, 1, 57600, d70f736810724ab968307daf4da1120c
#format: frame checksums
#version: 1
#hash: MD5
#tb 0: 1/50
#stream#, dts, pts, duration, size, hash
0, 0, 0, 1, 86400, d9500a2c9e0b6b44935de019c3583194
0, 1, 1, 1, 86400, 58edf3e69b332a2b62ca78a8dc99ea68
0, 2, 2, 1, 86400, ca8894e8496bf0eec64655972323feb2
0, 3, 3, 1, 86400, 3473bbb77d10631525a6e7240f6f0c68
0, 4, 4, 1, 86400, 1bc71b016a90d6a79882bd6b8af0cb23
0, 5, 5, 1, 86400, ebfbb540587c20586d9ff33adee41f24
0, 6, 6, 1, 86400, e8bd27ebb53f979232abbb9d18b15085
0, 7, 7, 1, 86400, 820f65d50b3c48ec600924c0bba90c40
0, 8, 8, 1, 86400, d8d8588550bc9820eb3289c5684dd7a9
0, 9, 9, 1, 86400, d8bd25842191e47ac1b342655e7015e6
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment