Commit dcc9009e authored by Michael Niedermayer's avatar Michael Niedermayer

Merge remote-tracking branch 'rbultje/vp9-simd'

* rbultje/vp9-simd:
  vp9: make mv bounds 32bit.
  vp9: reset contextual caches on frame size change with mt enabled.
  vp9/x86: idct_32x32_add_ssse3 sub-8x8-idct.
  vp9/x86: idct_32x32_add_ssse3 sub-16x16-idct.
  vp9/x86: idct_32x32_add_ssse3.
Merged-by: 's avatarMichael Niedermayer <michaelni@gmx.at>
parents 16ae337b 024fac5c
......@@ -232,7 +232,7 @@ typedef struct VP9Context {
// block reconstruction intermediates
int16_t *block_base, *block, *uvblock_base[2], *uvblock[2];
uint8_t *eob_base, *uveob_base[2], *eob, *uveob[2];
VP56mv min_mv, max_mv;
struct { int x, y; } min_mv, max_mv;
DECLARE_ALIGNED(32, uint8_t, tmp_y)[64*64];
DECLARE_ALIGNED(32, uint8_t, tmp_uv)[2][32*32];
} VP9Context;
......@@ -3450,6 +3450,13 @@ static void adapt_probs(VP9Context *s)
}
}
static void free_buffers(VP9Context *s)
{
av_freep(&s->above_partition_ctx);
av_freep(&s->b_base);
av_freep(&s->block_base);
}
static av_cold int vp9_decode_free(AVCodecContext *ctx)
{
VP9Context *s = ctx->priv_data;
......@@ -3468,11 +3475,9 @@ static av_cold int vp9_decode_free(AVCodecContext *ctx)
ff_thread_release_buffer(ctx, &s->next_refs[i]);
av_frame_free(&s->next_refs[i].f);
}
av_freep(&s->above_partition_ctx);
free_buffers(s);
av_freep(&s->c_b);
s->c_b_size = 0;
av_freep(&s->b_base);
av_freep(&s->block_base);
return 0;
}
......@@ -3762,7 +3767,10 @@ static int vp9_decode_update_thread_context(AVCodecContext *dst, const AVCodecCo
int i, res;
VP9Context *s = dst->priv_data, *ssrc = src->priv_data;
// FIXME scalability, size, etc.
// detect size changes in other threads
if (s->above_partition_ctx && (s->cols != ssrc->cols || s->rows != ssrc->rows)) {
free_buffers(s);
}
for (i = 0; i < 2; i++) {
if (s->frames[i].tf.f->data[0])
......
......@@ -159,6 +159,7 @@ filters_8tap_1d_fn3(avg)
void ff_vp9_idct_idct_4x4_add_ssse3(uint8_t *dst, ptrdiff_t stride, int16_t *block, int eob);
void ff_vp9_idct_idct_8x8_add_ssse3(uint8_t *dst, ptrdiff_t stride, int16_t *block, int eob);
void ff_vp9_idct_idct_16x16_add_ssse3(uint8_t *dst, ptrdiff_t stride, int16_t *block, int eob);
void ff_vp9_idct_idct_32x32_add_ssse3(uint8_t *dst, ptrdiff_t stride, int16_t *block, int eob);
#endif /* HAVE_YASM */
......@@ -217,6 +218,10 @@ av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp)
if (ARCH_X86_64) {
dsp->itxfm_add[TX_8X8][DCT_DCT] = ff_vp9_idct_idct_8x8_add_ssse3;
dsp->itxfm_add[TX_16X16][DCT_DCT] = ff_vp9_idct_idct_16x16_add_ssse3;
dsp->itxfm_add[TX_32X32][ADST_ADST] =
dsp->itxfm_add[TX_32X32][ADST_DCT] =
dsp->itxfm_add[TX_32X32][DCT_ADST] =
dsp->itxfm_add[TX_32X32][DCT_DCT] = ff_vp9_idct_idct_32x32_add_ssse3;
}
}
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment