Commit dcc9009e authored by Michael Niedermayer's avatar Michael Niedermayer

Merge remote-tracking branch 'rbultje/vp9-simd'

* rbultje/vp9-simd:
  vp9: make mv bounds 32bit.
  vp9: reset contextual caches on frame size change with mt enabled.
  vp9/x86: idct_32x32_add_ssse3 sub-8x8-idct.
  vp9/x86: idct_32x32_add_ssse3 sub-16x16-idct.
  vp9/x86: idct_32x32_add_ssse3.
Merged-by: 's avatarMichael Niedermayer <michaelni@gmx.at>
parents 16ae337b 024fac5c
...@@ -232,7 +232,7 @@ typedef struct VP9Context { ...@@ -232,7 +232,7 @@ typedef struct VP9Context {
// block reconstruction intermediates // block reconstruction intermediates
int16_t *block_base, *block, *uvblock_base[2], *uvblock[2]; int16_t *block_base, *block, *uvblock_base[2], *uvblock[2];
uint8_t *eob_base, *uveob_base[2], *eob, *uveob[2]; uint8_t *eob_base, *uveob_base[2], *eob, *uveob[2];
VP56mv min_mv, max_mv; struct { int x, y; } min_mv, max_mv;
DECLARE_ALIGNED(32, uint8_t, tmp_y)[64*64]; DECLARE_ALIGNED(32, uint8_t, tmp_y)[64*64];
DECLARE_ALIGNED(32, uint8_t, tmp_uv)[2][32*32]; DECLARE_ALIGNED(32, uint8_t, tmp_uv)[2][32*32];
} VP9Context; } VP9Context;
...@@ -3450,6 +3450,13 @@ static void adapt_probs(VP9Context *s) ...@@ -3450,6 +3450,13 @@ static void adapt_probs(VP9Context *s)
} }
} }
static void free_buffers(VP9Context *s)
{
av_freep(&s->above_partition_ctx);
av_freep(&s->b_base);
av_freep(&s->block_base);
}
static av_cold int vp9_decode_free(AVCodecContext *ctx) static av_cold int vp9_decode_free(AVCodecContext *ctx)
{ {
VP9Context *s = ctx->priv_data; VP9Context *s = ctx->priv_data;
...@@ -3468,11 +3475,9 @@ static av_cold int vp9_decode_free(AVCodecContext *ctx) ...@@ -3468,11 +3475,9 @@ static av_cold int vp9_decode_free(AVCodecContext *ctx)
ff_thread_release_buffer(ctx, &s->next_refs[i]); ff_thread_release_buffer(ctx, &s->next_refs[i]);
av_frame_free(&s->next_refs[i].f); av_frame_free(&s->next_refs[i].f);
} }
av_freep(&s->above_partition_ctx); free_buffers(s);
av_freep(&s->c_b); av_freep(&s->c_b);
s->c_b_size = 0; s->c_b_size = 0;
av_freep(&s->b_base);
av_freep(&s->block_base);
return 0; return 0;
} }
...@@ -3762,7 +3767,10 @@ static int vp9_decode_update_thread_context(AVCodecContext *dst, const AVCodecCo ...@@ -3762,7 +3767,10 @@ static int vp9_decode_update_thread_context(AVCodecContext *dst, const AVCodecCo
int i, res; int i, res;
VP9Context *s = dst->priv_data, *ssrc = src->priv_data; VP9Context *s = dst->priv_data, *ssrc = src->priv_data;
// FIXME scalability, size, etc. // detect size changes in other threads
if (s->above_partition_ctx && (s->cols != ssrc->cols || s->rows != ssrc->rows)) {
free_buffers(s);
}
for (i = 0; i < 2; i++) { for (i = 0; i < 2; i++) {
if (s->frames[i].tf.f->data[0]) if (s->frames[i].tf.f->data[0])
......
...@@ -159,6 +159,7 @@ filters_8tap_1d_fn3(avg) ...@@ -159,6 +159,7 @@ filters_8tap_1d_fn3(avg)
void ff_vp9_idct_idct_4x4_add_ssse3(uint8_t *dst, ptrdiff_t stride, int16_t *block, int eob); void ff_vp9_idct_idct_4x4_add_ssse3(uint8_t *dst, ptrdiff_t stride, int16_t *block, int eob);
void ff_vp9_idct_idct_8x8_add_ssse3(uint8_t *dst, ptrdiff_t stride, int16_t *block, int eob); void ff_vp9_idct_idct_8x8_add_ssse3(uint8_t *dst, ptrdiff_t stride, int16_t *block, int eob);
void ff_vp9_idct_idct_16x16_add_ssse3(uint8_t *dst, ptrdiff_t stride, int16_t *block, int eob); void ff_vp9_idct_idct_16x16_add_ssse3(uint8_t *dst, ptrdiff_t stride, int16_t *block, int eob);
void ff_vp9_idct_idct_32x32_add_ssse3(uint8_t *dst, ptrdiff_t stride, int16_t *block, int eob);
#endif /* HAVE_YASM */ #endif /* HAVE_YASM */
...@@ -217,6 +218,10 @@ av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp) ...@@ -217,6 +218,10 @@ av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp)
if (ARCH_X86_64) { if (ARCH_X86_64) {
dsp->itxfm_add[TX_8X8][DCT_DCT] = ff_vp9_idct_idct_8x8_add_ssse3; dsp->itxfm_add[TX_8X8][DCT_DCT] = ff_vp9_idct_idct_8x8_add_ssse3;
dsp->itxfm_add[TX_16X16][DCT_DCT] = ff_vp9_idct_idct_16x16_add_ssse3; dsp->itxfm_add[TX_16X16][DCT_DCT] = ff_vp9_idct_idct_16x16_add_ssse3;
dsp->itxfm_add[TX_32X32][ADST_ADST] =
dsp->itxfm_add[TX_32X32][ADST_DCT] =
dsp->itxfm_add[TX_32X32][DCT_ADST] =
dsp->itxfm_add[TX_32X32][DCT_DCT] = ff_vp9_idct_idct_32x32_add_ssse3;
} }
} }
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment