Commit 675bb1f4 authored by Lynne's avatar Lynne

diracdec: rewrite golomb reader

This version is able to output multiple coefficients at a time and
is able to altogether remove actual golomb code parsing.
Its also able to partially recover the last coefficient in case
the packet is incomplete.

Total decoder performance gain for 8bit 420 1080p lossless: 40%.
Total decoder performance gain for 10bit 420 1080p lossless: 40%.

clang was able to vectorize the loop much better than
my handwritten assembly, but gcc was very naive and didn't.

Lookup table is a rewritten version of vc2hqdecode.
parent d778be6e
This diff is collapsed.
/* /*
* Copyright (C) 2016 Open Broadcast Systems Ltd.
* Author 2016 Rostislav Pehlivanov <rpehlivanov@obe.tv>
*
* This file is part of FFmpeg. * This file is part of FFmpeg.
* *
* FFmpeg is free software; you can redistribute it and/or * FFmpeg is free software; you can redistribute it and/or
...@@ -24,28 +21,9 @@ ...@@ -24,28 +21,9 @@
#include "libavutil/avutil.h" #include "libavutil/avutil.h"
/* Can be 32 bits wide for some performance gain on some machines, but it will int ff_dirac_golomb_read_16bit(const uint8_t *buf, int bytes,
* incorrectly decode very long coefficients (usually only 1 or 2 per frame) */ uint8_t *_dst, int coeffs);
typedef uint64_t residual; int ff_dirac_golomb_read_32bit(const uint8_t *buf, int bytes,
uint8_t *_dst, int coeffs);
#define LUT_BITS 8
/* Exactly 64 bytes */
typedef struct DiracGolombLUT {
residual preamble, leftover;
int32_t ready[LUT_BITS];
int32_t preamble_bits, leftover_bits, ready_num;
int8_t need_s, sign;
} DiracGolombLUT;
av_cold int ff_dirac_golomb_reader_init(DiracGolombLUT **lut_ctx);
int ff_dirac_golomb_read_32bit(DiracGolombLUT *lut_ctx, const uint8_t *buf,
int bytes, uint8_t *dst, int coeffs);
int ff_dirac_golomb_read_16bit(DiracGolombLUT *lut_ctx, const uint8_t *buf,
int bytes, uint8_t *_dst, int coeffs);
av_cold void ff_dirac_golomb_reader_end(DiracGolombLUT **lut_ctx);
#endif /* AVCODEC_DIRAC_VLC_H */ #endif /* AVCODEC_DIRAC_VLC_H */
...@@ -136,7 +136,6 @@ typedef struct DiracContext { ...@@ -136,7 +136,6 @@ typedef struct DiracContext {
MpegvideoEncDSPContext mpvencdsp; MpegvideoEncDSPContext mpvencdsp;
VideoDSPContext vdsp; VideoDSPContext vdsp;
DiracDSPContext diracdsp; DiracDSPContext diracdsp;
DiracGolombLUT *reader_ctx;
DiracVersionInfo version; DiracVersionInfo version;
GetBitContext gb; GetBitContext gb;
AVDiracSeqHeader seq; AVDiracSeqHeader seq;
...@@ -395,7 +394,6 @@ static av_cold int dirac_decode_init(AVCodecContext *avctx) ...@@ -395,7 +394,6 @@ static av_cold int dirac_decode_init(AVCodecContext *avctx)
s->threads_num_buf = -1; s->threads_num_buf = -1;
s->thread_buf_size = -1; s->thread_buf_size = -1;
ff_dirac_golomb_reader_init(&s->reader_ctx);
ff_diracdsp_init(&s->diracdsp); ff_diracdsp_init(&s->diracdsp);
ff_mpegvideoencdsp_init(&s->mpvencdsp, avctx); ff_mpegvideoencdsp_init(&s->mpvencdsp, avctx);
ff_videodsp_init(&s->vdsp, 8); ff_videodsp_init(&s->vdsp, 8);
...@@ -428,8 +426,6 @@ static av_cold int dirac_decode_end(AVCodecContext *avctx) ...@@ -428,8 +426,6 @@ static av_cold int dirac_decode_end(AVCodecContext *avctx)
DiracContext *s = avctx->priv_data; DiracContext *s = avctx->priv_data;
int i; int i;
ff_dirac_golomb_reader_end(&s->reader_ctx);
dirac_decode_flush(avctx); dirac_decode_flush(avctx);
for (i = 0; i < MAX_FRAMES; i++) for (i = 0; i < MAX_FRAMES; i++)
av_frame_free(&s->all_frames[i].avframe); av_frame_free(&s->all_frames[i].avframe);
...@@ -881,11 +877,11 @@ static int decode_hq_slice(DiracContext *s, DiracSlice *slice, uint8_t *tmp_buf) ...@@ -881,11 +877,11 @@ static int decode_hq_slice(DiracContext *s, DiracSlice *slice, uint8_t *tmp_buf)
coef_num = subband_coeffs(s, slice->slice_x, slice->slice_y, i, coeffs_num); coef_num = subband_coeffs(s, slice->slice_x, slice->slice_y, i, coeffs_num);
if (s->pshift) if (s->pshift)
coef_par = ff_dirac_golomb_read_32bit(s->reader_ctx, addr, coef_par = ff_dirac_golomb_read_32bit(addr, length,
length, tmp_buf, coef_num); tmp_buf, coef_num);
else else
coef_par = ff_dirac_golomb_read_16bit(s->reader_ctx, addr, coef_par = ff_dirac_golomb_read_16bit(addr, length,
length, tmp_buf, coef_num); tmp_buf, coef_num);
if (coef_num > coef_par) { if (coef_num > coef_par) {
const int start_b = coef_par * (1 << (s->pshift + 1)); const int start_b = coef_par * (1 << (s->pshift + 1));
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment