Commit 0c8aba38 authored by Mickaël Raulet's avatar Mickaël Raulet Committed by Michael Niedermayer

hevc: add wavefront parallel processing

cherry picked from commit b971f2c8fdc60f8bab605a6e8060492eb548a53a
cherry picked from commit e57b0a2c915ce6b8a9d57b8292f6581f0680842e
Signed-off-by: 's avatarMichael Niedermayer <michaelni@gmx.at>
parent e146c326
This diff is collapsed.
...@@ -39,6 +39,9 @@ ...@@ -39,6 +39,9 @@
#define MAX_DPB_SIZE 16 // A.4.1 #define MAX_DPB_SIZE 16 // A.4.1
#define MAX_REFS 16 #define MAX_REFS 16
#define MAX_NB_THREADS 16
#define SHIFT_CTB_WPP 2
/** /**
* 7.4.2.1 * 7.4.2.1
*/ */
...@@ -563,6 +566,9 @@ typedef struct SliceHeader { ...@@ -563,6 +566,9 @@ typedef struct SliceHeader {
uint8_t slice_loop_filter_across_slices_enabled_flag; uint8_t slice_loop_filter_across_slices_enabled_flag;
int *entry_point_offset;
int * offset;
int * size;
int num_entry_point_offsets; int num_entry_point_offsets;
uint8_t luma_log2_weight_denom; uint8_t luma_log2_weight_denom;
...@@ -705,14 +711,6 @@ typedef struct HEVCFrame { ...@@ -705,14 +711,6 @@ typedef struct HEVCFrame {
AVBufferRef *rpl_buf; AVBufferRef *rpl_buf;
} HEVCFrame; } HEVCFrame;
typedef struct FilterData {
int x;
int y;
int size;
int slice_or_tiles_left_boundary;
int slice_or_tiles_up_boundary;
} FilterData;
typedef struct HEVCNAL { typedef struct HEVCNAL {
uint8_t *rbsp_buffer; uint8_t *rbsp_buffer;
int rbsp_buffer_size; int rbsp_buffer_size;
...@@ -745,20 +743,24 @@ typedef struct HEVCLocalContext { ...@@ -745,20 +743,24 @@ typedef struct HEVCLocalContext {
PredictionUnit pu; PredictionUnit pu;
NeighbourAvailable na; NeighbourAvailable na;
DECLARE_ALIGNED(16, int16_t, mc_buffer[(MAX_PB_SIZE + 7) * MAX_PB_SIZE]); DECLARE_ALIGNED(16, int16_t, mc_buffer[(MAX_PB_SIZE + 7) * MAX_PB_SIZE]);
FilterData *save_boundary_strengths;
int nb_saved;
} HEVCLocalContext; } HEVCLocalContext;
typedef struct HEVCContext { typedef struct HEVCContext {
const AVClass *c; // needed by private avoptions const AVClass *c; // needed by private avoptions
AVCodecContext *avctx; AVCodecContext *avctx;
HEVCLocalContext HEVClc; struct HEVCContext *sList[MAX_NB_THREADS];
HEVCLocalContext *HEVClcList[MAX_NB_THREADS];
HEVCLocalContext *HEVClc;
uint8_t threads_type;
uint8_t threads_number;
int width; int width;
int height; int height;
uint8_t cabac_state[HEVC_CONTEXTS]; uint8_t *cabac_state;
AVFrame *frame; AVFrame *frame;
AVFrame *sao_frame; AVFrame *sao_frame;
...@@ -826,6 +828,18 @@ typedef struct HEVCContext { ...@@ -826,6 +828,18 @@ typedef struct HEVCContext {
uint16_t seq_decode; uint16_t seq_decode;
uint16_t seq_output; uint16_t seq_output;
int enable_parallel_tiles;
int wpp_err;
int skipped_bytes;
int *skipped_bytes_pos;
int skipped_bytes_pos_size;
int *skipped_bytes_nal;
int **skipped_bytes_pos_nal;
int *skipped_bytes_pos_size_nal;
uint8_t *data;
HEVCNAL *nals; HEVCNAL *nals;
int nb_nals; int nb_nals;
int nals_allocated; int nals_allocated;
......
This diff is collapsed.
...@@ -73,7 +73,7 @@ static int chroma_tc(HEVCContext *s, int qp_y, int c_idx, int tc_offset) ...@@ -73,7 +73,7 @@ static int chroma_tc(HEVCContext *s, int qp_y, int c_idx, int tc_offset)
static int get_qPy_pred(HEVCContext *s, int xC, int yC, int xBase, int yBase, int log2_cb_size) static int get_qPy_pred(HEVCContext *s, int xC, int yC, int xBase, int yBase, int log2_cb_size)
{ {
HEVCLocalContext *lc = &s->HEVClc; HEVCLocalContext *lc = s->HEVClc;
int ctb_size_mask = (1 << s->sps->log2_ctb_size) - 1; int ctb_size_mask = (1 << s->sps->log2_ctb_size) - 1;
int MinCuQpDeltaSizeMask = (1 << (s->sps->log2_ctb_size - s->pps->diff_cu_qp_delta_depth)) - 1; int MinCuQpDeltaSizeMask = (1 << (s->sps->log2_ctb_size - s->pps->diff_cu_qp_delta_depth)) - 1;
int xQgBase = xBase - ( xBase & MinCuQpDeltaSizeMask ); int xQgBase = xBase - ( xBase & MinCuQpDeltaSizeMask );
...@@ -153,11 +153,11 @@ void ff_hevc_set_qPy(HEVCContext *s, int xC, int yC, int xBase, int yBase, int l ...@@ -153,11 +153,11 @@ void ff_hevc_set_qPy(HEVCContext *s, int xC, int yC, int xBase, int yBase, int l
{ {
int qp_y = get_qPy_pred(s, xC, yC, xBase, yBase, log2_cb_size); int qp_y = get_qPy_pred(s, xC, yC, xBase, yBase, log2_cb_size);
if (s->HEVClc.tu.cu_qp_delta != 0) { if (s->HEVClc->tu.cu_qp_delta != 0) {
int off = s->sps->qp_bd_offset; int off = s->sps->qp_bd_offset;
s->HEVClc.qp_y = ((qp_y + s->HEVClc.tu.cu_qp_delta + 52 + 2 * off) % (52 + off)) - off; s->HEVClc->qp_y = ((qp_y + s->HEVClc->tu.cu_qp_delta + 52 + 2 * off) % (52 + off)) - off;
} else } else
s->HEVClc.qp_y = qp_y; s->HEVClc->qp_y = qp_y;
} }
static int get_qPy(HEVCContext *s, int xC, int yC) static int get_qPy(HEVCContext *s, int xC, int yC)
......
...@@ -40,7 +40,7 @@ static const uint8_t l0_l1_cand_idx[12][2] = { ...@@ -40,7 +40,7 @@ static const uint8_t l0_l1_cand_idx[12][2] = {
void ff_hevc_set_neighbour_available(HEVCContext *s, int x0, int y0, int nPbW, int nPbH) void ff_hevc_set_neighbour_available(HEVCContext *s, int x0, int y0, int nPbW, int nPbH)
{ {
HEVCLocalContext *lc = &s->HEVClc; HEVCLocalContext *lc = s->HEVClc;
int x0b = x0 & ((1 << s->sps->log2_ctb_size) - 1); int x0b = x0 & ((1 << s->sps->log2_ctb_size) - 1);
int y0b = y0 & ((1 << s->sps->log2_ctb_size) - 1); int y0b = y0 & ((1 << s->sps->log2_ctb_size) - 1);
...@@ -98,7 +98,7 @@ static int check_prediction_block_available(HEVCContext *s, int log2_cb_size, ...@@ -98,7 +98,7 @@ static int check_prediction_block_available(HEVCContext *s, int log2_cb_size,
int x0, int y0, int nPbW, int nPbH, int x0, int y0, int nPbW, int nPbH,
int xA1, int yA1, int partIdx) int xA1, int yA1, int partIdx)
{ {
HEVCLocalContext *lc = &s->HEVClc; HEVCLocalContext *lc = s->HEVClc;
if (lc->cu.x < xA1 && lc->cu.y < yA1 && if (lc->cu.x < xA1 && lc->cu.y < yA1 &&
(lc->cu.x + (1 << log2_cb_size)) > xA1 && (lc->cu.x + (1 << log2_cb_size)) > xA1 &&
...@@ -272,7 +272,8 @@ static int temporal_luma_motion_vector(HEVCContext *s, int x0, int y0, ...@@ -272,7 +272,8 @@ static int temporal_luma_motion_vector(HEVCContext *s, int x0, int y0,
xPRb = x0 + nPbW; xPRb = x0 + nPbW;
yPRb = y0 + nPbH; yPRb = y0 + nPbH;
ff_thread_await_progress(&ref->tf, INT_MAX, 0); if (s->threads_type == FF_THREAD_FRAME )
ff_thread_await_progress(&ref->tf, INT_MAX, 0);
if (tab_mvf && if (tab_mvf &&
y0 >> s->sps->log2_ctb_size == yPRb >> s->sps->log2_ctb_size && y0 >> s->sps->log2_ctb_size == yPRb >> s->sps->log2_ctb_size &&
yPRb < s->sps->height && yPRb < s->sps->height &&
...@@ -322,7 +323,7 @@ static void derive_spatial_merge_candidates(HEVCContext *s, int x0, int y0, ...@@ -322,7 +323,7 @@ static void derive_spatial_merge_candidates(HEVCContext *s, int x0, int y0,
int singleMCLFlag, int part_idx, int singleMCLFlag, int part_idx,
struct MvField mergecandlist[]) struct MvField mergecandlist[])
{ {
HEVCLocalContext *lc = &s->HEVClc; HEVCLocalContext *lc = s->HEVClc;
RefPicList *refPicList = s->ref->refPicList; RefPicList *refPicList = s->ref->refPicList;
MvField *tab_mvf = s->ref->tab_mvf; MvField *tab_mvf = s->ref->tab_mvf;
...@@ -707,7 +708,7 @@ void ff_hevc_luma_mv_merge_mode(HEVCContext *s, int x0, int y0, int nPbW, ...@@ -707,7 +708,7 @@ void ff_hevc_luma_mv_merge_mode(HEVCContext *s, int x0, int y0, int nPbW,
struct MvField mergecand_list[MRG_MAX_NUM_CANDS] = { { { { 0 } } } }; struct MvField mergecand_list[MRG_MAX_NUM_CANDS] = { { { { 0 } } } };
int nPbW2 = nPbW; int nPbW2 = nPbW;
int nPbH2 = nPbH; int nPbH2 = nPbH;
HEVCLocalContext *lc = &s->HEVClc; HEVCLocalContext *lc = s->HEVClc;
if (s->pps->log2_parallel_merge_level > 2 && nCS == 8) { if (s->pps->log2_parallel_merge_level > 2 && nCS == 8) {
singleMCLFlag = 1; singleMCLFlag = 1;
...@@ -794,7 +795,7 @@ void ff_hevc_luma_mv_mvp_mode(HEVCContext *s, int x0, int y0, int nPbW, ...@@ -794,7 +795,7 @@ void ff_hevc_luma_mv_mvp_mode(HEVCContext *s, int x0, int y0, int nPbW,
int merge_idx, MvField *mv, int merge_idx, MvField *mv,
int mvp_lx_flag, int LX) int mvp_lx_flag, int LX)
{ {
HEVCLocalContext *lc = &s->HEVClc; HEVCLocalContext *lc = s->HEVClc;
MvField *tab_mvf = s->ref->tab_mvf; MvField *tab_mvf = s->ref->tab_mvf;
int isScaledFlag_L0 = 0; int isScaledFlag_L0 = 0;
int availableFlagLXA0 = 0; int availableFlagLXA0 = 0;
......
...@@ -72,7 +72,7 @@ static const AVRational vui_sar[] = { ...@@ -72,7 +72,7 @@ static const AVRational vui_sar[] = {
int ff_hevc_decode_short_term_rps(HEVCContext *s, ShortTermRPS *rps, int ff_hevc_decode_short_term_rps(HEVCContext *s, ShortTermRPS *rps,
const HEVCSPS *sps, int is_slice_header) const HEVCSPS *sps, int is_slice_header)
{ {
HEVCLocalContext *lc = &s->HEVClc; HEVCLocalContext *lc = s->HEVClc;
uint8_t rps_predict = 0; uint8_t rps_predict = 0;
int delta_poc; int delta_poc;
int k0 = 0; int k0 = 0;
...@@ -253,7 +253,7 @@ static void decode_hrd(HEVCContext *s) ...@@ -253,7 +253,7 @@ static void decode_hrd(HEVCContext *s)
int ff_hevc_decode_nal_vps(HEVCContext *s) int ff_hevc_decode_nal_vps(HEVCContext *s)
{ {
int i,j; int i,j;
GetBitContext *gb = &s->HEVClc.gb; GetBitContext *gb = &s->HEVClc->gb;
int vps_id = 0; int vps_id = 0;
VPS *vps; VPS *vps;
...@@ -289,7 +289,7 @@ int ff_hevc_decode_nal_vps(HEVCContext *s) ...@@ -289,7 +289,7 @@ int ff_hevc_decode_nal_vps(HEVCContext *s)
goto err; goto err;
} }
if (decode_profile_tier_level(&s->HEVClc, &vps->ptl, vps->vps_max_sub_layers) < 0) { if (decode_profile_tier_level(s->HEVClc, &vps->ptl, vps->vps_max_sub_layers) < 0) {
av_log(s->avctx, AV_LOG_ERROR, "Error decoding profile tier level.\n"); av_log(s->avctx, AV_LOG_ERROR, "Error decoding profile tier level.\n");
goto err; goto err;
} }
...@@ -347,7 +347,7 @@ err: ...@@ -347,7 +347,7 @@ err:
static void decode_vui(HEVCContext *s, HEVCSPS *sps) static void decode_vui(HEVCContext *s, HEVCSPS *sps)
{ {
VUI *vui = &sps->vui; VUI *vui = &sps->vui;
GetBitContext *gb = &s->HEVClc.gb; GetBitContext *gb = &s->HEVClc->gb;
int sar_present; int sar_present;
av_log(s->avctx, AV_LOG_DEBUG, "Decoding VUI\n"); av_log(s->avctx, AV_LOG_DEBUG, "Decoding VUI\n");
...@@ -469,7 +469,7 @@ static void set_default_scaling_list_data(ScalingList *sl) ...@@ -469,7 +469,7 @@ static void set_default_scaling_list_data(ScalingList *sl)
static int scaling_list_data(HEVCContext *s, ScalingList *sl) static int scaling_list_data(HEVCContext *s, ScalingList *sl)
{ {
GetBitContext *gb = &s->HEVClc.gb; GetBitContext *gb = &s->HEVClc->gb;
uint8_t scaling_list_pred_mode_flag[4][6]; uint8_t scaling_list_pred_mode_flag[4][6];
int32_t scaling_list_dc_coef[2][6]; int32_t scaling_list_dc_coef[2][6];
...@@ -525,7 +525,7 @@ static int scaling_list_data(HEVCContext *s, ScalingList *sl) ...@@ -525,7 +525,7 @@ static int scaling_list_data(HEVCContext *s, ScalingList *sl)
int ff_hevc_decode_nal_sps(HEVCContext *s) int ff_hevc_decode_nal_sps(HEVCContext *s)
{ {
const AVPixFmtDescriptor *desc; const AVPixFmtDescriptor *desc;
GetBitContext *gb = &s->HEVClc.gb; GetBitContext *gb = &s->HEVClc->gb;
int ret = 0; int ret = 0;
int sps_id = 0; int sps_id = 0;
int log2_diff_max_min_transform_block_size; int log2_diff_max_min_transform_block_size;
...@@ -559,7 +559,7 @@ int ff_hevc_decode_nal_sps(HEVCContext *s) ...@@ -559,7 +559,7 @@ int ff_hevc_decode_nal_sps(HEVCContext *s)
} }
skip_bits1(gb); // temporal_id_nesting_flag skip_bits1(gb); // temporal_id_nesting_flag
if (decode_profile_tier_level(&s->HEVClc, &sps->ptl, sps->max_sub_layers) < 0) { if (decode_profile_tier_level(s->HEVClc, &sps->ptl, sps->max_sub_layers) < 0) {
av_log(s->avctx, AV_LOG_ERROR, "error decoding profile tier level\n"); av_log(s->avctx, AV_LOG_ERROR, "error decoding profile tier level\n");
ret = AVERROR_INVALIDDATA; ret = AVERROR_INVALIDDATA;
goto err; goto err;
...@@ -888,7 +888,7 @@ static void hevc_pps_free(void *opaque, uint8_t *data) ...@@ -888,7 +888,7 @@ static void hevc_pps_free(void *opaque, uint8_t *data)
int ff_hevc_decode_nal_pps(HEVCContext *s) int ff_hevc_decode_nal_pps(HEVCContext *s)
{ {
GetBitContext *gb = &s->HEVClc.gb; GetBitContext *gb = &s->HEVClc->gb;
HEVCSPS *sps = NULL; HEVCSPS *sps = NULL;
int pic_area_in_ctbs, pic_area_in_min_cbs, pic_area_in_min_tbs; int pic_area_in_ctbs, pic_area_in_min_cbs, pic_area_in_min_tbs;
int log2_diff_ctb_min_tb_size; int log2_diff_ctb_min_tb_size;
......
...@@ -351,7 +351,8 @@ static HEVCFrame *generate_missing_ref(HEVCContext *s, int poc) ...@@ -351,7 +351,8 @@ static HEVCFrame *generate_missing_ref(HEVCContext *s, int poc)
frame->sequence = s->seq_decode; frame->sequence = s->seq_decode;
frame->flags = 0; frame->flags = 0;
ff_thread_report_progress(&frame->tf, INT_MAX, 0); if (s->threads_type == FF_THREAD_FRAME)
ff_thread_report_progress(&frame->tf, INT_MAX, 0);
return frame; return frame;
} }
......
...@@ -31,7 +31,7 @@ static void decode_nal_sei_decoded_picture_hash(HEVCContext *s, int payload_size ...@@ -31,7 +31,7 @@ static void decode_nal_sei_decoded_picture_hash(HEVCContext *s, int payload_size
uint8_t hash_type; uint8_t hash_type;
//uint16_t picture_crc; //uint16_t picture_crc;
//uint32_t picture_checksum; //uint32_t picture_checksum;
GetBitContext *gb = &s->HEVClc.gb; GetBitContext *gb = &s->HEVClc->gb;
hash_type = get_bits(gb, 8); hash_type = get_bits(gb, 8);
...@@ -79,7 +79,7 @@ static void decode_nal_sei_frame_packing_arrangement(HEVCLocalContext *lc) ...@@ -79,7 +79,7 @@ static void decode_nal_sei_frame_packing_arrangement(HEVCLocalContext *lc)
static int decode_nal_sei_message(HEVCContext *s) static int decode_nal_sei_message(HEVCContext *s)
{ {
GetBitContext *gb = &s->HEVClc.gb; GetBitContext *gb = &s->HEVClc->gb;
int payload_type = 0; int payload_type = 0;
int payload_size = 0; int payload_size = 0;
...@@ -99,7 +99,7 @@ static int decode_nal_sei_message(HEVCContext *s) ...@@ -99,7 +99,7 @@ static int decode_nal_sei_message(HEVCContext *s)
if (payload_type == 256 /*&& s->decode_checksum_sei*/) if (payload_type == 256 /*&& s->decode_checksum_sei*/)
decode_nal_sei_decoded_picture_hash(s, payload_size); decode_nal_sei_decoded_picture_hash(s, payload_size);
else if (payload_type == 45) else if (payload_type == 45)
decode_nal_sei_frame_packing_arrangement(&s->HEVClc); decode_nal_sei_frame_packing_arrangement(s->HEVClc);
else { else {
av_log(s->avctx, AV_LOG_DEBUG, "Skipped PREFIX SEI %d\n", payload_type); av_log(s->avctx, AV_LOG_DEBUG, "Skipped PREFIX SEI %d\n", payload_type);
skip_bits(gb, 8*payload_size); skip_bits(gb, 8*payload_size);
...@@ -124,6 +124,6 @@ int ff_hevc_decode_nal_sei(HEVCContext *s) ...@@ -124,6 +124,6 @@ int ff_hevc_decode_nal_sei(HEVCContext *s)
{ {
do { do {
decode_nal_sei_message(s); decode_nal_sei_message(s);
} while (more_rbsp_data(&s->HEVClc.gb)); } while (more_rbsp_data(&s->HEVClc->gb));
return 0; return 0;
} }
...@@ -65,7 +65,7 @@ static void FUNC(intra_pred)(HEVCContext *s, int x0, int y0, int log2_size, int ...@@ -65,7 +65,7 @@ static void FUNC(intra_pred)(HEVCContext *s, int x0, int y0, int log2_size, int
for (i = (start); i < (start) + (length); i++) \ for (i = (start); i < (start) + (length); i++) \
if (!IS_INTRA(-1, i)) \ if (!IS_INTRA(-1, i)) \
ptr[i] = ptr[i - 1] ptr[i] = ptr[i - 1]
HEVCLocalContext *lc = &s->HEVClc; HEVCLocalContext *lc = s->HEVClc;
int i; int i;
int hshift = s->sps->hshift[c_idx]; int hshift = s->sps->hshift[c_idx];
int vshift = s->sps->vshift[c_idx]; int vshift = s->sps->vshift[c_idx];
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment