Commit ba70563d authored by gcocherel's avatar gcocherel Committed by Michael Niedermayer

hevc/pps: optimized size of min_tb_addr_zs

reduce computation too
(cherry picked from commit 39c4d45c7788081c45c7fae51b7c5d0bcbaece9d)
Signed-off-by: 's avatarMichael Niedermayer <michaelni@gmx.at>
parent f7f1f4c7
...@@ -463,6 +463,7 @@ typedef struct HEVCSPS { ...@@ -463,6 +463,7 @@ typedef struct HEVCSPS {
int min_tb_height; int min_tb_height;
int min_pu_width; int min_pu_width;
int min_pu_height; int min_pu_height;
int tb_mask;
int hshift[3]; int hshift[3];
int vshift[3]; int vshift[3];
...@@ -532,6 +533,7 @@ typedef struct HEVCPPS { ...@@ -532,6 +533,7 @@ typedef struct HEVCPPS {
int *tile_id; ///< TileId int *tile_id; ///< TileId
int *tile_pos_rs; ///< TilePosRS int *tile_pos_rs; ///< TilePosRS
int *min_tb_addr_zs; ///< MinTbAddrZS int *min_tb_addr_zs; ///< MinTbAddrZS
int *min_tb_addr_zs_tab;///< MinTbAddrZS
} HEVCPPS; } HEVCPPS;
typedef struct SliceHeader { typedef struct SliceHeader {
......
...@@ -65,20 +65,27 @@ static int z_scan_block_avail(HEVCContext *s, int xCurr, int yCurr, ...@@ -65,20 +65,27 @@ static int z_scan_block_avail(HEVCContext *s, int xCurr, int yCurr,
int xN, int yN) int xN, int yN)
{ {
#define MIN_TB_ADDR_ZS(x, y) \ #define MIN_TB_ADDR_ZS(x, y) \
s->pps->min_tb_addr_zs[(y) * s->sps->min_tb_width + (x)] s->pps->min_tb_addr_zs[(y) * (s->sps->tb_mask+2) + (x)]
int Curr = MIN_TB_ADDR_ZS(xCurr >> s->sps->log2_min_tb_size,
yCurr >> s->sps->log2_min_tb_size); int xCurr_ctb = xCurr >> s->sps->log2_ctb_size;
int N; int yCurr_ctb = yCurr >> s->sps->log2_ctb_size;
int xN_ctb = xN >> s->sps->log2_ctb_size;
int yN_ctb = yN >> s->sps->log2_ctb_size;
if (xN < 0 || yN < 0 || if (xN < 0 || yN < 0 ||
xN >= s->sps->width || xN >= s->sps->width ||
yN >= s->sps->height) yN >= s->sps->height)
return 0; return 0;
N = MIN_TB_ADDR_ZS(xN >> s->sps->log2_min_tb_size, if( yN_ctb < yCurr_ctb || xN_ctb < xCurr_ctb )
yN >> s->sps->log2_min_tb_size); return 1;
else {
int Curr = MIN_TB_ADDR_ZS((xCurr >> s->sps->log2_min_tb_size) & s->sps->tb_mask,
(yCurr >> s->sps->log2_min_tb_size) & s->sps->tb_mask);
int N = MIN_TB_ADDR_ZS((xN >> s->sps->log2_min_tb_size) & s->sps->tb_mask,
(yN >> s->sps->log2_min_tb_size) & s->sps->tb_mask);
return N <= Curr; return N <= Curr;
}
} }
static int same_prediction_block(HEVCLocalContext *lc, int log2_cb_size, static int same_prediction_block(HEVCLocalContext *lc, int log2_cb_size,
......
...@@ -907,6 +907,7 @@ int ff_hevc_decode_nal_sps(HEVCContext *s) ...@@ -907,6 +907,7 @@ int ff_hevc_decode_nal_sps(HEVCContext *s)
sps->min_tb_height = sps->height >> sps->log2_min_tb_size; sps->min_tb_height = sps->height >> sps->log2_min_tb_size;
sps->min_pu_width = sps->width >> sps->log2_min_pu_size; sps->min_pu_width = sps->width >> sps->log2_min_pu_size;
sps->min_pu_height = sps->height >> sps->log2_min_pu_size; sps->min_pu_height = sps->height >> sps->log2_min_pu_size;
sps->tb_mask = (1 << (sps->log2_ctb_size - sps->log2_min_tb_size)) - 1;
sps->qp_bd_offset = 6 * (sps->bit_depth - 8); sps->qp_bd_offset = 6 * (sps->bit_depth - 8);
...@@ -981,7 +982,7 @@ static void hevc_pps_free(void *opaque, uint8_t *data) ...@@ -981,7 +982,7 @@ static void hevc_pps_free(void *opaque, uint8_t *data)
av_freep(&pps->ctb_addr_ts_to_rs); av_freep(&pps->ctb_addr_ts_to_rs);
av_freep(&pps->tile_pos_rs); av_freep(&pps->tile_pos_rs);
av_freep(&pps->tile_id); av_freep(&pps->tile_id);
av_freep(&pps->min_tb_addr_zs); av_freep(&pps->min_tb_addr_zs_tab);
av_freep(&pps); av_freep(&pps);
} }
...@@ -990,7 +991,7 @@ int ff_hevc_decode_nal_pps(HEVCContext *s) ...@@ -990,7 +991,7 @@ int ff_hevc_decode_nal_pps(HEVCContext *s)
{ {
GetBitContext *gb = &s->HEVClc->gb; GetBitContext *gb = &s->HEVClc->gb;
HEVCSPS *sps = NULL; HEVCSPS *sps = NULL;
int pic_area_in_ctbs, pic_area_in_min_cbs, pic_area_in_min_tbs; int pic_area_in_ctbs;
int log2_diff_ctb_min_tb_size; int log2_diff_ctb_min_tb_size;
int i, j, x, y, ctb_addr_rs, tile_id; int i, j, x, y, ctb_addr_rs, tile_id;
int ret = 0; int ret = 0;
...@@ -1229,15 +1230,13 @@ int ff_hevc_decode_nal_pps(HEVCContext *s) ...@@ -1229,15 +1230,13 @@ int ff_hevc_decode_nal_pps(HEVCContext *s)
* 6.5 * 6.5
*/ */
pic_area_in_ctbs = sps->ctb_width * sps->ctb_height; pic_area_in_ctbs = sps->ctb_width * sps->ctb_height;
pic_area_in_min_cbs = sps->min_cb_width * sps->min_cb_height;
pic_area_in_min_tbs = sps->min_tb_width * sps->min_tb_height;
pps->ctb_addr_rs_to_ts = av_malloc_array(pic_area_in_ctbs, sizeof(*pps->ctb_addr_rs_to_ts)); pps->ctb_addr_rs_to_ts = av_malloc_array(pic_area_in_ctbs, sizeof(*pps->ctb_addr_rs_to_ts));
pps->ctb_addr_ts_to_rs = av_malloc_array(pic_area_in_ctbs, sizeof(*pps->ctb_addr_ts_to_rs)); pps->ctb_addr_ts_to_rs = av_malloc_array(pic_area_in_ctbs, sizeof(*pps->ctb_addr_ts_to_rs));
pps->tile_id = av_malloc_array(pic_area_in_ctbs, sizeof(*pps->tile_id)); pps->tile_id = av_malloc_array(pic_area_in_ctbs, sizeof(*pps->tile_id));
pps->min_tb_addr_zs = av_malloc_array(pic_area_in_min_tbs, sizeof(*pps->min_tb_addr_zs)); pps->min_tb_addr_zs_tab = av_malloc_array((sps->tb_mask+2) * (sps->tb_mask+2), sizeof(*pps->min_tb_addr_zs_tab));
if (!pps->ctb_addr_rs_to_ts || !pps->ctb_addr_ts_to_rs || if (!pps->ctb_addr_rs_to_ts || !pps->ctb_addr_ts_to_rs ||
!pps->tile_id || !pps->min_tb_addr_zs) { !pps->tile_id || !pps->min_tb_addr_zs_tab) {
ret = AVERROR(ENOMEM); ret = AVERROR(ENOMEM);
goto err; goto err;
} }
...@@ -1292,8 +1291,13 @@ int ff_hevc_decode_nal_pps(HEVCContext *s) ...@@ -1292,8 +1291,13 @@ int ff_hevc_decode_nal_pps(HEVCContext *s)
pps->tile_pos_rs[j * pps->num_tile_columns + i] = pps->row_bd[j] * sps->ctb_width + pps->col_bd[i]; pps->tile_pos_rs[j * pps->num_tile_columns + i] = pps->row_bd[j] * sps->ctb_width + pps->col_bd[i];
log2_diff_ctb_min_tb_size = sps->log2_ctb_size - sps->log2_min_tb_size; log2_diff_ctb_min_tb_size = sps->log2_ctb_size - sps->log2_min_tb_size;
for (y = 0; y < sps->min_tb_height; y++) { pps->min_tb_addr_zs = &pps->min_tb_addr_zs_tab[1*(sps->tb_mask+2)+1];
for (x = 0; x < sps->min_tb_width; x++) { for (y = 0; y < sps->tb_mask+2; y++) {
pps->min_tb_addr_zs_tab[y*(sps->tb_mask+2)] = -1;
pps->min_tb_addr_zs_tab[y] = -1;
}
for (y = 0; y < sps->tb_mask+1; y++) {
for (x = 0; x < sps->tb_mask+1; x++) {
int tb_x = x >> log2_diff_ctb_min_tb_size; int tb_x = x >> log2_diff_ctb_min_tb_size;
int tb_y = y >> log2_diff_ctb_min_tb_size; int tb_y = y >> log2_diff_ctb_min_tb_size;
int ctb_addr_rs = sps->ctb_width * tb_y + tb_x; int ctb_addr_rs = sps->ctb_width * tb_y + tb_x;
...@@ -1303,7 +1307,7 @@ int ff_hevc_decode_nal_pps(HEVCContext *s) ...@@ -1303,7 +1307,7 @@ int ff_hevc_decode_nal_pps(HEVCContext *s)
int m = 1 << i; int m = 1 << i;
val += (m & x ? m * m : 0) + (m & y ? 2 * m * m : 0); val += (m & x ? m * m : 0) + (m & y ? 2 * m * m : 0);
} }
pps->min_tb_addr_zs[y * sps->min_tb_width + x] = val; pps->min_tb_addr_zs[y * (sps->tb_mask+2) + x] = val;
} }
} }
......
...@@ -39,8 +39,7 @@ static av_always_inline void FUNC(intra_pred)(HEVCContext *s, int x0, int y0, ...@@ -39,8 +39,7 @@ static av_always_inline void FUNC(intra_pred)(HEVCContext *s, int x0, int y0,
#define IS_INTRA(x, y) \ #define IS_INTRA(x, y) \
(MVF_PU(x, y).pred_flag == PF_INTRA) (MVF_PU(x, y).pred_flag == PF_INTRA)
#define MIN_TB_ADDR_ZS(x, y) \ #define MIN_TB_ADDR_ZS(x, y) \
s->pps->min_tb_addr_zs[(y) * s->sps->min_tb_width + (x)] s->pps->min_tb_addr_zs[(y) * (s->sps->tb_mask+2) + (x)]
#define EXTEND(ptr, val, len) \ #define EXTEND(ptr, val, len) \
do { \ do { \
pixel4 pix = PIXEL_SPLAT_X4(val); \ pixel4 pix = PIXEL_SPLAT_X4(val); \
...@@ -82,8 +81,9 @@ do { \ ...@@ -82,8 +81,9 @@ do { \
int size_in_tbs_v = size_in_luma_v >> s->sps->log2_min_tb_size; int size_in_tbs_v = size_in_luma_v >> s->sps->log2_min_tb_size;
int x = x0 >> hshift; int x = x0 >> hshift;
int y = y0 >> vshift; int y = y0 >> vshift;
int x_tb = x0 >> s->sps->log2_min_tb_size; int x_tb = (x0 >> s->sps->log2_min_tb_size) & s->sps->tb_mask;
int y_tb = y0 >> s->sps->log2_min_tb_size; int y_tb = (y0 >> s->sps->log2_min_tb_size) & s->sps->tb_mask;
int cur_tb_addr = MIN_TB_ADDR_ZS(x_tb, y_tb); int cur_tb_addr = MIN_TB_ADDR_ZS(x_tb, y_tb);
ptrdiff_t stride = s->frame->linesize[c_idx] / sizeof(pixel); ptrdiff_t stride = s->frame->linesize[c_idx] / sizeof(pixel);
...@@ -103,12 +103,11 @@ do { \ ...@@ -103,12 +103,11 @@ do { \
pixel *top = top_array + 1; pixel *top = top_array + 1;
pixel *filtered_left = filtered_left_array + 1; pixel *filtered_left = filtered_left_array + 1;
pixel *filtered_top = filtered_top_array + 1; pixel *filtered_top = filtered_top_array + 1;
int cand_bottom_left = lc->na.cand_bottom_left && cur_tb_addr > MIN_TB_ADDR_ZS( x_tb - 1, (y_tb + size_in_tbs_v) & s->sps->tb_mask);
int cand_bottom_left = lc->na.cand_bottom_left && cur_tb_addr > MIN_TB_ADDR_ZS(x_tb - 1, y_tb + size_in_tbs_v);
int cand_left = lc->na.cand_left; int cand_left = lc->na.cand_left;
int cand_up_left = lc->na.cand_up_left; int cand_up_left = lc->na.cand_up_left;
int cand_up = lc->na.cand_up; int cand_up = lc->na.cand_up;
int cand_up_right = lc->na.cand_up_right && cur_tb_addr > MIN_TB_ADDR_ZS(x_tb + size_in_tbs_h, y_tb - 1); int cand_up_right = lc->na.cand_up_right && cur_tb_addr > MIN_TB_ADDR_ZS((x_tb + size_in_tbs_h) & s->sps->tb_mask, y_tb - 1);
int bottom_left_size = (FFMIN(y0 + 2 * size_in_luma_v, s->sps->height) - int bottom_left_size = (FFMIN(y0 + 2 * size_in_luma_v, s->sps->height) -
(y0 + size_in_luma_v)) >> vshift; (y0 + size_in_luma_v)) >> vshift;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment