Commit d249e682 authored by Mickaël Raulet's avatar Mickaël Raulet Committed by Michael Niedermayer

hevc/sao: optimze sao implementation

- adding one extra pixel all around the frame
- do not copy when SAO is not applied

5% improvement

cherry picked from commit 10fc29fc19a12c4d8168fbe1a954b76386db12d0
Signed-off-by: 's avatarMichael Niedermayer <michaelni@gmx.at>
parent 2897447d
...@@ -276,6 +276,24 @@ static int decode_lt_rps(HEVCContext *s, LongTermRPS *rps, GetBitContext *gb) ...@@ -276,6 +276,24 @@ static int decode_lt_rps(HEVCContext *s, LongTermRPS *rps, GetBitContext *gb)
return 0; return 0;
} }
static int get_buffer_sao(HEVCContext *s, AVFrame *frame)
{
int ret, i;
frame->width = s->avctx->width + 2;
frame->height = s->avctx->height + 2;
if ((ret = ff_get_buffer(s->avctx, frame, AV_GET_BUFFER_FLAG_REF)) < 0)
return ret;
for (i = 0; frame->data[i]; i++) {
int offset = frame->linesize[i] + 1;
frame->data[i] += offset;
}
frame->width = s->avctx->width;
frame->height = s->avctx->height;
return 0;
}
static int set_sps(HEVCContext *s, const HEVCSPS *sps) static int set_sps(HEVCContext *s, const HEVCSPS *sps)
{ {
int ret; int ret;
...@@ -317,10 +335,8 @@ static int set_sps(HEVCContext *s, const HEVCSPS *sps) ...@@ -317,10 +335,8 @@ static int set_sps(HEVCContext *s, const HEVCSPS *sps)
if (sps->sao_enabled) { if (sps->sao_enabled) {
av_frame_unref(s->tmp_frame); av_frame_unref(s->tmp_frame);
ret = ff_get_buffer(s->avctx, s->tmp_frame, AV_GET_BUFFER_FLAG_REF); ret = get_buffer_sao(s, s->tmp_frame);
if (ret < 0) s->sao_frame = s->tmp_frame;
goto fail;
s->frame = s->tmp_frame;
} }
s->sps = sps; s->sps = sps;
...@@ -2582,8 +2598,7 @@ static int hevc_frame_start(HEVCContext *s) ...@@ -2582,8 +2598,7 @@ static int hevc_frame_start(HEVCContext *s)
if (s->pps->tiles_enabled_flag) if (s->pps->tiles_enabled_flag)
lc->end_of_tiles_x = s->pps->column_width[0] << s->sps->log2_ctb_size; lc->end_of_tiles_x = s->pps->column_width[0] << s->sps->log2_ctb_size;
ret = ff_hevc_set_new_ref(s, s->sps->sao_enabled ? &s->sao_frame : &s->frame, ret = ff_hevc_set_new_ref(s, &s->frame, s->poc);
s->poc);
if (ret < 0) if (ret < 0)
goto fail; goto fail;
......
...@@ -260,6 +260,7 @@ enum SAOType { ...@@ -260,6 +260,7 @@ enum SAOType {
SAO_NOT_APPLIED = 0, SAO_NOT_APPLIED = 0,
SAO_BAND, SAO_BAND,
SAO_EDGE, SAO_EDGE,
SAO_APPLIED
}; };
enum SAOEOClass { enum SAOEOClass {
......
...@@ -142,14 +142,14 @@ static int get_qPy(HEVCContext *s, int xC, int yC) ...@@ -142,14 +142,14 @@ static int get_qPy(HEVCContext *s, int xC, int yC)
} }
static void copy_CTB(uint8_t *dst, uint8_t *src, static void copy_CTB(uint8_t *dst, uint8_t *src,
int width, int height, int stride) int width, int height, int stride_dst, int stride_src)
{ {
int i; int i;
for (i = 0; i < height; i++) { for (i = 0; i < height; i++) {
memcpy(dst, src, width); memcpy(dst, src, width);
dst += stride; dst += stride_dst;
src += stride; src += stride_src;
} }
} }
...@@ -174,7 +174,7 @@ static void restore_tqb_pixels(HEVCContext *s, int x0, int y0, int width, int he ...@@ -174,7 +174,7 @@ static void restore_tqb_pixels(HEVCContext *s, int x0, int y0, int width, int he
uint8_t *src = &s->frame->data[c_idx][ ((y << s->sps->log2_min_pu_size) >> vshift) * stride + (((x << s->sps->log2_min_pu_size) >> hshift) << s->sps->pixel_shift)]; uint8_t *src = &s->frame->data[c_idx][ ((y << s->sps->log2_min_pu_size) >> vshift) * stride + (((x << s->sps->log2_min_pu_size) >> hshift) << s->sps->pixel_shift)];
uint8_t *dst = &s->sao_frame->data[c_idx][((y << s->sps->log2_min_pu_size) >> vshift) * stride + (((x << s->sps->log2_min_pu_size) >> hshift) << s->sps->pixel_shift)]; uint8_t *dst = &s->sao_frame->data[c_idx][((y << s->sps->log2_min_pu_size) >> vshift) * stride + (((x << s->sps->log2_min_pu_size) >> hshift) << s->sps->pixel_shift)];
for (n = 0; n < (min_pu_size >> vshift); n++) { for (n = 0; n < (min_pu_size >> vshift); n++) {
memcpy(dst, src, len); memcpy(src, dst, len);
src += stride; src += stride;
dst += stride; dst += stride;
} }
...@@ -247,29 +247,58 @@ static void sao_filter_CTB(HEVCContext *s, int x, int y) ...@@ -247,29 +247,58 @@ static void sao_filter_CTB(HEVCContext *s, int x, int y)
for (c_idx = 0; c_idx < 3; c_idx++) { for (c_idx = 0; c_idx < 3; c_idx++) {
int x0 = x >> s->sps->hshift[c_idx]; int x0 = x >> s->sps->hshift[c_idx];
int y0 = y >> s->sps->vshift[c_idx]; int y0 = y >> s->sps->vshift[c_idx];
int stride = s->frame->linesize[c_idx]; int stride_src = s->frame->linesize[c_idx];
int stride_dst = s->sao_frame->linesize[c_idx];
int ctb_size_h = (1 << (s->sps->log2_ctb_size)) >> s->sps->hshift[c_idx]; int ctb_size_h = (1 << (s->sps->log2_ctb_size)) >> s->sps->hshift[c_idx];
int ctb_size_v = (1 << (s->sps->log2_ctb_size)) >> s->sps->vshift[c_idx]; int ctb_size_v = (1 << (s->sps->log2_ctb_size)) >> s->sps->vshift[c_idx];
int width = FFMIN(ctb_size_h, int width = FFMIN(ctb_size_h, (s->sps->width >> s->sps->hshift[c_idx]) - x0);
(s->sps->width >> s->sps->hshift[c_idx]) - x0); int height = FFMIN(ctb_size_v, (s->sps->height >> s->sps->vshift[c_idx]) - y0);
int height = FFMIN(ctb_size_v, uint8_t *src = &s->frame->data[c_idx][y0 * stride_src + (x0 << s->sps->pixel_shift)];
(s->sps->height >> s->sps->vshift[c_idx]) - y0); uint8_t *dst = &s->sao_frame->data[c_idx][y0 * stride_dst + (x0 << s->sps->pixel_shift)];
uint8_t *src = &s->frame->data[c_idx][y0 * stride + (x0 << s->sps->pixel_shift)];
uint8_t *dst = &s->sao_frame->data[c_idx][y0 * stride + (x0 << s->sps->pixel_shift)];
switch (sao->type_idx[c_idx]) { switch (sao->type_idx[c_idx]) {
case SAO_BAND: case SAO_BAND:
s->hevcdsp.sao_band_filter(dst, src, copy_CTB(dst, src, width << s->sps->pixel_shift, height, stride_dst, stride_src);
stride, s->hevcdsp.sao_band_filter(src, dst,
stride_src, stride_dst,
sao, sao,
edges, width, edges, width,
height, c_idx); height, c_idx);
restore_tqb_pixels(s, x, y, width, height, c_idx); restore_tqb_pixels(s, x, y, width, height, c_idx);
sao->type_idx[c_idx] = SAO_APPLIED;
break; break;
case SAO_EDGE: case SAO_EDGE:
s->hevcdsp.sao_edge_filter[restore](dst, src, {
stride, uint8_t left_pixels = !edges[0] && (CTB(s->sao, x_ctb-1, y_ctb).type_idx[c_idx] != SAO_APPLIED);
if (!edges[1]) {
uint8_t top_left = !edges[0] && (CTB(s->sao, x_ctb-1, y_ctb-1).type_idx[c_idx] != SAO_APPLIED);
uint8_t top_right = !edges[2] && (CTB(s->sao, x_ctb+1, y_ctb-1).type_idx[c_idx] != SAO_APPLIED);
if (CTB(s->sao, x_ctb , y_ctb-1).type_idx[c_idx] == 0)
memcpy( dst - stride_dst - (top_left << s->sps->pixel_shift),
src - stride_src - (top_left << s->sps->pixel_shift),
(top_left + width + top_right) << s->sps->pixel_shift);
else {
if (top_left)
memcpy( dst - stride_dst - (1 << s->sps->pixel_shift),
src - stride_src - (1 << s->sps->pixel_shift),
1 << s->sps->pixel_shift);
if(top_right)
memcpy( dst - stride_dst + (width << s->sps->pixel_shift),
src - stride_src + (width << s->sps->pixel_shift),
1 << s->sps->pixel_shift);
}
}
if (!edges[3]) { // bottom and bottom right
uint8_t bottom_left = !edges[0] && (CTB(s->sao, x_ctb-1, y_ctb+1).type_idx[c_idx] != SAO_APPLIED);
memcpy( dst + height * stride_dst - (bottom_left << s->sps->pixel_shift),
src + height * stride_src - (bottom_left << s->sps->pixel_shift),
(width + 1 + bottom_left) << s->sps->pixel_shift);
}
copy_CTB(dst - (left_pixels << s->sps->pixel_shift),
src - (left_pixels << s->sps->pixel_shift),
(width + 1 + left_pixels) << s->sps->pixel_shift, height, stride_dst, stride_src);
s->hevcdsp.sao_edge_filter[restore](src, dst,
stride_src, stride_dst,
sao, sao,
edges, width, edges, width,
height, c_idx, height, c_idx,
...@@ -277,10 +306,9 @@ static void sao_filter_CTB(HEVCContext *s, int x, int y) ...@@ -277,10 +306,9 @@ static void sao_filter_CTB(HEVCContext *s, int x, int y)
horiz_edge, horiz_edge,
diag_edge); diag_edge);
restore_tqb_pixels(s, x, y, width, height, c_idx); restore_tqb_pixels(s, x, y, width, height, c_idx);
sao->type_idx[c_idx] = SAO_APPLIED;
break; break;
default : }
copy_CTB(dst, src, width << s->sps->pixel_shift, height, stride);
break;
} }
} }
} }
......
...@@ -58,11 +58,11 @@ typedef struct HEVCDSPContext { ...@@ -58,11 +58,11 @@ typedef struct HEVCDSPContext {
void (*transform_dc_add[4])(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride); void (*transform_dc_add[4])(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride);
void (*sao_band_filter)(uint8_t *_dst, uint8_t *_src, ptrdiff_t _stride, void (*sao_band_filter)(uint8_t *_dst, uint8_t *_src, ptrdiff_t _stride_dst, ptrdiff_t _stride_src,
struct SAOParams *sao, int *borders, struct SAOParams *sao, int *borders,
int width, int height, int c_idx); int width, int height, int c_idx);
void (*sao_edge_filter[2])(uint8_t *_dst, uint8_t *_src, ptrdiff_t _stride, void (*sao_edge_filter[2])(uint8_t *_dst, uint8_t *_src, ptrdiff_t _stride_dst, ptrdiff_t _stride_src,
struct SAOParams *sao, int *borders, int _width, struct SAOParams *sao, int *borders, int _width,
int _height, int c_idx, uint8_t *vert_edge, int _height, int c_idx, uint8_t *vert_edge,
uint8_t *horiz_edge, uint8_t *diag_edge); uint8_t *horiz_edge, uint8_t *diag_edge);
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment