Commit 9227bd8a authored by Paul B Mahol's avatar Paul B Mahol Committed by Luca Barbato

utvideodec: Reuse the huffyuv add_left

~10% faster when simd is available.
Signed-off-by: 's avatarPaul B Mahol <onemda@gmail.com>
Signed-off-by: 's avatarLuca Barbato <lu_zero@gentoo.org>
parent 4f33d9d4
...@@ -2129,7 +2129,7 @@ truespeech_decoder_select="bswapdsp" ...@@ -2129,7 +2129,7 @@ truespeech_decoder_select="bswapdsp"
tscc_decoder_deps="zlib" tscc_decoder_deps="zlib"
txd_decoder_select="texturedsp" txd_decoder_select="texturedsp"
twinvq_decoder_select="mdct lsp sinewin" twinvq_decoder_select="mdct lsp sinewin"
utvideo_decoder_select="bswapdsp" utvideo_decoder_select="bswapdsp huffyuvdsp"
utvideo_encoder_select="bswapdsp huffman huffyuvencdsp" utvideo_encoder_select="bswapdsp huffman huffyuvencdsp"
vble_decoder_select="huffyuvdsp" vble_decoder_select="huffyuvdsp"
vc1_decoder_select="blockdsp error_resilience h263_decoder h264qpel intrax8 mpeg_er mpegvideo vc1dsp" vc1_decoder_select="blockdsp error_resilience h263_decoder h264qpel intrax8 mpeg_er mpegvideo vc1dsp"
......
...@@ -30,6 +30,7 @@ ...@@ -30,6 +30,7 @@
#include "libavutil/common.h" #include "libavutil/common.h"
#include "avcodec.h" #include "avcodec.h"
#include "bswapdsp.h" #include "bswapdsp.h"
#include "huffyuvdsp.h"
#include "huffyuvencdsp.h" #include "huffyuvencdsp.h"
enum { enum {
...@@ -68,6 +69,7 @@ typedef struct UtvideoContext { ...@@ -68,6 +69,7 @@ typedef struct UtvideoContext {
const AVClass *class; const AVClass *class;
AVCodecContext *avctx; AVCodecContext *avctx;
BswapDSPContext bdsp; BswapDSPContext bdsp;
HuffYUVDSPContext hdspdec;
HuffYUVEncDSPContext hdsp; HuffYUVEncDSPContext hdsp;
uint32_t frame_info_size, flags, frame_info; uint32_t frame_info_size, flags, frame_info;
......
...@@ -373,8 +373,110 @@ static void restore_rgb_planes10(AVFrame *frame, int width, int height) ...@@ -373,8 +373,110 @@ static void restore_rgb_planes10(AVFrame *frame, int width, int height)
} }
} }
static void restore_median(uint8_t *src, int step, ptrdiff_t stride, static void restore_median_planar(UtvideoContext *c, uint8_t *src,
int width, int height, int slices, int rmode) ptrdiff_t stride, int width, int height,
int slices, int rmode)
{
int i, j, slice;
int A, B, C;
uint8_t *bsrc;
int slice_start, slice_height;
const int cmask = ~rmode;
for (slice = 0; slice < slices; slice++) {
slice_start = ((slice * height) / slices) & cmask;
slice_height = ((((slice + 1) * height) / slices) & cmask) -
slice_start;
if (!slice_height)
continue;
bsrc = src + slice_start * stride;
// first line - left neighbour prediction
bsrc[0] += 0x80;
c->hdspdec.add_hfyu_left_pred(bsrc, bsrc, width, 0);
bsrc += stride;
if (slice_height <= 1)
continue;
// second line - first element has top prediction, the rest uses median
C = bsrc[-stride];
bsrc[0] += C;
A = bsrc[0];
for (i = 1; i < width; i++) {
B = bsrc[i - stride];
bsrc[i] += mid_pred(A, B, (uint8_t)(A + B - C));
C = B;
A = bsrc[i];
}
bsrc += stride;
// the rest of lines use continuous median prediction
for (j = 2; j < slice_height; j++) {
c->hdspdec.add_hfyu_median_pred(bsrc, bsrc - stride,
bsrc, width, &A, &B);
bsrc += stride;
}
}
}
/* UtVideo interlaced mode treats every two lines as a single one,
* so restoring function should take care of possible padding between
* two parts of the same "line".
*/
static void restore_median_planar_il(UtvideoContext *c, uint8_t *src,
ptrdiff_t stride, int width, int height,
int slices, int rmode)
{
int i, j, slice;
int A, B, C;
uint8_t *bsrc;
int slice_start, slice_height;
const int cmask = ~(rmode ? 3 : 1);
const int stride2 = stride << 1;
for (slice = 0; slice < slices; slice++) {
slice_start = ((slice * height) / slices) & cmask;
slice_height = ((((slice + 1) * height) / slices) & cmask) -
slice_start;
slice_height >>= 1;
if (!slice_height)
continue;
bsrc = src + slice_start * stride;
// first line - left neighbour prediction
bsrc[0] += 0x80;
A = c->hdspdec.add_hfyu_left_pred(bsrc, bsrc, width, 0);
c->hdspdec.add_hfyu_left_pred(bsrc + stride, bsrc + stride, width, A);
bsrc += stride2;
if (slice_height <= 1)
continue;
// second line - first element has top prediction, the rest uses median
C = bsrc[-stride2];
bsrc[0] += C;
A = bsrc[0];
for (i = 1; i < width; i++) {
B = bsrc[i - stride2];
bsrc[i] += mid_pred(A, B, (uint8_t)(A + B - C));
C = B;
A = bsrc[i];
}
c->hdspdec.add_hfyu_median_pred(bsrc + stride, bsrc - stride,
bsrc + stride, width, &A, &B);
bsrc += stride2;
// the rest of lines use continuous median prediction
for (j = 2; j < slice_height; j++) {
c->hdspdec.add_hfyu_median_pred(bsrc, bsrc - stride2,
bsrc, width, &A, &B);
c->hdspdec.add_hfyu_median_pred(bsrc + stride, bsrc - stride,
bsrc + stride, width, &A, &B);
bsrc += stride2;
}
}
}
static void restore_median_packed(uint8_t *src, int step, ptrdiff_t stride,
int width, int height,
int slices, int rmode)
{ {
int i, j, slice; int i, j, slice;
int A, B, C; int A, B, C;
...@@ -429,8 +531,9 @@ static void restore_median(uint8_t *src, int step, ptrdiff_t stride, ...@@ -429,8 +531,9 @@ static void restore_median(uint8_t *src, int step, ptrdiff_t stride,
* so restoring function should take care of possible padding between * so restoring function should take care of possible padding between
* two parts of the same "line". * two parts of the same "line".
*/ */
static void restore_median_il(uint8_t *src, int step, ptrdiff_t stride, static void restore_median_packed_il(uint8_t *src, int step, ptrdiff_t stride,
int width, int height, int slices, int rmode) int width, int height,
int slices, int rmode)
{ {
int i, j, slice; int i, j, slice;
int A, B, C; int A, B, C;
...@@ -613,14 +716,14 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame, ...@@ -613,14 +716,14 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
return ret; return ret;
if (c->frame_pred == PRED_MEDIAN) { if (c->frame_pred == PRED_MEDIAN) {
if (!c->interlaced) { if (!c->interlaced) {
restore_median(frame.f->data[0] + ff_ut_rgb_order[i], restore_median_packed(frame.f->data[0] + ff_ut_rgb_order[i],
c->planes, frame.f->linesize[0], avctx->width, c->planes, frame.f->linesize[0], avctx->width,
avctx->height, c->slices, 0); avctx->height, c->slices, 0);
} else { } else {
restore_median_il(frame.f->data[0] + ff_ut_rgb_order[i], restore_median_packed_il(frame.f->data[0] + ff_ut_rgb_order[i],
c->planes, frame.f->linesize[0], c->planes, frame.f->linesize[0],
avctx->width, avctx->height, c->slices, avctx->width, avctx->height, c->slices,
0); 0);
} }
} }
} }
...@@ -649,14 +752,14 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame, ...@@ -649,14 +752,14 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
return ret; return ret;
if (c->frame_pred == PRED_MEDIAN) { if (c->frame_pred == PRED_MEDIAN) {
if (!c->interlaced) { if (!c->interlaced) {
restore_median(frame.f->data[i], 1, frame.f->linesize[i], restore_median_planar(c, frame.f->data[i], frame.f->linesize[i],
avctx->width >> !!i, avctx->height >> !!i, avctx->width >> !!i, avctx->height >> !!i,
c->slices, !i); c->slices, !i);
} else { } else {
restore_median_il(frame.f->data[i], 1, frame.f->linesize[i], restore_median_planar_il(c, frame.f->data[i], frame.f->linesize[i],
avctx->width >> !!i, avctx->width >> !!i,
avctx->height >> !!i, avctx->height >> !!i,
c->slices, !i); c->slices, !i);
} }
} }
} }
...@@ -670,13 +773,13 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame, ...@@ -670,13 +773,13 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
return ret; return ret;
if (c->frame_pred == PRED_MEDIAN) { if (c->frame_pred == PRED_MEDIAN) {
if (!c->interlaced) { if (!c->interlaced) {
restore_median(frame.f->data[i], 1, frame.f->linesize[i], restore_median_planar(c, frame.f->data[i], frame.f->linesize[i],
avctx->width >> !!i, avctx->height, avctx->width >> !!i, avctx->height,
c->slices, 0); c->slices, 0);
} else { } else {
restore_median_il(frame.f->data[i], 1, frame.f->linesize[i], restore_median_planar_il(c, frame.f->data[i], frame.f->linesize[i],
avctx->width >> !!i, avctx->height, avctx->width >> !!i, avctx->height,
c->slices, 0); c->slices, 0);
} }
} }
} }
...@@ -690,13 +793,13 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame, ...@@ -690,13 +793,13 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
return ret; return ret;
if (c->frame_pred == PRED_MEDIAN) { if (c->frame_pred == PRED_MEDIAN) {
if (!c->interlaced) { if (!c->interlaced) {
restore_median(frame.f->data[i], 1, frame.f->linesize[i], restore_median_planar(c, frame.f->data[i], frame.f->linesize[i],
avctx->width, avctx->height, avctx->width, avctx->height,
c->slices, 0); c->slices, 0);
} else { } else {
restore_median_il(frame.f->data[i], 1, frame.f->linesize[i], restore_median_planar_il(c, frame.f->data[i], frame.f->linesize[i],
avctx->width, avctx->height, avctx->width, avctx->height,
c->slices, 0); c->slices, 0);
} }
} }
} }
...@@ -729,6 +832,7 @@ static av_cold int decode_init(AVCodecContext *avctx) ...@@ -729,6 +832,7 @@ static av_cold int decode_init(AVCodecContext *avctx)
c->avctx = avctx; c->avctx = avctx;
ff_bswapdsp_init(&c->bdsp); ff_bswapdsp_init(&c->bdsp);
ff_huffyuvdsp_init(&c->hdspdec);
if (avctx->extradata_size >= 16) { if (avctx->extradata_size >= 16) {
av_log(avctx, AV_LOG_DEBUG, "Encoder version %d.%d.%d.%d\n", av_log(avctx, AV_LOG_DEBUG, "Encoder version %d.%d.%d.%d\n",
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment