Commit b2ffecbd authored by Paul B Mahol's avatar Paul B Mahol

avcodec/lagarith: switch to planar rgb

Speed goes from 363 fps to 428 fps for 640x480 video.
parent fb9d4f5c
...@@ -53,9 +53,6 @@ typedef struct LagarithContext { ...@@ -53,9 +53,6 @@ typedef struct LagarithContext {
LLVidDSPContext llviddsp; LLVidDSPContext llviddsp;
int zeros; /**< number of consecutive zero bytes encountered */ int zeros; /**< number of consecutive zero bytes encountered */
int zeros_rem; /**< number of zero bytes remaining to output */ int zeros_rem; /**< number of zero bytes remaining to output */
uint8_t *rgb_planes;
int rgb_planes_allocated;
int rgb_stride;
} LagarithContext; } LagarithContext;
/** /**
...@@ -544,7 +541,7 @@ static int lag_decode_frame(AVCodecContext *avctx, ...@@ -544,7 +541,7 @@ static int lag_decode_frame(AVCodecContext *avctx,
uint8_t frametype; uint8_t frametype;
uint32_t offset_gu = 0, offset_bv = 0, offset_ry = 9; uint32_t offset_gu = 0, offset_bv = 0, offset_ry = 9;
uint32_t offs[4]; uint32_t offs[4];
uint8_t *srcs[4], *dst; uint8_t *srcs[4];
int i, j, planes = 3; int i, j, planes = 3;
int ret; int ret;
...@@ -557,70 +554,60 @@ static int lag_decode_frame(AVCodecContext *avctx, ...@@ -557,70 +554,60 @@ static int lag_decode_frame(AVCodecContext *avctx,
switch (frametype) { switch (frametype) {
case FRAME_SOLID_RGBA: case FRAME_SOLID_RGBA:
avctx->pix_fmt = AV_PIX_FMT_RGB32; avctx->pix_fmt = AV_PIX_FMT_GBRAP;
case FRAME_SOLID_GRAY: case FRAME_SOLID_GRAY:
if (frametype == FRAME_SOLID_GRAY) if (frametype == FRAME_SOLID_GRAY)
if (avctx->bits_per_coded_sample == 24) { if (avctx->bits_per_coded_sample == 24) {
avctx->pix_fmt = AV_PIX_FMT_RGB24; avctx->pix_fmt = AV_PIX_FMT_GBRP;
} else { } else {
avctx->pix_fmt = AV_PIX_FMT_0RGB32; avctx->pix_fmt = AV_PIX_FMT_GBRAP;
planes = 4; planes = 4;
} }
if ((ret = ff_thread_get_buffer(avctx, &frame, 0)) < 0) if ((ret = ff_thread_get_buffer(avctx, &frame, 0)) < 0)
return ret; return ret;
dst = p->data[0];
if (frametype == FRAME_SOLID_RGBA) { if (frametype == FRAME_SOLID_RGBA) {
int qwidth = avctx->width>>2; for (i = 0; i < avctx->height; i++) {
uint64_t c = ((uint64_t)offset_gu << 32) | offset_gu; memset(p->data[0] + i * p->linesize[0], buf[2], avctx->width);
for (j = 0; j < avctx->height; j++) { memset(p->data[1] + i * p->linesize[1], buf[1], avctx->width);
for (i = 0; i < qwidth; i++) { memset(p->data[2] + i * p->linesize[2], buf[3], avctx->width);
AV_WN64(dst + i * 16 , c); memset(p->data[3] + i * p->linesize[3], buf[4], avctx->width);
AV_WN64(dst + i * 16 + 8, c);
} }
for (i = 4*qwidth; i < avctx->width; i++)
AV_WN32(dst + i * 4, offset_gu);
dst += p->linesize[0];
}
} else { } else {
for (j = 0; j < avctx->height; j++) { for (i = 0; i < avctx->height; i++) {
memset(dst, buf[1], avctx->width * planes); for (j = 0; j < planes; j++)
dst += p->linesize[0]; memset(p->data[j] + i * p->linesize[j], buf[1], avctx->width);
} }
} }
break; break;
case FRAME_SOLID_COLOR: case FRAME_SOLID_COLOR:
if (avctx->bits_per_coded_sample == 24) { if (avctx->bits_per_coded_sample == 24) {
avctx->pix_fmt = AV_PIX_FMT_RGB24; avctx->pix_fmt = AV_PIX_FMT_GBRP;
} else { } else {
avctx->pix_fmt = AV_PIX_FMT_RGB32; avctx->pix_fmt = AV_PIX_FMT_GBRAP;
offset_gu |= 0xFFU << 24;
} }
if ((ret = ff_thread_get_buffer(avctx, &frame,0)) < 0) if ((ret = ff_thread_get_buffer(avctx, &frame,0)) < 0)
return ret; return ret;
dst = p->data[0]; for (i = 0; i < avctx->height; i++) {
for (j = 0; j < avctx->height; j++) { memset(p->data[0] + i * p->linesize[0], buf[2], avctx->width);
for (i = 0; i < avctx->width; i++) memset(p->data[1] + i * p->linesize[1], buf[1], avctx->width);
if (avctx->bits_per_coded_sample == 24) { memset(p->data[2] + i * p->linesize[2], buf[3], avctx->width);
AV_WB24(dst + i * 3, offset_gu); if (avctx->pix_fmt == AV_PIX_FMT_GBRAP)
} else { memset(p->data[3] + i * p->linesize[3], 0xFFu, avctx->width);
AV_WN32(dst + i * 4, offset_gu);
}
dst += p->linesize[0];
} }
break; break;
case FRAME_ARITH_RGBA: case FRAME_ARITH_RGBA:
avctx->pix_fmt = AV_PIX_FMT_RGB32; avctx->pix_fmt = AV_PIX_FMT_GBRAP;
planes = 4; planes = 4;
offset_ry += 4; offset_ry += 4;
offs[3] = AV_RL32(buf + 9); offs[3] = AV_RL32(buf + 9);
case FRAME_ARITH_RGB24: case FRAME_ARITH_RGB24:
case FRAME_U_RGB24: case FRAME_U_RGB24:
if (frametype == FRAME_ARITH_RGB24 || frametype == FRAME_U_RGB24) if (frametype == FRAME_ARITH_RGB24 || frametype == FRAME_U_RGB24)
avctx->pix_fmt = AV_PIX_FMT_RGB24; avctx->pix_fmt = AV_PIX_FMT_GBRP;
if ((ret = ff_thread_get_buffer(avctx, &frame, 0)) < 0) if ((ret = ff_thread_get_buffer(avctx, &frame, 0)) < 0)
return ret; return ret;
...@@ -629,15 +616,8 @@ static int lag_decode_frame(AVCodecContext *avctx, ...@@ -629,15 +616,8 @@ static int lag_decode_frame(AVCodecContext *avctx,
offs[1] = offset_gu; offs[1] = offset_gu;
offs[2] = offset_ry; offs[2] = offset_ry;
l->rgb_stride = FFALIGN(avctx->width, 16);
av_fast_malloc(&l->rgb_planes, &l->rgb_planes_allocated,
l->rgb_stride * avctx->height * planes + 1);
if (!l->rgb_planes) {
av_log(avctx, AV_LOG_ERROR, "cannot allocate temporary buffer\n");
return AVERROR(ENOMEM);
}
for (i = 0; i < planes; i++) for (i = 0; i < planes; i++)
srcs[i] = l->rgb_planes + (i + 1) * l->rgb_stride * avctx->height - l->rgb_stride; srcs[i] = p->data[i] + (avctx->height - 1) * p->linesize[i];
for (i = 0; i < planes; i++) for (i = 0; i < planes; i++)
if (buf_size <= offs[i]) { if (buf_size <= offs[i]) {
av_log(avctx, AV_LOG_ERROR, av_log(avctx, AV_LOG_ERROR,
...@@ -648,32 +628,16 @@ static int lag_decode_frame(AVCodecContext *avctx, ...@@ -648,32 +628,16 @@ static int lag_decode_frame(AVCodecContext *avctx,
for (i = 0; i < planes; i++) for (i = 0; i < planes; i++)
lag_decode_arith_plane(l, srcs[i], lag_decode_arith_plane(l, srcs[i],
avctx->width, avctx->height, avctx->width, avctx->height,
-l->rgb_stride, buf + offs[i], -p->linesize[i], buf + offs[i],
buf_size - offs[i]); buf_size - offs[i]);
dst = p->data[0]; for (i = 0; i < avctx->height; i++) {
for (i = 0; i < planes; i++) l->llviddsp.add_bytes(p->data[0] + i * p->linesize[0], p->data[1] + i * p->linesize[1], avctx->width);
srcs[i] = l->rgb_planes + i * l->rgb_stride * avctx->height; l->llviddsp.add_bytes(p->data[2] + i * p->linesize[2], p->data[1] + i * p->linesize[1], avctx->width);
for (j = 0; j < avctx->height; j++) {
for (i = 0; i < avctx->width; i++) {
uint8_t r, g, b, a;
r = srcs[0][i];
g = srcs[1][i];
b = srcs[2][i];
r += g;
b += g;
if (frametype == FRAME_ARITH_RGBA) {
a = srcs[3][i];
AV_WN32(dst + i * 4, MKBETAG(a, r, g, b));
} else {
dst[i * 3 + 0] = r;
dst[i * 3 + 1] = g;
dst[i * 3 + 2] = b;
}
}
dst += p->linesize[0];
for (i = 0; i < planes; i++)
srcs[i] += l->rgb_stride;
} }
FFSWAP(uint8_t*, p->data[0], p->data[1]);
FFSWAP(int, p->linesize[0], p->linesize[1]);
FFSWAP(uint8_t*, p->data[2], p->data[1]);
FFSWAP(int, p->linesize[2], p->linesize[1]);
break; break;
case FRAME_ARITH_YUY2: case FRAME_ARITH_YUY2:
avctx->pix_fmt = AV_PIX_FMT_YUV422P; avctx->pix_fmt = AV_PIX_FMT_YUV422P;
...@@ -757,15 +721,6 @@ static av_cold int lag_decode_init_thread_copy(AVCodecContext *avctx) ...@@ -757,15 +721,6 @@ static av_cold int lag_decode_init_thread_copy(AVCodecContext *avctx)
} }
#endif #endif
static av_cold int lag_decode_end(AVCodecContext *avctx)
{
LagarithContext *l = avctx->priv_data;
av_freep(&l->rgb_planes);
return 0;
}
AVCodec ff_lagarith_decoder = { AVCodec ff_lagarith_decoder = {
.name = "lagarith", .name = "lagarith",
.long_name = NULL_IF_CONFIG_SMALL("Lagarith lossless"), .long_name = NULL_IF_CONFIG_SMALL("Lagarith lossless"),
...@@ -774,7 +729,6 @@ AVCodec ff_lagarith_decoder = { ...@@ -774,7 +729,6 @@ AVCodec ff_lagarith_decoder = {
.priv_data_size = sizeof(LagarithContext), .priv_data_size = sizeof(LagarithContext),
.init = lag_decode_init, .init = lag_decode_init,
.init_thread_copy = ONLY_IF_THREADS_ENABLED(lag_decode_init_thread_copy), .init_thread_copy = ONLY_IF_THREADS_ENABLED(lag_decode_init_thread_copy),
.close = lag_decode_end,
.decode = lag_decode_frame, .decode = lag_decode_frame,
.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS, .capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS,
}; };
...@@ -3,28 +3,28 @@ ...@@ -3,28 +3,28 @@
#codec_id 0: rawvideo #codec_id 0: rawvideo
#dimensions 0: 320x240 #dimensions 0: 320x240
#sar 0: 0/1 #sar 0: 0/1
0, 0, 0, 1, 230400, 0x67dfe576 0, 0, 0, 1, 230400, 0x77f0e576
0, 1, 1, 1, 230400, 0x67dfe576 0, 1, 1, 1, 230400, 0x77f0e576
0, 2, 2, 1, 230400, 0x67dfe576 0, 2, 2, 1, 230400, 0x77f0e576
0, 3, 3, 1, 230400, 0x67dfe576 0, 3, 3, 1, 230400, 0x77f0e576
0, 4, 4, 1, 230400, 0x67dfe576 0, 4, 4, 1, 230400, 0x77f0e576
0, 5, 5, 1, 230400, 0x67dfe576 0, 5, 5, 1, 230400, 0x77f0e576
0, 6, 6, 1, 230400, 0x67dfe576 0, 6, 6, 1, 230400, 0x77f0e576
0, 7, 7, 1, 230400, 0x67dfe576 0, 7, 7, 1, 230400, 0x77f0e576
0, 8, 8, 1, 230400, 0x67dfe576 0, 8, 8, 1, 230400, 0x77f0e576
0, 9, 9, 1, 230400, 0x67dfe576 0, 9, 9, 1, 230400, 0x77f0e576
0, 10, 10, 1, 230400, 0x67dfe576 0, 10, 10, 1, 230400, 0x77f0e576
0, 11, 11, 1, 230400, 0x67dfe576 0, 11, 11, 1, 230400, 0x77f0e576
0, 12, 12, 1, 230400, 0x67dfe576 0, 12, 12, 1, 230400, 0x77f0e576
0, 13, 13, 1, 230400, 0x67dfe576 0, 13, 13, 1, 230400, 0x77f0e576
0, 14, 14, 1, 230400, 0x67dfe576 0, 14, 14, 1, 230400, 0x77f0e576
0, 15, 15, 1, 230400, 0x67dfe576 0, 15, 15, 1, 230400, 0x77f0e576
0, 16, 16, 1, 230400, 0x67dfe576 0, 16, 16, 1, 230400, 0x77f0e576
0, 17, 17, 1, 230400, 0x67dfe576 0, 17, 17, 1, 230400, 0x77f0e576
0, 18, 18, 1, 230400, 0x67dfe576 0, 18, 18, 1, 230400, 0x77f0e576
0, 19, 19, 1, 230400, 0x67dfe576 0, 19, 19, 1, 230400, 0x77f0e576
0, 20, 20, 1, 230400, 0x67dfe576 0, 20, 20, 1, 230400, 0x77f0e576
0, 21, 21, 1, 230400, 0x67dfe576 0, 21, 21, 1, 230400, 0x77f0e576
0, 22, 22, 1, 230400, 0x67dfe576 0, 22, 22, 1, 230400, 0x77f0e576
0, 23, 23, 1, 230400, 0x67dfe576 0, 23, 23, 1, 230400, 0x77f0e576
0, 24, 24, 1, 230400, 0x67dfe576 0, 24, 24, 1, 230400, 0x77f0e576
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
#codec_id 0: rawvideo #codec_id 0: rawvideo
#dimensions 0: 480x256 #dimensions 0: 480x256
#sar 0: 0/1 #sar 0: 0/1
0, 0, 0, 1, 368640, 0x26f74db2 0, 0, 0, 1, 368640, 0x18364db2
0, 1, 1, 1, 368640, 0x63b29ea4 0, 1, 1, 1, 368640, 0x60e79ea4
0, 2, 2, 1, 368640, 0x19467f03 0, 2, 2, 1, 368640, 0xb28a7f03
0, 3, 3, 1, 368640, 0x5fdc3575 0, 3, 3, 1, 368640, 0x2ed83575
...@@ -4,5 +4,5 @@ ...@@ -4,5 +4,5 @@
#dimensions 0: 640x360 #dimensions 0: 640x360
#sar 0: 0/1 #sar 0: 0/1
0, 0, 0, 1, 691200, 0x00000000 0, 0, 0, 1, 691200, 0x00000000
0, 25, 25, 1, 691200, 0xc88a6f24 0, 25, 25, 1, 691200, 0x1c4a6f24
0, 50, 50, 1, 691200, 0x906d474c 0, 50, 50, 1, 691200, 0x1fa0474c
...@@ -27,53 +27,53 @@ ...@@ -27,53 +27,53 @@
0, 21, 21, 1, 691200, 0x00000000 0, 21, 21, 1, 691200, 0x00000000
0, 22, 22, 1, 691200, 0x00000000 0, 22, 22, 1, 691200, 0x00000000
0, 23, 23, 1, 691200, 0x00000000 0, 23, 23, 1, 691200, 0x00000000
0, 24, 24, 1, 691200, 0xc88a6f24 0, 24, 24, 1, 691200, 0x1c4a6f24
0, 25, 25, 1, 691200, 0xc88a6f24 0, 25, 25, 1, 691200, 0x1c4a6f24
0, 26, 26, 1, 691200, 0xc88a6f24 0, 26, 26, 1, 691200, 0x1c4a6f24
0, 27, 27, 1, 691200, 0xc88a6f24 0, 27, 27, 1, 691200, 0x1c4a6f24
0, 28, 28, 1, 691200, 0xc88a6f24 0, 28, 28, 1, 691200, 0x1c4a6f24
0, 29, 29, 1, 691200, 0xc88a6f24 0, 29, 29, 1, 691200, 0x1c4a6f24
0, 30, 30, 1, 691200, 0xc88a6f24 0, 30, 30, 1, 691200, 0x1c4a6f24
0, 31, 31, 1, 691200, 0xc88a6f24 0, 31, 31, 1, 691200, 0x1c4a6f24
0, 32, 32, 1, 691200, 0xc88a6f24 0, 32, 32, 1, 691200, 0x1c4a6f24
0, 33, 33, 1, 691200, 0xc88a6f24 0, 33, 33, 1, 691200, 0x1c4a6f24
0, 34, 34, 1, 691200, 0xc88a6f24 0, 34, 34, 1, 691200, 0x1c4a6f24
0, 35, 35, 1, 691200, 0xc88a6f24 0, 35, 35, 1, 691200, 0x1c4a6f24
0, 36, 36, 1, 691200, 0xc88a6f24 0, 36, 36, 1, 691200, 0x1c4a6f24
0, 37, 37, 1, 691200, 0xc88a6f24 0, 37, 37, 1, 691200, 0x1c4a6f24
0, 38, 38, 1, 691200, 0xc88a6f24 0, 38, 38, 1, 691200, 0x1c4a6f24
0, 39, 39, 1, 691200, 0xc88a6f24 0, 39, 39, 1, 691200, 0x1c4a6f24
0, 40, 40, 1, 691200, 0xc88a6f24 0, 40, 40, 1, 691200, 0x1c4a6f24
0, 41, 41, 1, 691200, 0xc88a6f24 0, 41, 41, 1, 691200, 0x1c4a6f24
0, 42, 42, 1, 691200, 0xc88a6f24 0, 42, 42, 1, 691200, 0x1c4a6f24
0, 43, 43, 1, 691200, 0xc88a6f24 0, 43, 43, 1, 691200, 0x1c4a6f24
0, 44, 44, 1, 691200, 0xc88a6f24 0, 44, 44, 1, 691200, 0x1c4a6f24
0, 45, 45, 1, 691200, 0xc88a6f24 0, 45, 45, 1, 691200, 0x1c4a6f24
0, 46, 46, 1, 691200, 0xc88a6f24 0, 46, 46, 1, 691200, 0x1c4a6f24
0, 47, 47, 1, 691200, 0xc88a6f24 0, 47, 47, 1, 691200, 0x1c4a6f24
0, 48, 48, 1, 691200, 0xc88a6f24 0, 48, 48, 1, 691200, 0x1c4a6f24
0, 49, 49, 1, 691200, 0x906d474c 0, 49, 49, 1, 691200, 0x1fa0474c
0, 50, 50, 1, 691200, 0x906d474c 0, 50, 50, 1, 691200, 0x1fa0474c
0, 51, 51, 1, 691200, 0x906d474c 0, 51, 51, 1, 691200, 0x1fa0474c
0, 52, 52, 1, 691200, 0x906d474c 0, 52, 52, 1, 691200, 0x1fa0474c
0, 53, 53, 1, 691200, 0x906d474c 0, 53, 53, 1, 691200, 0x1fa0474c
0, 54, 54, 1, 691200, 0x906d474c 0, 54, 54, 1, 691200, 0x1fa0474c
0, 55, 55, 1, 691200, 0x906d474c 0, 55, 55, 1, 691200, 0x1fa0474c
0, 56, 56, 1, 691200, 0x906d474c 0, 56, 56, 1, 691200, 0x1fa0474c
0, 57, 57, 1, 691200, 0x906d474c 0, 57, 57, 1, 691200, 0x1fa0474c
0, 58, 58, 1, 691200, 0x906d474c 0, 58, 58, 1, 691200, 0x1fa0474c
0, 59, 59, 1, 691200, 0x906d474c 0, 59, 59, 1, 691200, 0x1fa0474c
0, 60, 60, 1, 691200, 0x906d474c 0, 60, 60, 1, 691200, 0x1fa0474c
0, 61, 61, 1, 691200, 0x906d474c 0, 61, 61, 1, 691200, 0x1fa0474c
0, 62, 62, 1, 691200, 0x906d474c 0, 62, 62, 1, 691200, 0x1fa0474c
0, 63, 63, 1, 691200, 0x906d474c 0, 63, 63, 1, 691200, 0x1fa0474c
0, 64, 64, 1, 691200, 0x906d474c 0, 64, 64, 1, 691200, 0x1fa0474c
0, 65, 65, 1, 691200, 0x906d474c 0, 65, 65, 1, 691200, 0x1fa0474c
0, 66, 66, 1, 691200, 0x906d474c 0, 66, 66, 1, 691200, 0x1fa0474c
0, 67, 67, 1, 691200, 0x906d474c 0, 67, 67, 1, 691200, 0x1fa0474c
0, 68, 68, 1, 691200, 0x906d474c 0, 68, 68, 1, 691200, 0x1fa0474c
0, 69, 69, 1, 691200, 0x906d474c 0, 69, 69, 1, 691200, 0x1fa0474c
0, 70, 70, 1, 691200, 0x906d474c 0, 70, 70, 1, 691200, 0x1fa0474c
0, 71, 71, 1, 691200, 0x906d474c 0, 71, 71, 1, 691200, 0x1fa0474c
0, 72, 72, 1, 691200, 0x906d474c 0, 72, 72, 1, 691200, 0x1fa0474c
0, 73, 73, 1, 691200, 0x906d474c 0, 73, 73, 1, 691200, 0x1fa0474c
...@@ -4,5 +4,5 @@ ...@@ -4,5 +4,5 @@
#dimensions 0: 640x360 #dimensions 0: 640x360
#sar 0: 0/1 #sar 0: 0/1
0, 0, 0, 1, 691200, 0x00000000 0, 0, 0, 1, 691200, 0x00000000
0, 1, 1, 1, 691200, 0xc88a6f24 0, 1, 1, 1, 691200, 0x1c4a6f24
0, 2, 2, 1, 691200, 0x906d474c 0, 2, 2, 1, 691200, 0x1fa0474c
...@@ -4,5 +4,5 @@ ...@@ -4,5 +4,5 @@
#dimensions 0: 640x360 #dimensions 0: 640x360
#sar 0: 0/1 #sar 0: 0/1
0, 0, 0, 1, 691200, 0x00000000 0, 0, 0, 1, 691200, 0x00000000
0, 25, 25, 1, 691200, 0xc88a6f24 0, 25, 25, 1, 691200, 0x1c4a6f24
0, 50, 50, 1, 691200, 0x906d474c 0, 50, 50, 1, 691200, 0x1fa0474c
...@@ -4,5 +4,5 @@ ...@@ -4,5 +4,5 @@
#dimensions 0: 640x360 #dimensions 0: 640x360
#sar 0: 0/1 #sar 0: 0/1
0, 0, 0, 1, 691200, 0x00000000 0, 0, 0, 1, 691200, 0x00000000
0, 25, 25, 1, 691200, 0xc88a6f24 0, 25, 25, 1, 691200, 0x1c4a6f24
0, 50, 50, 1, 691200, 0x906d474c 0, 50, 50, 1, 691200, 0x1fa0474c
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment