Commit 4ed7c2bb authored by Paul B Mahol's avatar Paul B Mahol

avcodec/utvideodec: add SIMD for restore_rgb_planes

Signed-off-by: 's avatarPaul B Mahol <onemda@gmail.com>
parent 3594788b
...@@ -583,7 +583,7 @@ OBJS-$(CONFIG_TTA_ENCODER) += ttaenc.o ttaencdsp.o ttadata.o ...@@ -583,7 +583,7 @@ OBJS-$(CONFIG_TTA_ENCODER) += ttaenc.o ttaencdsp.o ttadata.o
OBJS-$(CONFIG_TWINVQ_DECODER) += twinvqdec.o twinvq.o OBJS-$(CONFIG_TWINVQ_DECODER) += twinvqdec.o twinvq.o
OBJS-$(CONFIG_TXD_DECODER) += txd.o OBJS-$(CONFIG_TXD_DECODER) += txd.o
OBJS-$(CONFIG_ULTI_DECODER) += ulti.o OBJS-$(CONFIG_ULTI_DECODER) += ulti.o
OBJS-$(CONFIG_UTVIDEO_DECODER) += utvideodec.o utvideo.o OBJS-$(CONFIG_UTVIDEO_DECODER) += utvideodec.o utvideo.o utvideodsp.o
OBJS-$(CONFIG_UTVIDEO_ENCODER) += utvideoenc.o utvideo.o OBJS-$(CONFIG_UTVIDEO_ENCODER) += utvideoenc.o utvideo.o
OBJS-$(CONFIG_V210_DECODER) += v210dec.o OBJS-$(CONFIG_V210_DECODER) += v210dec.o
OBJS-$(CONFIG_V210_ENCODER) += v210enc.o OBJS-$(CONFIG_V210_ENCODER) += v210enc.o
......
...@@ -30,6 +30,7 @@ ...@@ -30,6 +30,7 @@
#include "libavutil/common.h" #include "libavutil/common.h"
#include "avcodec.h" #include "avcodec.h"
#include "bswapdsp.h" #include "bswapdsp.h"
#include "utvideodsp.h"
#include "lossless_videodsp.h" #include "lossless_videodsp.h"
#include "lossless_videoencdsp.h" #include "lossless_videoencdsp.h"
...@@ -66,6 +67,7 @@ extern const int ff_ut_pred_order[5]; ...@@ -66,6 +67,7 @@ extern const int ff_ut_pred_order[5];
typedef struct UtvideoContext { typedef struct UtvideoContext {
const AVClass *class; const AVClass *class;
AVCodecContext *avctx; AVCodecContext *avctx;
UTVideoDSPContext utdsp;
BswapDSPContext bdsp; BswapDSPContext bdsp;
LLVidDSPContext llviddsp; LLVidDSPContext llviddsp;
LLVidEncDSPContext llvidencdsp; LLVidEncDSPContext llvidencdsp;
......
...@@ -333,50 +333,6 @@ fail: ...@@ -333,50 +333,6 @@ fail:
return AVERROR_INVALIDDATA; return AVERROR_INVALIDDATA;
} }
static void restore_rgb_planes(AVFrame *frame, int width, int height)
{
uint8_t *src_r = (uint8_t *)frame->data[2];
uint8_t *src_g = (uint8_t *)frame->data[0];
uint8_t *src_b = (uint8_t *)frame->data[1];
uint8_t r, g, b;
int i, j;
for (j = 0; j < height; j++) {
for (i = 0; i < width; i++) {
r = src_r[i];
g = src_g[i];
b = src_b[i];
src_r[i] = r + g - 0x80;
src_b[i] = b + g - 0x80;
}
src_r += frame->linesize[2];
src_g += frame->linesize[0];
src_b += frame->linesize[1];
}
}
static void restore_rgb_planes10(AVFrame *frame, int width, int height)
{
uint16_t *src_r = (uint16_t *)frame->data[2];
uint16_t *src_g = (uint16_t *)frame->data[0];
uint16_t *src_b = (uint16_t *)frame->data[1];
int r, g, b;
int i, j;
for (j = 0; j < height; j++) {
for (i = 0; i < width; i++) {
r = src_r[i];
g = src_g[i];
b = src_b[i];
src_r[i] = (r + g - 0x200) & 0x3FF;
src_b[i] = (b + g - 0x200) & 0x3FF;
}
src_r += frame->linesize[2] / 2;
src_g += frame->linesize[0] / 2;
src_b += frame->linesize[1] / 2;
}
}
#undef A #undef A
#undef B #undef B
#undef C #undef C
...@@ -696,7 +652,9 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame, ...@@ -696,7 +652,9 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
} }
} }
} }
restore_rgb_planes(frame.f, avctx->width, avctx->height); c->utdsp.restore_rgb_planes(frame.f->data[2], frame.f->data[0], frame.f->data[1],
frame.f->linesize[2], frame.f->linesize[0], frame.f->linesize[1],
avctx->width, avctx->height);
break; break;
case AV_PIX_FMT_GBRAP10: case AV_PIX_FMT_GBRAP10:
case AV_PIX_FMT_GBRP10: case AV_PIX_FMT_GBRP10:
...@@ -709,7 +667,9 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame, ...@@ -709,7 +667,9 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
if (ret) if (ret)
return ret; return ret;
} }
restore_rgb_planes10(frame.f, avctx->width, avctx->height); c->utdsp.restore_rgb_planes10((uint16_t *)frame.f->data[2], (uint16_t *)frame.f->data[0], (uint16_t *)frame.f->data[1],
frame.f->linesize[2] / 2, frame.f->linesize[0] / 2, frame.f->linesize[1] / 2,
avctx->width, avctx->height);
break; break;
case AV_PIX_FMT_YUV420P: case AV_PIX_FMT_YUV420P:
for (i = 0; i < 3; i++) { for (i = 0; i < 3; i++) {
...@@ -830,6 +790,7 @@ static av_cold int decode_init(AVCodecContext *avctx) ...@@ -830,6 +790,7 @@ static av_cold int decode_init(AVCodecContext *avctx)
c->avctx = avctx; c->avctx = avctx;
ff_utvideodsp_init(&c->utdsp);
ff_bswapdsp_init(&c->bdsp); ff_bswapdsp_init(&c->bdsp);
ff_llviddsp_init(&c->llviddsp); ff_llviddsp_init(&c->llviddsp);
......
/*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <stdint.h>
#include "config.h"
#include "libavutil/attributes.h"
#include "utvideodsp.h"
static void restore_rgb_planes_c(uint8_t *src_r,
uint8_t *src_g,
uint8_t *src_b,
ptrdiff_t linesize_r,
ptrdiff_t linesize_g,
ptrdiff_t linesize_b,
int width, int height)
{
uint8_t r, g, b;
int i, j;
for (j = 0; j < height; j++) {
for (i = 0; i < width; i++) {
r = src_r[i];
g = src_g[i];
b = src_b[i];
src_r[i] = r + g - 0x80;
src_b[i] = b + g - 0x80;
}
src_r += linesize_r;
src_g += linesize_g;
src_b += linesize_b;
}
}
static void restore_rgb_planes10_c(uint16_t *src_r,
uint16_t *src_g,
uint16_t *src_b,
ptrdiff_t linesize_r,
ptrdiff_t linesize_g,
ptrdiff_t linesize_b,
int width, int height)
{
int r, g, b;
int i, j;
for (j = 0; j < height; j++) {
for (i = 0; i < width; i++) {
r = src_r[i];
g = src_g[i];
b = src_b[i];
src_r[i] = (r + g - 0x200) & 0x3FF;
src_b[i] = (b + g - 0x200) & 0x3FF;
}
src_r += linesize_r;
src_g += linesize_g;
src_b += linesize_b;
}
}
av_cold void ff_utvideodsp_init(UTVideoDSPContext *c)
{
c->restore_rgb_planes = restore_rgb_planes_c;
c->restore_rgb_planes10 = restore_rgb_planes10_c;
if (ARCH_X86)
ff_utvideodsp_init_x86(c);
}
/*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef AVCODEC_UTVIDEODSP_H
#define AVCODEC_UTVIDEODSP_H
#include <stdint.h>
#include <stddef.h>
#include "libavutil/pixfmt.h"
#include "config.h"
typedef struct UTVideoDSPContext {
void (*restore_rgb_planes)(uint8_t *src_r, uint8_t *src_g, uint8_t *src_b,
ptrdiff_t linesize_r, ptrdiff_t linesize_g,
ptrdiff_t linesize_b, int width, int height);
void (*restore_rgb_planes10)(uint16_t *src_r, uint16_t *src_g, uint16_t *src_b,
ptrdiff_t linesize_r, ptrdiff_t linesize_g,
ptrdiff_t linesize_b, int width, int height);
} UTVideoDSPContext;
void ff_utvideodsp_init(UTVideoDSPContext *c);
void ff_utvideodsp_init_x86(UTVideoDSPContext *c);
#endif /* AVCODEC_UTVIDEODSP_H */
...@@ -65,6 +65,7 @@ OBJS-$(CONFIG_TAK_DECODER) += x86/takdsp_init.o ...@@ -65,6 +65,7 @@ OBJS-$(CONFIG_TAK_DECODER) += x86/takdsp_init.o
OBJS-$(CONFIG_TRUEHD_DECODER) += x86/mlpdsp_init.o OBJS-$(CONFIG_TRUEHD_DECODER) += x86/mlpdsp_init.o
OBJS-$(CONFIG_TTA_DECODER) += x86/ttadsp_init.o OBJS-$(CONFIG_TTA_DECODER) += x86/ttadsp_init.o
OBJS-$(CONFIG_TTA_ENCODER) += x86/ttaencdsp_init.o OBJS-$(CONFIG_TTA_ENCODER) += x86/ttaencdsp_init.o
OBJS-$(CONFIG_UTVIDEO_DECODER) += x86/utvideodsp_init.o
OBJS-$(CONFIG_V210_DECODER) += x86/v210-init.o OBJS-$(CONFIG_V210_DECODER) += x86/v210-init.o
OBJS-$(CONFIG_V210_ENCODER) += x86/v210enc_init.o OBJS-$(CONFIG_V210_ENCODER) += x86/v210enc_init.o
OBJS-$(CONFIG_VORBIS_DECODER) += x86/vorbisdsp_init.o OBJS-$(CONFIG_VORBIS_DECODER) += x86/vorbisdsp_init.o
...@@ -171,6 +172,7 @@ X86ASM-OBJS-$(CONFIG_TAK_DECODER) += x86/takdsp.o ...@@ -171,6 +172,7 @@ X86ASM-OBJS-$(CONFIG_TAK_DECODER) += x86/takdsp.o
X86ASM-OBJS-$(CONFIG_TRUEHD_DECODER) += x86/mlpdsp.o X86ASM-OBJS-$(CONFIG_TRUEHD_DECODER) += x86/mlpdsp.o
X86ASM-OBJS-$(CONFIG_TTA_DECODER) += x86/ttadsp.o X86ASM-OBJS-$(CONFIG_TTA_DECODER) += x86/ttadsp.o
X86ASM-OBJS-$(CONFIG_TTA_ENCODER) += x86/ttaencdsp.o X86ASM-OBJS-$(CONFIG_TTA_ENCODER) += x86/ttaencdsp.o
X86ASM-OBJS-$(CONFIG_UTVIDEO_DECODER) += x86/utvideodsp.o
X86ASM-OBJS-$(CONFIG_V210_ENCODER) += x86/v210enc.o X86ASM-OBJS-$(CONFIG_V210_ENCODER) += x86/v210enc.o
X86ASM-OBJS-$(CONFIG_V210_DECODER) += x86/v210.o X86ASM-OBJS-$(CONFIG_V210_DECODER) += x86/v210.o
X86ASM-OBJS-$(CONFIG_VORBIS_DECODER) += x86/vorbisdsp.o X86ASM-OBJS-$(CONFIG_VORBIS_DECODER) += x86/vorbisdsp.o
......
;******************************************************************************
;* SIMD-optimized UTVideo functions
;* Copyright (c) 2017 Paul B Mahol
;*
;* This file is part of FFmpeg.
;*
;* FFmpeg is free software; you can redistribute it and/or
;* modify it under the terms of the GNU Lesser General Public
;* License as published by the Free Software Foundation; either
;* version 2.1 of the License, or (at your option) any later version.
;*
;* FFmpeg is distributed in the hope that it will be useful,
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;* Lesser General Public License for more details.
;*
;* You should have received a copy of the GNU Lesser General Public
;* License along with FFmpeg; if not, write to the Free Software
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;******************************************************************************
%include "libavutil/x86/x86util.asm"
%if ARCH_X86_64
SECTION_RODATA
pb_128: times 16 db 128
pw_512: times 8 dw 512
pw_1023: times 8 dw 1023
SECTION .text
INIT_XMM sse2
; void restore_rgb_planes(uint8_t *src_r, uint8_t *src_g, uint8_t *src_b,
; ptrdiff_t linesize_r, ptrdiff_t linesize_g, ptrdiff_t linesize_b,
; int width, int height)
cglobal restore_rgb_planes, 8,9,4, src_r, src_g, src_b, linesize_r, linesize_g, linesize_b, w, h, x
movsxdifnidn wq, wd
add src_rq, wq
add src_gq, wq
add src_bq, wq
neg wq
mova m3, [pb_128]
.nextrow:
mov xq, wq
.loop:
mova m0, [src_rq + xq]
mova m1, [src_gq + xq]
mova m2, [src_bq + xq]
psubb m1, m3
paddb m0, m1
paddb m2, m1
mova [src_rq+xq], m0
mova [src_bq+xq], m2
add xq, mmsize
jl .loop
add src_rq, linesize_rq
add src_gq, linesize_gq
add src_bq, linesize_bq
sub hd, 1
jg .nextrow
REP_RET
cglobal restore_rgb_planes10, 8,9,5, src_r, src_g, src_b, linesize_r, linesize_g, linesize_b, w, h, x
shl wd, 1
shl linesize_rq, 1
shl linesize_gq, 1
shl linesize_bq, 1
add src_rq, wq
add src_gq, wq
add src_bq, wq
mova m3, [pw_512]
mova m4, [pw_1023]
neg wq
.nextrow:
mov xq, wq
.loop:
mova m0, [src_rq + xq]
mova m1, [src_gq + xq]
mova m2, [src_bq + xq]
psubw m1, m3
paddw m0, m1
paddw m2, m1
pand m0, m4
pand m2, m4
mova [src_rq+xq], m0
mova [src_bq+xq], m2
add xq, mmsize
jl .loop
add src_rq, linesize_rq
add src_gq, linesize_gq
add src_bq, linesize_bq
sub hd, 1
jg .nextrow
REP_RET
%endif
/*
* Copyright (c) 2017 Paul B Mahol
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "config.h"
#include "libavutil/attributes.h"
#include "libavutil/cpu.h"
#include "libavutil/x86/asm.h"
#include "libavutil/x86/cpu.h"
#include "libavcodec/utvideodsp.h"
void ff_restore_rgb_planes_sse2(uint8_t *src_r, uint8_t *src_g, uint8_t *src_b,
ptrdiff_t linesize_r, ptrdiff_t linesize_g,
ptrdiff_t linesize_b, int width, int height);
void ff_restore_rgb_planes10_sse2(uint16_t *src_r, uint16_t *src_g, uint16_t *src_b,
ptrdiff_t linesize_r, ptrdiff_t linesize_g,
ptrdiff_t linesize_b, int width, int height);
av_cold void ff_utvideodsp_init_x86(UTVideoDSPContext *c)
{
int cpu_flags = av_get_cpu_flags();
if (ARCH_X86_64 && EXTERNAL_SSE2(cpu_flags)) {
c->restore_rgb_planes = ff_restore_rgb_planes_sse2;
c->restore_rgb_planes10 = ff_restore_rgb_planes10_sse2;
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment