Commit 9b8c1224 authored by Martin Vignali's avatar Martin Vignali Committed by James Almer

libavcodec/exr : add X86 SIMD for reorder_pixels

Signed-off-by: 's avatarJames Almer <jamrial@gmail.com>
parent 08ec828d
......@@ -286,7 +286,7 @@ OBJS-$(CONFIG_EIGHTSVX_FIB_DECODER) += 8svx.o
OBJS-$(CONFIG_ESCAPE124_DECODER) += escape124.o
OBJS-$(CONFIG_ESCAPE130_DECODER) += escape130.o
OBJS-$(CONFIG_EVRC_DECODER) += evrcdec.o acelp_vectors.o lsp.o
OBJS-$(CONFIG_EXR_DECODER) += exr.o
OBJS-$(CONFIG_EXR_DECODER) += exr.o exrdsp.o
OBJS-$(CONFIG_FFV1_DECODER) += ffv1dec.o ffv1.o
OBJS-$(CONFIG_FFV1_ENCODER) += ffv1enc.o ffv1.o
OBJS-$(CONFIG_FFWAVESYNTH_DECODER) += ffwavesynth.o
......
......@@ -51,6 +51,7 @@
#include "bswapdsp.h"
#endif
#include "exrdsp.h"
#include "get_bits.h"
#include "internal.h"
#include "mathops.h"
......@@ -121,6 +122,7 @@ typedef struct EXRContext {
AVClass *class;
AVFrame *picture;
AVCodecContext *avctx;
ExrDSPContext dsp;
#if HAVE_BIGENDIAN
BswapDSPContext bbdsp;
......@@ -275,23 +277,7 @@ static void predictor(uint8_t *src, int size)
}
}
static void reorder_pixels(uint8_t *src, uint8_t *dst, int size)
{
const uint8_t *t1 = src;
int half_size = size / 2;
const uint8_t *t2 = src + half_size;
uint8_t *s = dst;
int i;
av_assert1(size % 2 == 0);
for (i = 0; i < half_size; i++) {
*(s++) = *(t1++);
*(s++) = *(t2++);
}
}
static int zip_uncompress(const uint8_t *src, int compressed_size,
static int zip_uncompress(EXRContext *s, const uint8_t *src, int compressed_size,
int uncompressed_size, EXRThreadData *td)
{
unsigned long dest_len = uncompressed_size;
......@@ -300,13 +286,15 @@ static int zip_uncompress(const uint8_t *src, int compressed_size,
dest_len != uncompressed_size)
return AVERROR_INVALIDDATA;
av_assert1(uncompressed_size % 2 == 0);
predictor(td->tmp, uncompressed_size);
reorder_pixels(td->tmp, td->uncompressed_data, uncompressed_size);
s->dsp.reorder_pixels(td->tmp, td->uncompressed_data, uncompressed_size);
return 0;
}
static int rle_uncompress(const uint8_t *src, int compressed_size,
static int rle_uncompress(EXRContext *ctx, const uint8_t *src, int compressed_size,
int uncompressed_size, EXRThreadData *td)
{
uint8_t *d = td->tmp;
......@@ -345,8 +333,10 @@ static int rle_uncompress(const uint8_t *src, int compressed_size,
if (dend != d)
return AVERROR_INVALIDDATA;
av_assert1(uncompressed_size % 2 == 0);
predictor(td->tmp, uncompressed_size);
reorder_pixels(td->tmp, td->uncompressed_data, uncompressed_size);
ctx->dsp.reorder_pixels(td->tmp, td->uncompressed_data, uncompressed_size);
return 0;
}
......@@ -1152,7 +1142,7 @@ static int decode_block(AVCodecContext *avctx, void *tdata,
if (data_size < uncompressed_size) {
av_fast_padded_malloc(&td->uncompressed_data,
&td->uncompressed_size, uncompressed_size);
&td->uncompressed_size, uncompressed_size + 64);/* Force 64 padding for AVX2 reorder_pixels dst */
if (!td->uncompressed_data)
return AVERROR(ENOMEM);
......@@ -1161,7 +1151,7 @@ static int decode_block(AVCodecContext *avctx, void *tdata,
switch (s->compression) {
case EXR_ZIP1:
case EXR_ZIP16:
ret = zip_uncompress(src, data_size, uncompressed_size, td);
ret = zip_uncompress(s, src, data_size, uncompressed_size, td);
break;
case EXR_PIZ:
ret = piz_uncompress(s, src, data_size, uncompressed_size, td);
......@@ -1170,7 +1160,7 @@ static int decode_block(AVCodecContext *avctx, void *tdata,
ret = pxr24_uncompress(s, src, data_size, uncompressed_size, td);
break;
case EXR_RLE:
ret = rle_uncompress(src, data_size, uncompressed_size, td);
ret = rle_uncompress(s, src, data_size, uncompressed_size, td);
break;
case EXR_B44:
case EXR_B44A:
......@@ -1804,6 +1794,8 @@ static av_cold int decode_init(AVCodecContext *avctx)
s->avctx = avctx;
ff_exrdsp_init(&s->dsp);
#if HAVE_BIGENDIAN
ff_bswapdsp_init(&s->bbdsp);
#endif
......
/*
* This file is part of FFmpeg.
*
* Copyright (c) 2006 Industrial Light & Magic, a division of Lucas Digital Ltd. LLC
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <stdint.h>
#include "libavutil/attributes.h"
#include "exrdsp.h"
#include "config.h"
static void reorder_pixels_scalar(uint8_t *src, uint8_t *dst, ptrdiff_t size)
{
const uint8_t *t1 = src;
int half_size = size / 2;
const uint8_t *t2 = src + half_size;
uint8_t *s = dst;
int i;
for (i = 0; i < half_size; i++) {
*(s++) = *(t1++);
*(s++) = *(t2++);
}
}
av_cold void ff_exrdsp_init(ExrDSPContext *c)
{
c->reorder_pixels = reorder_pixels_scalar;
if (ARCH_X86)
ff_exrdsp_init_x86(c);
}
/*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef AVCODEC_EXRDSP_H
#define AVCODEC_EXRDSP_H
#include <stdint.h>
#include "libavutil/common.h"
typedef struct ExrDSPContext {
void (*reorder_pixels)(uint8_t *src, uint8_t *dst, ptrdiff_t size);
} ExrDSPContext;
void ff_exrdsp_init(ExrDSPContext *c);
void ff_exrdsp_init_x86(ExrDSPContext *c);
#endif /* AVCODEC_EXRDSP_H */
......@@ -52,6 +52,7 @@ OBJS-$(CONFIG_APNG_DECODER) += x86/pngdsp_init.o
OBJS-$(CONFIG_CAVS_DECODER) += x86/cavsdsp.o
OBJS-$(CONFIG_DCA_DECODER) += x86/dcadsp_init.o x86/synth_filter_init.o
OBJS-$(CONFIG_DNXHD_ENCODER) += x86/dnxhdenc_init.o
OBJS-$(CONFIG_EXR_DECODER) += x86/exrdsp_init.o
OBJS-$(CONFIG_OPUS_DECODER) += x86/opus_dsp_init.o
OBJS-$(CONFIG_OPUS_ENCODER) += x86/opus_dsp_init.o
OBJS-$(CONFIG_HEVC_DECODER) += x86/hevcdsp_init.o
......@@ -153,6 +154,7 @@ X86ASM-OBJS-$(CONFIG_DCA_DECODER) += x86/dcadsp.o x86/synth_filter.o
X86ASM-OBJS-$(CONFIG_DIRAC_DECODER) += x86/diracdsp.o \
x86/dirac_dwt.o
X86ASM-OBJS-$(CONFIG_DNXHD_ENCODER) += x86/dnxhdenc.o
X86ASM-OBJS-$(CONFIG_EXR_DECODER) += x86/exrdsp.o
X86ASM-OBJS-$(CONFIG_FLAC_DECODER) += x86/flacdsp.o
ifdef CONFIG_GPL
X86ASM-OBJS-$(CONFIG_FLAC_ENCODER) += x86/flac_dsp_gpl.o
......
;******************************************************************************
;* X86 Optimized functions for Open Exr Decoder
;* Copyright (c) 2006 Industrial Light & Magic, a division of Lucas Digital Ltd. LLC
;*
;* reorder_pixels based on patch by John Loy
;* port to ASM by Jokyo Images support by CNC - French National Center for Cinema
;*
;* This file is part of FFmpeg.
;*
;* FFmpeg is free software; you can redistribute it and/or
;* modify it under the terms of the GNU Lesser General Public
;* License as published by the Free Software Foundation; either
;* version 2.1 of the License, or (at your option) any later version.
;*
;* FFmpeg is distributed in the hope that it will be useful,
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;* Lesser General Public License for more details.
;*
;* You should have received a copy of the GNU Lesser General Public
;* License along with FFmpeg; if not, write to the Free Software
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;******************************************************************************
%include "libavutil/x86/x86util.asm"
SECTION .text
;------------------------------------------------------------------------------
; void ff_reorder_pixels(uint8_t *src, uint8_t *dst, ptrdiff_t size)
;------------------------------------------------------------------------------
%macro REORDER_PIXELS 0
cglobal reorder_pixels, 3,4,3, src1, dst, size, src2
lea src2q, [src1q+sizeq] ; src2 = src + 2 * half_size
add dstq, sizeq ; dst offset by size
shr sizeq, 1 ; half_size
add src1q, sizeq ; offset src by half_size
neg sizeq ; size = offset for dst, src1, src2
.loop:
%if cpuflag(avx2)
vpermq m0, [src1q + sizeq], 0xd8; load first part
vpermq m1, [src2q + sizeq], 0xd8; load second part
%else
mova m0, [src1q+sizeq] ; load first part
movu m1, [src2q+sizeq] ; load second part
%endif
SBUTTERFLY bw, 0, 1, 2 ; interleaved
mova [dstq+2*sizeq ], m0 ; copy to dst
mova [dstq+2*sizeq+mmsize], m1
add sizeq, mmsize
jl .loop
RET
%endmacro
INIT_XMM sse2
REORDER_PIXELS
%if HAVE_AVX2_EXTERNAL
INIT_YMM avx2
REORDER_PIXELS
%endif
/*
* OpenEXR (.exr) image decoder
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "libavutil/attributes.h"
#include "libavutil/x86/cpu.h"
#include "libavcodec/exrdsp.h"
void ff_reorder_pixels_sse2(uint8_t *src, uint8_t *dst, ptrdiff_t size);
void ff_reorder_pixels_avx2(uint8_t *src, uint8_t *dst, ptrdiff_t size);
av_cold void ff_exrdsp_init_x86(ExrDSPContext *dsp)
{
int cpu_flags = av_get_cpu_flags();
if (EXTERNAL_SSE2(cpu_flags)) {
dsp->reorder_pixels = ff_reorder_pixels_sse2;
}
if (EXTERNAL_AVX2_FAST(cpu_flags)) {
dsp->reorder_pixels = ff_reorder_pixels_avx2;
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment