Commit 058bbf48 authored by Paul B Mahol's avatar Paul B Mahol

avfilter/vf_v360: x86 SIMD for interpolations

parent f0d8005e
/*
* Copyright (c) 2019 Eugene Lyapustin
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef AVFILTER_V360_H
#define AVFILTER_V360_H
#include "avfilter.h"
enum Projections {
EQUIRECTANGULAR,
CUBEMAP_3_2,
CUBEMAP_6_1,
EQUIANGULAR,
FLAT,
DUAL_FISHEYE,
BARREL,
CUBEMAP_1_6,
NB_PROJECTIONS,
};
enum InterpMethod {
NEAREST,
BILINEAR,
BICUBIC,
LANCZOS,
NB_INTERP_METHODS,
};
enum Faces {
TOP_LEFT,
TOP_MIDDLE,
TOP_RIGHT,
BOTTOM_LEFT,
BOTTOM_MIDDLE,
BOTTOM_RIGHT,
NB_FACES,
};
enum Direction {
RIGHT, ///< Axis +X
LEFT, ///< Axis -X
UP, ///< Axis +Y
DOWN, ///< Axis -Y
FRONT, ///< Axis -Z
BACK, ///< Axis +Z
NB_DIRECTIONS,
};
enum Rotation {
ROT_0,
ROT_90,
ROT_180,
ROT_270,
NB_ROTATIONS,
};
typedef struct V360Context {
const AVClass *class;
int in, out;
int interp;
int width, height;
char* in_forder;
char* out_forder;
char* in_frot;
char* out_frot;
int in_cubemap_face_order[6];
int out_cubemap_direction_order[6];
int in_cubemap_face_rotation[6];
int out_cubemap_face_rotation[6];
float in_pad, out_pad;
float yaw, pitch, roll;
int h_flip, v_flip, d_flip;
float h_fov, v_fov;
float flat_range[3];
int planewidth[4], planeheight[4];
int inplanewidth[4], inplaneheight[4];
int nb_planes;
uint16_t *u[4], *v[4];
int16_t *ker[4];
int (*remap_slice)(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs);
void (*remap_line)(uint8_t *dst, int width, const uint8_t *src, ptrdiff_t in_linesize,
const uint16_t *u, const uint16_t *v, const int16_t *ker);
} V360Context;
void ff_v360_init(V360Context *s, int depth);
void ff_v360_init_x86(V360Context *s, int depth);
#endif /* AVFILTER_V360_H */
This diff is collapsed.
......@@ -31,6 +31,7 @@ OBJS-$(CONFIG_TBLEND_FILTER) += x86/vf_blend_init.o
OBJS-$(CONFIG_THRESHOLD_FILTER) += x86/vf_threshold_init.o
OBJS-$(CONFIG_TINTERLACE_FILTER) += x86/vf_tinterlace_init.o
OBJS-$(CONFIG_VOLUME_FILTER) += x86/af_volume_init.o
OBJS-$(CONFIG_V360_FILTER) += x86/vf_v360_init.o
OBJS-$(CONFIG_W3FDIF_FILTER) += x86/vf_w3fdif_init.o
OBJS-$(CONFIG_YADIF_FILTER) += x86/vf_yadif_init.o
......@@ -66,5 +67,6 @@ X86ASM-OBJS-$(CONFIG_TBLEND_FILTER) += x86/vf_blend.o
X86ASM-OBJS-$(CONFIG_THRESHOLD_FILTER) += x86/vf_threshold.o
X86ASM-OBJS-$(CONFIG_TINTERLACE_FILTER) += x86/vf_interlace.o
X86ASM-OBJS-$(CONFIG_VOLUME_FILTER) += x86/af_volume.o
X86ASM-OBJS-$(CONFIG_V360_FILTER) += x86/vf_v360.o
X86ASM-OBJS-$(CONFIG_W3FDIF_FILTER) += x86/vf_w3fdif.o
X86ASM-OBJS-$(CONFIG_YADIF_FILTER) += x86/vf_yadif.o x86/yadif-16.o x86/yadif-10.o
;*****************************************************************************
;* x86-optimized functions for v360 filter
;*
;* This file is part of FFmpeg.
;*
;* FFmpeg is free software; you can redistribute it and/or
;* modify it under the terms of the GNU Lesser General Public
;* License as published by the Free Software Foundation; either
;* version 2.1 of the License, or (at your option) any later version.
;*
;* FFmpeg is distributed in the hope that it will be useful,
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;* Lesser General Public License for more details.
;*
;* You should have received a copy of the GNU Lesser General Public
;* License along with FFmpeg; if not, write to the Free Software
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;******************************************************************************
%if HAVE_AVX2_EXTERNAL && ARCH_X86_64
%include "libavutil/x86/x86util.asm"
SECTION_RODATA
pb_mask: db 0,4,8,12,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
pd_255: times 4 dd 255
SECTION .text
; void ff_remap2_8bit_line_avx2(uint8_t *dst, int width, const uint8_t *src, ptrdiff_t in_linesize,
; const uint16_t *u, const uint16_t *v, const int16_t *ker);
INIT_YMM avx2
cglobal remap1_8bit_line, 6, 7, 6, dst, width, src, in_linesize, u, v, x
movsxdifnidn widthq, widthd
xor xq, xq
movd xm0, in_linesized
pcmpeqw m4, m4
VBROADCASTI128 m3, [pb_mask]
vpbroadcastd m0, xm0
.loop:
pmovsxwd m1, [vq + xq * 2]
pmovsxwd m2, [uq + xq * 2]
pmulld m1, m0
paddd m1, m2
mova m2, m4
vpgatherdd m5, [srcq + m1], m2
pshufb m1, m5, m3
vextracti128 xm2, m1, 1
movd [dstq+xq], xm1
movd [dstq+xq+4], xm2
add xq, mmsize / 4
cmp xq, widthq
jl .loop
RET
INIT_YMM avx2
cglobal remap2_8bit_line, 7, 8, 8, dst, width, src, in_linesize, u, v, ker, x
movsxdifnidn widthq, widthd
xor xq, xq
movd xm0, in_linesized
pcmpeqw m7, m7
vpbroadcastd m0, xm0
vpbroadcastd m6, [pd_255]
.loop:
pmovsxwd m1, [kerq + xq * 8]
pmovsxwd m2, [vq + xq * 8]
pmovsxwd m3, [uq + xq * 8]
pmulld m4, m2, m0
paddd m4, m3
mova m3, m7
vpgatherdd m2, [srcq + m4], m3
pand m2, m6
pmulld m2, m1
phaddd m2, m2
phaddd m1, m2, m2
psrld m1, m1, 0xe
vextracti128 xm2, m1, 1
pextrb [dstq+xq], xm1, 0
pextrb [dstq+xq+1], xm2, 0
add xq, mmsize / 16
cmp xq, widthq
jl .loop
RET
INIT_YMM avx2
cglobal remap4_8bit_line, 7, 9, 11, dst, width, src, in_linesize, u, v, ker, x, y
movsxdifnidn widthq, widthd
xor yq, yq
xor xq, xq
movd xm0, in_linesized
pcmpeqw m7, m7
vpbroadcastd m0, xm0
vpbroadcastd m6, [pd_255]
.loop:
pmovsxwd m1, [kerq + yq]
pmovsxwd m5, [kerq + yq + 16]
pmovsxwd m2, [vq + yq]
pmovsxwd m8, [vq + yq + 16]
pmovsxwd m3, [uq + yq]
pmovsxwd m9, [uq + yq + 16]
pmulld m4, m2, m0
pmulld m10, m8, m0
paddd m4, m3
paddd m10, m9
mova m3, m7
vpgatherdd m2, [srcq + m4], m3
mova m3, m7
vpgatherdd m4, [srcq + m10], m3
pand m2, m6
pand m4, m6
pmulld m2, m1
pmulld m4, m5
paddd m2, m4
vextracti128 xm1, m2, 1
paddd m1, m2
phaddd m1, m1
phaddd m1, m1
psrld m1, m1, 0xe
packuswb m1, m1
pextrb [dstq+xq], xm1, 0
add xq, 1
add yq, 32
cmp xq, widthq
jl .loop
RET
%endif
/*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "config.h"
#include "libavutil/attributes.h"
#include "libavutil/cpu.h"
#include "libavutil/x86/cpu.h"
#include "libavfilter/v360.h"
void ff_remap1_8bit_line_avx2(uint8_t *dst, int width, const uint8_t *src, ptrdiff_t in_linesize,
const uint16_t *u, const uint16_t *v, const int16_t *ker);
void ff_remap2_8bit_line_avx2(uint8_t *dst, int width, const uint8_t *src, ptrdiff_t in_linesize,
const uint16_t *u, const uint16_t *v, const int16_t *ker);
void ff_remap4_8bit_line_avx2(uint8_t *dst, int width, const uint8_t *src, ptrdiff_t in_linesize,
const uint16_t *u, const uint16_t *v, const int16_t *ker);
av_cold void ff_v360_init_x86(V360Context *s, int depth)
{
#if ARCH_X86_64
int cpu_flags = av_get_cpu_flags();
if (EXTERNAL_AVX2_FAST(cpu_flags) && s->interp == NEAREST && depth <= 8)
s->remap_line = ff_remap1_8bit_line_avx2;
if (EXTERNAL_AVX2_FAST(cpu_flags) && s->interp == BILINEAR && depth <= 8)
s->remap_line = ff_remap2_8bit_line_avx2;
if (EXTERNAL_AVX2_FAST(cpu_flags) && (s->interp == BICUBIC ||
s->interp == LANCZOS) && depth <= 8)
s->remap_line = ff_remap4_8bit_line_avx2;
#endif
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment