Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in / Register
Toggle navigation
F
ffmpeg.wasm-core
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Linshizhi
ffmpeg.wasm-core
Commits
058bbf48
Commit
058bbf48
authored
Sep 03, 2019
by
Paul B Mahol
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
avfilter/vf_v360: x86 SIMD for interpolations
parent
f0d8005e
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
394 additions
and
155 deletions
+394
-155
v360.h
libavfilter/v360.h
+113
-0
vf_v360.c
libavfilter/vf_v360.c
+87
-155
Makefile
libavfilter/x86/Makefile
+2
-0
vf_v360.asm
libavfilter/x86/vf_v360.asm
+142
-0
vf_v360_init.c
libavfilter/x86/vf_v360_init.c
+50
-0
No files found.
libavfilter/v360.h
0 → 100644
View file @
058bbf48
/*
* Copyright (c) 2019 Eugene Lyapustin
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef AVFILTER_V360_H
#define AVFILTER_V360_H
#include "avfilter.h"
enum
Projections
{
EQUIRECTANGULAR
,
CUBEMAP_3_2
,
CUBEMAP_6_1
,
EQUIANGULAR
,
FLAT
,
DUAL_FISHEYE
,
BARREL
,
CUBEMAP_1_6
,
NB_PROJECTIONS
,
};
enum
InterpMethod
{
NEAREST
,
BILINEAR
,
BICUBIC
,
LANCZOS
,
NB_INTERP_METHODS
,
};
enum
Faces
{
TOP_LEFT
,
TOP_MIDDLE
,
TOP_RIGHT
,
BOTTOM_LEFT
,
BOTTOM_MIDDLE
,
BOTTOM_RIGHT
,
NB_FACES
,
};
enum
Direction
{
RIGHT
,
///< Axis +X
LEFT
,
///< Axis -X
UP
,
///< Axis +Y
DOWN
,
///< Axis -Y
FRONT
,
///< Axis -Z
BACK
,
///< Axis +Z
NB_DIRECTIONS
,
};
enum
Rotation
{
ROT_0
,
ROT_90
,
ROT_180
,
ROT_270
,
NB_ROTATIONS
,
};
typedef
struct
V360Context
{
const
AVClass
*
class
;
int
in
,
out
;
int
interp
;
int
width
,
height
;
char
*
in_forder
;
char
*
out_forder
;
char
*
in_frot
;
char
*
out_frot
;
int
in_cubemap_face_order
[
6
];
int
out_cubemap_direction_order
[
6
];
int
in_cubemap_face_rotation
[
6
];
int
out_cubemap_face_rotation
[
6
];
float
in_pad
,
out_pad
;
float
yaw
,
pitch
,
roll
;
int
h_flip
,
v_flip
,
d_flip
;
float
h_fov
,
v_fov
;
float
flat_range
[
3
];
int
planewidth
[
4
],
planeheight
[
4
];
int
inplanewidth
[
4
],
inplaneheight
[
4
];
int
nb_planes
;
uint16_t
*
u
[
4
],
*
v
[
4
];
int16_t
*
ker
[
4
];
int
(
*
remap_slice
)(
AVFilterContext
*
ctx
,
void
*
arg
,
int
jobnr
,
int
nb_jobs
);
void
(
*
remap_line
)(
uint8_t
*
dst
,
int
width
,
const
uint8_t
*
src
,
ptrdiff_t
in_linesize
,
const
uint16_t
*
u
,
const
uint16_t
*
v
,
const
int16_t
*
ker
);
}
V360Context
;
void
ff_v360_init
(
V360Context
*
s
,
int
depth
);
void
ff_v360_init_x86
(
V360Context
*
s
,
int
depth
);
#endif
/* AVFILTER_V360_H */
libavfilter/vf_v360.c
View file @
058bbf48
...
...
@@ -41,88 +41,7 @@
#include "formats.h"
#include "internal.h"
#include "video.h"
enum
Projections
{
EQUIRECTANGULAR
,
CUBEMAP_3_2
,
CUBEMAP_6_1
,
EQUIANGULAR
,
FLAT
,
DUAL_FISHEYE
,
BARREL
,
CUBEMAP_1_6
,
NB_PROJECTIONS
,
};
enum
InterpMethod
{
NEAREST
,
BILINEAR
,
BICUBIC
,
LANCZOS
,
NB_INTERP_METHODS
,
};
enum
Faces
{
TOP_LEFT
,
TOP_MIDDLE
,
TOP_RIGHT
,
BOTTOM_LEFT
,
BOTTOM_MIDDLE
,
BOTTOM_RIGHT
,
NB_FACES
,
};
enum
Direction
{
RIGHT
,
///< Axis +X
LEFT
,
///< Axis -X
UP
,
///< Axis +Y
DOWN
,
///< Axis -Y
FRONT
,
///< Axis -Z
BACK
,
///< Axis +Z
NB_DIRECTIONS
,
};
enum
Rotation
{
ROT_0
,
ROT_90
,
ROT_180
,
ROT_270
,
NB_ROTATIONS
,
};
typedef
struct
V360Context
{
const
AVClass
*
class
;
int
in
,
out
;
int
interp
;
int
width
,
height
;
char
*
in_forder
;
char
*
out_forder
;
char
*
in_frot
;
char
*
out_frot
;
int
in_cubemap_face_order
[
6
];
int
out_cubemap_direction_order
[
6
];
int
in_cubemap_face_rotation
[
6
];
int
out_cubemap_face_rotation
[
6
];
float
in_pad
,
out_pad
;
float
yaw
,
pitch
,
roll
;
int
h_flip
,
v_flip
,
d_flip
;
float
h_fov
,
v_fov
;
float
flat_range
[
3
];
int
planewidth
[
4
],
planeheight
[
4
];
int
inplanewidth
[
4
],
inplaneheight
[
4
];
int
nb_planes
;
uint16_t
*
u
[
4
],
*
v
[
4
];
int16_t
*
ker
[
4
];
int
(
*
remap_slice
)(
AVFilterContext
*
ctx
,
void
*
arg
,
int
jobnr
,
int
nb_jobs
);
}
V360Context
;
#include "v360.h"
typedef
struct
ThreadData
{
AVFrame
*
in
;
...
...
@@ -251,47 +170,22 @@ static int query_formats(AVFilterContext *ctx)
return
ff_set_common_formats
(
ctx
,
fmts_list
);
}
/**
* Generate no-interpolation remapping function with a given pixel depth.
*
* @param bits number of bits per pixel
* @param div number of bytes per pixel
*/
#define DEFINE_REMAP1(bits, div) \
static int remap1_##bits##bit_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs) \
{ \
ThreadData *td = (ThreadData*)arg; \
const V360Context *s = ctx->priv; \
const AVFrame *in = td->in; \
AVFrame *out = td->out; \
\
int plane, x, y; \
\
for (plane = 0; plane < s->nb_planes; plane++) { \
const int in_linesize = in->linesize[plane] / div; \
const int out_linesize = out->linesize[plane] / div; \
const uint##bits##_t *src = (const uint##bits##_t *)in->data[plane]; \
uint##bits##_t *dst = (uint##bits##_t *)out->data[plane]; \
const int width = s->planewidth[plane]; \
const int height = s->planeheight[plane]; \
\
const int slice_start = (height * jobnr ) / nb_jobs; \
const int slice_end = (height * (jobnr + 1)) / nb_jobs; \
\
for (y = slice_start; y < slice_end; y++) { \
const uint16_t *u = s->u[plane] + y * width; \
const uint16_t *v = s->v[plane] + y * width; \
uint##bits##_t *d = dst + y * out_linesize; \
for (x = 0; x < width; x++) \
*d++ = src[v[x] * in_linesize + u[x]]; \
} \
} \
\
return 0; \
#define DEFINE_REMAP1_LINE(bits, div) \
static void remap1_##bits##bit_line_c(uint8_t *dst, int width, const uint8_t *src, \
ptrdiff_t in_linesize, \
const uint16_t *u, const uint16_t *v, const int16_t *ker) \
{ \
const uint##bits##_t *s = (const uint##bits##_t *)src; \
uint##bits##_t *d = (uint##bits##_t *)dst; \
\
in_linesize /= div; \
\
for (int x = 0; x < width; x++) \
d[x] = s[v[x] * in_linesize + u[x]]; \
}
DEFINE_REMAP1
(
8
,
1
)
DEFINE_REMAP1
(
16
,
2
)
DEFINE_REMAP1
_LINE
(
8
,
1
)
DEFINE_REMAP1
_LINE
(
16
,
2
)
typedef
struct
XYRemap
{
uint16_t
u
[
4
][
4
];
...
...
@@ -304,9 +198,8 @@ typedef struct XYRemap {
*
* @param ws size of interpolation window
* @param bits number of bits per pixel
* @param div number of bytes per pixel
*/
#define DEFINE_REMAP(ws, bits
, div)
\
#define DEFINE_REMAP(ws, bits
)
\
static int remap##ws##_##bits##bit_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs) \
{ \
ThreadData *td = (ThreadData*)arg; \
...
...
@@ -314,48 +207,85 @@ static int remap##ws##_##bits##bit_slice(AVFilterContext *ctx, void *arg, int jo
const AVFrame *in = td->in; \
AVFrame *out = td->out; \
\
int plane, x, y, i, j; \
\
for (plane = 0; plane < s->nb_planes; plane++) { \
const int in_linesize = in->linesize[plane] / div; \
const int out_linesize = out->linesize[plane] / div; \
const uint##bits##_t *src = (const uint##bits##_t *)in->data[plane]; \
uint##bits##_t *dst = (uint##bits##_t *)out->data[plane]; \
for (int plane = 0; plane < s->nb_planes; plane++) { \
const int in_linesize = in->linesize[plane]; \
const int out_linesize = out->linesize[plane]; \
const uint8_t *src = in->data[plane]; \
uint8_t *dst = out->data[plane]; \
const int width = s->planewidth[plane]; \
const int height = s->planeheight[plane]; \
\
const int slice_start = (height * jobnr ) / nb_jobs; \
const int slice_end = (height * (jobnr + 1)) / nb_jobs; \
\
for (y = slice_start; y < slice_end; y++) { \
uint##bits##_t *d = dst + y * out_linesize; \
for (int y = slice_start; y < slice_end; y++) { \
const uint16_t *u = s->u[plane] + y * width * ws * ws; \
const uint16_t *v = s->v[plane] + y * width * ws * ws; \
const int16_t *ker = s->ker[plane] + y * width * ws * ws; \
for (x = 0; x < width; x++) { \
const uint16_t *uu = u + x * ws * ws; \
const uint16_t *vv = v + x * ws * ws; \
const int16_t *kker = ker + x * ws * ws; \
int tmp = 0; \
\
for (i = 0; i < ws; i++) { \
for (j = 0; j < ws; j++) { \
tmp += kker[i * ws + j] * src[vv[i * ws + j] * in_linesize + uu[i * ws + j]]; \
} \
} \
\
*d++ = av_clip_uint##bits(tmp >> (15 - ws)); \
} \
s->remap_line(dst + y * out_linesize, width, src, in_linesize, u, v, ker); \
} \
} \
\
return 0; \
}
DEFINE_REMAP
(
2
,
8
,
1
)
DEFINE_REMAP
(
4
,
8
,
1
)
DEFINE_REMAP
(
2
,
16
,
2
)
DEFINE_REMAP
(
4
,
16
,
2
)
DEFINE_REMAP
(
1
,
8
)
DEFINE_REMAP
(
2
,
8
)
DEFINE_REMAP
(
4
,
8
)
DEFINE_REMAP
(
1
,
16
)
DEFINE_REMAP
(
2
,
16
)
DEFINE_REMAP
(
4
,
16
)
#define DEFINE_REMAP_LINE(ws, bits, div) \
static void remap##ws##_##bits##bit_line_c(uint8_t *dst, int width, const uint8_t *src, \
ptrdiff_t in_linesize, \
const uint16_t *u, const uint16_t *v, const int16_t *ker) \
{ \
const uint##bits##_t *s = (const uint##bits##_t *)src; \
uint##bits##_t *d = (uint##bits##_t *)dst; \
\
in_linesize /= div; \
\
for (int x = 0; x < width; x++) { \
const uint16_t *uu = u + x * ws * ws; \
const uint16_t *vv = v + x * ws * ws; \
const int16_t *kker = ker + x * ws * ws; \
int tmp = 0; \
\
for (int i = 0; i < ws; i++) { \
for (int j = 0; j < ws; j++) { \
tmp += kker[i * ws + j] * s[vv[i * ws + j] * in_linesize + uu[i * ws + j]]; \
} \
} \
\
d[x] = av_clip_uint##bits(tmp >> 14); \
} \
}
DEFINE_REMAP_LINE
(
2
,
8
,
1
)
DEFINE_REMAP_LINE
(
4
,
8
,
1
)
DEFINE_REMAP_LINE
(
2
,
16
,
2
)
DEFINE_REMAP_LINE
(
4
,
16
,
2
)
void
ff_v360_init
(
V360Context
*
s
,
int
depth
)
{
switch
(
s
->
interp
)
{
case
NEAREST
:
s
->
remap_line
=
depth
<=
8
?
remap1_8bit_line_c
:
remap1_16bit_line_c
;
break
;
case
BILINEAR
:
s
->
remap_line
=
depth
<=
8
?
remap2_8bit_line_c
:
remap2_16bit_line_c
;
break
;
case
BICUBIC
:
case
LANCZOS
:
s
->
remap_line
=
depth
<=
8
?
remap4_8bit_line_c
:
remap4_16bit_line_c
;
break
;
}
if
(
ARCH_X86_64
)
ff_v360_init_x86
(
s
,
depth
);
}
/**
* Save nearest pixel coordinates for remapping.
...
...
@@ -399,10 +329,10 @@ static void bilinear_kernel(float du, float dv, const XYRemap *r_tmp,
}
}
ker
[
0
]
=
(
1
.
f
-
du
)
*
(
1
.
f
-
dv
)
*
8192
;
ker
[
1
]
=
du
*
(
1
.
f
-
dv
)
*
8192
;
ker
[
2
]
=
(
1
.
f
-
du
)
*
dv
*
8192
;
ker
[
3
]
=
du
*
dv
*
8192
;
ker
[
0
]
=
(
1
.
f
-
du
)
*
(
1
.
f
-
dv
)
*
16384
;
ker
[
1
]
=
du
*
(
1
.
f
-
dv
)
*
16384
;
ker
[
2
]
=
(
1
.
f
-
du
)
*
dv
*
16384
;
ker
[
3
]
=
du
*
dv
*
16384
;
}
/**
...
...
@@ -446,7 +376,7 @@ static void bicubic_kernel(float du, float dv, const XYRemap *r_tmp,
for
(
j
=
0
;
j
<
4
;
j
++
)
{
u
[
i
*
4
+
j
]
=
r_tmp
->
u
[
i
][
j
];
v
[
i
*
4
+
j
]
=
r_tmp
->
v
[
i
][
j
];
ker
[
i
*
4
+
j
]
=
du_coeffs
[
j
]
*
dv_coeffs
[
i
]
*
2048
;
ker
[
i
*
4
+
j
]
=
du_coeffs
[
j
]
*
dv_coeffs
[
i
]
*
16384
;
}
}
}
...
...
@@ -501,7 +431,7 @@ static void lanczos_kernel(float du, float dv, const XYRemap *r_tmp,
for
(
j
=
0
;
j
<
4
;
j
++
)
{
u
[
i
*
4
+
j
]
=
r_tmp
->
u
[
i
][
j
];
v
[
i
*
4
+
j
]
=
r_tmp
->
v
[
i
][
j
];
ker
[
i
*
4
+
j
]
=
du_coeffs
[
j
]
*
dv_coeffs
[
i
]
*
2048
;
ker
[
i
*
4
+
j
]
=
du_coeffs
[
j
]
*
dv_coeffs
[
i
]
*
16384
;
}
}
}
...
...
@@ -2038,6 +1968,8 @@ static int config_output(AVFilterLink *outlink)
av_assert0
(
0
);
}
ff_v360_init
(
s
,
depth
);
switch
(
s
->
in
)
{
case
EQUIRECTANGULAR
:
in_transform
=
xyz_to_equirect
;
...
...
libavfilter/x86/Makefile
View file @
058bbf48
...
...
@@ -31,6 +31,7 @@ OBJS-$(CONFIG_TBLEND_FILTER) += x86/vf_blend_init.o
OBJS-$(CONFIG_THRESHOLD_FILTER)
+=
x86/vf_threshold_init.o
OBJS-$(CONFIG_TINTERLACE_FILTER)
+=
x86/vf_tinterlace_init.o
OBJS-$(CONFIG_VOLUME_FILTER)
+=
x86/af_volume_init.o
OBJS-$(CONFIG_V360_FILTER)
+=
x86/vf_v360_init.o
OBJS-$(CONFIG_W3FDIF_FILTER)
+=
x86/vf_w3fdif_init.o
OBJS-$(CONFIG_YADIF_FILTER)
+=
x86/vf_yadif_init.o
...
...
@@ -66,5 +67,6 @@ X86ASM-OBJS-$(CONFIG_TBLEND_FILTER) += x86/vf_blend.o
X86ASM-OBJS-$(CONFIG_THRESHOLD_FILTER)
+=
x86/vf_threshold.o
X86ASM-OBJS-$(CONFIG_TINTERLACE_FILTER)
+=
x86/vf_interlace.o
X86ASM-OBJS-$(CONFIG_VOLUME_FILTER)
+=
x86/af_volume.o
X86ASM-OBJS-$(CONFIG_V360_FILTER)
+=
x86/vf_v360.o
X86ASM-OBJS-$(CONFIG_W3FDIF_FILTER)
+=
x86/vf_w3fdif.o
X86ASM-OBJS-$(CONFIG_YADIF_FILTER)
+=
x86/vf_yadif.o
x86/yadif-16.o
x86/yadif-10.o
libavfilter/x86/vf_v360.asm
0 → 100644
View file @
058bbf48
;*****************************************************************************
;* x86-optimized functions for v360 filter
;*
;* This file is part of FFmpeg.
;*
;* FFmpeg is free software; you can redistribute it and/or
;* modify it under the terms of the GNU Lesser General Public
;* License as published by the Free Software Foundation; either
;* version 2.1 of the License, or (at your option) any later version.
;*
;* FFmpeg is distributed in the hope that it will be useful,
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;* Lesser General Public License for more details.
;*
;* You should have received a copy of the GNU Lesser General Public
;* License along with FFmpeg; if not, write to the Free Software
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;******************************************************************************
%if
HAVE_AVX2_EXTERNAL
&&
ARCH_X86_64
%include
"libavutil/x86/x86util.asm"
SECTION_RODATA
pb_mask
:
db
0
,
4
,
8
,
12
,
-
1
,
-
1
,
-
1
,
-
1
,
-
1
,
-
1
,
-
1
,
-
1
,
-
1
,
-
1
,
-
1
,
-
1
pd_255
:
times
4
dd
255
SECTION
.
text
; void ff_remap2_8bit_line_avx2(uint8_t *dst, int width, const uint8_t *src, ptrdiff_t in_linesize,
; const uint16_t *u, const uint16_t *v, const int16_t *ker);
INIT_YMM
avx2
cglobal
remap1_8bit_line
,
6
,
7
,
6
,
dst
,
width
,
src
,
in_linesize
,
u
,
v
,
x
movsxdifnidn
widthq
,
widthd
xor
xq
,
xq
movd
xm0
,
in_linesized
pcmpeqw
m4
,
m4
VBROADCASTI128
m3
,
[
pb_mask
]
vpbroadcastd
m0
,
xm0
.
loop
:
pmovsxwd
m1
,
[
vq
+
xq
*
2
]
pmovsxwd
m2
,
[
uq
+
xq
*
2
]
pmulld
m1
,
m0
paddd
m1
,
m2
mova
m2
,
m4
vpgatherdd
m5
,
[
srcq
+
m1
]
,
m2
pshufb
m1
,
m5
,
m3
vextracti128
xm2
,
m1
,
1
movd
[
dstq
+
xq
]
,
xm1
movd
[
dstq
+
xq
+
4
]
,
xm2
add
xq
,
mmsize
/
4
cmp
xq
,
widthq
jl
.
loop
RET
INIT_YMM
avx2
cglobal
remap2_8bit_line
,
7
,
8
,
8
,
dst
,
width
,
src
,
in_linesize
,
u
,
v
,
ker
,
x
movsxdifnidn
widthq
,
widthd
xor
xq
,
xq
movd
xm0
,
in_linesized
pcmpeqw
m7
,
m7
vpbroadcastd
m0
,
xm0
vpbroadcastd
m6
,
[
pd_255
]
.
loop
:
pmovsxwd
m1
,
[
kerq
+
xq
*
8
]
pmovsxwd
m2
,
[
vq
+
xq
*
8
]
pmovsxwd
m3
,
[
uq
+
xq
*
8
]
pmulld
m4
,
m2
,
m0
paddd
m4
,
m3
mova
m3
,
m7
vpgatherdd
m2
,
[
srcq
+
m4
]
,
m3
pand
m2
,
m6
pmulld
m2
,
m1
phaddd
m2
,
m2
phaddd
m1
,
m2
,
m2
psrld
m1
,
m1
,
0xe
vextracti128
xm2
,
m1
,
1
pextrb
[
dstq
+
xq
]
,
xm1
,
0
pextrb
[
dstq
+
xq
+
1
]
,
xm2
,
0
add
xq
,
mmsize
/
16
cmp
xq
,
widthq
jl
.
loop
RET
INIT_YMM
avx2
cglobal
remap4_8bit_line
,
7
,
9
,
11
,
dst
,
width
,
src
,
in_linesize
,
u
,
v
,
ker
,
x
,
y
movsxdifnidn
widthq
,
widthd
xor
yq
,
yq
xor
xq
,
xq
movd
xm0
,
in_linesized
pcmpeqw
m7
,
m7
vpbroadcastd
m0
,
xm0
vpbroadcastd
m6
,
[
pd_255
]
.
loop
:
pmovsxwd
m1
,
[
kerq
+
yq
]
pmovsxwd
m5
,
[
kerq
+
yq
+
16
]
pmovsxwd
m2
,
[
vq
+
yq
]
pmovsxwd
m8
,
[
vq
+
yq
+
16
]
pmovsxwd
m3
,
[
uq
+
yq
]
pmovsxwd
m9
,
[
uq
+
yq
+
16
]
pmulld
m4
,
m2
,
m0
pmulld
m10
,
m8
,
m0
paddd
m4
,
m3
paddd
m10
,
m9
mova
m3
,
m7
vpgatherdd
m2
,
[
srcq
+
m4
]
,
m3
mova
m3
,
m7
vpgatherdd
m4
,
[
srcq
+
m10
]
,
m3
pand
m2
,
m6
pand
m4
,
m6
pmulld
m2
,
m1
pmulld
m4
,
m5
paddd
m2
,
m4
vextracti128
xm1
,
m2
,
1
paddd
m1
,
m2
phaddd
m1
,
m1
phaddd
m1
,
m1
psrld
m1
,
m1
,
0xe
packuswb
m1
,
m1
pextrb
[
dstq
+
xq
]
,
xm1
,
0
add
xq
,
1
add
yq
,
32
cmp
xq
,
widthq
jl
.
loop
RET
%endif
libavfilter/x86/vf_v360_init.c
0 → 100644
View file @
058bbf48
/*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "config.h"
#include "libavutil/attributes.h"
#include "libavutil/cpu.h"
#include "libavutil/x86/cpu.h"
#include "libavfilter/v360.h"
void
ff_remap1_8bit_line_avx2
(
uint8_t
*
dst
,
int
width
,
const
uint8_t
*
src
,
ptrdiff_t
in_linesize
,
const
uint16_t
*
u
,
const
uint16_t
*
v
,
const
int16_t
*
ker
);
void
ff_remap2_8bit_line_avx2
(
uint8_t
*
dst
,
int
width
,
const
uint8_t
*
src
,
ptrdiff_t
in_linesize
,
const
uint16_t
*
u
,
const
uint16_t
*
v
,
const
int16_t
*
ker
);
void
ff_remap4_8bit_line_avx2
(
uint8_t
*
dst
,
int
width
,
const
uint8_t
*
src
,
ptrdiff_t
in_linesize
,
const
uint16_t
*
u
,
const
uint16_t
*
v
,
const
int16_t
*
ker
);
av_cold
void
ff_v360_init_x86
(
V360Context
*
s
,
int
depth
)
{
#if ARCH_X86_64
int
cpu_flags
=
av_get_cpu_flags
();
if
(
EXTERNAL_AVX2_FAST
(
cpu_flags
)
&&
s
->
interp
==
NEAREST
&&
depth
<=
8
)
s
->
remap_line
=
ff_remap1_8bit_line_avx2
;
if
(
EXTERNAL_AVX2_FAST
(
cpu_flags
)
&&
s
->
interp
==
BILINEAR
&&
depth
<=
8
)
s
->
remap_line
=
ff_remap2_8bit_line_avx2
;
if
(
EXTERNAL_AVX2_FAST
(
cpu_flags
)
&&
(
s
->
interp
==
BICUBIC
||
s
->
interp
==
LANCZOS
)
&&
depth
<=
8
)
s
->
remap_line
=
ff_remap4_8bit_line_avx2
;
#endif
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment