Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in / Register
Toggle navigation
F
ffmpeg.wasm-core
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Linshizhi
ffmpeg.wasm-core
Commits
db7786e8
Commit
db7786e8
authored
Sep 30, 2015
by
Ronald S. Bultje
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
vp9: sse2/ssse3/avx 16bpp loopfilter x86 simd.
parent
254c64c5
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
918 additions
and
1 deletion
+918
-1
Makefile
libavcodec/x86/Makefile
+1
-0
constants.c
libavcodec/x86/constants.c
+2
-0
constants.h
libavcodec/x86/constants.h
+1
-0
vp9dsp_init_16bpp_template.c
libavcodec/x86/vp9dsp_init_16bpp_template.c
+90
-0
vp9lpf_16bpp.asm
libavcodec/x86/vp9lpf_16bpp.asm
+823
-0
vp9mc_16bpp.asm
libavcodec/x86/vp9mc_16bpp.asm
+1
-1
No files found.
libavcodec/x86/Makefile
View file @
db7786e8
...
...
@@ -160,6 +160,7 @@ YASM-OBJS-$(CONFIG_VP6_DECODER) += x86/vp6dsp.o
YASM-OBJS-$(CONFIG_VP9_DECODER)
+=
x86/vp9intrapred.o
\
x86/vp9itxfm.o
\
x86/vp9lpf.o
\
x86/vp9lpf_16bpp.o
\
x86/vp9mc.o
\
x86/vp9mc_16bpp.o
YASM-OBJS-$(CONFIG_WEBP_DECODER)
+=
x86/vp8dsp.o
libavcodec/x86/constants.c
View file @
db7786e8
...
...
@@ -55,6 +55,8 @@ DECLARE_ALIGNED(32, const ymm_reg, ff_pw_1024) = { 0x0400040004000400ULL, 0x040
0x0400040004000400ULL
,
0x0400040004000400ULL
};
DECLARE_ALIGNED
(
32
,
const
ymm_reg
,
ff_pw_2048
)
=
{
0x0800080008000800ULL
,
0x0800080008000800ULL
,
0x0800080008000800ULL
,
0x0800080008000800ULL
};
DECLARE_ALIGNED
(
32
,
const
ymm_reg
,
ff_pw_4095
)
=
{
0x0fff0fff0fff0fffULL
,
0x0fff0fff0fff0fffULL
,
0x0fff0fff0fff0fffULL
,
0x0fff0fff0fff0fffULL
};
DECLARE_ALIGNED
(
32
,
const
ymm_reg
,
ff_pw_4096
)
=
{
0x1000100010001000ULL
,
0x1000100010001000ULL
,
0x1000100010001000ULL
,
0x1000100010001000ULL
};
DECLARE_ALIGNED
(
32
,
const
ymm_reg
,
ff_pw_8192
)
=
{
0x2000200020002000ULL
,
0x2000200020002000ULL
,
...
...
libavcodec/x86/constants.h
View file @
db7786e8
...
...
@@ -47,6 +47,7 @@ extern const ymm_reg ff_pw_512;
extern
const
ymm_reg
ff_pw_1023
;
extern
const
ymm_reg
ff_pw_1024
;
extern
const
ymm_reg
ff_pw_2048
;
extern
const
ymm_reg
ff_pw_4095
;
extern
const
ymm_reg
ff_pw_4096
;
extern
const
ymm_reg
ff_pw_8192
;
extern
const
ymm_reg
ff_pw_m1
;
...
...
libavcodec/x86/vp9dsp_init_16bpp_template.c
View file @
db7786e8
...
...
@@ -65,6 +65,62 @@ filters_8tap_1d_fn2(put, 16, BPC, avx2, 16bpp)
filters_8tap_1d_fn2
(
avg
,
16
,
BPC
,
avx2
,
16
bpp
)
#endif
#define decl_lpf_func(dir, wd, bpp, opt) \
void ff_vp9_loop_filter_##dir##_##wd##_##bpp##_##opt(uint8_t *dst, ptrdiff_t stride, \
int E, int I, int H)
#define decl_lpf_funcs(dir, wd, bpp) \
decl_lpf_func(dir, wd, bpp, sse2); \
decl_lpf_func(dir, wd, bpp, ssse3); \
decl_lpf_func(dir, wd, bpp, avx)
#define decl_lpf_funcs_wd(dir) \
decl_lpf_funcs(dir, 4, BPC); \
decl_lpf_funcs(dir, 8, BPC); \
decl_lpf_funcs(dir, 16, BPC)
decl_lpf_funcs_wd
(
h
);
decl_lpf_funcs_wd
(
v
);
#define lpf_16_wrapper(dir, off, bpp, opt) \
static void loop_filter_##dir##_16_##bpp##_##opt(uint8_t *dst, ptrdiff_t stride, \
int E, int I, int H) \
{ \
ff_vp9_loop_filter_##dir##_16_##bpp##_##opt(dst, stride, E, I, H); \
ff_vp9_loop_filter_##dir##_16_##bpp##_##opt(dst + off, stride, E, I, H); \
}
#define lpf_16_wrappers(bpp, opt) \
lpf_16_wrapper(h, 8 * stride, bpp, opt); \
lpf_16_wrapper(v, 16, bpp, opt)
lpf_16_wrappers
(
BPC
,
sse2
);
lpf_16_wrappers
(
BPC
,
ssse3
);
lpf_16_wrappers
(
BPC
,
avx
);
#define lpf_mix2_wrapper(dir, off, wd1, wd2, bpp, opt) \
static void loop_filter_##dir##_##wd1##wd2##_##bpp##_##opt(uint8_t *dst, ptrdiff_t stride, \
int E, int I, int H) \
{ \
ff_vp9_loop_filter_##dir##_##wd1##_##bpp##_##opt(dst, stride, \
E & 0xff, I & 0xff, H & 0xff); \
ff_vp9_loop_filter_##dir##_##wd2##_##bpp##_##opt(dst + off, stride, \
E >> 8, I >> 8, H >> 8); \
}
#define lpf_mix2_wrappers(wd1, wd2, bpp, opt) \
lpf_mix2_wrapper(h, 8 * stride, wd1, wd2, bpp, opt); \
lpf_mix2_wrapper(v, 16, wd1, wd2, bpp, opt)
#define lpf_mix2_wrappers_set(bpp, opt) \
lpf_mix2_wrappers(4, 4, bpp, opt); \
lpf_mix2_wrappers(4, 8, bpp, opt); \
lpf_mix2_wrappers(8, 4, bpp, opt); \
lpf_mix2_wrappers(8, 8, bpp, opt); \
lpf_mix2_wrappers_set
(
BPC
,
sse2
);
lpf_mix2_wrappers_set
(
BPC
,
ssse3
);
lpf_mix2_wrappers_set
(
BPC
,
avx
);
#endif
/* HAVE_YASM */
av_cold
void
INIT_FUNC
(
VP9DSPContext
*
dsp
)
...
...
@@ -72,9 +128,43 @@ av_cold void INIT_FUNC(VP9DSPContext *dsp)
#if HAVE_YASM
int
cpu_flags
=
av_get_cpu_flags
();
#define init_lpf_8_func(idx1, idx2, dir, wd, bpp, opt) \
dsp->loop_filter_8[idx1][idx2] = ff_vp9_loop_filter_##dir##_##wd##_##bpp##_##opt
#define init_lpf_16_func(idx, dir, bpp, opt) \
dsp->loop_filter_16[idx] = loop_filter_##dir##_16_##bpp##_##opt
#define init_lpf_mix2_func(idx1, idx2, idx3, dir, wd1, wd2, bpp, opt) \
dsp->loop_filter_mix2[idx1][idx2][idx3] = loop_filter_##dir##_##wd1##wd2##_##bpp##_##opt
#define init_lpf_funcs(bpp, opt) \
init_lpf_8_func(0, 0, h, 4, bpp, opt); \
init_lpf_8_func(0, 1, v, 4, bpp, opt); \
init_lpf_8_func(1, 0, h, 8, bpp, opt); \
init_lpf_8_func(1, 1, v, 8, bpp, opt); \
init_lpf_8_func(2, 0, h, 16, bpp, opt); \
init_lpf_8_func(2, 1, v, 16, bpp, opt); \
init_lpf_16_func(0, h, bpp, opt); \
init_lpf_16_func(1, v, bpp, opt); \
init_lpf_mix2_func(0, 0, 0, h, 4, 4, bpp, opt); \
init_lpf_mix2_func(0, 1, 0, h, 4, 8, bpp, opt); \
init_lpf_mix2_func(1, 0, 0, h, 8, 4, bpp, opt); \
init_lpf_mix2_func(1, 1, 0, h, 8, 8, bpp, opt); \
init_lpf_mix2_func(0, 0, 1, v, 4, 4, bpp, opt); \
init_lpf_mix2_func(0, 1, 1, v, 4, 8, bpp, opt); \
init_lpf_mix2_func(1, 0, 1, v, 8, 4, bpp, opt); \
init_lpf_mix2_func(1, 1, 1, v, 8, 8, bpp, opt)
if
(
EXTERNAL_SSE2
(
cpu_flags
))
{
init_subpel3
(
0
,
put
,
BPC
,
sse2
);
init_subpel3
(
1
,
avg
,
BPC
,
sse2
);
init_lpf_funcs
(
BPC
,
sse2
);
}
if
(
EXTERNAL_SSSE3
(
cpu_flags
))
{
init_lpf_funcs
(
BPC
,
ssse3
);
}
if
(
EXTERNAL_AVX
(
cpu_flags
))
{
init_lpf_funcs
(
BPC
,
avx
);
}
if
(
EXTERNAL_AVX2
(
cpu_flags
))
{
...
...
libavcodec/x86/vp9lpf_16bpp.asm
0 → 100644
View file @
db7786e8
This diff is collapsed.
Click to expand it.
libavcodec/x86/vp9mc_16bpp.asm
View file @
db7786e8
...
...
@@ -24,10 +24,10 @@
SECTION_RODATA
32
pw_4095
:
times
16
dw
0xfff
pd_64
:
times
8
dd
64
cextern
pw_1023
cextern
pw_4095
SECTION
.
text
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment