Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in / Register
Toggle navigation
F
ffmpeg.wasm-core
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Linshizhi
ffmpeg.wasm-core
Commits
77f35967
Commit
77f35967
authored
Sep 16, 2015
by
Ronald S. Bultje
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
vp9: add fullpel (avg) MC SIMD for 10/12bpp.
parent
6354ff03
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
120 additions
and
65 deletions
+120
-65
vp9dsp_init.c
libavcodec/x86/vp9dsp_init.c
+28
-28
vp9dsp_init.h
libavcodec/x86/vp9dsp_init.h
+6
-6
vp9dsp_init_16bpp.c
libavcodec/x86/vp9dsp_init_16bpp.c
+42
-16
vp9mc.asm
libavcodec/x86/vp9mc.asm
+44
-15
No files found.
libavcodec/x86/vp9dsp_init.c
View file @
77f35967
...
...
@@ -29,20 +29,20 @@
#if HAVE_YASM
decl_fpel_func
(
put
,
4
,
mmx
);
decl_fpel_func
(
put
,
8
,
mmx
);
decl_fpel_func
(
put
,
16
,
sse
);
decl_fpel_func
(
put
,
32
,
sse
);
decl_fpel_func
(
put
,
64
,
sse
);
decl_fpel_func
(
avg
,
4
,
mmxext
);
decl_fpel_func
(
avg
,
8
,
mmxext
);
decl_fpel_func
(
avg
,
16
,
sse2
);
decl_fpel_func
(
avg
,
32
,
sse2
);
decl_fpel_func
(
avg
,
64
,
sse2
);
decl_fpel_func
(
put
,
32
,
avx
);
decl_fpel_func
(
put
,
64
,
avx
);
decl_fpel_func
(
avg
,
32
,
avx2
);
decl_fpel_func
(
avg
,
64
,
avx2
);
decl_fpel_func
(
put
,
4
,
,
mmx
);
decl_fpel_func
(
put
,
8
,
,
mmx
);
decl_fpel_func
(
put
,
16
,
,
sse
);
decl_fpel_func
(
put
,
32
,
,
sse
);
decl_fpel_func
(
put
,
64
,
,
sse
);
decl_fpel_func
(
avg
,
4
,
_8
,
mmxext
);
decl_fpel_func
(
avg
,
8
,
_8
,
mmxext
);
decl_fpel_func
(
avg
,
16
,
_8
,
sse2
);
decl_fpel_func
(
avg
,
32
,
_8
,
sse2
);
decl_fpel_func
(
avg
,
64
,
_8
,
sse2
);
decl_fpel_func
(
put
,
32
,
,
avx
);
decl_fpel_func
(
put
,
64
,
,
avx
);
decl_fpel_func
(
avg
,
32
,
_8
,
avx2
);
decl_fpel_func
(
avg
,
64
,
_8
,
avx2
);
#define mc_func(avg, sz, dir, opt, type, f_sz) \
void ff_vp9_##avg##_8tap_1d_##dir##_##sz##_##opt(uint8_t *dst, ptrdiff_t dst_stride, \
...
...
@@ -378,8 +378,8 @@ av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp, int bpp, int bitexact)
} while (0)
if
(
EXTERNAL_MMX
(
cpu_flags
))
{
init_fpel_func
(
4
,
0
,
4
,
put
,
mmx
);
init_fpel_func
(
3
,
0
,
8
,
put
,
mmx
);
init_fpel_func
(
4
,
0
,
4
,
put
,
,
mmx
);
init_fpel_func
(
3
,
0
,
8
,
put
,
,
mmx
);
if
(
!
bitexact
)
{
dsp
->
itxfm_add
[
4
/* lossless */
][
DCT_DCT
]
=
dsp
->
itxfm_add
[
4
/* lossless */
][
ADST_DCT
]
=
...
...
@@ -392,8 +392,8 @@ av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp, int bpp, int bitexact)
if
(
EXTERNAL_MMXEXT
(
cpu_flags
))
{
init_subpel2
(
4
,
0
,
4
,
put
,
mmxext
);
init_subpel2
(
4
,
1
,
4
,
avg
,
mmxext
);
init_fpel_func
(
4
,
1
,
4
,
avg
,
mmxext
);
init_fpel_func
(
3
,
1
,
8
,
avg
,
mmxext
);
init_fpel_func
(
4
,
1
,
4
,
avg
,
_8
,
mmxext
);
init_fpel_func
(
3
,
1
,
8
,
avg
,
_8
,
mmxext
);
dsp
->
itxfm_add
[
TX_4X4
][
DCT_DCT
]
=
ff_vp9_idct_idct_4x4_add_mmxext
;
init_dc_ipred
(
4
,
mmxext
);
init_dc_ipred
(
8
,
mmxext
);
...
...
@@ -401,9 +401,9 @@ av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp, int bpp, int bitexact)
}
if
(
EXTERNAL_SSE
(
cpu_flags
))
{
init_fpel_func
(
2
,
0
,
16
,
put
,
sse
);
init_fpel_func
(
1
,
0
,
32
,
put
,
sse
);
init_fpel_func
(
0
,
0
,
64
,
put
,
sse
);
init_fpel_func
(
2
,
0
,
16
,
put
,
,
sse
);
init_fpel_func
(
1
,
0
,
32
,
put
,
,
sse
);
init_fpel_func
(
0
,
0
,
64
,
put
,
,
sse
);
init_ipred
(
16
,
sse
,
v
,
VERT
);
init_ipred
(
32
,
sse
,
v
,
VERT
);
}
...
...
@@ -411,9 +411,9 @@ av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp, int bpp, int bitexact)
if
(
EXTERNAL_SSE2
(
cpu_flags
))
{
init_subpel3_8to64
(
0
,
put
,
sse2
);
init_subpel3_8to64
(
1
,
avg
,
sse2
);
init_fpel_func
(
2
,
1
,
16
,
avg
,
sse2
);
init_fpel_func
(
1
,
1
,
32
,
avg
,
sse2
);
init_fpel_func
(
0
,
1
,
64
,
avg
,
sse2
);
init_fpel_func
(
2
,
1
,
16
,
avg
,
_8
,
sse2
);
init_fpel_func
(
1
,
1
,
32
,
avg
,
_8
,
sse2
);
init_fpel_func
(
0
,
1
,
64
,
avg
,
_8
,
sse2
);
init_lpf
(
sse2
);
dsp
->
itxfm_add
[
TX_4X4
][
ADST_DCT
]
=
ff_vp9_idct_iadst_4x4_add_sse2
;
dsp
->
itxfm_add
[
TX_4X4
][
DCT_ADST
]
=
ff_vp9_iadst_idct_4x4_add_sse2
;
...
...
@@ -483,14 +483,14 @@ av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp, int bpp, int bitexact)
init_dir_tm_h_ipred
(
32
,
avx
);
}
if
(
EXTERNAL_AVX_FAST
(
cpu_flags
))
{
init_fpel_func
(
1
,
0
,
32
,
put
,
avx
);
init_fpel_func
(
0
,
0
,
64
,
put
,
avx
);
init_fpel_func
(
1
,
0
,
32
,
put
,
,
avx
);
init_fpel_func
(
0
,
0
,
64
,
put
,
,
avx
);
init_ipred
(
32
,
avx
,
v
,
VERT
);
}
if
(
EXTERNAL_AVX2
(
cpu_flags
))
{
init_fpel_func
(
1
,
1
,
32
,
avg
,
avx2
);
init_fpel_func
(
0
,
1
,
64
,
avg
,
avx2
);
init_fpel_func
(
1
,
1
,
32
,
avg
,
_8
,
avx2
);
init_fpel_func
(
0
,
1
,
64
,
avg
,
_8
,
avx2
);
if
(
ARCH_X86_64
)
{
#if ARCH_X86_64 && HAVE_AVX2_EXTERNAL
init_subpel3_32_64
(
0
,
put
,
avx2
);
...
...
libavcodec/x86/vp9dsp_init.h
View file @
77f35967
...
...
@@ -23,16 +23,16 @@
#ifndef AVCODEC_X86_VP9DSP_INIT_H
#define AVCODEC_X86_VP9DSP_INIT_H
#define decl_fpel_func(avg, sz, opt) \
void ff_vp9_##avg##sz##_##opt(uint8_t *dst, ptrdiff_t dst_stride, \
const uint8_t *src, ptrdiff_t src_stride, \
int h, int mx, int my)
#define decl_fpel_func(avg, sz,
bpp,
opt) \
void ff_vp9_##avg##sz##
bpp##
_##opt(uint8_t *dst, ptrdiff_t dst_stride, \
const uint8_t *src, ptrdiff_t src_stride, \
int h, int mx, int my)
#define init_fpel_func(idx1, idx2, sz, type, opt) \
#define init_fpel_func(idx1, idx2, sz, type,
bpp,
opt) \
dsp->mc[idx1][FILTER_8TAP_SMOOTH ][idx2][0][0] = \
dsp->mc[idx1][FILTER_8TAP_REGULAR][idx2][0][0] = \
dsp->mc[idx1][FILTER_8TAP_SHARP ][idx2][0][0] = \
dsp->mc[idx1][FILTER_BILINEAR ][idx2][0][0] = ff_vp9_##type##sz##_##opt
dsp->mc[idx1][FILTER_BILINEAR ][idx2][0][0] = ff_vp9_##type##sz##
bpp##
_##opt
void
ff_vp9dsp_init_16bpp_x86
(
VP9DSPContext
*
dsp
,
int
bpp
);
...
...
libavcodec/x86/vp9dsp_init_16bpp.c
View file @
77f35967
...
...
@@ -29,14 +29,22 @@
#if HAVE_YASM
decl_fpel_func
(
put
,
8
,
mmx
);
decl_fpel_func
(
put
,
16
,
sse
);
decl_fpel_func
(
put
,
32
,
sse
);
decl_fpel_func
(
put
,
64
,
sse
);
decl_fpel_func
(
put
,
128
,
sse
);
decl_fpel_func
(
put
,
32
,
avx
);
decl_fpel_func
(
put
,
64
,
avx
);
decl_fpel_func
(
put
,
128
,
avx
);
decl_fpel_func
(
put
,
8
,
,
mmx
);
decl_fpel_func
(
avg
,
8
,
_16
,
mmxext
);
decl_fpel_func
(
put
,
16
,
,
sse
);
decl_fpel_func
(
put
,
32
,
,
sse
);
decl_fpel_func
(
put
,
64
,
,
sse
);
decl_fpel_func
(
put
,
128
,
,
sse
);
decl_fpel_func
(
avg
,
16
,
_16
,
sse2
);
decl_fpel_func
(
avg
,
32
,
_16
,
sse2
);
decl_fpel_func
(
avg
,
64
,
_16
,
sse2
);
decl_fpel_func
(
avg
,
128
,
_16
,
sse2
);
decl_fpel_func
(
put
,
32
,
,
avx
);
decl_fpel_func
(
put
,
64
,
,
avx
);
decl_fpel_func
(
put
,
128
,
,
avx
);
decl_fpel_func
(
avg
,
32
,
_16
,
avx2
);
decl_fpel_func
(
avg
,
64
,
_16
,
avx2
);
decl_fpel_func
(
avg
,
128
,
_16
,
avx2
);
#endif
/* HAVE_YASM */
...
...
@@ -46,19 +54,37 @@ av_cold void ff_vp9dsp_init_16bpp_x86(VP9DSPContext *dsp, int bpp)
int
cpu_flags
=
av_get_cpu_flags
();
if
(
EXTERNAL_MMX
(
cpu_flags
))
{
init_fpel_func
(
4
,
0
,
8
,
put
,
mmx
);
init_fpel_func
(
4
,
0
,
8
,
put
,
,
mmx
);
}
if
(
EXTERNAL_MMXEXT
(
cpu_flags
))
{
init_fpel_func
(
4
,
1
,
8
,
avg
,
_16
,
mmxext
);
}
if
(
EXTERNAL_SSE
(
cpu_flags
))
{
init_fpel_func
(
3
,
0
,
16
,
put
,
sse
);
init_fpel_func
(
2
,
0
,
32
,
put
,
sse
);
init_fpel_func
(
1
,
0
,
64
,
put
,
sse
);
init_fpel_func
(
0
,
0
,
128
,
put
,
sse
);
init_fpel_func
(
3
,
0
,
16
,
put
,
,
sse
);
init_fpel_func
(
2
,
0
,
32
,
put
,
,
sse
);
init_fpel_func
(
1
,
0
,
64
,
put
,
,
sse
);
init_fpel_func
(
0
,
0
,
128
,
put
,
,
sse
);
}
if
(
EXTERNAL_SSE2
(
cpu_flags
))
{
init_fpel_func
(
3
,
1
,
16
,
avg
,
_16
,
sse2
);
init_fpel_func
(
2
,
1
,
32
,
avg
,
_16
,
sse2
);
init_fpel_func
(
1
,
1
,
64
,
avg
,
_16
,
sse2
);
init_fpel_func
(
0
,
1
,
128
,
avg
,
_16
,
sse2
);
}
if
(
EXTERNAL_AVX_FAST
(
cpu_flags
))
{
init_fpel_func
(
2
,
0
,
32
,
put
,
avx
);
init_fpel_func
(
1
,
0
,
64
,
put
,
avx
);
init_fpel_func
(
0
,
0
,
128
,
put
,
avx
);
init_fpel_func
(
2
,
0
,
32
,
put
,
,
avx
);
init_fpel_func
(
1
,
0
,
64
,
put
,
,
avx
);
init_fpel_func
(
0
,
0
,
128
,
put
,
,
avx
);
}
if
(
EXTERNAL_AVX2
(
cpu_flags
))
{
init_fpel_func
(
2
,
1
,
32
,
avg
,
_16
,
avx2
);
init_fpel_func
(
1
,
1
,
64
,
avg
,
_16
,
avx2
);
init_fpel_func
(
0
,
1
,
128
,
avg
,
_16
,
avx2
);
}
#endif
/* HAVE_YASM */
...
...
libavcodec/x86/vp9mc.asm
View file @
77f35967
...
...
@@ -553,7 +553,7 @@ filter_vx2_fn avg
%endif
; ARCH_X86_64
%macro
fpel_fn
6
-
7
4
%macro
fpel_fn
6
-
8
0
,
4
%if
%2
==
4
%define
%%
srcfn
movh
%define
%%
dstfn
movh
...
...
@@ -562,12 +562,22 @@ filter_vx2_fn avg
%define
%%
dstfn
mova
%endif
%if
%7
==
8
%define
%%
pavg
pavgb
%define
%%
szsuf
_8
%elif
%7
==
16
%define
%%
pavg
pavgw
%define
%%
szsuf
_16
%else
%define
%%
szsuf
%endif
%if
%2
<=
mmsize
cglobal
vp9_
%1%2
,
5
,
7
,
4
,
dst
,
dstride
,
src
,
sstride
,
h
,
dstride3
,
sstride3
cglobal
vp9_
%1%2
%%
szsuf
,
5
,
7
,
4
,
dst
,
dstride
,
src
,
sstride
,
h
,
dstride3
,
sstride3
lea
sstride3q
,
[
sstrideq
*
3
]
lea
dstride3q
,
[
dstrideq
*
3
]
%else
cglobal
vp9_
%1%2
,
5
,
5
,
%7
,
dst
,
dstride
,
src
,
sstride
,
h
cglobal
vp9_
%1%2
%%
szsuf
,
5
,
5
,
%8
,
dst
,
dstride
,
src
,
sstride
,
h
%endif
.
loop
:
%%
srcfn
m0
,
[srcq]
...
...
@@ -582,10 +592,16 @@ cglobal vp9_%1%2, 5, 5, %7, dst, dstride, src, sstride, h
%endif
lea
srcq
,
[
srcq
+
sstrideq
*
%6
]
%ifidn
%1
,
avg
pavgb
m0
,
[dstq]
pavgb
m1
,
[
dstq
+
d%3
]
pavgb
m2
,
[
dstq
+
d%4
]
pavgb
m3
,
[
dstq
+
d%5
]
%%
pavg
m0
,
[dstq]
%%
pavg
m1
,
[
dstq
+
d%3
]
%%
pavg
m2
,
[
dstq
+
d%4
]
%%
pavg
m3
,
[
dstq
+
d%5
]
%if
%2
/
mmsize
==
8
%%
pavg
m4
,
[
dstq
+
mmsize
*
4
]
%%
pavg
m5
,
[
dstq
+
mmsize
*
5
]
%%
pavg
m6
,
[
dstq
+
mmsize
*
6
]
%%
pavg
m7
,
[
dstq
+
mmsize
*
7
]
%endif
%endif
%%
dstfn
[dstq],
m0
%%
dstfn
[
dstq
+
d%3
]
,
m1
...
...
@@ -611,25 +627,38 @@ INIT_MMX mmx
fpel_fn
put
,
4
,
strideq
,
strideq
*
2
,
stride3q
,
4
fpel_fn
put
,
8
,
strideq
,
strideq
*
2
,
stride3q
,
4
INIT_MMX
mmxext
fpel_fn
avg
,
4
,
strideq
,
strideq
*
2
,
stride3q
,
4
fpel_fn
avg
,
8
,
strideq
,
strideq
*
2
,
stride3q
,
4
fpel_fn
avg
,
4
,
strideq
,
strideq
*
2
,
stride3q
,
4
,
8
fpel_fn
avg
,
8
,
strideq
,
strideq
*
2
,
stride3q
,
4
,
8
INIT_XMM
sse
fpel_fn
put
,
16
,
strideq
,
strideq
*
2
,
stride3q
,
4
fpel_fn
put
,
32
,
mmsize
,
strideq
,
strideq
+
mmsize
,
2
fpel_fn
put
,
64
,
mmsize
,
mmsize
*
2
,
mmsize
*
3
,
1
fpel_fn
put
,
128
,
mmsize
,
mmsize
*
2
,
mmsize
*
3
,
1
,
8
fpel_fn
put
,
128
,
mmsize
,
mmsize
*
2
,
mmsize
*
3
,
1
,
0
,
8
INIT_XMM
sse2
fpel_fn
avg
,
16
,
strideq
,
strideq
*
2
,
stride3q
,
4
fpel_fn
avg
,
32
,
mmsize
,
strideq
,
strideq
+
mmsize
,
2
fpel_fn
avg
,
64
,
mmsize
,
mmsize
*
2
,
mmsize
*
3
,
1
fpel_fn
avg
,
16
,
strideq
,
strideq
*
2
,
stride3q
,
4
,
8
fpel_fn
avg
,
32
,
mmsize
,
strideq
,
strideq
+
mmsize
,
2
,
8
fpel_fn
avg
,
64
,
mmsize
,
mmsize
*
2
,
mmsize
*
3
,
1
,
8
INIT_YMM
avx
fpel_fn
put
,
32
,
strideq
,
strideq
*
2
,
stride3q
,
4
fpel_fn
put
,
64
,
mmsize
,
strideq
,
strideq
+
mmsize
,
2
fpel_fn
put
,
128
,
mmsize
,
mmsize
*
2
,
mmsize
*
3
,
1
%if
HAVE_AVX2_EXTERNAL
INIT_YMM
avx2
fpel_fn
avg
,
32
,
strideq
,
strideq
*
2
,
stride3q
,
4
fpel_fn
avg
,
64
,
mmsize
,
strideq
,
strideq
+
mmsize
,
2
fpel_fn
avg
,
32
,
strideq
,
strideq
*
2
,
stride3q
,
4
,
8
fpel_fn
avg
,
64
,
mmsize
,
strideq
,
strideq
+
mmsize
,
2
,
8
%endif
INIT_MMX
mmxext
fpel_fn
avg
,
8
,
strideq
,
strideq
*
2
,
stride3q
,
4
,
16
INIT_XMM
sse2
fpel_fn
avg
,
16
,
strideq
,
strideq
*
2
,
stride3q
,
4
,
16
fpel_fn
avg
,
32
,
mmsize
,
strideq
,
strideq
+
mmsize
,
2
,
16
fpel_fn
avg
,
64
,
mmsize
,
mmsize
*
2
,
mmsize
*
3
,
1
,
16
fpel_fn
avg
,
128
,
mmsize
,
mmsize
*
2
,
mmsize
*
3
,
1
,
16
,
8
%if
HAVE_AVX2_EXTERNAL
INIT_YMM
avx2
fpel_fn
avg
,
32
,
strideq
,
strideq
*
2
,
stride3q
,
4
,
16
fpel_fn
avg
,
64
,
mmsize
,
strideq
,
strideq
+
mmsize
,
2
,
16
fpel_fn
avg
,
128
,
mmsize
,
mmsize
*
2
,
mmsize
*
3
,
1
,
16
%endif
%undef
s16
%undef
d16
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment