Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in / Register
Toggle navigation
F
ffmpeg.wasm-core
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Linshizhi
ffmpeg.wasm-core
Commits
f1548c00
Commit
f1548c00
authored
Sep 22, 2013
by
Ronald S. Bultje
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Full-pixel MC functions.
Decoding time of ped1080p.webm goes from 11.3sec to 11.1sec.
parent
c07ac8d4
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
100 additions
and
0 deletions
+100
-0
vp9dsp.asm
libavcodec/x86/vp9dsp.asm
+57
-0
vp9dsp_init.c
libavcodec/x86/vp9dsp_init.c
+43
-0
No files found.
libavcodec/x86/vp9dsp.asm
View file @
f1548c00
...
...
@@ -219,3 +219,60 @@ filter_v_fn avg
INIT_XMM
ssse3
filter_v_fn
put
filter_v_fn
avg
%macro
fpel_fn
6
%if
%2
==
4
%define
%%
srcfn
movh
%define
%%
dstfn
movh
%else
%define
%%
srcfn
movu
%define
%%
dstfn
mova
%endif
%if
%2
<=
16
cglobal
%1%2
,
5
,
7
,
4
,
dst
,
dstride
,
src
,
sstride
,
h
,
dstride3
,
sstride3
lea
sstride3q
,
[
sstrideq
*
3
]
lea
dstride3q
,
[
dstrideq
*
3
]
%else
cglobal
%1%2
,
5
,
5
,
4
,
dst
,
dstride
,
src
,
sstride
,
h
%endif
.
loop
:
%%
srcfn
m0
,
[srcq]
%%
srcfn
m1
,
[
srcq
+
s%3
]
%%
srcfn
m2
,
[
srcq
+
s%4
]
%%
srcfn
m3
,
[
srcq
+
s%5
]
lea
srcq
,
[
srcq
+
sstrideq
*
%6
]
%ifidn
%1
,
avg
pavgb
m0
,
[dstq]
pavgb
m1
,
[
dstq
+
d%3
]
pavgb
m2
,
[
dstq
+
d%4
]
pavgb
m3
,
[
dstq
+
d%5
]
%endif
%%
dstfn
[dstq],
m0
%%
dstfn
[
dstq
+
d%3
]
,
m1
%%
dstfn
[
dstq
+
d%4
]
,
m2
%%
dstfn
[
dstq
+
d%5
]
,
m3
lea
dstq
,
[
dstq
+
dstrideq
*
%6
]
sub
hd
,
%6
jnz
.
loop
RET
%endmacro
%define
d16
16
%define
s16
16
INIT_MMX
mmx
fpel_fn
put
,
4
,
strideq
,
strideq
*
2
,
stride3q
,
4
fpel_fn
put
,
8
,
strideq
,
strideq
*
2
,
stride3q
,
4
INIT_MMX
sse
fpel_fn
avg
,
4
,
strideq
,
strideq
*
2
,
stride3q
,
4
fpel_fn
avg
,
8
,
strideq
,
strideq
*
2
,
stride3q
,
4
INIT_XMM
sse
fpel_fn
put
,
16
,
strideq
,
strideq
*
2
,
stride3q
,
4
fpel_fn
put
,
32
,
mmsize
,
strideq
,
strideq
+
mmsize
,
2
fpel_fn
put
,
64
,
mmsize
,
mmsize
*
2
,
mmsize
*
3
,
1
INIT_XMM
sse2
fpel_fn
avg
,
16
,
strideq
,
strideq
*
2
,
stride3q
,
4
fpel_fn
avg
,
32
,
mmsize
,
strideq
,
strideq
+
mmsize
,
2
fpel_fn
avg
,
64
,
mmsize
,
mmsize
*
2
,
mmsize
*
3
,
1
%undef
s16
%undef
d16
libavcodec/x86/vp9dsp_init.c
View file @
f1548c00
...
...
@@ -27,6 +27,22 @@
#if HAVE_YASM
#define fpel_func(avg, sz, opt) \
void ff_##avg##sz##_##opt(uint8_t *dst, ptrdiff_t dst_stride, \
const uint8_t *src, ptrdiff_t src_stride, \
int h, int mx, int my)
fpel_func
(
put
,
4
,
mmx
);
fpel_func
(
put
,
8
,
mmx
);
fpel_func
(
put
,
16
,
sse
);
fpel_func
(
put
,
32
,
sse
);
fpel_func
(
put
,
64
,
sse
);
fpel_func
(
avg
,
4
,
sse
);
fpel_func
(
avg
,
8
,
sse
);
fpel_func
(
avg
,
16
,
sse2
);
fpel_func
(
avg
,
32
,
sse2
);
fpel_func
(
avg
,
64
,
sse2
);
#undef fpel_func
#define mc_func(avg, sz, dir, opt) \
void ff_##avg##_8tap_1d_##dir##_##sz##_##opt(uint8_t *dst, ptrdiff_t dst_stride, \
const uint8_t *src, ptrdiff_t src_stride, \
...
...
@@ -141,6 +157,13 @@ av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp)
#if HAVE_YASM
int
cpu_flags
=
av_get_cpu_flags
();
#define init_fpel(idx1, idx2, sz, type, opt) \
dsp->mc[idx1][FILTER_8TAP_SMOOTH ][idx2][0][0] = \
dsp->mc[idx1][FILTER_8TAP_REGULAR][idx2][0][0] = \
dsp->mc[idx1][FILTER_8TAP_SHARP ][idx2][0][0] = \
dsp->mc[idx1][FILTER_BILINEAR ][idx2][0][0] = ff_##type##sz##_##opt
#define init_subpel1(idx1, idx2, idxh, idxv, sz, dir, type, opt) \
dsp->mc[idx1][FILTER_8TAP_SMOOTH ][idx2][idxh][idxv] = type##_8tap_smooth_##sz##dir##_##opt; \
dsp->mc[idx1][FILTER_8TAP_REGULAR][idx2][idxh][idxv] = type##_8tap_regular_##sz##dir##_##opt; \
...
...
@@ -158,11 +181,31 @@ av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp)
init_subpel2(idx, 0, 1, v, type, opt); \
init_subpel2(idx, 1, 0, h, type, opt)
if
(
cpu_flags
&
AV_CPU_FLAG_MMX
)
{
init_fpel
(
4
,
0
,
4
,
put
,
mmx
);
init_fpel
(
3
,
0
,
8
,
put
,
mmx
);
}
if
(
cpu_flags
&
AV_CPU_FLAG_SSE
)
{
init_fpel
(
2
,
0
,
16
,
put
,
sse
);
init_fpel
(
1
,
0
,
32
,
put
,
sse
);
init_fpel
(
0
,
0
,
64
,
put
,
sse
);
init_fpel
(
4
,
1
,
4
,
avg
,
sse
);
init_fpel
(
3
,
1
,
8
,
avg
,
sse
);
}
if
(
cpu_flags
&
AV_CPU_FLAG_SSE2
)
{
init_fpel
(
2
,
1
,
16
,
avg
,
sse2
);
init_fpel
(
1
,
1
,
32
,
avg
,
sse2
);
init_fpel
(
0
,
1
,
64
,
avg
,
sse2
);
}
if
(
cpu_flags
&
AV_CPU_FLAG_SSSE3
)
{
init_subpel3
(
0
,
put
,
ssse3
);
init_subpel3
(
1
,
avg
,
ssse3
);
}
#undef init_fpel
#undef init_subpel1
#undef init_subpel2
#undef init_subpel3
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment