Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in / Register
Toggle navigation
F
ffmpeg.wasm-core
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Linshizhi
ffmpeg.wasm-core
Commits
6354ff03
Commit
6354ff03
authored
Sep 16, 2015
by
Ronald S. Bultje
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
vp9: add fullpel (put) MC SIMD for 10/12bpp.
parent
d64f7d42
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
155 additions
and
44 deletions
+155
-44
Makefile
libavcodec/x86/Makefile
+2
-1
vp9dsp_init.c
libavcodec/x86/vp9dsp_init.c
+33
-41
vp9dsp_init.h
libavcodec/x86/vp9dsp_init.h
+39
-0
vp9dsp_init_16bpp.c
libavcodec/x86/vp9dsp_init_16bpp.c
+65
-0
vp9mc.asm
libavcodec/x86/vp9mc.asm
+16
-2
No files found.
libavcodec/x86/Makefile
View file @
6354ff03
...
...
@@ -62,7 +62,8 @@ OBJS-$(CONFIG_V210_ENCODER) += x86/v210enc_init.o
OBJS-$(CONFIG_VC1_DECODER)
+=
x86/vc1dsp_init.o
OBJS-$(CONFIG_VORBIS_DECODER)
+=
x86/vorbisdsp_init.o
OBJS-$(CONFIG_VP6_DECODER)
+=
x86/vp6dsp_init.o
OBJS-$(CONFIG_VP9_DECODER)
+=
x86/vp9dsp_init.o
OBJS-$(CONFIG_VP9_DECODER)
+=
x86/vp9dsp_init.o
\
x86/vp9dsp_init_16bpp.o
OBJS-$(CONFIG_WEBP_DECODER)
+=
x86/vp8dsp_init.o
...
...
libavcodec/x86/vp9dsp_init.c
View file @
6354ff03
...
...
@@ -23,31 +23,26 @@
#include "libavutil/attributes.h"
#include "libavutil/cpu.h"
#include "libavutil/mem.h"
#include "libavutil/x86/asm.h"
#include "libavutil/x86/cpu.h"
#include "libavcodec/vp9dsp.h"
#include "libavcodec/x86/vp9dsp_init.h"
#if HAVE_YASM
#define fpel_func(avg, sz, opt) \
void ff_vp9_##avg##sz##_##opt(uint8_t *dst, ptrdiff_t dst_stride, \
const uint8_t *src, ptrdiff_t src_stride, \
int h, int mx, int my)
fpel_func
(
put
,
4
,
mmx
);
fpel_func
(
put
,
8
,
mmx
);
fpel_func
(
put
,
16
,
sse
);
fpel_func
(
put
,
32
,
sse
);
fpel_func
(
put
,
64
,
sse
);
fpel_func
(
avg
,
4
,
mmxext
);
fpel_func
(
avg
,
8
,
mmxext
);
fpel_func
(
avg
,
16
,
sse2
);
fpel_func
(
avg
,
32
,
sse2
);
fpel_func
(
avg
,
64
,
sse2
);
fpel_func
(
put
,
32
,
avx
);
fpel_func
(
put
,
64
,
avx
);
fpel_func
(
avg
,
32
,
avx2
);
fpel_func
(
avg
,
64
,
avx2
);
#undef fpel_func
decl_fpel_func
(
put
,
4
,
mmx
);
decl_fpel_func
(
put
,
8
,
mmx
);
decl_fpel_func
(
put
,
16
,
sse
);
decl_fpel_func
(
put
,
32
,
sse
);
decl_fpel_func
(
put
,
64
,
sse
);
decl_fpel_func
(
avg
,
4
,
mmxext
);
decl_fpel_func
(
avg
,
8
,
mmxext
);
decl_fpel_func
(
avg
,
16
,
sse2
);
decl_fpel_func
(
avg
,
32
,
sse2
);
decl_fpel_func
(
avg
,
64
,
sse2
);
decl_fpel_func
(
put
,
32
,
avx
);
decl_fpel_func
(
put
,
64
,
avx
);
decl_fpel_func
(
avg
,
32
,
avx2
);
decl_fpel_func
(
avg
,
64
,
avx2
);
#define mc_func(avg, sz, dir, opt, type, f_sz) \
void ff_vp9_##avg##_8tap_1d_##dir##_##sz##_##opt(uint8_t *dst, ptrdiff_t dst_stride, \
...
...
@@ -311,16 +306,13 @@ av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp, int bpp, int bitexact)
{
#if HAVE_YASM
int
cpu_flags
;
if
(
bpp
!=
8
)
return
;
if
(
bpp
!=
8
)
{
ff_vp9dsp_init_16bpp_x86
(
dsp
,
bpp
);
return
;
}
cpu_flags
=
av_get_cpu_flags
();
#define init_fpel(idx1, idx2, sz, type, opt) \
dsp->mc[idx1][FILTER_8TAP_SMOOTH ][idx2][0][0] = \
dsp->mc[idx1][FILTER_8TAP_REGULAR][idx2][0][0] = \
dsp->mc[idx1][FILTER_8TAP_SHARP ][idx2][0][0] = \
dsp->mc[idx1][FILTER_BILINEAR ][idx2][0][0] = ff_vp9_##type##sz##_##opt
#define init_subpel1(idx1, idx2, idxh, idxv, sz, dir, type, opt) \
dsp->mc[idx1][FILTER_8TAP_SMOOTH ][idx2][idxh][idxv] = type##_8tap_smooth_##sz##dir##_##opt; \
dsp->mc[idx1][FILTER_8TAP_REGULAR][idx2][idxh][idxv] = type##_8tap_regular_##sz##dir##_##opt; \
...
...
@@ -386,8 +378,8 @@ av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp, int bpp, int bitexact)
} while (0)
if
(
EXTERNAL_MMX
(
cpu_flags
))
{
init_fpel
(
4
,
0
,
4
,
put
,
mmx
);
init_fpel
(
3
,
0
,
8
,
put
,
mmx
);
init_fpel
_func
(
4
,
0
,
4
,
put
,
mmx
);
init_fpel
_func
(
3
,
0
,
8
,
put
,
mmx
);
if
(
!
bitexact
)
{
dsp
->
itxfm_add
[
4
/* lossless */
][
DCT_DCT
]
=
dsp
->
itxfm_add
[
4
/* lossless */
][
ADST_DCT
]
=
...
...
@@ -400,8 +392,8 @@ av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp, int bpp, int bitexact)
if
(
EXTERNAL_MMXEXT
(
cpu_flags
))
{
init_subpel2
(
4
,
0
,
4
,
put
,
mmxext
);
init_subpel2
(
4
,
1
,
4
,
avg
,
mmxext
);
init_fpel
(
4
,
1
,
4
,
avg
,
mmxext
);
init_fpel
(
3
,
1
,
8
,
avg
,
mmxext
);
init_fpel
_func
(
4
,
1
,
4
,
avg
,
mmxext
);
init_fpel
_func
(
3
,
1
,
8
,
avg
,
mmxext
);
dsp
->
itxfm_add
[
TX_4X4
][
DCT_DCT
]
=
ff_vp9_idct_idct_4x4_add_mmxext
;
init_dc_ipred
(
4
,
mmxext
);
init_dc_ipred
(
8
,
mmxext
);
...
...
@@ -409,9 +401,9 @@ av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp, int bpp, int bitexact)
}
if
(
EXTERNAL_SSE
(
cpu_flags
))
{
init_fpel
(
2
,
0
,
16
,
put
,
sse
);
init_fpel
(
1
,
0
,
32
,
put
,
sse
);
init_fpel
(
0
,
0
,
64
,
put
,
sse
);
init_fpel
_func
(
2
,
0
,
16
,
put
,
sse
);
init_fpel
_func
(
1
,
0
,
32
,
put
,
sse
);
init_fpel
_func
(
0
,
0
,
64
,
put
,
sse
);
init_ipred
(
16
,
sse
,
v
,
VERT
);
init_ipred
(
32
,
sse
,
v
,
VERT
);
}
...
...
@@ -419,9 +411,9 @@ av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp, int bpp, int bitexact)
if
(
EXTERNAL_SSE2
(
cpu_flags
))
{
init_subpel3_8to64
(
0
,
put
,
sse2
);
init_subpel3_8to64
(
1
,
avg
,
sse2
);
init_fpel
(
2
,
1
,
16
,
avg
,
sse2
);
init_fpel
(
1
,
1
,
32
,
avg
,
sse2
);
init_fpel
(
0
,
1
,
64
,
avg
,
sse2
);
init_fpel
_func
(
2
,
1
,
16
,
avg
,
sse2
);
init_fpel
_func
(
1
,
1
,
32
,
avg
,
sse2
);
init_fpel
_func
(
0
,
1
,
64
,
avg
,
sse2
);
init_lpf
(
sse2
);
dsp
->
itxfm_add
[
TX_4X4
][
ADST_DCT
]
=
ff_vp9_idct_iadst_4x4_add_sse2
;
dsp
->
itxfm_add
[
TX_4X4
][
DCT_ADST
]
=
ff_vp9_iadst_idct_4x4_add_sse2
;
...
...
@@ -491,14 +483,14 @@ av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp, int bpp, int bitexact)
init_dir_tm_h_ipred
(
32
,
avx
);
}
if
(
EXTERNAL_AVX_FAST
(
cpu_flags
))
{
init_fpel
(
1
,
0
,
32
,
put
,
avx
);
init_fpel
(
0
,
0
,
64
,
put
,
avx
);
init_fpel
_func
(
1
,
0
,
32
,
put
,
avx
);
init_fpel
_func
(
0
,
0
,
64
,
put
,
avx
);
init_ipred
(
32
,
avx
,
v
,
VERT
);
}
if
(
EXTERNAL_AVX2
(
cpu_flags
))
{
init_fpel
(
1
,
1
,
32
,
avg
,
avx2
);
init_fpel
(
0
,
1
,
64
,
avg
,
avx2
);
init_fpel
_func
(
1
,
1
,
32
,
avg
,
avx2
);
init_fpel
_func
(
0
,
1
,
64
,
avg
,
avx2
);
if
(
ARCH_X86_64
)
{
#if ARCH_X86_64 && HAVE_AVX2_EXTERNAL
init_subpel3_32_64
(
0
,
put
,
avx2
);
...
...
libavcodec/x86/vp9dsp_init.h
0 → 100644
View file @
6354ff03
/*
* VP9 SIMD optimizations
*
* Copyright (c) 2013 Ronald S. Bultje <rsbultje gmail com>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef AVCODEC_X86_VP9DSP_INIT_H
#define AVCODEC_X86_VP9DSP_INIT_H
#define decl_fpel_func(avg, sz, opt) \
void ff_vp9_##avg##sz##_##opt(uint8_t *dst, ptrdiff_t dst_stride, \
const uint8_t *src, ptrdiff_t src_stride, \
int h, int mx, int my)
#define init_fpel_func(idx1, idx2, sz, type, opt) \
dsp->mc[idx1][FILTER_8TAP_SMOOTH ][idx2][0][0] = \
dsp->mc[idx1][FILTER_8TAP_REGULAR][idx2][0][0] = \
dsp->mc[idx1][FILTER_8TAP_SHARP ][idx2][0][0] = \
dsp->mc[idx1][FILTER_BILINEAR ][idx2][0][0] = ff_vp9_##type##sz##_##opt
void
ff_vp9dsp_init_16bpp_x86
(
VP9DSPContext
*
dsp
,
int
bpp
);
#endif
/* AVCODEC_X86_VP9DSP_INIT_H */
libavcodec/x86/vp9dsp_init_16bpp.c
0 → 100644
View file @
6354ff03
/*
* VP9 SIMD optimizations
*
* Copyright (c) 2013 Ronald S. Bultje <rsbultje gmail com>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "libavutil/attributes.h"
#include "libavutil/cpu.h"
#include "libavutil/mem.h"
#include "libavutil/x86/cpu.h"
#include "libavcodec/vp9dsp.h"
#include "libavcodec/x86/vp9dsp_init.h"
#if HAVE_YASM
decl_fpel_func
(
put
,
8
,
mmx
);
decl_fpel_func
(
put
,
16
,
sse
);
decl_fpel_func
(
put
,
32
,
sse
);
decl_fpel_func
(
put
,
64
,
sse
);
decl_fpel_func
(
put
,
128
,
sse
);
decl_fpel_func
(
put
,
32
,
avx
);
decl_fpel_func
(
put
,
64
,
avx
);
decl_fpel_func
(
put
,
128
,
avx
);
#endif
/* HAVE_YASM */
av_cold
void
ff_vp9dsp_init_16bpp_x86
(
VP9DSPContext
*
dsp
,
int
bpp
)
{
#if HAVE_YASM
int
cpu_flags
=
av_get_cpu_flags
();
if
(
EXTERNAL_MMX
(
cpu_flags
))
{
init_fpel_func
(
4
,
0
,
8
,
put
,
mmx
);
}
if
(
EXTERNAL_SSE
(
cpu_flags
))
{
init_fpel_func
(
3
,
0
,
16
,
put
,
sse
);
init_fpel_func
(
2
,
0
,
32
,
put
,
sse
);
init_fpel_func
(
1
,
0
,
64
,
put
,
sse
);
init_fpel_func
(
0
,
0
,
128
,
put
,
sse
);
}
if
(
EXTERNAL_AVX_FAST
(
cpu_flags
))
{
init_fpel_func
(
2
,
0
,
32
,
put
,
avx
);
init_fpel_func
(
1
,
0
,
64
,
put
,
avx
);
init_fpel_func
(
0
,
0
,
128
,
put
,
avx
);
}
#endif
/* HAVE_YASM */
}
libavcodec/x86/vp9mc.asm
View file @
6354ff03
...
...
@@ -553,7 +553,7 @@ filter_vx2_fn avg
%endif
; ARCH_X86_64
%macro
fpel_fn
6
%macro
fpel_fn
6
-
7
4
%if
%2
==
4
%define
%%
srcfn
movh
%define
%%
dstfn
movh
...
...
@@ -567,13 +567,19 @@ cglobal vp9_%1%2, 5, 7, 4, dst, dstride, src, sstride, h, dstride3, sstride3
lea
sstride3q
,
[
sstrideq
*
3
]
lea
dstride3q
,
[
dstrideq
*
3
]
%else
cglobal
vp9_
%1%2
,
5
,
5
,
4
,
dst
,
dstride
,
src
,
sstride
,
h
cglobal
vp9_
%1%2
,
5
,
5
,
%7
,
dst
,
dstride
,
src
,
sstride
,
h
%endif
.
loop
:
%%
srcfn
m0
,
[srcq]
%%
srcfn
m1
,
[
srcq
+
s%3
]
%%
srcfn
m2
,
[
srcq
+
s%4
]
%%
srcfn
m3
,
[
srcq
+
s%5
]
%if
%2
/
mmsize
==
8
%%
srcfn
m4
,
[
srcq
+
mmsize
*
4
]
%%
srcfn
m5
,
[
srcq
+
mmsize
*
5
]
%%
srcfn
m6
,
[
srcq
+
mmsize
*
6
]
%%
srcfn
m7
,
[
srcq
+
mmsize
*
7
]
%endif
lea
srcq
,
[
srcq
+
sstrideq
*
%6
]
%ifidn
%1
,
avg
pavgb
m0
,
[dstq]
...
...
@@ -585,6 +591,12 @@ cglobal vp9_%1%2, 5, 5, 4, dst, dstride, src, sstride, h
%%
dstfn
[
dstq
+
d%3
]
,
m1
%%
dstfn
[
dstq
+
d%4
]
,
m2
%%
dstfn
[
dstq
+
d%5
]
,
m3
%if
%2
/
mmsize
==
8
%%
dstfn
[
dstq
+
mmsize
*
4
]
,
m4
%%
dstfn
[
dstq
+
mmsize
*
5
]
,
m5
%%
dstfn
[
dstq
+
mmsize
*
6
]
,
m6
%%
dstfn
[
dstq
+
mmsize
*
7
]
,
m7
%endif
lea
dstq
,
[
dstq
+
dstrideq
*
%6
]
sub
hd
,
%6
jnz
.
loop
...
...
@@ -605,6 +617,7 @@ INIT_XMM sse
fpel_fn
put
,
16
,
strideq
,
strideq
*
2
,
stride3q
,
4
fpel_fn
put
,
32
,
mmsize
,
strideq
,
strideq
+
mmsize
,
2
fpel_fn
put
,
64
,
mmsize
,
mmsize
*
2
,
mmsize
*
3
,
1
fpel_fn
put
,
128
,
mmsize
,
mmsize
*
2
,
mmsize
*
3
,
1
,
8
INIT_XMM
sse2
fpel_fn
avg
,
16
,
strideq
,
strideq
*
2
,
stride3q
,
4
fpel_fn
avg
,
32
,
mmsize
,
strideq
,
strideq
+
mmsize
,
2
...
...
@@ -612,6 +625,7 @@ fpel_fn avg, 64, mmsize, mmsize*2, mmsize*3, 1
INIT_YMM
avx
fpel_fn
put
,
32
,
strideq
,
strideq
*
2
,
stride3q
,
4
fpel_fn
put
,
64
,
mmsize
,
strideq
,
strideq
+
mmsize
,
2
fpel_fn
put
,
128
,
mmsize
,
mmsize
*
2
,
mmsize
*
3
,
1
%if
HAVE_AVX2_EXTERNAL
INIT_YMM
avx2
fpel_fn
avg
,
32
,
strideq
,
strideq
*
2
,
stride3q
,
4
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment