Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in / Register
Toggle navigation
F
ffmpeg.wasm-core
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Linshizhi
ffmpeg.wasm-core
Commits
344d5190
Commit
344d5190
authored
Sep 16, 2015
by
Ronald S. Bultje
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
vp9: add subpel MC SIMD for 10/12bpp.
parent
77f35967
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
708 additions
and
165 deletions
+708
-165
Makefile
libavcodec/x86/Makefile
+4
-1
vp9dsp_init.c
libavcodec/x86/vp9dsp_init.c
+43
-154
vp9dsp_init.h
libavcodec/x86/vp9dsp_init.h
+109
-1
vp9dsp_init_10bpp.c
libavcodec/x86/vp9dsp_init_10bpp.c
+25
-0
vp9dsp_init_12bpp.c
libavcodec/x86/vp9dsp_init_12bpp.c
+25
-0
vp9dsp_init_16bpp.c
libavcodec/x86/vp9dsp_init_16bpp.c
+1
-1
vp9dsp_init_16bpp_template.c
libavcodec/x86/vp9dsp_init_16bpp_template.c
+62
-0
vp9mc.asm
libavcodec/x86/vp9mc.asm
+18
-8
vp9mc_16bpp.asm
libavcodec/x86/vp9mc_16bpp.asm
+421
-0
No files found.
libavcodec/x86/Makefile
View file @
344d5190
...
...
@@ -63,6 +63,8 @@ OBJS-$(CONFIG_VC1_DECODER) += x86/vc1dsp_init.o
OBJS-$(CONFIG_VORBIS_DECODER)
+=
x86/vorbisdsp_init.o
OBJS-$(CONFIG_VP6_DECODER)
+=
x86/vp6dsp_init.o
OBJS-$(CONFIG_VP9_DECODER)
+=
x86/vp9dsp_init.o
\
x86/vp9dsp_init_10bpp.o
\
x86/vp9dsp_init_12bpp.o
\
x86/vp9dsp_init_16bpp.o
OBJS-$(CONFIG_WEBP_DECODER)
+=
x86/vp8dsp_init.o
...
...
@@ -157,5 +159,6 @@ YASM-OBJS-$(CONFIG_VP6_DECODER) += x86/vp6dsp.o
YASM-OBJS-$(CONFIG_VP9_DECODER)
+=
x86/vp9intrapred.o
\
x86/vp9itxfm.o
\
x86/vp9lpf.o
\
x86/vp9mc.o
x86/vp9mc.o
\
x86/vp9mc_16bpp.o
YASM-OBJS-$(CONFIG_WEBP_DECODER)
+=
x86/vp8dsp.o
libavcodec/x86/vp9dsp_init.c
View file @
344d5190
This diff is collapsed.
Click to expand it.
libavcodec/x86/vp9dsp_init.h
View file @
344d5190
...
...
@@ -28,12 +28,120 @@ void ff_vp9_##avg##sz##bpp##_##opt(uint8_t *dst, ptrdiff_t dst_stride, \
const uint8_t *src, ptrdiff_t src_stride, \
int h, int mx, int my)
#define decl_mc_func(avg, sz, dir, opt, type, f_sz, bpp) \
void ff_vp9_##avg##_8tap_1d_##dir##_##sz##_##bpp##_##opt(uint8_t *dst, ptrdiff_t dst_stride, \
const uint8_t *src, ptrdiff_t src_stride, \
int h, const type (*filter)[f_sz])
#define decl_mc_funcs(sz, opt, type, fsz, bpp) \
decl_mc_func(put, sz, h, opt, type, fsz, bpp); \
decl_mc_func(avg, sz, h, opt, type, fsz, bpp); \
decl_mc_func(put, sz, v, opt, type, fsz, bpp); \
decl_mc_func(avg, sz, v, opt, type, fsz, bpp)
#define mc_rep_func(avg, sz, hsz, hszb, dir, opt, type, f_sz, bpp) \
static av_always_inline void \
ff_vp9_##avg##_8tap_1d_##dir##_##sz##_##bpp##_##opt(uint8_t *dst, ptrdiff_t dst_stride, \
const uint8_t *src, ptrdiff_t src_stride, \
int h, const type (*filter)[f_sz]) \
{ \
ff_vp9_##avg##_8tap_1d_##dir##_##hsz##_##bpp##_##opt(dst, dst_stride, src, \
src_stride, h, filter); \
ff_vp9_##avg##_8tap_1d_##dir##_##hsz##_##bpp##_##opt(dst + hszb, dst_stride, src + hszb, \
src_stride, h, filter); \
}
#define mc_rep_funcs(sz, hsz, hszb, opt, type, fsz, bpp) \
mc_rep_func(put, sz, hsz, hszb, h, opt, type, fsz, bpp); \
mc_rep_func(avg, sz, hsz, hszb, h, opt, type, fsz, bpp); \
mc_rep_func(put, sz, hsz, hszb, v, opt, type, fsz, bpp); \
mc_rep_func(avg, sz, hsz, hszb, v, opt, type, fsz, bpp)
#define filter_8tap_1d_fn(op, sz, f, f_opt, fname, dir, dvar, bpp, opt) \
static void op##_8tap_##fname##_##sz##dir##_##bpp##_##opt(uint8_t *dst, ptrdiff_t dst_stride, \
const uint8_t *src, ptrdiff_t src_stride, \
int h, int mx, int my) \
{ \
ff_vp9_##op##_8tap_1d_##dir##_##sz##_##bpp##_##opt(dst, dst_stride, src, src_stride, \
h, ff_filters_##f_opt[f][dvar - 1]); \
}
#define filters_8tap_1d_fn(op, sz, dir, dvar, bpp, opt, f_opt) \
filter_8tap_1d_fn(op, sz, FILTER_8TAP_REGULAR, f_opt, regular, dir, dvar, bpp, opt) \
filter_8tap_1d_fn(op, sz, FILTER_8TAP_SHARP, f_opt, sharp, dir, dvar, bpp, opt) \
filter_8tap_1d_fn(op, sz, FILTER_8TAP_SMOOTH, f_opt, smooth, dir, dvar, bpp, opt)
#define filters_8tap_1d_fn2(op, sz, bpp, opt, f_opt) \
filters_8tap_1d_fn(op, sz, h, mx, bpp, opt, f_opt) \
filters_8tap_1d_fn(op, sz, v, my, bpp, opt, f_opt)
#define filters_8tap_1d_fn3(op, bpp, opt4, opt8, f_opt) \
filters_8tap_1d_fn2(op, 64, bpp, opt8, f_opt) \
filters_8tap_1d_fn2(op, 32, bpp, opt8, f_opt) \
filters_8tap_1d_fn2(op, 16, bpp, opt8, f_opt) \
filters_8tap_1d_fn2(op, 8, bpp, opt8, f_opt) \
filters_8tap_1d_fn2(op, 4, bpp, opt4, f_opt)
#define filter_8tap_2d_fn(op, sz, f, f_opt, fname, align, bpp, bytes, opt) \
static void op##_8tap_##fname##_##sz##hv_##bpp##_##opt(uint8_t *dst, ptrdiff_t dst_stride, \
const uint8_t *src, ptrdiff_t src_stride, \
int h, int mx, int my) \
{ \
LOCAL_ALIGNED_##align(uint8_t, temp, [71 * 64 * bytes]); \
ff_vp9_put_8tap_1d_h_##sz##_##bpp##_##opt(temp, 64 * bytes, src - 3 * src_stride, \
src_stride, h + 7, \
ff_filters_##f_opt[f][mx - 1]); \
ff_vp9_##op##_8tap_1d_v_##sz##_##bpp##_##opt(dst, dst_stride, temp + 3 * bytes * 64, \
64 * bytes, h, \
ff_filters_##f_opt[f][my - 1]); \
}
#define filters_8tap_2d_fn(op, sz, align, bpp, bytes, opt, f_opt) \
filter_8tap_2d_fn(op, sz, FILTER_8TAP_REGULAR, f_opt, regular, align, bpp, bytes, opt) \
filter_8tap_2d_fn(op, sz, FILTER_8TAP_SHARP, f_opt, sharp, align, bpp, bytes, opt) \
filter_8tap_2d_fn(op, sz, FILTER_8TAP_SMOOTH, f_opt, smooth, align, bpp, bytes, opt)
#define filters_8tap_2d_fn2(op, align, bpp, bytes, opt4, opt8, f_opt) \
filters_8tap_2d_fn(op, 64, align, bpp, bytes, opt8, f_opt) \
filters_8tap_2d_fn(op, 32, align, bpp, bytes, opt8, f_opt) \
filters_8tap_2d_fn(op, 16, align, bpp, bytes, opt8, f_opt) \
filters_8tap_2d_fn(op, 8, align, bpp, bytes, opt8, f_opt) \
filters_8tap_2d_fn(op, 4, align, bpp, bytes, opt4, f_opt)
#define init_fpel_func(idx1, idx2, sz, type, bpp, opt) \
dsp->mc[idx1][FILTER_8TAP_SMOOTH ][idx2][0][0] = \
dsp->mc[idx1][FILTER_8TAP_REGULAR][idx2][0][0] = \
dsp->mc[idx1][FILTER_8TAP_SHARP ][idx2][0][0] = \
dsp->mc[idx1][FILTER_BILINEAR ][idx2][0][0] = ff_vp9_##type##sz##bpp##_##opt
void
ff_vp9dsp_init_16bpp_x86
(
VP9DSPContext
*
dsp
,
int
bpp
);
#define init_subpel1(idx1, idx2, idxh, idxv, sz, dir, type, bpp, opt) \
dsp->mc[idx1][FILTER_8TAP_SMOOTH ][idx2][idxh][idxv] = \
type##_8tap_smooth_##sz##dir##_##bpp##_##opt; \
dsp->mc[idx1][FILTER_8TAP_REGULAR][idx2][idxh][idxv] = \
type##_8tap_regular_##sz##dir##_##bpp##_##opt; \
dsp->mc[idx1][FILTER_8TAP_SHARP ][idx2][idxh][idxv] = \
type##_8tap_sharp_##sz##dir##_##bpp##_##opt
#define init_subpel2(idx1, idx2, sz, type, bpp, opt) \
init_subpel1(idx1, idx2, 1, 1, sz, hv, type, bpp, opt); \
init_subpel1(idx1, idx2, 0, 1, sz, v, type, bpp, opt); \
init_subpel1(idx1, idx2, 1, 0, sz, h, type, bpp, opt)
#define init_subpel3_32_64(idx, type, bpp, opt) \
init_subpel2(0, idx, 64, type, bpp, opt); \
init_subpel2(1, idx, 32, type, bpp, opt)
#define init_subpel3_8to64(idx, type, bpp, opt) \
init_subpel3_32_64(idx, type, bpp, opt); \
init_subpel2(2, idx, 16, type, bpp, opt); \
init_subpel2(3, idx, 8, type, bpp, opt)
#define init_subpel3(idx, type, bpp, opt) \
init_subpel3_8to64(idx, type, bpp, opt); \
init_subpel2(4, idx, 4, type, bpp, opt)
void
ff_vp9dsp_init_10bpp_x86
(
VP9DSPContext
*
dsp
);
void
ff_vp9dsp_init_12bpp_x86
(
VP9DSPContext
*
dsp
);
void
ff_vp9dsp_init_16bpp_x86
(
VP9DSPContext
*
dsp
);
#endif
/* AVCODEC_X86_VP9DSP_INIT_H */
libavcodec/x86/vp9dsp_init_10bpp.c
0 → 100644
View file @
344d5190
/*
* VP9 SIMD optimizations
*
* Copyright (c) 2013 Ronald S. Bultje <rsbultje gmail com>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#define BPC 10
#define INIT_FUNC ff_vp9dsp_init_10bpp_x86
#include "vp9dsp_init_16bpp_template.c"
libavcodec/x86/vp9dsp_init_12bpp.c
0 → 100644
View file @
344d5190
/*
* VP9 SIMD optimizations
*
* Copyright (c) 2013 Ronald S. Bultje <rsbultje gmail com>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#define BPC 12
#define INIT_FUNC ff_vp9dsp_init_12bpp_x86
#include "vp9dsp_init_16bpp_template.c"
libavcodec/x86/vp9dsp_init_16bpp.c
View file @
344d5190
...
...
@@ -48,7 +48,7 @@ decl_fpel_func(avg, 128, _16, avx2);
#endif
/* HAVE_YASM */
av_cold
void
ff_vp9dsp_init_16bpp_x86
(
VP9DSPContext
*
dsp
,
int
bpp
)
av_cold
void
ff_vp9dsp_init_16bpp_x86
(
VP9DSPContext
*
dsp
)
{
#if HAVE_YASM
int
cpu_flags
=
av_get_cpu_flags
();
...
...
libavcodec/x86/vp9dsp_init_16bpp_template.c
0 → 100644
View file @
344d5190
/*
* VP9 SIMD optimizations
*
* Copyright (c) 2013 Ronald S. Bultje <rsbultje gmail com>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "libavutil/attributes.h"
#include "libavutil/cpu.h"
#include "libavutil/mem.h"
#include "libavutil/x86/cpu.h"
#include "libavcodec/vp9dsp.h"
#include "libavcodec/x86/vp9dsp_init.h"
#if HAVE_YASM
extern
const
int16_t
ff_filters_16bpp
[
3
][
15
][
4
][
16
];
decl_mc_funcs
(
4
,
sse2
,
int16_t
,
16
,
BPC
);
decl_mc_funcs
(
8
,
sse2
,
int16_t
,
16
,
BPC
);
mc_rep_funcs
(
16
,
8
,
16
,
sse2
,
int16_t
,
16
,
BPC
);
mc_rep_funcs
(
32
,
16
,
32
,
sse2
,
int16_t
,
16
,
BPC
);
mc_rep_funcs
(
64
,
32
,
64
,
sse2
,
int16_t
,
16
,
BPC
);
filters_8tap_2d_fn2
(
put
,
16
,
BPC
,
2
,
sse2
,
sse2
,
16
bpp
)
filters_8tap_2d_fn2
(
avg
,
16
,
BPC
,
2
,
sse2
,
sse2
,
16
bpp
)
filters_8tap_1d_fn3
(
put
,
BPC
,
sse2
,
sse2
,
16
bpp
)
filters_8tap_1d_fn3
(
avg
,
BPC
,
sse2
,
sse2
,
16
bpp
)
#endif
/* HAVE_YASM */
av_cold
void
INIT_FUNC
(
VP9DSPContext
*
dsp
)
{
#if HAVE_YASM
int
cpu_flags
=
av_get_cpu_flags
();
if
(
EXTERNAL_SSE2
(
cpu_flags
))
{
init_subpel3
(
0
,
put
,
BPC
,
sse2
);
init_subpel3
(
1
,
avg
,
BPC
,
sse2
);
}
#endif
/* HAVE_YASM */
ff_vp9dsp_init_16bpp_x86
(
dsp
);
}
libavcodec/x86/vp9mc.asm
View file @
344d5190
...
...
@@ -45,6 +45,13 @@ times 8 dw %7
times
8
dw
%8
%endmacro
%macro
F8_16BPP_TAPS
8
times
8
dw
%1
,
%2
times
8
dw
%3
,
%4
times
8
dw
%5
,
%6
times
8
dw
%7
,
%8
%endmacro
%macro
FILTER
1
const
filters_
%1
; smooth
F8_TAPS
-
3
,
-
1
,
32
,
64
,
38
,
1
,
-
3
,
0
...
...
@@ -102,12 +109,15 @@ FILTER ssse3
%define
F8_TAPS
F8_SSE2_TAPS
; int16_t ff_filters_sse2[3][15][8][8]
FILTER
sse2
%define
F8_TAPS
F8_16BPP_TAPS
; int16_t ff_filters_16bpp[3][15][4][16]
FILTER
16
bpp
SECTION
.
text
%macro
filter_sse2_h_fn
1
%assign
%%
px
mmsize
/
2
cglobal
vp9_
%1
_8tap_1d_h_
%
+
%%
px
,
6
,
6
,
15
,
dst
,
dstride
,
src
,
sstride
,
h
,
filtery
cglobal
vp9_
%1
_8tap_1d_h_
%
+
%%
px
%
+
_8
,
6
,
6
,
15
,
dst
,
dstride
,
src
,
sstride
,
h
,
filtery
pxor
m5
,
m5
mova
m6
,
[
pw_64
]
mova
m7
,
[
filteryq
+
0
]
...
...
@@ -192,7 +202,7 @@ filter_sse2_h_fn avg
%macro
filter_h_fn
1
%assign
%%
px
mmsize
/
2
cglobal
vp9_
%1
_8tap_1d_h_
%
+
%%
px
,
6
,
6
,
11
,
dst
,
dstride
,
src
,
sstride
,
h
,
filtery
cglobal
vp9_
%1
_8tap_1d_h_
%
+
%%
px
%
+
_8
,
6
,
6
,
11
,
dst
,
dstride
,
src
,
sstride
,
h
,
filtery
mova
m6
,
[
pw_256
]
mova
m7
,
[
filteryq
+
0
]
%if
ARCH_X86_64
&&
mmsize
>
8
...
...
@@ -253,7 +263,7 @@ filter_h_fn avg
%if
ARCH_X86_64
%macro
filter_hx2_fn
1
%assign
%%
px
mmsize
cglobal
vp9_
%1
_8tap_1d_h_
%
+
%%
px
,
6
,
6
,
14
,
dst
,
dstride
,
src
,
sstride
,
h
,
filtery
cglobal
vp9_
%1
_8tap_1d_h_
%
+
%%
px
%
+
_8
,
6
,
6
,
14
,
dst
,
dstride
,
src
,
sstride
,
h
,
filtery
mova
m13
,
[
pw_256
]
mova
m8
,
[
filteryq
+
0
]
mova
m9
,
[
filteryq
+
32
]
...
...
@@ -315,9 +325,9 @@ filter_hx2_fn avg
%macro
filter_sse2_v_fn
1
%assign
%%
px
mmsize
/
2
%if
ARCH_X86_64
cglobal
vp9_
%1
_8tap_1d_v_
%
+
%%
px
,
6
,
8
,
15
,
dst
,
dstride
,
src
,
sstride
,
h
,
filtery
,
src4
,
sstride3
cglobal
vp9_
%1
_8tap_1d_v_
%
+
%%
px
%
+
_8
,
6
,
8
,
15
,
dst
,
dstride
,
src
,
sstride
,
h
,
filtery
,
src4
,
sstride3
%else
cglobal
vp9_
%1
_8tap_1d_v_
%
+
%%
px
,
4
,
7
,
15
,
dst
,
dstride
,
src
,
sstride
,
filtery
,
src4
,
sstride3
cglobal
vp9_
%1
_8tap_1d_v_
%
+
%%
px
%
+
_8
,
4
,
7
,
15
,
dst
,
dstride
,
src
,
sstride
,
filtery
,
src4
,
sstride3
mov
filteryq
,
r5mp
%define
hd
r4mp
%endif
...
...
@@ -413,9 +423,9 @@ filter_sse2_v_fn avg
%macro
filter_v_fn
1
%assign
%%
px
mmsize
/
2
%if
ARCH_X86_64
cglobal
vp9_
%1
_8tap_1d_v_
%
+
%%
px
,
6
,
8
,
11
,
dst
,
dstride
,
src
,
sstride
,
h
,
filtery
,
src4
,
sstride3
cglobal
vp9_
%1
_8tap_1d_v_
%
+
%%
px
%
+
_8
,
6
,
8
,
11
,
dst
,
dstride
,
src
,
sstride
,
h
,
filtery
,
src4
,
sstride3
%else
cglobal
vp9_
%1
_8tap_1d_v_
%
+
%%
px
,
4
,
7
,
11
,
dst
,
dstride
,
src
,
sstride
,
filtery
,
src4
,
sstride3
cglobal
vp9_
%1
_8tap_1d_v_
%
+
%%
px
%
+
_8
,
4
,
7
,
11
,
dst
,
dstride
,
src
,
sstride
,
filtery
,
src4
,
sstride3
mov
filteryq
,
r5mp
%define
hd
r4mp
%endif
...
...
@@ -487,7 +497,7 @@ filter_v_fn avg
%macro
filter_vx2_fn
1
%assign
%%
px
mmsize
cglobal
vp9_
%1
_8tap_1d_v_
%
+
%%
px
,
6
,
8
,
14
,
dst
,
dstride
,
src
,
sstride
,
h
,
filtery
,
src4
,
sstride3
cglobal
vp9_
%1
_8tap_1d_v_
%
+
%%
px
%
+
_8
,
6
,
8
,
14
,
dst
,
dstride
,
src
,
sstride
,
h
,
filtery
,
src4
,
sstride3
mova
m13
,
[
pw_256
]
lea
sstride3q
,
[
sstrideq
*
3
]
lea
src4q
,
[
srcq
+
sstrideq
]
...
...
libavcodec/x86/vp9mc_16bpp.asm
0 → 100644
View file @
344d5190
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment