Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in / Register
Toggle navigation
F
ffmpeg.wasm-core
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Linshizhi
ffmpeg.wasm-core
Commits
26ece7a5
Commit
26ece7a5
authored
Sep 25, 2015
by
Ronald S. Bultje
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
vp9: 16bpp tm/dc/h/v intra pred simd (mostly sse2) functions.
parent
db7786e8
Show whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
669 additions
and
5 deletions
+669
-5
Makefile
libavcodec/x86/Makefile
+1
-0
constants.c
libavcodec/x86/constants.c
+4
-0
constants.h
libavcodec/x86/constants.h
+2
-0
h264_idct_10bit.asm
libavcodec/x86/h264_idct_10bit.asm
+1
-4
h264_intrapred_10bit.asm
libavcodec/x86/h264_intrapred_10bit.asm
+1
-1
vp9dsp_init.h
libavcodec/x86/vp9dsp_init.h
+23
-0
vp9dsp_init_16bpp.c
libavcodec/x86/vp9dsp_init_16bpp.c
+15
-0
vp9dsp_init_16bpp_template.c
libavcodec/x86/vp9dsp_init_16bpp_template.c
+7
-0
vp9intrapred_16bpp.asm
libavcodec/x86/vp9intrapred_16bpp.asm
+615
-0
No files found.
libavcodec/x86/Makefile
View file @
26ece7a5
...
@@ -158,6 +158,7 @@ YASM-OBJS-$(CONFIG_VC1_DECODER) += x86/vc1dsp.o
...
@@ -158,6 +158,7 @@ YASM-OBJS-$(CONFIG_VC1_DECODER) += x86/vc1dsp.o
YASM-OBJS-$(CONFIG_VORBIS_DECODER)
+=
x86/vorbisdsp.o
YASM-OBJS-$(CONFIG_VORBIS_DECODER)
+=
x86/vorbisdsp.o
YASM-OBJS-$(CONFIG_VP6_DECODER)
+=
x86/vp6dsp.o
YASM-OBJS-$(CONFIG_VP6_DECODER)
+=
x86/vp6dsp.o
YASM-OBJS-$(CONFIG_VP9_DECODER)
+=
x86/vp9intrapred.o
\
YASM-OBJS-$(CONFIG_VP9_DECODER)
+=
x86/vp9intrapred.o
\
x86/vp9intrapred_16bpp.o
\
x86/vp9itxfm.o
\
x86/vp9itxfm.o
\
x86/vp9lpf.o
\
x86/vp9lpf.o
\
x86/vp9lpf_16bpp.o
\
x86/vp9lpf_16bpp.o
\
...
...
libavcodec/x86/constants.c
View file @
26ece7a5
...
@@ -81,3 +81,7 @@ DECLARE_ALIGNED(16, const xmm_reg, ff_ps_neg) = { 0x8000000080000000ULL, 0x800
...
@@ -81,3 +81,7 @@ DECLARE_ALIGNED(16, const xmm_reg, ff_ps_neg) = { 0x8000000080000000ULL, 0x800
DECLARE_ALIGNED
(
32
,
const
ymm_reg
,
ff_pd_1
)
=
{
0x0000000100000001ULL
,
0x0000000100000001ULL
,
DECLARE_ALIGNED
(
32
,
const
ymm_reg
,
ff_pd_1
)
=
{
0x0000000100000001ULL
,
0x0000000100000001ULL
,
0x0000000100000001ULL
,
0x0000000100000001ULL
};
0x0000000100000001ULL
,
0x0000000100000001ULL
};
DECLARE_ALIGNED
(
32
,
const
ymm_reg
,
ff_pd_16
)
=
{
0x0000001000000010ULL
,
0x0000001000000010ULL
,
0x0000001000000010ULL
,
0x0000001000000010ULL
};
DECLARE_ALIGNED
(
32
,
const
ymm_reg
,
ff_pd_32
)
=
{
0x0000002000000020ULL
,
0x0000002000000020ULL
,
0x0000002000000020ULL
,
0x0000002000000020ULL
};
libavcodec/x86/constants.h
View file @
26ece7a5
...
@@ -63,5 +63,7 @@ extern const uint64_t ff_pb_FC;
...
@@ -63,5 +63,7 @@ extern const uint64_t ff_pb_FC;
extern
const
xmm_reg
ff_ps_neg
;
extern
const
xmm_reg
ff_ps_neg
;
extern
const
ymm_reg
ff_pd_1
;
extern
const
ymm_reg
ff_pd_1
;
extern
const
ymm_reg
ff_pd_16
;
extern
const
ymm_reg
ff_pd_32
;
#endif
/* AVCODEC_X86_CONSTANTS_H */
#endif
/* AVCODEC_X86_CONSTANTS_H */
libavcodec/x86/h264_idct_10bit.asm
View file @
26ece7a5
...
@@ -24,14 +24,11 @@
...
@@ -24,14 +24,11 @@
%include
"libavutil/x86/x86util.asm"
%include
"libavutil/x86/x86util.asm"
SECTION_RODATA
pd_32
:
times
4
dd
32
SECTION
.
text
SECTION
.
text
cextern
pw_1023
cextern
pw_1023
%define
pw_pixel_max
pw_1023
%define
pw_pixel_max
pw_1023
cextern
pd_32
;-----------------------------------------------------------------------------
;-----------------------------------------------------------------------------
; void ff_h264_idct_add_10(pixel *dst, int16_t *block, int stride)
; void ff_h264_idct_add_10(pixel *dst, int16_t *block, int stride)
...
...
libavcodec/x86/h264_intrapred_10bit.asm
View file @
26ece7a5
...
@@ -34,11 +34,11 @@ cextern pw_8
...
@@ -34,11 +34,11 @@ cextern pw_8
cextern
pw_4
cextern
pw_4
cextern
pw_2
cextern
pw_2
cextern
pw_1
cextern
pw_1
cextern
pd_16
pw_m32101234
:
dw
-
3
,
-
2
,
-
1
,
0
,
1
,
2
,
3
,
4
pw_m32101234
:
dw
-
3
,
-
2
,
-
1
,
0
,
1
,
2
,
3
,
4
pw_m3
:
times
8
dw
-
3
pw_m3
:
times
8
dw
-
3
pd_17
:
times
4
dd
17
pd_17
:
times
4
dd
17
pd_16
:
times
4
dd
16
SECTION
.
text
SECTION
.
text
...
...
libavcodec/x86/vp9dsp_init.h
View file @
26ece7a5
...
@@ -41,6 +41,18 @@ decl_mc_func(avg, sz, h, opt, type, fsz, bpp); \
...
@@ -41,6 +41,18 @@ decl_mc_func(avg, sz, h, opt, type, fsz, bpp); \
decl_mc_func(put, sz, v, opt, type, fsz, bpp); \
decl_mc_func(put, sz, v, opt, type, fsz, bpp); \
decl_mc_func(avg, sz, v, opt, type, fsz, bpp)
decl_mc_func(avg, sz, v, opt, type, fsz, bpp)
#define decl_ipred_fn(type, sz, bpp, opt) \
void ff_vp9_ipred_##type##_##sz##x##sz##_##bpp##_##opt(uint8_t *dst, \
ptrdiff_t stride, \
const uint8_t *l, \
const uint8_t *a)
#define decl_ipred_fns(type, bpp, opt4, opt8_16_32) \
decl_ipred_fn(type, 4, bpp, opt4); \
decl_ipred_fn(type, 8, bpp, opt8_16_32); \
decl_ipred_fn(type, 16, bpp, opt8_16_32); \
decl_ipred_fn(type, 32, bpp, opt8_16_32)
#define mc_rep_func(avg, sz, hsz, hszb, dir, opt, type, f_sz, bpp) \
#define mc_rep_func(avg, sz, hsz, hszb, dir, opt, type, f_sz, bpp) \
static av_always_inline void \
static av_always_inline void \
ff_vp9_##avg##_8tap_1d_##dir##_##sz##_##bpp##_##opt(uint8_t *dst, ptrdiff_t dst_stride, \
ff_vp9_##avg##_8tap_1d_##dir##_##sz##_##bpp##_##opt(uint8_t *dst, ptrdiff_t dst_stride, \
...
@@ -142,6 +154,17 @@ filters_8tap_2d_fn(op, 4, align, bpp, bytes, opt4, f_opt)
...
@@ -142,6 +154,17 @@ filters_8tap_2d_fn(op, 4, align, bpp, bytes, opt4, f_opt)
init_subpel3_8to64(idx, type, bpp, opt); \
init_subpel3_8to64(idx, type, bpp, opt); \
init_subpel2(4, idx, 4, type, bpp, opt)
init_subpel2(4, idx, 4, type, bpp, opt)
#define cat(a, bpp, b) a##bpp##b
#define init_ipred_func(type, enum, sz, bpp, opt) \
dsp->intra_pred[TX_##sz##X##sz][enum##_PRED] = \
cat(ff_vp9_ipred_##type##_##sz##x##sz##_, bpp, _##opt)
#define init_8_16_32_ipred_funcs(type, enum, bpp, opt) \
init_ipred_func(type, enum, 8, bpp, opt); \
init_ipred_func(type, enum, 16, bpp, opt); \
init_ipred_func(type, enum, 32, bpp, opt)
void
ff_vp9dsp_init_10bpp_x86
(
VP9DSPContext
*
dsp
);
void
ff_vp9dsp_init_10bpp_x86
(
VP9DSPContext
*
dsp
);
void
ff_vp9dsp_init_12bpp_x86
(
VP9DSPContext
*
dsp
);
void
ff_vp9dsp_init_12bpp_x86
(
VP9DSPContext
*
dsp
);
void
ff_vp9dsp_init_16bpp_x86
(
VP9DSPContext
*
dsp
);
void
ff_vp9dsp_init_16bpp_x86
(
VP9DSPContext
*
dsp
);
...
...
libavcodec/x86/vp9dsp_init_16bpp.c
View file @
26ece7a5
...
@@ -46,6 +46,11 @@ decl_fpel_func(avg, 32, _16, avx2);
...
@@ -46,6 +46,11 @@ decl_fpel_func(avg, 32, _16, avx2);
decl_fpel_func
(
avg
,
64
,
_16
,
avx2
);
decl_fpel_func
(
avg
,
64
,
_16
,
avx2
);
decl_fpel_func
(
avg
,
128
,
_16
,
avx2
);
decl_fpel_func
(
avg
,
128
,
_16
,
avx2
);
decl_ipred_fns
(
v
,
16
,
mmx
,
sse
);
decl_ipred_fns
(
h
,
16
,
mmxext
,
sse2
);
decl_ipred_fns
(
dc
,
16
,
mmxext
,
sse2
);
decl_ipred_fns
(
dc_top
,
16
,
mmxext
,
sse2
);
decl_ipred_fns
(
dc_left
,
16
,
mmxext
,
sse2
);
#endif
/* HAVE_YASM */
#endif
/* HAVE_YASM */
av_cold
void
ff_vp9dsp_init_16bpp_x86
(
VP9DSPContext
*
dsp
)
av_cold
void
ff_vp9dsp_init_16bpp_x86
(
VP9DSPContext
*
dsp
)
...
@@ -55,10 +60,15 @@ av_cold void ff_vp9dsp_init_16bpp_x86(VP9DSPContext *dsp)
...
@@ -55,10 +60,15 @@ av_cold void ff_vp9dsp_init_16bpp_x86(VP9DSPContext *dsp)
if
(
EXTERNAL_MMX
(
cpu_flags
))
{
if
(
EXTERNAL_MMX
(
cpu_flags
))
{
init_fpel_func
(
4
,
0
,
8
,
put
,
,
mmx
);
init_fpel_func
(
4
,
0
,
8
,
put
,
,
mmx
);
init_ipred_func
(
v
,
VERT
,
4
,
16
,
mmx
);
}
}
if
(
EXTERNAL_MMXEXT
(
cpu_flags
))
{
if
(
EXTERNAL_MMXEXT
(
cpu_flags
))
{
init_fpel_func
(
4
,
1
,
8
,
avg
,
_16
,
mmxext
);
init_fpel_func
(
4
,
1
,
8
,
avg
,
_16
,
mmxext
);
init_ipred_func
(
h
,
HOR
,
4
,
16
,
mmxext
);
init_ipred_func
(
dc
,
DC
,
4
,
16
,
mmxext
);
init_ipred_func
(
dc_top
,
TOP_DC
,
4
,
16
,
mmxext
);
init_ipred_func
(
dc_left
,
LEFT_DC
,
4
,
16
,
mmxext
);
}
}
if
(
EXTERNAL_SSE
(
cpu_flags
))
{
if
(
EXTERNAL_SSE
(
cpu_flags
))
{
...
@@ -66,6 +76,7 @@ av_cold void ff_vp9dsp_init_16bpp_x86(VP9DSPContext *dsp)
...
@@ -66,6 +76,7 @@ av_cold void ff_vp9dsp_init_16bpp_x86(VP9DSPContext *dsp)
init_fpel_func
(
2
,
0
,
32
,
put
,
,
sse
);
init_fpel_func
(
2
,
0
,
32
,
put
,
,
sse
);
init_fpel_func
(
1
,
0
,
64
,
put
,
,
sse
);
init_fpel_func
(
1
,
0
,
64
,
put
,
,
sse
);
init_fpel_func
(
0
,
0
,
128
,
put
,
,
sse
);
init_fpel_func
(
0
,
0
,
128
,
put
,
,
sse
);
init_8_16_32_ipred_funcs
(
v
,
VERT
,
16
,
sse
);
}
}
if
(
EXTERNAL_SSE2
(
cpu_flags
))
{
if
(
EXTERNAL_SSE2
(
cpu_flags
))
{
...
@@ -73,6 +84,10 @@ av_cold void ff_vp9dsp_init_16bpp_x86(VP9DSPContext *dsp)
...
@@ -73,6 +84,10 @@ av_cold void ff_vp9dsp_init_16bpp_x86(VP9DSPContext *dsp)
init_fpel_func
(
2
,
1
,
32
,
avg
,
_16
,
sse2
);
init_fpel_func
(
2
,
1
,
32
,
avg
,
_16
,
sse2
);
init_fpel_func
(
1
,
1
,
64
,
avg
,
_16
,
sse2
);
init_fpel_func
(
1
,
1
,
64
,
avg
,
_16
,
sse2
);
init_fpel_func
(
0
,
1
,
128
,
avg
,
_16
,
sse2
);
init_fpel_func
(
0
,
1
,
128
,
avg
,
_16
,
sse2
);
init_8_16_32_ipred_funcs
(
h
,
HOR
,
16
,
sse2
);
init_8_16_32_ipred_funcs
(
dc
,
DC
,
16
,
sse2
);
init_8_16_32_ipred_funcs
(
dc_top
,
TOP_DC
,
16
,
sse2
);
init_8_16_32_ipred_funcs
(
dc_left
,
LEFT_DC
,
16
,
sse2
);
}
}
if
(
EXTERNAL_AVX_FAST
(
cpu_flags
))
{
if
(
EXTERNAL_AVX_FAST
(
cpu_flags
))
{
...
...
libavcodec/x86/vp9dsp_init_16bpp_template.c
View file @
26ece7a5
...
@@ -121,6 +121,8 @@ lpf_mix2_wrappers(8, 8, bpp, opt); \
...
@@ -121,6 +121,8 @@ lpf_mix2_wrappers(8, 8, bpp, opt); \
lpf_mix2_wrappers_set
(
BPC
,
sse2
);
lpf_mix2_wrappers_set
(
BPC
,
sse2
);
lpf_mix2_wrappers_set
(
BPC
,
ssse3
);
lpf_mix2_wrappers_set
(
BPC
,
ssse3
);
lpf_mix2_wrappers_set
(
BPC
,
avx
);
lpf_mix2_wrappers_set
(
BPC
,
avx
);
decl_ipred_fns
(
tm
,
BPC
,
mmxext
,
sse2
);
#endif
/* HAVE_YASM */
#endif
/* HAVE_YASM */
av_cold
void
INIT_FUNC
(
VP9DSPContext
*
dsp
)
av_cold
void
INIT_FUNC
(
VP9DSPContext
*
dsp
)
...
@@ -153,10 +155,15 @@ av_cold void INIT_FUNC(VP9DSPContext *dsp)
...
@@ -153,10 +155,15 @@ av_cold void INIT_FUNC(VP9DSPContext *dsp)
init_lpf_mix2_func(1, 0, 1, v, 8, 4, bpp, opt); \
init_lpf_mix2_func(1, 0, 1, v, 8, 4, bpp, opt); \
init_lpf_mix2_func(1, 1, 1, v, 8, 8, bpp, opt)
init_lpf_mix2_func(1, 1, 1, v, 8, 8, bpp, opt)
if
(
EXTERNAL_MMXEXT
(
cpu_flags
))
{
init_ipred_func
(
tm
,
TM_VP8
,
4
,
BPC
,
mmxext
);
}
if
(
EXTERNAL_SSE2
(
cpu_flags
))
{
if
(
EXTERNAL_SSE2
(
cpu_flags
))
{
init_subpel3
(
0
,
put
,
BPC
,
sse2
);
init_subpel3
(
0
,
put
,
BPC
,
sse2
);
init_subpel3
(
1
,
avg
,
BPC
,
sse2
);
init_subpel3
(
1
,
avg
,
BPC
,
sse2
);
init_lpf_funcs
(
BPC
,
sse2
);
init_lpf_funcs
(
BPC
,
sse2
);
init_8_16_32_ipred_funcs
(
tm
,
TM_VP8
,
BPC
,
sse2
);
}
}
if
(
EXTERNAL_SSSE3
(
cpu_flags
))
{
if
(
EXTERNAL_SSSE3
(
cpu_flags
))
{
...
...
libavcodec/x86/vp9intrapred_16bpp.asm
0 → 100644
View file @
26ece7a5
;******************************************************************************
;* VP9 Intra prediction SIMD optimizations
;*
;* Copyright (c) 2015 Ronald S. Bultje <rsbultje gmail com>
;* Copyright (c) 2015 Henrik Gramner <henrik gramner com>
;*
;* This file is part of FFmpeg.
;*
;* FFmpeg is free software; you can redistribute it and/or
;* modify it under the terms of the GNU Lesser General Public
;* License as published by the Free Software Foundation; either
;* version 2.1 of the License, or (at your option) any later version.
;*
;* FFmpeg is distributed in the hope that it will be useful,
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;* Lesser General Public License for more details.
;*
;* You should have received a copy of the GNU Lesser General Public
;* License along with FFmpeg; if not, write to the Free Software
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;******************************************************************************
%include
"libavutil/x86/x86util.asm"
SECTION_RODATA
32
pd_2
:
times
8
dd
2
pd_4
:
times
8
dd
4
pd_8
:
times
8
dd
8
cextern
pw_1
cextern
pw_1023
cextern
pw_4095
cextern
pd_16
cextern
pd_32
SECTION
.
text
INIT_MMX
mmx
cglobal
vp9_ipred_v_4x4_16
,
2
,
4
,
1
,
dst
,
stride
,
l
,
a
movifnidn
aq
,
amp
mova
m0
,
[aq]
DEFINE_ARGS
dst
,
stride
,
stride3
lea
stride3q
,
[
strideq
*
3
]
mova
[
dstq
+
strideq
*
0
]
,
m0
mova
[
dstq
+
strideq
*
1
]
,
m0
mova
[
dstq
+
strideq
*
2
]
,
m0
mova
[
dstq
+
stride3q
]
,
m0
RET
INIT_XMM
sse
cglobal
vp9_ipred_v_8x8_16
,
2
,
4
,
1
,
dst
,
stride
,
l
,
a
movifnidn
aq
,
amp
mova
m0
,
[aq]
DEFINE_ARGS
dst
,
stride
,
stride3
lea
stride3q
,
[
strideq
*
3
]
mova
[
dstq
+
strideq
*
0
]
,
m0
mova
[
dstq
+
strideq
*
1
]
,
m0
mova
[
dstq
+
strideq
*
2
]
,
m0
mova
[
dstq
+
stride3q
]
,
m0
lea
dstq
,
[
dstq
+
strideq
*
4
]
mova
[
dstq
+
strideq
*
0
]
,
m0
mova
[
dstq
+
strideq
*
1
]
,
m0
mova
[
dstq
+
strideq
*
2
]
,
m0
mova
[
dstq
+
stride3q
]
,
m0
RET
INIT_XMM
sse
cglobal
vp9_ipred_v_16x16_16
,
2
,
4
,
2
,
dst
,
stride
,
l
,
a
movifnidn
aq
,
amp
mova
m0
,
[aq]
mova
m1
,
[
aq
+
mmsize
]
DEFINE_ARGS
dst
,
stride
,
stride3
,
cnt
lea
stride3q
,
[
strideq
*
3
]
mov
cntd
,
4
.
loop
:
mova
[
dstq
+
strideq
*
0
+
0
]
,
m0
mova
[
dstq
+
strideq
*
0
+
16
]
,
m1
mova
[
dstq
+
strideq
*
1
+
0
]
,
m0
mova
[
dstq
+
strideq
*
1
+
16
]
,
m1
mova
[
dstq
+
strideq
*
2
+
0
]
,
m0
mova
[
dstq
+
strideq
*
2
+
16
]
,
m1
mova
[
dstq
+
stride3q
+
0
]
,
m0
mova
[
dstq
+
stride3q
+
16
]
,
m1
lea
dstq
,
[
dstq
+
strideq
*
4
]
dec
cntd
jg
.
loop
RET
INIT_XMM
sse
cglobal
vp9_ipred_v_32x32_16
,
2
,
4
,
4
,
dst
,
stride
,
l
,
a
movifnidn
aq
,
amp
mova
m0
,
[
aq
+
mmsize
*
0
]
mova
m1
,
[
aq
+
mmsize
*
1
]
mova
m2
,
[
aq
+
mmsize
*
2
]
mova
m3
,
[
aq
+
mmsize
*
3
]
DEFINE_ARGS
dst
,
stride
,
cnt
mov
cntd
,
16
.
loop
:
mova
[
dstq
+
strideq
*
0
+
0
]
,
m0
mova
[
dstq
+
strideq
*
0
+
16
]
,
m1
mova
[
dstq
+
strideq
*
0
+
32
]
,
m2
mova
[
dstq
+
strideq
*
0
+
48
]
,
m3
mova
[
dstq
+
strideq
*
1
+
0
]
,
m0
mova
[
dstq
+
strideq
*
1
+
16
]
,
m1
mova
[
dstq
+
strideq
*
1
+
32
]
,
m2
mova
[
dstq
+
strideq
*
1
+
48
]
,
m3
lea
dstq
,
[
dstq
+
strideq
*
2
]
dec
cntd
jg
.
loop
RET
INIT_MMX
mmxext
cglobal
vp9_ipred_h_4x4_16
,
3
,
3
,
4
,
dst
,
stride
,
l
,
a
mova
m3
,
[lq]
DEFINE_ARGS
dst
,
stride
,
stride3
lea
stride3q
,
[
strideq
*
3
]
pshufw
m0
,
m3
,
q3333
pshufw
m1
,
m3
,
q2222
pshufw
m2
,
m3
,
q1111
pshufw
m3
,
m3
,
q0000
mova
[
dstq
+
strideq
*
0
]
,
m0
mova
[
dstq
+
strideq
*
1
]
,
m1
mova
[
dstq
+
strideq
*
2
]
,
m2
mova
[
dstq
+
stride3q
]
,
m3
RET
INIT_XMM
sse2
cglobal
vp9_ipred_h_8x8_16
,
3
,
3
,
4
,
dst
,
stride
,
l
,
a
mova
m2
,
[lq]
DEFINE_ARGS
dst
,
stride
,
stride3
lea
stride3q
,
[
strideq
*
3
]
punpckhwd
m3
,
m2
,
m2
pshufd
m0
,
m3
,
q3333
pshufd
m1
,
m3
,
q2222
mova
[
dstq
+
strideq
*
0
]
,
m0
mova
[
dstq
+
strideq
*
1
]
,
m1
pshufd
m0
,
m3
,
q1111
pshufd
m1
,
m3
,
q0000
mova
[
dstq
+
strideq
*
2
]
,
m0
mova
[
dstq
+
stride3q
]
,
m1
lea
dstq
,
[
dstq
+
strideq
*
4
]
punpcklwd
m2
,
m2
pshufd
m0
,
m2
,
q3333
pshufd
m1
,
m2
,
q2222
mova
[
dstq
+
strideq
*
0
]
,
m0
mova
[
dstq
+
strideq
*
1
]
,
m1
pshufd
m0
,
m2
,
q1111
pshufd
m1
,
m2
,
q0000
mova
[
dstq
+
strideq
*
2
]
,
m0
mova
[
dstq
+
stride3q
]
,
m1
RET
INIT_XMM
sse2
cglobal
vp9_ipred_h_16x16_16
,
3
,
5
,
4
,
dst
,
stride
,
l
,
stride3
,
cnt
mov
cntd
,
3
lea
stride3q
,
[
strideq
*
3
]
.
loop
:
movh
m3
,
[
lq
+
cntq
*
8
]
punpcklwd
m3
,
m3
pshufd
m0
,
m3
,
q3333
pshufd
m1
,
m3
,
q2222
pshufd
m2
,
m3
,
q1111
pshufd
m3
,
m3
,
q0000
mova
[
dstq
+
strideq
*
0
+
0
]
,
m0
mova
[
dstq
+
strideq
*
0
+
16
]
,
m0
mova
[
dstq
+
strideq
*
1
+
0
]
,
m1
mova
[
dstq
+
strideq
*
1
+
16
]
,
m1
mova
[
dstq
+
strideq
*
2
+
0
]
,
m2
mova
[
dstq
+
strideq
*
2
+
16
]
,
m2
mova
[
dstq
+
stride3q
+
0
]
,
m3
mova
[
dstq
+
stride3q
+
16
]
,
m3
lea
dstq
,
[
dstq
+
strideq
*
4
]
dec
cntd
jge
.
loop
RET
INIT_XMM
sse2
cglobal
vp9_ipred_h_32x32_16
,
3
,
5
,
4
,
dst
,
stride
,
l
,
stride3
,
cnt
mov
cntd
,
7
lea
stride3q
,
[
strideq
*
3
]
.
loop
:
movh
m3
,
[
lq
+
cntq
*
8
]
punpcklwd
m3
,
m3
pshufd
m0
,
m3
,
q3333
pshufd
m1
,
m3
,
q2222
pshufd
m2
,
m3
,
q1111
pshufd
m3
,
m3
,
q0000
mova
[
dstq
+
strideq
*
0
+
0
]
,
m0
mova
[
dstq
+
strideq
*
0
+
16
]
,
m0
mova
[
dstq
+
strideq
*
0
+
32
]
,
m0
mova
[
dstq
+
strideq
*
0
+
48
]
,
m0
mova
[
dstq
+
strideq
*
1
+
0
]
,
m1
mova
[
dstq
+
strideq
*
1
+
16
]
,
m1
mova
[
dstq
+
strideq
*
1
+
32
]
,
m1
mova
[
dstq
+
strideq
*
1
+
48
]
,
m1
mova
[
dstq
+
strideq
*
2
+
0
]
,
m2
mova
[
dstq
+
strideq
*
2
+
16
]
,
m2
mova
[
dstq
+
strideq
*
2
+
32
]
,
m2
mova
[
dstq
+
strideq
*
2
+
48
]
,
m2
mova
[
dstq
+
stride3q
+
0
]
,
m3
mova
[
dstq
+
stride3q
+
16
]
,
m3
mova
[
dstq
+
stride3q
+
32
]
,
m3
mova
[
dstq
+
stride3q
+
48
]
,
m3
lea
dstq
,
[
dstq
+
strideq
*
4
]
dec
cntd
jge
.
loop
RET
INIT_MMX
mmxext
cglobal
vp9_ipred_dc_4x4_16
,
4
,
4
,
2
,
dst
,
stride
,
l
,
a
mova
m0
,
[lq]
paddw
m0
,
[aq]
DEFINE_ARGS
dst
,
stride
,
stride3
lea
stride3q
,
[
strideq
*
3
]
pmaddwd
m0
,
[
pw_1
]
pshufw
m1
,
m0
,
q3232
paddd
m0
,
[
pd_4
]
paddd
m0
,
m1
psrad
m0
,
3
pshufw
m0
,
m0
,
q0000
mova
[
dstq
+
strideq
*
0
]
,
m0
mova
[
dstq
+
strideq
*
1
]
,
m0
mova
[
dstq
+
strideq
*
2
]
,
m0
mova
[
dstq
+
stride3q
]
,
m0
RET
INIT_XMM
sse2
cglobal
vp9_ipred_dc_8x8_16
,
4
,
4
,
2
,
dst
,
stride
,
l
,
a
mova
m0
,
[lq]
paddw
m0
,
[aq]
DEFINE_ARGS
dst
,
stride
,
stride3
lea
stride3q
,
[
strideq
*
3
]
pmaddwd
m0
,
[
pw_1
]
pshufd
m1
,
m0
,
q3232
paddd
m0
,
m1
pshufd
m1
,
m0
,
q1111
paddd
m0
,
[
pd_8
]
paddd
m0
,
m1
psrad
m0
,
4
pshuflw
m0
,
m0
,
q0000
punpcklqdq
m0
,
m0
mova
[
dstq
+
strideq
*
0
]
,
m0
mova
[
dstq
+
strideq
*
1
]
,
m0
mova
[
dstq
+
strideq
*
2
]
,
m0
mova
[
dstq
+
stride3q
]
,
m0
lea
dstq
,
[
dstq
+
strideq
*
4
]
mova
[
dstq
+
strideq
*
0
]
,
m0
mova
[
dstq
+
strideq
*
1
]
,
m0
mova
[
dstq
+
strideq
*
2
]
,
m0
mova
[
dstq
+
stride3q
]
,
m0
RET
INIT_XMM
sse2
cglobal
vp9_ipred_dc_16x16_16
,
4
,
4
,
2
,
dst
,
stride
,
l
,
a
mova
m0
,
[lq]
paddw
m0
,
[
lq
+
mmsize
]
paddw
m0
,
[aq]
paddw
m0
,
[
aq
+
mmsize
]
DEFINE_ARGS
dst
,
stride
,
stride3
,
cnt
lea
stride3q
,
[
strideq
*
3
]
mov
cntd
,
4
pmaddwd
m0
,
[
pw_1
]
pshufd
m1
,
m0
,
q3232
paddd
m0
,
m1
pshufd
m1
,
m0
,
q1111
paddd
m0
,
[
pd_16
]
paddd
m0
,
m1
psrad
m0
,
5
pshuflw
m0
,
m0
,
q0000
punpcklqdq
m0
,
m0
.
loop
:
mova
[
dstq
+
strideq
*
0
+
0
]
,
m0
mova
[
dstq
+
strideq
*
0
+
16
]
,
m0
mova
[
dstq
+
strideq
*
1
+
0
]
,
m0
mova
[
dstq
+
strideq
*
1
+
16
]
,
m0
mova
[
dstq
+
strideq
*
2
+
0
]
,
m0
mova
[
dstq
+
strideq
*
2
+
16
]
,
m0
mova
[
dstq
+
stride3q
+
0
]
,
m0
mova
[
dstq
+
stride3q
+
16
]
,
m0
lea
dstq
,
[
dstq
+
strideq
*
4
]
dec
cntd
jg
.
loop
RET
INIT_XMM
sse2
cglobal
vp9_ipred_dc_32x32_16
,
4
,
4
,
2
,
dst
,
stride
,
l
,
a
mova
m0
,
[
lq
+
mmsize
*
0
]
paddw
m0
,
[
lq
+
mmsize
*
1
]
paddw
m0
,
[
lq
+
mmsize
*
2
]
paddw
m0
,
[
lq
+
mmsize
*
3
]
paddw
m0
,
[
aq
+
mmsize
*
0
]
paddw
m0
,
[
aq
+
mmsize
*
1
]
paddw
m0
,
[
aq
+
mmsize
*
2
]
paddw
m0
,
[
aq
+
mmsize
*
3
]
DEFINE_ARGS
dst
,
stride
,
stride3
,
cnt
lea
stride3q
,
[
strideq
*
3
]
mov
cntd
,
16
pmaddwd
m0
,
[
pw_1
]
pshufd
m1
,
m0
,
q3232
paddd
m0
,
m1
pshufd
m1
,
m0
,
q1111
paddd
m0
,
[
pd_32
]
paddd
m0
,
m1
psrad
m0
,
6
pshuflw
m0
,
m0
,
q0000
punpcklqdq
m0
,
m0
.
loop
:
mova
[
dstq
+
strideq
*
0
+
0
]
,
m0
mova
[
dstq
+
strideq
*
0
+
16
]
,
m0
mova
[
dstq
+
strideq
*
0
+
32
]
,
m0
mova
[
dstq
+
strideq
*
0
+
48
]
,
m0
mova
[
dstq
+
strideq
*
1
+
0
]
,
m0
mova
[
dstq
+
strideq
*
1
+
16
]
,
m0
mova
[
dstq
+
strideq
*
1
+
32
]
,
m0
mova
[
dstq
+
strideq
*
1
+
48
]
,
m0
lea
dstq
,
[
dstq
+
strideq
*
2
]
dec
cntd
jg
.
loop
RET
%macro
DC_1D_FNS
2
INIT_MMX
mmxext
cglobal
vp9_ipred_dc_
%1
_4x4_16
,
4
,
4
,
2
,
dst
,
stride
,
l
,
a
mova
m0
,
[
%2
]
DEFINE_ARGS
dst
,
stride
,
stride3
lea
stride3q
,
[
strideq
*
3
]
pmaddwd
m0
,
[
pw_1
]
pshufw
m1
,
m0
,
q3232
paddd
m0
,
[
pd_2
]
paddd
m0
,
m1
psrad
m0
,
2
pshufw
m0
,
m0
,
q0000
mova
[
dstq
+
strideq
*
0
]
,
m0
mova
[
dstq
+
strideq
*
1
]
,
m0
mova
[
dstq
+
strideq
*
2
]
,
m0
mova
[
dstq
+
stride3q
]
,
m0
RET
INIT_XMM
sse2
cglobal
vp9_ipred_dc_
%1
_8x8_16
,
4
,
4
,
2
,
dst
,
stride
,
l
,
a
mova
m0
,
[
%2
]
DEFINE_ARGS
dst
,
stride
,
stride3
lea
stride3q
,
[
strideq
*
3
]
pmaddwd
m0
,
[
pw_1
]
pshufd
m1
,
m0
,
q3232
paddd
m0
,
m1
pshufd
m1
,
m0
,
q1111
paddd
m0
,
[
pd_4
]
paddd
m0
,
m1
psrad
m0
,
3
pshuflw
m0
,
m0
,
q0000
punpcklqdq
m0
,
m0
mova
[
dstq
+
strideq
*
0
]
,
m0
mova
[
dstq
+
strideq
*
1
]
,
m0
mova
[
dstq
+
strideq
*
2
]
,
m0
mova
[
dstq
+
stride3q
]
,
m0
lea
dstq
,
[
dstq
+
strideq
*
4
]
mova
[
dstq
+
strideq
*
0
]
,
m0
mova
[
dstq
+
strideq
*
1
]
,
m0
mova
[
dstq
+
strideq
*
2
]
,
m0
mova
[
dstq
+
stride3q
]
,
m0
RET
INIT_XMM
sse2
cglobal
vp9_ipred_dc_
%1
_16x16_16
,
4
,
4
,
2
,
dst
,
stride
,
l
,
a
mova
m0
,
[
%2
]
paddw
m0
,
[
%2
+
mmsize
]
DEFINE_ARGS
dst
,
stride
,
stride3
,
cnt
lea
stride3q
,
[
strideq
*
3
]
mov
cntd
,
4
pmaddwd
m0
,
[
pw_1
]
pshufd
m1
,
m0
,
q3232
paddd
m0
,
m1
pshufd
m1
,
m0
,
q1111
paddd
m0
,
[
pd_8
]
paddd
m0
,
m1
psrad
m0
,
4
pshuflw
m0
,
m0
,
q0000
punpcklqdq
m0
,
m0
.
loop
:
mova
[
dstq
+
strideq
*
0
+
0
]
,
m0
mova
[
dstq
+
strideq
*
0
+
16
]
,
m0
mova
[
dstq
+
strideq
*
1
+
0
]
,
m0
mova
[
dstq
+
strideq
*
1
+
16
]
,
m0
mova
[
dstq
+
strideq
*
2
+
0
]
,
m0
mova
[
dstq
+
strideq
*
2
+
16
]
,
m0
mova
[
dstq
+
stride3q
+
0
]
,
m0
mova
[
dstq
+
stride3q
+
16
]
,
m0
lea
dstq
,
[
dstq
+
strideq
*
4
]
dec
cntd
jg
.
loop
RET
INIT_XMM
sse2
cglobal
vp9_ipred_dc_
%1
_32x32_16
,
4
,
4
,
2
,
dst
,
stride
,
l
,
a
mova
m0
,
[
%2
+
mmsize
*
0
]
paddw
m0
,
[
%2
+
mmsize
*
1
]
paddw
m0
,
[
%2
+
mmsize
*
2
]
paddw
m0
,
[
%2
+
mmsize
*
3
]
DEFINE_ARGS
dst
,
stride
,
cnt
mov
cntd
,
16
pmaddwd
m0
,
[
pw_1
]
pshufd
m1
,
m0
,
q3232
paddd
m0
,
m1
pshufd
m1
,
m0
,
q1111
paddd
m0
,
[
pd_16
]
paddd
m0
,
m1
psrad
m0
,
5
pshuflw
m0
,
m0
,
q0000
punpcklqdq
m0
,
m0
.
loop
:
mova
[
dstq
+
strideq
*
0
+
0
]
,
m0
mova
[
dstq
+
strideq
*
0
+
16
]
,
m0
mova
[
dstq
+
strideq
*
0
+
32
]
,
m0
mova
[
dstq
+
strideq
*
0
+
48
]
,
m0
mova
[
dstq
+
strideq
*
1
+
0
]
,
m0
mova
[
dstq
+
strideq
*
1
+
16
]
,
m0
mova
[
dstq
+
strideq
*
1
+
32
]
,
m0
mova
[
dstq
+
strideq
*
1
+
48
]
,
m0
lea
dstq
,
[
dstq
+
strideq
*
2
]
dec
cntd
jg
.
loop
RET
%endmacro
DC_1D_FNS
top
,
aq
DC_1D_FNS
left
,
lq
INIT_MMX
mmxext
cglobal
vp9_ipred_tm_4x4_10
,
4
,
4
,
6
,
dst
,
stride
,
l
,
a
mova
m5
,
[
pw_1023
]
.
body
:
mova
m4
,
[aq]
mova
m3
,
[lq]
movd
m0
,
[
aq
-
4
]
pshufw
m0
,
m0
,
q1111
psubw
m4
,
m0
DEFINE_ARGS
dst
,
stride
,
stride3
lea
stride3q
,
[
strideq
*
3
]
pshufw
m0
,
m3
,
q3333
pshufw
m1
,
m3
,
q2222
pshufw
m2
,
m3
,
q1111
pshufw
m3
,
m3
,
q0000
paddw
m0
,
m4
paddw
m1
,
m4
paddw
m2
,
m4
paddw
m3
,
m4
pxor
m4
,
m4
pmaxsw
m0
,
m4
pmaxsw
m1
,
m4
pmaxsw
m2
,
m4
pmaxsw
m3
,
m4
pminsw
m0
,
m5
pminsw
m1
,
m5
pminsw
m2
,
m5
pminsw
m3
,
m5
mova
[
dstq
+
strideq
*
0
]
,
m0
mova
[
dstq
+
strideq
*
1
]
,
m1
mova
[
dstq
+
strideq
*
2
]
,
m2
mova
[
dstq
+
stride3q
]
,
m3
RET
cglobal
vp9_ipred_tm_4x4_12
,
4
,
4
,
6
,
dst
,
stride
,
l
,
a
mova
m5
,
[
pw_4095
]
jmp
mangle
(
private_prefix
%
+
_
%
+
vp9_ipred_tm_4x4_10
%
+
SUFFIX
).
body
INIT_XMM
sse2
cglobal
vp9_ipred_tm_8x8_10
,
4
,
5
,
7
,
dst
,
stride
,
l
,
a
mova
m4
,
[
pw_1023
]
.
body
:
pxor
m6
,
m6
mova
m5
,
[aq]
movd
m0
,
[
aq
-
4
]
pshuflw
m0
,
m0
,
q1111
punpcklqdq
m0
,
m0
psubw
m5
,
m0
DEFINE_ARGS
dst
,
stride
,
l
,
stride3
,
cnt
lea
stride3q
,
[
strideq
*
3
]
mov
cntd
,
1
.
loop
:
movh
m3
,
[
lq
+
cntq
*
8
]
punpcklwd
m3
,
m3
pshufd
m0
,
m3
,
q3333
pshufd
m1
,
m3
,
q2222
pshufd
m2
,
m3
,
q1111
pshufd
m3
,
m3
,
q0000
paddw
m0
,
m5
paddw
m1
,
m5
paddw
m2
,
m5
paddw
m3
,
m5
pmaxsw
m0
,
m6
pmaxsw
m1
,
m6
pmaxsw
m2
,
m6
pmaxsw
m3
,
m6
pminsw
m0
,
m4
pminsw
m1
,
m4
pminsw
m2
,
m4
pminsw
m3
,
m4
mova
[
dstq
+
strideq
*
0
]
,
m0
mova
[
dstq
+
strideq
*
1
]
,
m1
mova
[
dstq
+
strideq
*
2
]
,
m2
mova
[
dstq
+
stride3q
]
,
m3
lea
dstq
,
[
dstq
+
strideq
*
4
]
dec
cntd
jge
.
loop
RET
cglobal
vp9_ipred_tm_8x8_12
,
4
,
5
,
7
,
dst
,
stride
,
l
,
a
mova
m4
,
[
pw_4095
]
jmp
mangle
(
private_prefix
%
+
_
%
+
vp9_ipred_tm_8x8_10
%
+
SUFFIX
).
body
INIT_XMM
sse2
cglobal
vp9_ipred_tm_16x16_10
,
4
,
4
,
8
,
dst
,
stride
,
l
,
a
mova
m7
,
[
pw_1023
]
.
body
:
pxor
m6
,
m6
mova
m4
,
[aq]
mova
m5
,
[
aq
+
mmsize
]
movd
m0
,
[
aq
-
4
]
pshuflw
m0
,
m0
,
q1111
punpcklqdq
m0
,
m0
psubw
m4
,
m0
psubw
m5
,
m0
DEFINE_ARGS
dst
,
stride
,
l
,
cnt
mov
cntd
,
7
.
loop
:
movd
m3
,
[
lq
+
cntq
*
4
]
punpcklwd
m3
,
m3
pshufd
m2
,
m3
,
q1111
pshufd
m3
,
m3
,
q0000
paddw
m0
,
m2
,
m4
paddw
m2
,
m5
paddw
m1
,
m3
,
m4
paddw
m3
,
m5
pmaxsw
m0
,
m6
pmaxsw
m2
,
m6
pmaxsw
m1
,
m6
pmaxsw
m3
,
m6
pminsw
m0
,
m7
pminsw
m2
,
m7
pminsw
m1
,
m7
pminsw
m3
,
m7
mova
[
dstq
+
strideq
*
0
+
0
]
,
m0
mova
[
dstq
+
strideq
*
0
+
16
]
,
m2
mova
[
dstq
+
strideq
*
1
+
0
]
,
m1
mova
[
dstq
+
strideq
*
1
+
16
]
,
m3
lea
dstq
,
[
dstq
+
strideq
*
2
]
dec
cntd
jge
.
loop
RET
cglobal
vp9_ipred_tm_16x16_12
,
4
,
4
,
8
,
dst
,
stride
,
l
,
a
mova
m7
,
[
pw_4095
]
jmp
mangle
(
private_prefix
%
+
_
%
+
vp9_ipred_tm_16x16_10
%
+
SUFFIX
).
body
INIT_XMM
sse2
cglobal
vp9_ipred_tm_32x32_10
,
4
,
4
,
10
,
32
*
ARCH_X86_32
,
dst
,
stride
,
l
,
a
mova
m0
,
[
pw_1023
]
.
body
:
pxor
m1
,
m1
%if
ARCH_X86_64
SWAP
0
,
8
SWAP
1
,
9
%define
reg_min
m9
%define
reg_max
m8
%else
mova
[
rsp
+
0
]
,
m0
mova
[
rsp
+
16
]
,
m1
%define
reg_min
[
rsp
+
16
]
%define
reg_max
[
rsp
+
0
]
%endif
mova
m4
,
[
aq
+
mmsize
*
0
]
mova
m5
,
[
aq
+
mmsize
*
1
]
mova
m6
,
[
aq
+
mmsize
*
2
]
mova
m7
,
[
aq
+
mmsize
*
3
]
movd
m0
,
[
aq
-
4
]
pshuflw
m0
,
m0
,
q1111
punpcklqdq
m0
,
m0
psubw
m4
,
m0
psubw
m5
,
m0
psubw
m6
,
m0
psubw
m7
,
m0
DEFINE_ARGS
dst
,
stride
,
l
,
cnt
mov
cntd
,
31
.
loop
:
pinsrw
m3
,
[
lq
+
cntq
*
2
]
,
0
punpcklwd
m3
,
m3
pshufd
m3
,
m3
,
q0000
paddw
m0
,
m3
,
m4
paddw
m1
,
m3
,
m5
paddw
m2
,
m3
,
m6
paddw
m3
,
m7
pmaxsw
m0
,
reg_min
pmaxsw
m1
,
reg_min
pmaxsw
m2
,
reg_min
pmaxsw
m3
,
reg_min
pminsw
m0
,
reg_max
pminsw
m1
,
reg_max
pminsw
m2
,
reg_max
pminsw
m3
,
reg_max
mova
[
dstq
+
strideq
*
0
+
0
]
,
m0
mova
[
dstq
+
strideq
*
0
+
16
]
,
m1
mova
[
dstq
+
strideq
*
0
+
32
]
,
m2
mova
[
dstq
+
strideq
*
0
+
48
]
,
m3
add
dstq
,
strideq
dec
cntd
jge
.
loop
RET
cglobal
vp9_ipred_tm_32x32_12
,
4
,
4
,
10
,
32
*
ARCH_X86_32
,
dst
,
stride
,
l
,
a
mova
m0
,
[
pw_4095
]
jmp
mangle
(
private_prefix
%
+
_
%
+
vp9_ipred_tm_32x32_10
%
+
SUFFIX
).
body
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment