Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in / Register
Toggle navigation
F
ffmpeg.wasm-core
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Linshizhi
ffmpeg.wasm-core
Commits
f76423d0
Commit
f76423d0
authored
Oct 06, 2015
by
Ronald S. Bultje
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
vp9: add x86 simd (sse2/ssse3) for iadst4 10bpp functions.
parent
6b579cf5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
157 additions
and
82 deletions
+157
-82
vp9dsp_init.h
libavcodec/x86/vp9dsp_init.h
+6
-0
vp9dsp_init_16bpp_template.c
libavcodec/x86/vp9dsp_init_16bpp_template.c
+18
-3
vp9itxfm.asm
libavcodec/x86/vp9itxfm.asm
+0
-58
vp9itxfm_16bpp.asm
libavcodec/x86/vp9itxfm_16bpp.asm
+75
-21
vp9itxfm_template.asm
libavcodec/x86/vp9itxfm_template.asm
+58
-0
No files found.
libavcodec/x86/vp9dsp_init.h
View file @
f76423d0
...
...
@@ -62,6 +62,12 @@ void cat(ff_vp9_##typea##_##typeb##_##size##x##size##_add_, bpp, _##opt)(uint8_t
int16_t *block, \
int eob)
#define decl_itxfm_funcs(size, bpp, opt) \
decl_itxfm_func(idct, idct, size, bpp, opt); \
decl_itxfm_func(iadst, idct, size, bpp, opt); \
decl_itxfm_func(idct, iadst, size, bpp, opt); \
decl_itxfm_func(iadst, iadst, size, bpp, opt)
#define mc_rep_func(avg, sz, hsz, hszb, dir, opt, type, f_sz, bpp) \
static av_always_inline void \
ff_vp9_##avg##_8tap_1d_##dir##_##sz##_##bpp##_##opt(uint8_t *dst, ptrdiff_t dst_stride, \
...
...
libavcodec/x86/vp9dsp_init_16bpp_template.c
View file @
f76423d0
...
...
@@ -126,8 +126,11 @@ decl_ipred_fns(tm, BPC, mmxext, sse2);
decl_itxfm_func
(
iwht
,
iwht
,
4
,
BPC
,
mmxext
);
#if BPC == 10
decl_itxfm_func
(
idct
,
idct
,
4
,
BPC
,
mmxext
);
decl_itxfm_func
(
idct
,
idct
,
4
,
BPC
,
ssse3
);
decl_itxfm_func
(
idct
,
idct
,
4
,
BPC
,
mmxext
);
decl_itxfm_func
(
idct
,
iadst
,
4
,
BPC
,
sse2
);
decl_itxfm_func
(
iadst
,
idct
,
4
,
BPC
,
sse2
);
decl_itxfm_func
(
iadst
,
iadst
,
4
,
BPC
,
sse2
);
decl_itxfm_funcs
(
4
,
BPC
,
ssse3
);
#endif
#endif
/* HAVE_YASM */
...
...
@@ -169,6 +172,11 @@ av_cold void INIT_FUNC(VP9DSPContext *dsp, int bitexact)
init_itx_func(idx, ADST_DCT, typea, typeb, size, bpp, opt); \
init_itx_func(idx, DCT_ADST, typea, typeb, size, bpp, opt); \
init_itx_func(idx, ADST_ADST, typea, typeb, size, bpp, opt)
#define init_itx_funcs(idx, size, bpp, opt) \
init_itx_func(idx, DCT_DCT, idct, idct, size, bpp, opt); \
init_itx_func(idx, ADST_DCT, idct, iadst, size, bpp, opt); \
init_itx_func(idx, DCT_ADST, iadst, idct, size, bpp, opt); \
init_itx_func(idx, ADST_ADST, iadst, iadst, size, bpp, opt); \
if
(
EXTERNAL_MMXEXT
(
cpu_flags
))
{
init_ipred_func
(
tm
,
TM_VP8
,
4
,
BPC
,
mmxext
);
...
...
@@ -185,13 +193,20 @@ av_cold void INIT_FUNC(VP9DSPContext *dsp, int bitexact)
init_subpel3
(
1
,
avg
,
BPC
,
sse2
);
init_lpf_funcs
(
BPC
,
sse2
);
init_8_16_32_ipred_funcs
(
tm
,
TM_VP8
,
BPC
,
sse2
);
#if BPC == 10
if
(
!
bitexact
)
{
init_itx_func
(
TX_4X4
,
ADST_DCT
,
idct
,
iadst
,
4
,
10
,
sse2
);
init_itx_func
(
TX_4X4
,
DCT_ADST
,
iadst
,
idct
,
4
,
10
,
sse2
);
init_itx_func
(
TX_4X4
,
ADST_ADST
,
iadst
,
iadst
,
4
,
10
,
sse2
);
}
#endif
}
if
(
EXTERNAL_SSSE3
(
cpu_flags
))
{
init_lpf_funcs
(
BPC
,
ssse3
);
#if BPC == 10
if
(
!
bitexact
)
{
init_itx_func
(
TX_4X4
,
DCT_DCT
,
idct
,
idct
,
4
,
10
,
ssse3
);
init_itx_func
s
(
TX_4X4
,
4
,
BPC
,
ssse3
);
}
#endif
}
...
...
libavcodec/x86/vp9itxfm.asm
View file @
f76423d0
...
...
@@ -289,64 +289,6 @@ IDCT_4x4_FN ssse3
; void vp9_iadst_iadst_4x4_add_<opt>(uint8_t *dst, ptrdiff_t stride, int16_t *block, int eob);
;-------------------------------------------------------------------------------------------
%macro
VP9_IADST4_1D
0
movq2dq
xmm0
,
m0
movq2dq
xmm1
,
m1
movq2dq
xmm2
,
m2
movq2dq
xmm3
,
m3
%if
cpuflag
(
ssse3
)
paddw
m3
,
m0
%endif
punpcklwd
xmm0
,
xmm1
punpcklwd
xmm2
,
xmm3
pmaddwd
xmm1
,
xmm0
,
[
pw_5283_13377
]
pmaddwd
xmm4
,
xmm0
,
[
pw_9929_13377
]
%if
notcpuflag
(
ssse3
)
pmaddwd
xmm6
,
xmm0
,
[
pw_13377_0
]
%endif
pmaddwd
xmm0
,
[
pw_15212_m13377
]
pmaddwd
xmm3
,
xmm2
,
[
pw_15212_9929
]
%if
notcpuflag
(
ssse3
)
pmaddwd
xmm7
,
xmm2
,
[
pw_m13377_13377
]
%endif
pmaddwd
xmm2
,
[
pw_m5283_m15212
]
%if
cpuflag
(
ssse3
)
psubw
m3
,
m2
%else
paddd
xmm6
,
xmm7
%endif
paddd
xmm0
,
xmm2
paddd
xmm3
,
xmm5
paddd
xmm2
,
xmm5
%if
notcpuflag
(
ssse3
)
paddd
xmm6
,
xmm5
%endif
paddd
xmm1
,
xmm3
paddd
xmm0
,
xmm3
paddd
xmm4
,
xmm2
psrad
xmm1
,
14
psrad
xmm0
,
14
psrad
xmm4
,
14
%if
cpuflag
(
ssse3
)
pmulhrsw
m3
,
[
pw_13377x2
]
; out2
%else
psrad
xmm6
,
14
%endif
packssdw
xmm0
,
xmm0
packssdw
xmm1
,
xmm1
packssdw
xmm4
,
xmm4
%if
notcpuflag
(
ssse3
)
packssdw
xmm6
,
xmm6
%endif
movdq2q
m0
,
xmm0
; out3
movdq2q
m1
,
xmm1
; out0
movdq2q
m2
,
xmm4
; out1
%if
notcpuflag
(
ssse3
)
movdq2q
m3
,
xmm6
; out2
%endif
SWAP
0
,
1
,
2
,
3
%endmacro
%macro
IADST4_FN
5
INIT_MMX
%5
cglobal
vp9_
%1
_
%3
_4x4_add
,
3
,
3
,
0
,
dst
,
stride
,
block
,
eob
...
...
libavcodec/x86/vp9itxfm_16bpp.asm
View file @
f76423d0
...
...
@@ -38,6 +38,15 @@ pw_m15137_6270: times 4 dw -15137, 6270
pw_6270_15137
:
times
4
dw
6270
,
15137
pw_11585x2
:
times
8
dw
11585
*
2
pw_5283_13377
:
times
4
dw
5283
,
13377
pw_9929_13377
:
times
4
dw
9929
,
13377
pw_15212_m13377
:
times
4
dw
15212
,
-
13377
pw_15212_9929
:
times
4
dw
15212
,
9929
pw_m5283_m15212
:
times
4
dw
-
5283
,
-
15212
pw_13377x2
:
times
8
dw
13377
*
2
pw_m13377_13377
:
times
4
dw
-
13377
,
13377
pw_13377_0
:
times
4
dw
13377
,
0
SECTION
.
text
%macro
VP9_STORE_2X
6
-
7
dstq
; reg1, reg2, tmp1, tmp2, min, max, dst
...
...
@@ -129,6 +138,30 @@ IWHT4_FN 10, 1023
INIT_MMX
mmxext
IWHT4_FN
12
,
4095
%macro
VP9_IDCT4_WRITEOUT
0
%if
cpuflag
(
ssse3
)
mova
m5
,
[
pw_2048
]
pmulhrsw
m0
,
m5
pmulhrsw
m1
,
m5
pmulhrsw
m2
,
m5
pmulhrsw
m3
,
m5
%else
mova
m5
,
[
pw_8
]
paddw
m0
,
m5
paddw
m1
,
m5
paddw
m2
,
m5
paddw
m3
,
m5
psraw
m0
,
4
psraw
m1
,
4
psraw
m2
,
4
psraw
m3
,
4
%endif
mova
m5
,
[
pw_1023
]
VP9_STORE_2X
0
,
1
,
6
,
7
,
4
,
5
lea
dstq
,
[
dstq
+
2
*
strideq
]
VP9_STORE_2X
2
,
3
,
6
,
7
,
4
,
5
%endmacro
; 4x4 coefficients are 5+depth+sign bits, so for 10bpp, everything still fits
; in 15+1 words without additional effort, since the coefficients are 15bpp.
...
...
@@ -186,27 +219,7 @@ cglobal vp9_idct_idct_4x4_add_10, 4, 4, 8, dst, stride, block, eob
pxor
m4
,
m4
ZERO_BLOCK
blockq
,
16
,
4
,
m4
%if
cpuflag
(
ssse3
)
mova
m5
,
[
pw_2048
]
pmulhrsw
m0
,
m5
pmulhrsw
m1
,
m5
pmulhrsw
m2
,
m5
pmulhrsw
m3
,
m5
%else
mova
m5
,
[
pw_8
]
paddw
m0
,
m5
paddw
m1
,
m5
paddw
m2
,
m5
paddw
m3
,
m5
psraw
m0
,
4
psraw
m1
,
4
psraw
m2
,
4
psraw
m3
,
4
%endif
mova
m5
,
[
pw_1023
]
VP9_STORE_2X
0
,
1
,
6
,
7
,
4
,
5
lea
dstq
,
[
dstq
+
2
*
strideq
]
VP9_STORE_2X
2
,
3
,
6
,
7
,
4
,
5
VP9_IDCT4_WRITEOUT
RET
%endmacro
...
...
@@ -214,3 +227,44 @@ INIT_MMX mmxext
IDCT4_10_FN
INIT_MMX
ssse3
IDCT4_10_FN
%macro
IADST4_FN
4
cglobal
vp9_
%1
_
%3
_4x4_add_10
,
3
,
3
,
0
,
dst
,
stride
,
block
,
eob
%if
WIN64
&&
notcpuflag
(
ssse3
)
WIN64_SPILL_XMM
8
%endif
movdqa
xmm5
,
[
pd_8192
]
mova
m0
,
[
blockq
+
0
*
16
+
0
]
mova
m1
,
[
blockq
+
1
*
16
+
0
]
packssdw
m0
,
[
blockq
+
0
*
16
+
8
]
packssdw
m1
,
[
blockq
+
1
*
16
+
8
]
mova
m2
,
[
blockq
+
2
*
16
+
0
]
mova
m3
,
[
blockq
+
3
*
16
+
0
]
packssdw
m2
,
[
blockq
+
2
*
16
+
8
]
packssdw
m3
,
[
blockq
+
3
*
16
+
8
]
%if
cpuflag
(
ssse3
)
mova
m6
,
[
pw_11585x2
]
%endif
%ifnidn
%1%3
,
iadstiadst
movdq2q
m7
,
xmm5
%endif
VP9_
%2
_1D
TRANSPOSE4x4W
0
,
1
,
2
,
3
,
4
VP9_
%4
_1D
pxor
m4
,
m4
ZERO_BLOCK
blockq
,
16
,
4
,
m4
VP9_IDCT4_WRITEOUT
RET
%endmacro
INIT_MMX
sse2
IADST4_FN
idct
,
IDCT4
,
iadst
,
IADST4
IADST4_FN
iadst
,
IADST4
,
idct
,
IDCT4
IADST4_FN
iadst
,
IADST4
,
iadst
,
IADST4
INIT_MMX
ssse3
IADST4_FN
idct
,
IDCT4
,
iadst
,
IADST4
IADST4_FN
iadst
,
IADST4
,
idct
,
IDCT4
IADST4_FN
iadst
,
IADST4
,
iadst
,
IADST4
libavcodec/x86/vp9itxfm_template.asm
View file @
f76423d0
...
...
@@ -82,3 +82,61 @@
VP9_UNPACK_MULSUB_2W_4X
1
,
3
,
15137
,
6270
,
m7
,
4
,
5
; m1=t2, m3=t3
VP9_IDCT4_1D_FINALIZE
%endmacro
%macro
VP9_IADST4_1D
0
movq2dq
xmm0
,
m0
movq2dq
xmm1
,
m1
movq2dq
xmm2
,
m2
movq2dq
xmm3
,
m3
%if
cpuflag
(
ssse3
)
paddw
m3
,
m0
%endif
punpcklwd
xmm0
,
xmm1
punpcklwd
xmm2
,
xmm3
pmaddwd
xmm1
,
xmm0
,
[
pw_5283_13377
]
pmaddwd
xmm4
,
xmm0
,
[
pw_9929_13377
]
%if
notcpuflag
(
ssse3
)
pmaddwd
xmm6
,
xmm0
,
[
pw_13377_0
]
%endif
pmaddwd
xmm0
,
[
pw_15212_m13377
]
pmaddwd
xmm3
,
xmm2
,
[
pw_15212_9929
]
%if
notcpuflag
(
ssse3
)
pmaddwd
xmm7
,
xmm2
,
[
pw_m13377_13377
]
%endif
pmaddwd
xmm2
,
[
pw_m5283_m15212
]
%if
cpuflag
(
ssse3
)
psubw
m3
,
m2
%else
paddd
xmm6
,
xmm7
%endif
paddd
xmm0
,
xmm2
paddd
xmm3
,
xmm5
paddd
xmm2
,
xmm5
%if
notcpuflag
(
ssse3
)
paddd
xmm6
,
xmm5
%endif
paddd
xmm1
,
xmm3
paddd
xmm0
,
xmm3
paddd
xmm4
,
xmm2
psrad
xmm1
,
14
psrad
xmm0
,
14
psrad
xmm4
,
14
%if
cpuflag
(
ssse3
)
pmulhrsw
m3
,
[
pw_13377x2
]
; out2
%else
psrad
xmm6
,
14
%endif
packssdw
xmm0
,
xmm0
packssdw
xmm1
,
xmm1
packssdw
xmm4
,
xmm4
%if
notcpuflag
(
ssse3
)
packssdw
xmm6
,
xmm6
%endif
movdq2q
m0
,
xmm0
; out3
movdq2q
m1
,
xmm1
; out0
movdq2q
m2
,
xmm4
; out1
%if
notcpuflag
(
ssse3
)
movdq2q
m3
,
xmm6
; out2
%endif
SWAP
0
,
1
,
2
,
3
%endmacro
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment