Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in / Register
Toggle navigation
F
ffmpeg.wasm-core
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Linshizhi
ffmpeg.wasm-core
Commits
1c3be325
Commit
1c3be325
authored
Oct 06, 2015
by
Ronald S. Bultje
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
vp9: add 10/12bpp mmxext-optimized iwht_iwht_4x4 function.
parent
b6594a96
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
187 additions
and
22 deletions
+187
-22
Makefile
libavcodec/x86/Makefile
+1
-0
vp9dsp_init.c
libavcodec/x86/vp9dsp_init.c
+2
-2
vp9dsp_init.h
libavcodec/x86/vp9dsp_init.h
+11
-4
vp9dsp_init_16bpp_template.c
libavcodec/x86/vp9dsp_init_16bpp_template.c
+15
-1
vp9itxfm.asm
libavcodec/x86/vp9itxfm.asm
+1
-15
vp9itxfm_16bpp.asm
libavcodec/x86/vp9itxfm_16bpp.asm
+120
-0
vp9itxfm_template.asm
libavcodec/x86/vp9itxfm_template.asm
+37
-0
No files found.
libavcodec/x86/Makefile
View file @
1c3be325
...
...
@@ -165,6 +165,7 @@ YASM-OBJS-$(CONFIG_VP6_DECODER) += x86/vp6dsp.o
YASM-OBJS-$(CONFIG_VP9_DECODER)
+=
x86/vp9intrapred.o
\
x86/vp9intrapred_16bpp.o
\
x86/vp9itxfm.o
\
x86/vp9itxfm_16bpp.o
\
x86/vp9lpf.o
\
x86/vp9lpf_16bpp.o
\
x86/vp9mc.o
\
...
...
libavcodec/x86/vp9dsp_init.c
View file @
1c3be325
...
...
@@ -216,10 +216,10 @@ av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp, int bpp, int bitexact)
int
cpu_flags
;
if
(
bpp
==
10
)
{
ff_vp9dsp_init_10bpp_x86
(
dsp
);
ff_vp9dsp_init_10bpp_x86
(
dsp
,
bitexact
);
return
;
}
else
if
(
bpp
==
12
)
{
ff_vp9dsp_init_12bpp_x86
(
dsp
);
ff_vp9dsp_init_12bpp_x86
(
dsp
,
bitexact
);
return
;
}
...
...
libavcodec/x86/vp9dsp_init.h
View file @
1c3be325
...
...
@@ -25,6 +25,9 @@
#include "libavcodec/vp9dsp.h"
// hack to force-expand BPC
#define cat(a, bpp, b) a##bpp##b
#define decl_fpel_func(avg, sz, bpp, opt) \
void ff_vp9_##avg##sz##bpp##_##opt(uint8_t *dst, ptrdiff_t dst_stride, \
const uint8_t *src, ptrdiff_t src_stride, \
...
...
@@ -53,6 +56,12 @@ decl_ipred_fn(type, 8, bpp, opt8_16_32); \
decl_ipred_fn(type, 16, bpp, opt8_16_32); \
decl_ipred_fn(type, 32, bpp, opt8_16_32)
#define decl_itxfm_func(typea, typeb, size, bpp, opt) \
void cat(ff_vp9_##typea##_##typeb##_##size##x##size##_add_, bpp, _##opt)(uint8_t *dst, \
ptrdiff_t stride, \
int16_t *block, \
int eob)
#define mc_rep_func(avg, sz, hsz, hszb, dir, opt, type, f_sz, bpp) \
static av_always_inline void \
ff_vp9_##avg##_8tap_1d_##dir##_##sz##_##bpp##_##opt(uint8_t *dst, ptrdiff_t dst_stride, \
...
...
@@ -154,8 +163,6 @@ filters_8tap_2d_fn(op, 4, align, bpp, bytes, opt4, f_opt)
init_subpel3_8to64(idx, type, bpp, opt); \
init_subpel2(4, idx, 4, type, bpp, opt)
#define cat(a, bpp, b) a##bpp##b
#define init_ipred_func(type, enum, sz, bpp, opt) \
dsp->intra_pred[TX_##sz##X##sz][enum##_PRED] = \
cat(ff_vp9_ipred_##type##_##sz##x##sz##_, bpp, _##opt)
...
...
@@ -169,8 +176,8 @@ filters_8tap_2d_fn(op, 4, align, bpp, bytes, opt4, f_opt)
init_ipred_func(type, enum, 4, bpp, opt); \
init_8_16_32_ipred_funcs(type, enum, bpp, opt)
void
ff_vp9dsp_init_10bpp_x86
(
VP9DSPContext
*
dsp
);
void
ff_vp9dsp_init_12bpp_x86
(
VP9DSPContext
*
dsp
);
void
ff_vp9dsp_init_10bpp_x86
(
VP9DSPContext
*
dsp
,
int
bitexact
);
void
ff_vp9dsp_init_12bpp_x86
(
VP9DSPContext
*
dsp
,
int
bitexact
);
void
ff_vp9dsp_init_16bpp_x86
(
VP9DSPContext
*
dsp
);
#endif
/* AVCODEC_X86_VP9DSP_INIT_H */
libavcodec/x86/vp9dsp_init_16bpp_template.c
View file @
1c3be325
...
...
@@ -123,9 +123,11 @@ lpf_mix2_wrappers_set(BPC, ssse3);
lpf_mix2_wrappers_set
(
BPC
,
avx
);
decl_ipred_fns
(
tm
,
BPC
,
mmxext
,
sse2
);
decl_itxfm_func
(
iwht
,
iwht
,
4
,
BPC
,
mmxext
);
#endif
/* HAVE_YASM */
av_cold
void
INIT_FUNC
(
VP9DSPContext
*
dsp
)
av_cold
void
INIT_FUNC
(
VP9DSPContext
*
dsp
,
int
bitexact
)
{
#if HAVE_YASM
int
cpu_flags
=
av_get_cpu_flags
();
...
...
@@ -155,8 +157,20 @@ av_cold void INIT_FUNC(VP9DSPContext *dsp)
init_lpf_mix2_func(1, 0, 1, v, 8, 4, bpp, opt); \
init_lpf_mix2_func(1, 1, 1, v, 8, 8, bpp, opt)
#define init_itx_func(idxa, idxb, typea, typeb, size, bpp, opt) \
dsp->itxfm_add[idxa][idxb] = \
ff_vp9_##typea##_##typeb##_##size##x##size##_add_##bpp##_##opt;
#define init_itx_func_one(idx, typea, typeb, size, bpp, opt) \
init_itx_func(idx, DCT_DCT, typea, typeb, size, bpp, opt); \
init_itx_func(idx, ADST_DCT, typea, typeb, size, bpp, opt); \
init_itx_func(idx, DCT_ADST, typea, typeb, size, bpp, opt); \
init_itx_func(idx, ADST_ADST, typea, typeb, size, bpp, opt)
if
(
EXTERNAL_MMXEXT
(
cpu_flags
))
{
init_ipred_func
(
tm
,
TM_VP8
,
4
,
BPC
,
mmxext
);
if
(
!
bitexact
)
{
init_itx_func_one
(
4
/* lossless */
,
iwht
,
iwht
,
4
,
BPC
,
mmxext
);
}
}
if
(
EXTERNAL_SSE2
(
cpu_flags
))
{
...
...
libavcodec/x86/vp9itxfm.asm
View file @
1c3be325
...
...
@@ -22,6 +22,7 @@
;******************************************************************************
%include
"libavutil/x86/x86util.asm"
%include
"vp9itxfm_template.asm"
SECTION_RODATA
...
...
@@ -164,21 +165,6 @@ SECTION .text
; void vp9_iwht_iwht_4x4_add_<opt>(uint8_t *dst, ptrdiff_t stride, int16_t *block, int eob);
;-------------------------------------------------------------------------------------------
%macro
VP9_IWHT4_1D
0
SWAP
1
,
2
,
3
paddw
m0
,
m2
psubw
m3
,
m1
psubw
m4
,
m0
,
m3
psraw
m4
,
1
psubw
m5
,
m4
,
m1
SWAP
5
,
1
psubw
m4
,
m2
SWAP
4
,
2
psubw
m0
,
m1
paddw
m3
,
m2
SWAP
3
,
2
,
1
%endmacro
INIT_MMX
mmx
cglobal
vp9_iwht_iwht_4x4_add
,
3
,
3
,
0
,
dst
,
stride
,
block
,
eob
mova
m0
,
[
blockq
+
0
*
8
]
...
...
libavcodec/x86/vp9itxfm_16bpp.asm
0 → 100644
View file @
1c3be325
;******************************************************************************
;* VP9 inverse transform x86 SIMD optimizations
;*
;* Copyright (C) 2015 Ronald S. Bultje <rsbultje gmail com>
;*
;* This file is part of FFmpeg.
;*
;* FFmpeg is free software; you can redistribute it and/or
;* modify it under the terms of the GNU Lesser General Public
;* License as published by the Free Software Foundation; either
;* version 2.1 of the License, or (at your option) any later version.
;*
;* FFmpeg is distributed in the hope that it will be useful,
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;* Lesser General Public License for more details.
;*
;* You should have received a copy of the GNU Lesser General Public
;* License along with FFmpeg; if not, write to the Free Software
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;******************************************************************************
%include
"libavutil/x86/x86util.asm"
%include
"vp9itxfm_template.asm"
SECTION_RODATA
cextern
pw_1023
cextern
pw_4095
SECTION
.
text
%macro
VP9_STORE_2X
6
-
7
dstq
; reg1, reg2, tmp1, tmp2, min, max, dst
mova
m%3
,
[
%7
]
mova
m%4
,
[
%7
+
strideq
]
paddw
m%3
,
m%1
paddw
m%4
,
m%2
pmaxsw
m%3
,
m%5
pmaxsw
m%4
,
m%5
pminsw
m%3
,
m%6
pminsw
m%4
,
m%6
mova
[
%7
]
,
m%3
mova
[
%7
+
strideq
]
,
m%4
%endmacro
%macro
ZERO_BLOCK
4
; mem, stride, nnzcpl, zero_reg
%assign
%%
y
0
%rep
%3
%assign
%%
x
0
%rep
%3
*
4
/
mmsize
mova
[
%1
+
%%
y
+
%%
x
]
,
%4
%assign
%%
x
(
%%
x
+
mmsize
)
%endrep
%assign
%%
y
(
%%
y
+
%2
)
%endrep
%endmacro
; the input coefficients are scaled up by 2 bit (which we downscale immediately
; in the iwht), and is otherwise orthonormally increased by 1 bit per iwht_1d.
; therefore, a diff of 10-12+sign bit will fit in 12-14+sign bit after scaling,
; i.e. everything can be done in 15+1bpp words. Since the quant fractional bits
; add 2 bits, we need to scale before converting to word in 12bpp, since the
; input will be 16+sign bit which doesn't fit in 15+sign words, but in 10bpp
; we can scale after converting to words (which is half the instructions),
; since the input is only 14+sign bit, which fits in 15+sign words directly.
%macro
IWHT4_FN
2
; bpp, max
cglobal
vp9_iwht_iwht_4x4_add_
%1
,
3
,
3
,
8
,
dst
,
stride
,
block
,
eob
mova
m7
,
[
pw_
%2
]
mova
m0
,
[
blockq
+
0
*
16
+
0
]
mova
m1
,
[
blockq
+
1
*
16
+
0
]
%if
%1
>=
12
mova
m4
,
[
blockq
+
0
*
16
+
8
]
mova
m5
,
[
blockq
+
1
*
16
+
8
]
psrad
m0
,
2
psrad
m1
,
2
psrad
m4
,
2
psrad
m5
,
2
packssdw
m0
,
m4
packssdw
m1
,
m5
%else
packssdw
m0
,
[
blockq
+
0
*
16
+
8
]
packssdw
m1
,
[
blockq
+
1
*
16
+
8
]
psraw
m0
,
2
psraw
m1
,
2
%endif
mova
m2
,
[
blockq
+
2
*
16
+
0
]
mova
m3
,
[
blockq
+
3
*
16
+
0
]
%if
%1
>=
12
mova
m4
,
[
blockq
+
2
*
16
+
8
]
mova
m5
,
[
blockq
+
3
*
16
+
8
]
psrad
m2
,
2
psrad
m3
,
2
psrad
m4
,
2
psrad
m5
,
2
packssdw
m2
,
m4
packssdw
m3
,
m5
%else
packssdw
m2
,
[
blockq
+
2
*
16
+
8
]
packssdw
m3
,
[
blockq
+
3
*
16
+
8
]
psraw
m2
,
2
psraw
m3
,
2
%endif
VP9_IWHT4_1D
TRANSPOSE4x4W
0
,
1
,
2
,
3
,
4
VP9_IWHT4_1D
pxor
m6
,
m6
VP9_STORE_2X
0
,
1
,
4
,
5
,
6
,
7
lea
dstq
,
[
dstq
+
strideq
*
2
]
VP9_STORE_2X
2
,
3
,
4
,
5
,
6
,
7
ZERO_BLOCK
blockq
,
16
,
4
,
m6
RET
%endmacro
INIT_MMX
mmxext
IWHT4_FN
10
,
1023
INIT_MMX
mmxext
IWHT4_FN
12
,
4095
libavcodec/x86/vp9itxfm_template.asm
0 → 100644
View file @
1c3be325
;******************************************************************************
;* VP9 IDCT SIMD optimizations
;*
;* Copyright (C) 2013 Clément Bœsch <u pkh me>
;* Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
;*
;* This file is part of FFmpeg.
;*
;* FFmpeg is free software; you can redistribute it and/or
;* modify it under the terms of the GNU Lesser General Public
;* License as published by the Free Software Foundation; either
;* version 2.1 of the License, or (at your option) any later version.
;*
;* FFmpeg is distributed in the hope that it will be useful,
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;* Lesser General Public License for more details.
;*
;* You should have received a copy of the GNU Lesser General Public
;* License along with FFmpeg; if not, write to the Free Software
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;******************************************************************************
%macro
VP9_IWHT4_1D
0
SWAP
1
,
2
,
3
paddw
m0
,
m2
psubw
m3
,
m1
psubw
m4
,
m0
,
m3
psraw
m4
,
1
psubw
m5
,
m4
,
m1
SWAP
5
,
1
psubw
m4
,
m2
SWAP
4
,
2
psubw
m0
,
m1
paddw
m3
,
m2
SWAP
3
,
2
,
1
%endmacro
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment