Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in / Register
Toggle navigation
F
ffmpeg.wasm-core
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Linshizhi
ffmpeg.wasm-core
Commits
630967ef
Commit
630967ef
authored
Dec 02, 2017
by
Martin Vignali
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
avcodec/utvideodec : add SIMD (SSSE3 and AVX2) for gradient_pred
parent
4353c350
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
101 additions
and
1 deletion
+101
-1
lossless_videodsp.c
libavcodec/lossless_videodsp.c
+11
-0
lossless_videodsp.h
libavcodec/lossless_videodsp.h
+1
-0
utvideodec.c
libavcodec/utvideodec.c
+4
-1
lossless_videodsp.asm
libavcodec/x86/lossless_videodsp.asm
+80
-0
lossless_videodsp_init.c
libavcodec/x86/lossless_videodsp_init.c
+5
-0
No files found.
libavcodec/lossless_videodsp.c
View file @
630967ef
...
...
@@ -98,6 +98,16 @@ static int add_left_pred_int16_c(uint16_t *dst, const uint16_t *src, unsigned ma
return
acc
;
}
static
void
add_gradient_pred_c
(
uint8_t
*
src
,
const
ptrdiff_t
stride
,
const
ptrdiff_t
width
){
int
A
,
B
,
C
,
i
;
for
(
i
=
0
;
i
<
width
;
i
++
)
{
A
=
src
[
i
-
stride
];
B
=
src
[
i
-
(
stride
+
1
)];
C
=
src
[
i
-
1
];
src
[
i
]
=
(
A
-
B
+
C
+
src
[
i
])
&
0xFF
;
}
}
void
ff_llviddsp_init
(
LLVidDSPContext
*
c
)
{
...
...
@@ -106,6 +116,7 @@ void ff_llviddsp_init(LLVidDSPContext *c)
c
->
add_left_pred
=
add_left_pred_c
;
c
->
add_left_pred_int16
=
add_left_pred_int16_c
;
c
->
add_gradient_pred
=
add_gradient_pred_c
;
if
(
ARCH_PPC
)
ff_llviddsp_init_ppc
(
c
);
...
...
libavcodec/lossless_videodsp.h
View file @
630967ef
...
...
@@ -39,6 +39,7 @@ typedef struct LLVidDSPContext {
int
(
*
add_left_pred_int16
)(
uint16_t
*
dst
,
const
uint16_t
*
src
,
unsigned
mask
,
ptrdiff_t
w
,
unsigned
left
);
void
(
*
add_gradient_pred
)(
uint8_t
*
src
/* align 32 */
,
const
ptrdiff_t
stride
,
const
ptrdiff_t
width
);
}
LLVidDSPContext
;
void
ff_llviddsp_init
(
LLVidDSPContext
*
llviddsp
);
...
...
libavcodec/utvideodec.c
View file @
630967ef
...
...
@@ -460,6 +460,7 @@ static void restore_gradient_planar(UtvideoContext *c, uint8_t *src, ptrdiff_t s
uint8_t
*
bsrc
;
int
slice_start
,
slice_height
;
const
int
cmask
=
~
rmode
;
int
min_width
=
FFMIN
(
width
,
32
);
for
(
slice
=
0
;
slice
<
slices
;
slice
++
)
{
slice_start
=
((
slice
*
height
)
/
slices
)
&
cmask
;
...
...
@@ -479,12 +480,14 @@ static void restore_gradient_planar(UtvideoContext *c, uint8_t *src, ptrdiff_t s
for
(
j
=
1
;
j
<
slice_height
;
j
++
)
{
// second line - first element has top prediction, the rest uses gradient
bsrc
[
0
]
=
(
bsrc
[
0
]
+
bsrc
[
-
stride
])
&
0xFF
;
for
(
i
=
1
;
i
<
width
;
i
++
)
{
for
(
i
=
1
;
i
<
min_width
;
i
++
)
{
/* dsp need align 32 */
A
=
bsrc
[
i
-
stride
];
B
=
bsrc
[
i
-
(
stride
+
1
)];
C
=
bsrc
[
i
-
1
];
bsrc
[
i
]
=
(
A
-
B
+
C
+
bsrc
[
i
])
&
0xFF
;
}
if
(
width
>
32
)
c
->
llviddsp
.
add_gradient_pred
(
bsrc
+
32
,
stride
,
width
-
32
);
bsrc
+=
stride
;
}
}
...
...
libavcodec/x86/lossless_videodsp.asm
View file @
630967ef
...
...
@@ -2,6 +2,7 @@
;* SIMD lossless video DSP utils
;* Copyright (c) 2008 Loren Merritt
;* Copyright (c) 2014 Michael Niedermayer
;* Copyright (c) 2017 Jokyo Images
;*
;* This file is part of FFmpeg.
;*
...
...
@@ -325,3 +326,82 @@ cglobal add_left_pred_int16, 4,4,8, dst, src, mask, w, left
ADD_HFYU_LEFT_LOOP_INT16
u
,
a
.
src_unaligned
:
ADD_HFYU_LEFT_LOOP_INT16
u
,
u
;---------------------------------------------------------------------------------------------
; void add_gradient_pred(uint8_t *src, const ptrdiff_t stride, const ptrdiff_t width)
;---------------------------------------------------------------------------------------------
%macro
ADD_GRADIENT_PRED
0
cglobal
add_gradient_pred
,
3
,
4
,
5
,
src
,
stride
,
width
,
tmp
mova
xm0
,
[
pb_15
]
;load src - 1 in xm1
movd
xm1
,
[
srcq
-
1
]
%if
cpuflag
(
avx2
)
vpbroadcastb
xm1
,
xm1
%else
pxor
xm2
,
xm2
pshufb
xm1
,
xm2
%endif
add
srcq
,
widthq
neg
widthq
neg
strideq
.
loop
:
lea
tmpq
,
[
srcq
+
strideq
]
mova
m2
,
[
tmpq
+
widthq
]
; A = src[x-stride]
movu
m3
,
[
tmpq
+
widthq
-
1
]
; B = src[x - (stride + 1)]
mova
m4
,
[
srcq
+
widthq
]
; current val (src[x])
psubb
m2
,
m3
; A - B
; prefix sum A-B
pslldq
m3
,
m2
,
1
paddb
m2
,
m3
pslldq
m3
,
m2
,
2
paddb
m2
,
m3
pslldq
m3
,
m2
,
4
paddb
m2
,
m3
pslldq
m3
,
m2
,
8
paddb
m2
,
m3
; prefix sum current val
pslldq
m3
,
m4
,
1
paddb
m4
,
m3
pslldq
m3
,
m4
,
2
paddb
m4
,
m3
pslldq
m3
,
m4
,
4
paddb
m4
,
m3
pslldq
m3
,
m4
,
8
paddb
m4
,
m3
; last sum
paddb
m2
,
m4
; current + (A - B)
paddb
xm1
,
xm2
; += C
mova
[
srcq
+
widthq
]
,
xm1
; store
pshufb
xm1
,
xm0
; put last val in all val of xm1
%if
mmsize
==
32
vextracti128
xm2
,
m2
,
1
; get second lane of the ymm
paddb
xm1
,
xm2
; += C
mova
[
srcq
+
widthq
+
16
]
,
xm1
; store
pshufb
xm1
,
xm0
; put last val in all val of m1
%endif
add
widthq
,
mmsize
jl
.
loop
RET
%endmacro
INIT_XMM
ssse3
ADD_GRADIENT_PRED
%if
HAVE_AVX2_EXTERNAL
INIT_YMM
avx2
ADD_GRADIENT_PRED
%endif
libavcodec/x86/lossless_videodsp_init.c
View file @
630967ef
...
...
@@ -44,6 +44,9 @@ int ff_add_left_pred_unaligned_avx2(uint8_t *dst, const uint8_t *src,
int
ff_add_left_pred_int16_ssse3
(
uint16_t
*
dst
,
const
uint16_t
*
src
,
unsigned
mask
,
ptrdiff_t
w
,
unsigned
acc
);
int
ff_add_left_pred_int16_sse4
(
uint16_t
*
dst
,
const
uint16_t
*
src
,
unsigned
mask
,
ptrdiff_t
w
,
unsigned
acc
);
void
ff_add_gradient_pred_ssse3
(
uint8_t
*
src
,
const
ptrdiff_t
stride
,
const
ptrdiff_t
width
);
void
ff_add_gradient_pred_avx2
(
uint8_t
*
src
,
const
ptrdiff_t
stride
,
const
ptrdiff_t
width
);
#if HAVE_INLINE_ASM && HAVE_7REGS && ARCH_X86_32
static
void
add_median_pred_cmov
(
uint8_t
*
dst
,
const
uint8_t
*
top
,
const
uint8_t
*
diff
,
ptrdiff_t
w
,
...
...
@@ -109,6 +112,7 @@ void ff_llviddsp_init_x86(LLVidDSPContext *c)
if
(
EXTERNAL_SSSE3
(
cpu_flags
))
{
c
->
add_left_pred
=
ff_add_left_pred_ssse3
;
c
->
add_left_pred_int16
=
ff_add_left_pred_int16_ssse3
;
c
->
add_gradient_pred
=
ff_add_gradient_pred_ssse3
;
}
if
(
EXTERNAL_SSSE3_FAST
(
cpu_flags
))
{
...
...
@@ -121,5 +125,6 @@ void ff_llviddsp_init_x86(LLVidDSPContext *c)
if
(
EXTERNAL_AVX2_FAST
(
cpu_flags
))
{
c
->
add_bytes
=
ff_add_bytes_avx2
;
c
->
add_left_pred
=
ff_add_left_pred_unaligned_avx2
;
c
->
add_gradient_pred
=
ff_add_gradient_pred_avx2
;
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment