Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in / Register
Toggle navigation
F
ffmpeg.wasm-core
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Linshizhi
ffmpeg.wasm-core
Commits
4353c350
Commit
4353c350
authored
Dec 02, 2017
by
Martin Vignali
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
avcodec/x86/lossless_videodsp : add avx2 version for add_left_pred
parent
cfbcea1c
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
44 additions
and
22 deletions
+44
-22
lossless_videodsp.asm
libavcodec/x86/lossless_videodsp.asm
+41
-22
lossless_videodsp_init.c
libavcodec/x86/lossless_videodsp_init.c
+3
-0
No files found.
libavcodec/x86/lossless_videodsp.asm
View file @
4353c350
...
@@ -114,40 +114,54 @@ MEDIAN_PRED
...
@@ -114,40 +114,54 @@ MEDIAN_PRED
add
dstq
,
wq
add
dstq
,
wq
neg
wq
neg
wq
%%
.
loop
:
%%
.
loop
:
pshufb
xm0
,
xm5
%if
%2
%if
%2
mova
m1
,
[
srcq
+
wq
]
mova
m1
,
[
srcq
+
wq
]
%else
%else
movu
m1
,
[
srcq
+
wq
]
movu
m1
,
[
srcq
+
wq
]
%endif
%endif
mova
m2
,
m1
psllw
m2
,
m1
,
8
psllw
m1
,
8
paddb
m1
,
m2
paddb
m1
,
m2
mova
m2
,
m1
pshufb
m2
,
m1
,
m3
pshufb
m1
,
m3
paddb
m1
,
m2
paddb
m1
,
m2
pshufb
m0
,
m5
pshufb
m2
,
m1
,
m4
mova
m2
,
m1
pshufb
m1
,
m4
paddb
m1
,
m2
paddb
m1
,
m2
%if
mmsize
==
16
%if
mmsize
>=
16
mova
m2
,
m1
pshufb
m2
,
m1
,
m6
pshufb
m1
,
m6
paddb
m1
,
m2
paddb
m1
,
m2
%endif
%endif
paddb
m0
,
m1
paddb
xm0
,
x
m1
%if
%1
%if
%1
mova
[
dstq
+
wq
]
,
m0
mova
[
dstq
+
wq
]
,
x
m0
%else
%else
movq
[
dstq
+
wq
]
,
m0
movq
[
dstq
+
wq
]
,
xm0
movhps
[
dstq
+
wq
+
8
]
,
m0
movhps
[
dstq
+
wq
+
8
]
,
xm0
%endif
%if
mmsize
==
32
vextracti128
xm2
,
m1
,
1
; get second lane of the ymm
pshufb
xm0
,
xm5
; set alls val to last val of the first lane
paddb
xm0
,
xm2
;store val
%if
%1
mova
[
dstq
+
wq
+
16
]
,
xm0
%else
;
movq
[
dstq
+
wq
+
16
]
,
xm0
movhps
[
dstq
+
wq
+
16
+
8
]
,
xm0
%endif
%endif
%endif
add
wq
,
mmsize
add
wq
,
mmsize
jl
%%
.
loop
jl
%%
.
loop
%if
mmsize
==
32
mov
eax
,
[
dstq
-
1
]
and
eax
,
0xff
%else
;
mov
eax
,
mmsize
-
1
mov
eax
,
mmsize
-
1
sub
eax
,
wd
sub
eax
,
wd
movd
m1
,
eax
movd
m1
,
eax
pshufb
m0
,
m1
pshufb
m0
,
m1
movd
eax
,
m0
movd
eax
,
m0
%endif
RET
RET
%endmacro
%endmacro
...
@@ -166,15 +180,15 @@ cglobal add_left_pred, 3,3,7, dst, src, w, left
...
@@ -166,15 +180,15 @@ cglobal add_left_pred, 3,3,7, dst, src, w, left
%macro
ADD_LEFT_PRED_UNALIGNED
0
%macro
ADD_LEFT_PRED_UNALIGNED
0
cglobal
add_left_pred_unaligned
,
3
,
3
,
7
,
dst
,
src
,
w
,
left
cglobal
add_left_pred_unaligned
,
3
,
3
,
7
,
dst
,
src
,
w
,
left
mova
m5
,
[
pb_15
]
mova
x
m5
,
[
pb_15
]
mova
m6
,
[
pb_zzzzzzzz77777777
]
VBROADCASTI128
m6
,
[
pb_zzzzzzzz77777777
]
mova
m4
,
[
pb_zzzz3333zzzzbbbb
]
VBROADCASTI128
m4
,
[
pb_zzzz3333zzzzbbbb
]
mova
m3
,
[
pb_zz11zz55zz99zzdd
]
VBROADCASTI128
m3
,
[
pb_zz11zz55zz99zzdd
]
movd
m0
,
leftm
movd
x
m0
,
leftm
pslldq
m0
,
15
pslldq
x
m0
,
15
test
srcq
,
15
test
srcq
,
mmsize
-
1
jnz
.
src_unaligned
jnz
.
src_unaligned
test
dstq
,
15
test
dstq
,
mmsize
-
1
jnz
.
dst_unaligned
jnz
.
dst_unaligned
ADD_LEFT_LOOP
1
,
1
ADD_LEFT_LOOP
1
,
1
.
dst_unaligned
:
.
dst_unaligned
:
...
@@ -186,6 +200,11 @@ cglobal add_left_pred_unaligned, 3,3,7, dst, src, w, left
...
@@ -186,6 +200,11 @@ cglobal add_left_pred_unaligned, 3,3,7, dst, src, w, left
INIT_XMM
ssse3
INIT_XMM
ssse3
ADD_LEFT_PRED_UNALIGNED
ADD_LEFT_PRED_UNALIGNED
%if
HAVE_AVX2_EXTERNAL
INIT_YMM
avx2
ADD_LEFT_PRED_UNALIGNED
%endif
;------------------------------------------------------------------------------
;------------------------------------------------------------------------------
; void ff_add_bytes(uint8_t *dst, uint8_t *src, ptrdiff_t w);
; void ff_add_bytes(uint8_t *dst, uint8_t *src, ptrdiff_t w);
;------------------------------------------------------------------------------
;------------------------------------------------------------------------------
...
...
libavcodec/x86/lossless_videodsp_init.c
View file @
4353c350
...
@@ -38,6 +38,8 @@ int ff_add_left_pred_ssse3(uint8_t *dst, const uint8_t *src,
...
@@ -38,6 +38,8 @@ int ff_add_left_pred_ssse3(uint8_t *dst, const uint8_t *src,
ptrdiff_t
w
,
int
left
);
ptrdiff_t
w
,
int
left
);
int
ff_add_left_pred_unaligned_ssse3
(
uint8_t
*
dst
,
const
uint8_t
*
src
,
int
ff_add_left_pred_unaligned_ssse3
(
uint8_t
*
dst
,
const
uint8_t
*
src
,
ptrdiff_t
w
,
int
left
);
ptrdiff_t
w
,
int
left
);
int
ff_add_left_pred_unaligned_avx2
(
uint8_t
*
dst
,
const
uint8_t
*
src
,
ptrdiff_t
w
,
int
left
);
int
ff_add_left_pred_int16_ssse3
(
uint16_t
*
dst
,
const
uint16_t
*
src
,
unsigned
mask
,
ptrdiff_t
w
,
unsigned
acc
);
int
ff_add_left_pred_int16_ssse3
(
uint16_t
*
dst
,
const
uint16_t
*
src
,
unsigned
mask
,
ptrdiff_t
w
,
unsigned
acc
);
int
ff_add_left_pred_int16_sse4
(
uint16_t
*
dst
,
const
uint16_t
*
src
,
unsigned
mask
,
ptrdiff_t
w
,
unsigned
acc
);
int
ff_add_left_pred_int16_sse4
(
uint16_t
*
dst
,
const
uint16_t
*
src
,
unsigned
mask
,
ptrdiff_t
w
,
unsigned
acc
);
...
@@ -118,5 +120,6 @@ void ff_llviddsp_init_x86(LLVidDSPContext *c)
...
@@ -118,5 +120,6 @@ void ff_llviddsp_init_x86(LLVidDSPContext *c)
}
}
if
(
EXTERNAL_AVX2_FAST
(
cpu_flags
))
{
if
(
EXTERNAL_AVX2_FAST
(
cpu_flags
))
{
c
->
add_bytes
=
ff_add_bytes_avx2
;
c
->
add_bytes
=
ff_add_bytes_avx2
;
c
->
add_left_pred
=
ff_add_left_pred_unaligned_avx2
;
}
}
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment