Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in / Register
Toggle navigation
F
ffmpeg.wasm-core
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Linshizhi
ffmpeg.wasm-core
Commits
e9930883
Commit
e9930883
authored
Oct 22, 2017
by
Martin Vignali
Committed by
James Darnley
Oct 29, 2017
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
libavcodec/bswapdsp : add AVX2 func for bswap_buf (swap uint32_t)
parent
9b0510a8
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
38 additions
and
13 deletions
+38
-13
bswapdsp.asm
libavcodec/x86/bswapdsp.asm
+35
-13
bswapdsp_init.c
libavcodec/x86/bswapdsp_init.c
+3
-0
No files found.
libavcodec/x86/bswapdsp.asm
View file @
e9930883
...
@@ -35,14 +35,18 @@ SECTION .text
...
@@ -35,14 +35,18 @@ SECTION .text
mov
r3d
,
r2d
mov
r3d
,
r2d
sar
r2d
,
3
sar
r2d
,
3
jz
.
left4_
%1
jz
.
left4_
%1
%if
cpuflag
(
avx2
)
sar
r2d
,
1
jz
.
left8_
%1
%endif
.
loop8_
%1
:
.
loop8_
%1
:
mov%1
m0
,
[
r1
+
0
]
mov%1
m0
,
[
r1
+
0
]
mov%1
m1
,
[
r1
+
16
]
mov%1
m1
,
[
r1
+
mmsize
]
%if
cpuflag
(
ssse3
)
%if
cpuflag
(
ssse3
)
||
cpuflag
(
avx2
)
pshufb
m0
,
m2
pshufb
m0
,
m2
pshufb
m1
,
m2
pshufb
m1
,
m2
mov%1
[
r0
+
0
]
,
m0
mov%1
[
r0
+
0
]
,
m0
mov%1
[
r0
+
16
]
,
m1
mov%1
[
r0
+
mmsize
]
,
m1
%else
%else
pshuflw
m0
,
m0
,
10110001
b
pshuflw
m0
,
m0
,
10110001
b
pshuflw
m1
,
m1
,
10110001
b
pshuflw
m1
,
m1
,
10110001
b
...
@@ -59,18 +63,29 @@ SECTION .text
...
@@ -59,18 +63,29 @@ SECTION .text
mov%1
[
r0
+
0
]
,
m2
mov%1
[
r0
+
0
]
,
m2
mov%1
[
r0
+
16
]
,
m3
mov%1
[
r0
+
16
]
,
m3
%endif
%endif
add
r0
,
3
2
add
r0
,
mmsize
*
2
add
r1
,
3
2
add
r1
,
mmsize
*
2
dec
r2d
dec
r2d
jnz
.
loop8_
%1
jnz
.
loop8_
%1
%if
cpuflag
(
avx2
)
.
left8_
%1
:
mov
r2d
,
r3d
test
r3d
,
8
jz
.
left4_
%1
mov%1
m0
,
[r1]
pshufb
m0
,
m2
mov%1
[
r0
+
0
]
,
m0
add
r1
,
mmsize
add
r0
,
mmsize
%endif
.
left4_
%1
:
.
left4_
%1
:
mov
r2d
,
r3d
mov
r2d
,
r3d
test
r3d
,
4
test
r3d
,
4
jz
.
left
jz
.
left
mov%1
m0
,
[r1]
mov%1
x
m0
,
[r1]
%if
cpuflag
(
ssse3
)
%if
cpuflag
(
ssse3
)
pshufb
m0
,
m2
pshufb
xm0
,
x
m2
mov%1
[r0],
m0
mov%1
[r0],
x
m0
%else
%else
pshuflw
m0
,
m0
,
10110001
b
pshuflw
m0
,
m0
,
10110001
b
pshufhw
m0
,
m0
,
10110001
b
pshufhw
m0
,
m0
,
10110001
b
...
@@ -86,16 +101,20 @@ SECTION .text
...
@@ -86,16 +101,20 @@ SECTION .text
; void ff_bswap_buf(uint32_t *dst, const uint32_t *src, int w);
; void ff_bswap_buf(uint32_t *dst, const uint32_t *src, int w);
%macro
BSWAP32_BUF
0
%macro
BSWAP32_BUF
0
%if
cpuflag
(
ssse3
)
%if
cpuflag
(
ssse3
)
||
cpuflag
(
avx2
)
cglobal
bswap32_buf
,
3
,
4
,
3
cglobal
bswap32_buf
,
3
,
4
,
3
mov
r3
,
r1
mov
r3
,
r1
%if
cpuflag
(
avx2
)
vbroadcasti128
m2
,
[
pb_bswap32
]
%else
mova
m2
,
[
pb_bswap32
]
mova
m2
,
[
pb_bswap32
]
%endif
%else
%else
cglobal
bswap32_buf
,
3
,
4
,
5
cglobal
bswap32_buf
,
3
,
4
,
5
mov
r3
,
r1
mov
r3
,
r1
%endif
%endif
or
r3
,
r0
or
r3
,
r0
test
r3
,
15
test
r3
,
mmsize
-
1
jz
.
start_align
jz
.
start_align
BSWAP_LOOPS
u
BSWAP_LOOPS
u
jmp
.
left
jmp
.
left
...
@@ -105,9 +124,9 @@ cglobal bswap32_buf, 3,4,5
...
@@ -105,9 +124,9 @@ cglobal bswap32_buf, 3,4,5
%if
cpuflag
(
ssse3
)
%if
cpuflag
(
ssse3
)
test
r2d
,
2
test
r2d
,
2
jz
.
left1
jz
.
left1
movq
m0
,
[r1]
movq
x
m0
,
[r1]
pshufb
m0
,
m2
pshufb
xm0
,
x
m2
movq
[r0],
m0
movq
[r0],
x
m0
add
r1
,
8
add
r1
,
8
add
r0
,
8
add
r0
,
8
.
left1
:
.
left1
:
...
@@ -137,3 +156,6 @@ BSWAP32_BUF
...
@@ -137,3 +156,6 @@ BSWAP32_BUF
INIT_XMM
ssse3
INIT_XMM
ssse3
BSWAP32_BUF
BSWAP32_BUF
INIT_YMM
avx2
BSWAP32_BUF
libavcodec/x86/bswapdsp_init.c
View file @
e9930883
...
@@ -25,6 +25,7 @@
...
@@ -25,6 +25,7 @@
void
ff_bswap32_buf_sse2
(
uint32_t
*
dst
,
const
uint32_t
*
src
,
int
w
);
void
ff_bswap32_buf_sse2
(
uint32_t
*
dst
,
const
uint32_t
*
src
,
int
w
);
void
ff_bswap32_buf_ssse3
(
uint32_t
*
dst
,
const
uint32_t
*
src
,
int
w
);
void
ff_bswap32_buf_ssse3
(
uint32_t
*
dst
,
const
uint32_t
*
src
,
int
w
);
void
ff_bswap32_buf_avx2
(
uint32_t
*
dst
,
const
uint32_t
*
src
,
int
w
);
av_cold
void
ff_bswapdsp_init_x86
(
BswapDSPContext
*
c
)
av_cold
void
ff_bswapdsp_init_x86
(
BswapDSPContext
*
c
)
{
{
...
@@ -34,4 +35,6 @@ av_cold void ff_bswapdsp_init_x86(BswapDSPContext *c)
...
@@ -34,4 +35,6 @@ av_cold void ff_bswapdsp_init_x86(BswapDSPContext *c)
c
->
bswap_buf
=
ff_bswap32_buf_sse2
;
c
->
bswap_buf
=
ff_bswap32_buf_sse2
;
if
(
EXTERNAL_SSSE3
(
cpu_flags
))
if
(
EXTERNAL_SSSE3
(
cpu_flags
))
c
->
bswap_buf
=
ff_bswap32_buf_ssse3
;
c
->
bswap_buf
=
ff_bswap32_buf_ssse3
;
if
(
EXTERNAL_AVX2_FAST
(
cpu_flags
))
c
->
bswap_buf
=
ff_bswap32_buf_avx2
;
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment