Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in / Register
Toggle navigation
F
ffmpeg.wasm-core
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Linshizhi
ffmpeg.wasm-core
Commits
296609f8
Commit
296609f8
authored
Oct 11, 2018
by
Martin Vignali
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
swscale/x86/rgb2rgb : port shuffle 2103 mmxext to external asm and remove inline asm version
parent
04afdbb5
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
67 additions
and
48 deletions
+67
-48
rgb2rgb.c
libswscale/x86/rgb2rgb.c
+4
-0
rgb2rgb_template.c
libswscale/x86/rgb2rgb_template.c
+0
-48
rgb_2_rgb.asm
libswscale/x86/rgb_2_rgb.asm
+63
-0
No files found.
libswscale/x86/rgb2rgb.c
View file @
296609f8
...
...
@@ -144,6 +144,7 @@ DECLARE_ALIGNED(8, extern const uint64_t, ff_bgr2UVOffset);
#endif
/* HAVE_INLINE_ASM */
void
ff_shuffle_bytes_2103_mmxext
(
const
uint8_t
*
src
,
uint8_t
*
dst
,
int
src_size
);
void
ff_shuffle_bytes_2103_ssse3
(
const
uint8_t
*
src
,
uint8_t
*
dst
,
int
src_size
);
void
ff_shuffle_bytes_0321_ssse3
(
const
uint8_t
*
src
,
uint8_t
*
dst
,
int
src_size
);
void
ff_shuffle_bytes_1230_ssse3
(
const
uint8_t
*
src
,
uint8_t
*
dst
,
int
src_size
);
...
...
@@ -176,6 +177,9 @@ av_cold void rgb2rgb_init_x86(void)
rgb2rgb_init_avx
();
#endif
/* HAVE_INLINE_ASM */
if
(
EXTERNAL_MMXEXT
(
cpu_flags
))
{
shuffle_bytes_2103
=
ff_shuffle_bytes_2103_mmxext
;
}
if
(
EXTERNAL_SSE2
(
cpu_flags
))
{
#if ARCH_X86_64
uyvytoyuv422
=
ff_uyvytoyuv422_sse2
;
...
...
libswscale/x86/rgb2rgb_template.c
View file @
296609f8
...
...
@@ -1034,51 +1034,6 @@ static inline void RENAME(rgb16to32)(const uint8_t *src, uint8_t *dst, int src_s
}
}
#if COMPILE_TEMPLATE_MMXEXT
static
inline
void
RENAME
(
shuffle_bytes_2103
)(
const
uint8_t
*
src
,
uint8_t
*
dst
,
int
src_size
)
{
x86_reg
idx
=
15
-
src_size
;
const
uint8_t
*
s
=
src
-
idx
;
uint8_t
*
d
=
dst
-
idx
;
__asm__
volatile
(
"test %0, %0
\n\t
"
"jns 2f
\n\t
"
PREFETCH
" (%1, %0)
\n\t
"
"movq %3, %%mm7
\n\t
"
"pxor %4, %%mm7
\n\t
"
"movq %%mm7, %%mm6
\n\t
"
"pxor %5, %%mm7
\n\t
"
".p2align 4
\n\t
"
"1:
\n\t
"
PREFETCH
" 32(%1, %0)
\n\t
"
"movq (%1, %0), %%mm0
\n\t
"
"movq 8(%1, %0), %%mm1
\n\t
"
"pshufw $177, %%mm0, %%mm3
\n\t
"
"pshufw $177, %%mm1, %%mm5
\n\t
"
"pand %%mm7, %%mm0
\n\t
"
"pand %%mm6, %%mm3
\n\t
"
"pand %%mm7, %%mm1
\n\t
"
"pand %%mm6, %%mm5
\n\t
"
"por %%mm3, %%mm0
\n\t
"
"por %%mm5, %%mm1
\n\t
"
MOVNTQ
" %%mm0, (%2, %0)
\n\t
"
MOVNTQ
" %%mm1, 8(%2, %0)
\n\t
"
"add $16, %0
\n\t
"
"js 1b
\n\t
"
SFENCE
"
\n\t
"
EMMS
"
\n\t
"
"2:
\n\t
"
:
"+&r"
(
idx
)
:
"r"
(
s
),
"r"
(
d
),
"m"
(
mask32b
),
"m"
(
mask32r
),
"m"
(
mmx_one
)
:
"memory"
);
for
(;
idx
<
15
;
idx
+=
4
)
{
register
unsigned
v
=
*
(
const
uint32_t
*
)
&
s
[
idx
],
g
=
v
&
0xff00ff00
;
v
&=
0xff00ff
;
*
(
uint32_t
*
)
&
d
[
idx
]
=
(
v
>>
16
)
+
g
+
(
v
<<
16
);
}
}
#endif
static
inline
void
RENAME
(
rgb24tobgr24
)(
const
uint8_t
*
src
,
uint8_t
*
dst
,
int
src_size
)
{
unsigned
i
;
...
...
@@ -2555,9 +2510,6 @@ static av_cold void RENAME(rgb2rgb_init)(void)
rgb24to15
=
RENAME
(
rgb24to15
);
rgb24to16
=
RENAME
(
rgb24to16
);
rgb24tobgr24
=
RENAME
(
rgb24tobgr24
);
#if COMPILE_TEMPLATE_MMXEXT
shuffle_bytes_2103
=
RENAME
(
shuffle_bytes_2103
);
#endif
rgb32tobgr16
=
RENAME
(
rgb32tobgr16
);
rgb32tobgr15
=
RENAME
(
rgb32tobgr15
);
yv12toyuy2
=
RENAME
(
yv12toyuy2
);
...
...
libswscale/x86/rgb_2_rgb.asm
View file @
296609f8
...
...
@@ -24,6 +24,7 @@
SECTION_RODATA
pb_mask_shuffle2103_mmx
times
8
dw
255
pb_shuffle2103
:
db
2
,
1
,
0
,
3
,
6
,
5
,
4
,
7
,
10
,
9
,
8
,
11
,
14
,
13
,
12
,
15
pb_shuffle0321
:
db
0
,
3
,
2
,
1
,
4
,
7
,
6
,
5
,
8
,
11
,
10
,
9
,
12
,
15
,
14
,
13
pb_shuffle1230
:
db
1
,
2
,
3
,
0
,
5
,
6
,
7
,
4
,
9
,
10
,
11
,
8
,
13
,
14
,
15
,
12
...
...
@@ -42,6 +43,68 @@ SECTION .text
%endif
%endmacro
;------------------------------------------------------------------------------
; shuffle_bytes_2103_mmext (const uint8_t *src, uint8_t *dst, int src_size)
;------------------------------------------------------------------------------
INIT_MMX
mmxext
cglobal
shuffle_bytes_2103
,
3
,
5
,
8
,
src
,
dst
,
w
,
tmp
,
x
mova
m6
,
[
pb_mask_shuffle2103_mmx
]
mova
m7
,
m6
psllq
m7
,
8
movsxdifnidn
wq
,
wd
mov
xq
,
wq
add
srcq
,
wq
add
dstq
,
wq
neg
wq
;calc scalar loop
and
xq
,
mmsize
*
2
-
4
je
.
loop_simd
.
loop_scalar
:
mov
tmpb
,
[
srcq
+
wq
+
2
]
mov
[
dstq
+
wq
+
0
]
,
tmpb
mov
tmpb
,
[
srcq
+
wq
+
1
]
mov
[
dstq
+
wq
+
1
]
,
tmpb
mov
tmpb
,
[
srcq
+
wq
+
0
]
mov
[
dstq
+
wq
+
2
]
,
tmpb
mov
tmpb
,
[
srcq
+
wq
+
3
]
mov
[
dstq
+
wq
+
3
]
,
tmpb
add
wq
,
4
sub
xq
,
4
jg
.
loop_scalar
;check if src_size < mmsize * 2
cmp
wq
,
0
jge
.
end
.
loop_simd
:
movu
m0
,
[
srcq
+
wq
]
movu
m1
,
[
srcq
+
wq
+
8
]
pshufw
m3
,
m0
,
177
pshufw
m5
,
m1
,
177
pand
m0
,
m7
pand
m3
,
m6
pand
m1
,
m7
pand
m5
,
m6
por
m0
,
m3
por
m1
,
m5
movu
[
dstq
+
wq
]
,
m0
movu
[
dstq
+
wq
+
8
]
,
m1
add
wq
,
mmsize
*
2
jl
.
loop_simd
.
end
:
RET
;------------------------------------------------------------------------------
; shuffle_bytes_## (const uint8_t *src, uint8_t *dst, int src_size)
;------------------------------------------------------------------------------
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment