Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in / Register
Toggle navigation
F
ffmpeg.wasm-core
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Linshizhi
ffmpeg.wasm-core
Commits
aeae5d53
Commit
aeae5d53
authored
Feb 18, 2003
by
Michael Niedermayer
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
optimize
Originally committed as revision 9455 to
svn://svn.mplayerhq.hu/mplayer/trunk/postproc
parent
64094f37
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
77 additions
and
2 deletions
+77
-2
rgb2rgb.c
postproc/rgb2rgb.c
+5
-0
rgb2rgb_template.c
postproc/rgb2rgb_template.c
+72
-2
No files found.
postproc/rgb2rgb.c
View file @
aeae5d53
...
...
@@ -28,6 +28,11 @@ static const uint64_t mask32b __attribute__((aligned(8))) = 0x000000FF000000FFU
static
const
uint64_t
mask32g
__attribute__
((
aligned
(
8
)))
=
0x0000FF000000FF00ULL
;
static
const
uint64_t
mask32r
__attribute__
((
aligned
(
8
)))
=
0x00FF000000FF0000ULL
;
static
const
uint64_t
mask32
__attribute__
((
aligned
(
8
)))
=
0x00FFFFFF00FFFFFFULL
;
static
const
uint64_t
mask3216br
__attribute__
((
aligned
(
8
)))
=
0x00F800F800F800F8ULL
;
static
const
uint64_t
mask3216g
__attribute__
((
aligned
(
8
)))
=
0x0000FC000000FC00ULL
;
static
const
uint64_t
mask3215g
__attribute__
((
aligned
(
8
)))
=
0x0000F8000000F800ULL
;
static
const
uint64_t
mul3216
__attribute__
((
aligned
(
8
)))
=
0x2000000420000004ULL
;
static
const
uint64_t
mul3215
__attribute__
((
aligned
(
8
)))
=
0x2000000820000008ULL
;
static
const
uint64_t
mask24b
__attribute__
((
aligned
(
8
)))
=
0x00FF0000FF0000FFULL
;
static
const
uint64_t
mask24g
__attribute__
((
aligned
(
8
)))
=
0xFF0000FF0000FF00ULL
;
static
const
uint64_t
mask24r
__attribute__
((
aligned
(
8
)))
=
0x0000FF0000FF0000ULL
;
...
...
postproc/rgb2rgb_template.c
View file @
aeae5d53
...
...
@@ -318,12 +318,46 @@ static inline void RENAME(rgb32to16)(const uint8_t *src, uint8_t *dst, unsigned
uint16_t
*
d
=
(
uint16_t
*
)
dst
;
end
=
s
+
src_size
;
#ifdef HAVE_MMX
mm_end
=
end
-
15
;
#if 1 //is faster only if multiplies are reasonable fast (FIXME figure out on which cpus this is faster, on Athlon its slightly faster)
asm
volatile
(
"movq %3, %%mm5
\n\t
"
"movq %4, %%mm6
\n\t
"
"movq %5, %%mm7
\n\t
"
".balign 16
\n\t
"
"1:
\n\t
"
PREFETCH
" 32(%1)
\n\t
"
"movd (%1), %%mm0
\n\t
"
"movd 4(%1), %%mm3
\n\t
"
"punpckldq 8(%1), %%mm0
\n\t
"
"punpckldq 12(%1), %%mm3
\n\t
"
"movq %%mm0, %%mm1
\n\t
"
"movq %%mm3, %%mm4
\n\t
"
"pand %%mm6, %%mm0
\n\t
"
"pand %%mm6, %%mm3
\n\t
"
"pmaddwd %%mm7, %%mm0
\n\t
"
"pmaddwd %%mm7, %%mm3
\n\t
"
"pand %%mm5, %%mm1
\n\t
"
"pand %%mm5, %%mm4
\n\t
"
"por %%mm1, %%mm0
\n\t
"
"por %%mm4, %%mm3
\n\t
"
"psrld $5, %%mm0
\n\t
"
"pslld $11, %%mm3
\n\t
"
"por %%mm3, %%mm0
\n\t
"
MOVNTQ
" %%mm0, (%0)
\n\t
"
"addl $16, %1
\n\t
"
"addl $8, %0
\n\t
"
"cmpl %2, %1
\n\t
"
" jb 1b
\n\t
"
:
"+r"
(
d
),
"+r"
(
s
)
:
"r"
(
mm_end
),
"m"
(
mask3216g
),
"m"
(
mask3216br
),
"m"
(
mul3216
)
);
#else
__asm
__volatile
(
PREFETCH
" %0"
::
"m"
(
*
src
)
:
"memory"
);
__asm
__volatile
(
"movq %0, %%mm7
\n\t
"
"movq %1, %%mm6
\n\t
"
::
"m"
(
red_16mask
),
"m"
(
green_16mask
));
mm_end
=
end
-
15
;
while
(
s
<
mm_end
)
{
__asm
__volatile
(
...
...
@@ -359,6 +393,7 @@ static inline void RENAME(rgb32to16)(const uint8_t *src, uint8_t *dst, unsigned
d
+=
4
;
s
+=
16
;
}
#endif
__asm
__volatile
(
SFENCE
:::
"memory"
);
__asm
__volatile
(
EMMS
:::
"memory"
);
#endif
...
...
@@ -441,12 +476,46 @@ static inline void RENAME(rgb32to15)(const uint8_t *src, uint8_t *dst, unsigned
uint16_t
*
d
=
(
uint16_t
*
)
dst
;
end
=
s
+
src_size
;
#ifdef HAVE_MMX
mm_end
=
end
-
15
;
#if 1 //is faster only if multiplies are reasonable fast (FIXME figure out on which cpus this is faster, on Athlon its slightly faster)
asm
volatile
(
"movq %3, %%mm5
\n\t
"
"movq %4, %%mm6
\n\t
"
"movq %5, %%mm7
\n\t
"
".balign 16
\n\t
"
"1:
\n\t
"
PREFETCH
" 32(%1)
\n\t
"
"movd (%1), %%mm0
\n\t
"
"movd 4(%1), %%mm3
\n\t
"
"punpckldq 8(%1), %%mm0
\n\t
"
"punpckldq 12(%1), %%mm3
\n\t
"
"movq %%mm0, %%mm1
\n\t
"
"movq %%mm3, %%mm4
\n\t
"
"pand %%mm6, %%mm0
\n\t
"
"pand %%mm6, %%mm3
\n\t
"
"pmaddwd %%mm7, %%mm0
\n\t
"
"pmaddwd %%mm7, %%mm3
\n\t
"
"pand %%mm5, %%mm1
\n\t
"
"pand %%mm5, %%mm4
\n\t
"
"por %%mm1, %%mm0
\n\t
"
"por %%mm4, %%mm3
\n\t
"
"psrld $6, %%mm0
\n\t
"
"pslld $10, %%mm3
\n\t
"
"por %%mm3, %%mm0
\n\t
"
MOVNTQ
" %%mm0, (%0)
\n\t
"
"addl $16, %1
\n\t
"
"addl $8, %0
\n\t
"
"cmpl %2, %1
\n\t
"
" jb 1b
\n\t
"
:
"+r"
(
d
),
"+r"
(
s
)
:
"r"
(
mm_end
),
"m"
(
mask3215g
),
"m"
(
mask3216br
),
"m"
(
mul3215
)
);
#else
__asm
__volatile
(
PREFETCH
" %0"
::
"m"
(
*
src
)
:
"memory"
);
__asm
__volatile
(
"movq %0, %%mm7
\n\t
"
"movq %1, %%mm6
\n\t
"
::
"m"
(
red_15mask
),
"m"
(
green_15mask
));
mm_end
=
end
-
15
;
while
(
s
<
mm_end
)
{
__asm
__volatile
(
...
...
@@ -482,6 +551,7 @@ static inline void RENAME(rgb32to15)(const uint8_t *src, uint8_t *dst, unsigned
d
+=
4
;
s
+=
16
;
}
#endif
__asm
__volatile
(
SFENCE
:::
"memory"
);
__asm
__volatile
(
EMMS
:::
"memory"
);
#endif
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment