Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in / Register
Toggle navigation
F
ffmpeg.wasm-core
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Linshizhi
ffmpeg.wasm-core
Commits
b5d08c27
Commit
b5d08c27
authored
Jan 27, 2012
by
Ronald S. Bultje
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
swscale: convert rgb/bgr24ToY/UV_mmx functions from inline asm to yasm.
Also implement sse2/ssse3/avx versions.
parent
3b15a6d7
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
300 additions
and
178 deletions
+300
-178
input.asm
libswscale/x86/input.asm
+271
-0
swscale_mmx.c
libswscale/x86/swscale_mmx.c
+28
-20
swscale_template.c
libswscale/x86/swscale_template.c
+1
-158
No files found.
libswscale/x86/input.asm
View file @
b5d08c27
This diff is collapsed.
Click to expand it.
libswscale/x86/swscale_mmx.c
View file @
b5d08c27
...
...
@@ -31,10 +31,6 @@ DECLARE_ASM_CONST(8, uint64_t, bF8)= 0xF8F8F8F8F8F8F8F8LL;
DECLARE_ASM_CONST
(
8
,
uint64_t
,
bFC
)
=
0xFCFCFCFCFCFCFCFCLL
;
DECLARE_ASM_CONST
(
8
,
uint64_t
,
w10
)
=
0x0010001000100010LL
;
DECLARE_ASM_CONST
(
8
,
uint64_t
,
w02
)
=
0x0002000200020002LL
;
DECLARE_ASM_CONST
(
8
,
uint64_t
,
bm00001111
)
=
0x00000000FFFFFFFFLL
;
DECLARE_ASM_CONST
(
8
,
uint64_t
,
bm00000111
)
=
0x0000000000FFFFFFLL
;
DECLARE_ASM_CONST
(
8
,
uint64_t
,
bm11111000
)
=
0xFFFFFFFFFF000000LL
;
DECLARE_ASM_CONST
(
8
,
uint64_t
,
bm01010101
)
=
0x00FF00FF00FF00FFLL
;
const
DECLARE_ALIGNED
(
8
,
uint64_t
,
ff_dither4
)[
2
]
=
{
0x0103010301030103LL
,
...
...
@@ -68,19 +64,6 @@ DECLARE_ALIGNED(8, const uint64_t, ff_bgr2YOffset) = 0x1010101010101010ULL;
DECLARE_ALIGNED
(
8
,
const
uint64_t
,
ff_bgr2UVOffset
)
=
0x8080808080808080ULL
;
DECLARE_ALIGNED
(
8
,
const
uint64_t
,
ff_w1111
)
=
0x0001000100010001ULL
;
DECLARE_ASM_CONST
(
8
,
uint64_t
,
ff_bgr24toY1Coeff
)
=
0x0C88000040870C88ULL
;
DECLARE_ASM_CONST
(
8
,
uint64_t
,
ff_bgr24toY2Coeff
)
=
0x20DE4087000020DEULL
;
DECLARE_ASM_CONST
(
8
,
uint64_t
,
ff_rgb24toY1Coeff
)
=
0x20DE0000408720DEULL
;
DECLARE_ASM_CONST
(
8
,
uint64_t
,
ff_rgb24toY2Coeff
)
=
0x0C88408700000C88ULL
;
DECLARE_ASM_CONST
(
8
,
uint64_t
,
ff_bgr24toYOffset
)
=
0x0008400000084000ULL
;
DECLARE_ASM_CONST
(
8
,
uint64_t
,
ff_bgr24toUV
)[
2
][
4
]
=
{
{
0x38380000DAC83838ULL
,
0xECFFDAC80000ECFFULL
,
0xF6E40000D0E3F6E4ULL
,
0x3838D0E300003838ULL
},
{
0xECFF0000DAC8ECFFULL
,
0x3838DAC800003838ULL
,
0x38380000D0E33838ULL
,
0xF6E4D0E30000F6E4ULL
},
};
DECLARE_ASM_CONST
(
8
,
uint64_t
,
ff_bgr24toUVOffset
)
=
0x0040400000404000ULL
;
//MMX versions
#if HAVE_MMX
#undef RENAME
...
...
@@ -244,24 +227,29 @@ VSCALE_FUNCS(sse2, sse2);
VSCALE_FUNC
(
16
,
sse4
);
VSCALE_FUNCS
(
avx
,
avx
);
#define INPUT_Y_FUNC(fmt, opt) \
extern void ff_ ## fmt ## ToY_ ## opt(uint8_t *dst, const uint8_t *src, \
int w, uint32_t *unused)
#define INPUT_UV_FUNC(fmt, opt) \
extern void ff_ ## fmt ## ToUV_ ## opt(uint8_t *dstU, uint8_t *dstV, \
const uint8_t *src, const uint8_t *unused1, \
int w, uint32_t *unused2)
#define INPUT_FUNC(fmt, opt) \
extern void ff_ ## fmt ## ToY_ ## opt(uint8_t *dst, const uint8_t *src, \
int w, uint32_t *unused); \
INPUT_Y_FUNC(fmt, opt); \
INPUT_UV_FUNC(fmt, opt)
#define INPUT_FUNCS(opt) \
INPUT_FUNC(uyvy, opt); \
INPUT_FUNC(yuyv, opt); \
INPUT_UV_FUNC(nv12, opt); \
INPUT_UV_FUNC(nv21, opt)
INPUT_UV_FUNC(nv21, opt); \
INPUT_FUNC(rgb24, opt); \
INPUT_FUNC(bgr24, opt)
#if ARCH_X86_32
INPUT_FUNCS
(
mmx
);
#endif
INPUT_FUNCS
(
sse2
);
INPUT_FUNCS
(
ssse3
);
INPUT_FUNCS
(
avx
);
void
ff_sws_init_swScale_mmx
(
SwsContext
*
c
)
...
...
@@ -311,6 +299,12 @@ switch(c->dstBpc){ \
case 9: if (!isBE(c->dstFormat) && opt2chk) vscalefn = ff_yuv2plane1_9_ ## opt2; break; \
default: vscalefn = ff_yuv2plane1_8_ ## opt1; break; \
}
#define case_rgb(x, X, opt) \
case PIX_FMT_ ## X: \
c->lumToYV12 = ff_ ## x ## ToY_ ## opt; \
if (!c->chrSrcHSubSample) \
c->chrToYV12 = ff_ ## x ## ToUV_ ## opt; \
break
#if ARCH_X86_32
if
(
cpu_flags
&
AV_CPU_FLAG_MMX
)
{
ASSIGN_MMX_SCALE_FUNC
(
c
->
hyScale
,
c
->
hLumFilterSize
,
mmx
,
mmx
);
...
...
@@ -337,6 +331,8 @@ switch(c->dstBpc){ \
case
PIX_FMT_NV21
:
c
->
chrToYV12
=
ff_nv21ToUV_mmx
;
break
;
case_rgb
(
rgb24
,
RGB24
,
mmx
);
case_rgb
(
bgr24
,
BGR24
,
mmx
);
default:
break
;
}
...
...
@@ -379,11 +375,21 @@ switch(c->dstBpc){ \
case
PIX_FMT_NV21
:
c
->
chrToYV12
=
ff_nv21ToUV_sse2
;
break
;
case_rgb
(
rgb24
,
RGB24
,
sse2
);
case_rgb
(
bgr24
,
BGR24
,
sse2
);
default:
break
;
}
}
if
(
cpu_flags
&
AV_CPU_FLAG_SSSE3
)
{
ASSIGN_SSE_SCALE_FUNC
(
c
->
hyScale
,
c
->
hLumFilterSize
,
ssse3
,
ssse3
);
ASSIGN_SSE_SCALE_FUNC
(
c
->
hcScale
,
c
->
hChrFilterSize
,
ssse3
,
ssse3
);
switch
(
c
->
srcFormat
)
{
case_rgb
(
rgb24
,
RGB24
,
ssse3
);
case_rgb
(
bgr24
,
BGR24
,
ssse3
);
default:
break
;
}
}
if
(
cpu_flags
&
AV_CPU_FLAG_SSE4
)
{
/* Xto15 don't need special sse4 functions */
...
...
@@ -412,6 +418,8 @@ switch(c->dstBpc){ \
case
PIX_FMT_NV21
:
c
->
chrToYV12
=
ff_nv21ToUV_avx
;
break
;
case_rgb
(
rgb24
,
RGB24
,
avx
);
case_rgb
(
bgr24
,
BGR24
,
avx
);
default:
break
;
}
...
...
libswscale/x86/swscale_template.c
View file @
b5d08c27
...
...
@@ -1361,148 +1361,6 @@ static void RENAME(yuv2yuyv422_1)(SwsContext *c, const int16_t *buf0,
}
}
static
av_always_inline
void
RENAME
(
bgr24ToY_mmx
)(
uint8_t
*
dst
,
const
uint8_t
*
src
,
int
width
,
enum
PixelFormat
srcFormat
)
{
if
(
srcFormat
==
PIX_FMT_BGR24
)
{
__asm__
volatile
(
"movq "
MANGLE
(
ff_bgr24toY1Coeff
)
", %%mm5
\n\t
"
"movq "
MANGLE
(
ff_bgr24toY2Coeff
)
", %%mm6
\n\t
"
:
);
}
else
{
__asm__
volatile
(
"movq "
MANGLE
(
ff_rgb24toY1Coeff
)
", %%mm5
\n\t
"
"movq "
MANGLE
(
ff_rgb24toY2Coeff
)
", %%mm6
\n\t
"
:
);
}
__asm__
volatile
(
"movq "
MANGLE
(
ff_bgr24toYOffset
)
", %%mm4
\n\t
"
"mov %2, %%"
REG_a
"
\n\t
"
"pxor %%mm7, %%mm7
\n\t
"
"1:
\n\t
"
PREFETCH
" 64(%0)
\n\t
"
"movd (%0), %%mm0
\n\t
"
"movd 2(%0), %%mm1
\n\t
"
"movd 6(%0), %%mm2
\n\t
"
"movd 8(%0), %%mm3
\n\t
"
"add $12, %0
\n\t
"
"punpcklbw %%mm7, %%mm0
\n\t
"
"punpcklbw %%mm7, %%mm1
\n\t
"
"punpcklbw %%mm7, %%mm2
\n\t
"
"punpcklbw %%mm7, %%mm3
\n\t
"
"pmaddwd %%mm5, %%mm0
\n\t
"
"pmaddwd %%mm6, %%mm1
\n\t
"
"pmaddwd %%mm5, %%mm2
\n\t
"
"pmaddwd %%mm6, %%mm3
\n\t
"
"paddd %%mm1, %%mm0
\n\t
"
"paddd %%mm3, %%mm2
\n\t
"
"paddd %%mm4, %%mm0
\n\t
"
"paddd %%mm4, %%mm2
\n\t
"
"psrad $15, %%mm0
\n\t
"
"psrad $15, %%mm2
\n\t
"
"packssdw %%mm2, %%mm0
\n\t
"
"packuswb %%mm0, %%mm0
\n\t
"
"movd %%mm0, (%1, %%"
REG_a
")
\n\t
"
"add $4, %%"
REG_a
"
\n\t
"
" js 1b
\n\t
"
:
"+r"
(
src
)
:
"r"
(
dst
+
width
),
"g"
((
x86_reg
)
-
width
)
:
"%"
REG_a
);
}
static
void
RENAME
(
bgr24ToY
)(
uint8_t
*
dst
,
const
uint8_t
*
src
,
int
width
,
uint32_t
*
unused
)
{
RENAME
(
bgr24ToY_mmx
)(
dst
,
src
,
width
,
PIX_FMT_BGR24
);
}
static
void
RENAME
(
rgb24ToY
)(
uint8_t
*
dst
,
const
uint8_t
*
src
,
int
width
,
uint32_t
*
unused
)
{
RENAME
(
bgr24ToY_mmx
)(
dst
,
src
,
width
,
PIX_FMT_RGB24
);
}
static
av_always_inline
void
RENAME
(
bgr24ToUV_mmx
)(
uint8_t
*
dstU
,
uint8_t
*
dstV
,
const
uint8_t
*
src
,
int
width
,
enum
PixelFormat
srcFormat
)
{
__asm__
volatile
(
"movq 24(%4), %%mm6
\n\t
"
"mov %3, %%"
REG_a
"
\n\t
"
"pxor %%mm7, %%mm7
\n\t
"
"1:
\n\t
"
PREFETCH
" 64(%0)
\n\t
"
"movd (%0), %%mm0
\n\t
"
"movd 2(%0), %%mm1
\n\t
"
"punpcklbw %%mm7, %%mm0
\n\t
"
"punpcklbw %%mm7, %%mm1
\n\t
"
"movq %%mm0, %%mm2
\n\t
"
"movq %%mm1, %%mm3
\n\t
"
"pmaddwd (%4), %%mm0
\n\t
"
"pmaddwd 8(%4), %%mm1
\n\t
"
"pmaddwd 16(%4), %%mm2
\n\t
"
"pmaddwd %%mm6, %%mm3
\n\t
"
"paddd %%mm1, %%mm0
\n\t
"
"paddd %%mm3, %%mm2
\n\t
"
"movd 6(%0), %%mm1
\n\t
"
"movd 8(%0), %%mm3
\n\t
"
"add $12, %0
\n\t
"
"punpcklbw %%mm7, %%mm1
\n\t
"
"punpcklbw %%mm7, %%mm3
\n\t
"
"movq %%mm1, %%mm4
\n\t
"
"movq %%mm3, %%mm5
\n\t
"
"pmaddwd (%4), %%mm1
\n\t
"
"pmaddwd 8(%4), %%mm3
\n\t
"
"pmaddwd 16(%4), %%mm4
\n\t
"
"pmaddwd %%mm6, %%mm5
\n\t
"
"paddd %%mm3, %%mm1
\n\t
"
"paddd %%mm5, %%mm4
\n\t
"
"movq "
MANGLE
(
ff_bgr24toUVOffset
)
", %%mm3
\n\t
"
"paddd %%mm3, %%mm0
\n\t
"
"paddd %%mm3, %%mm2
\n\t
"
"paddd %%mm3, %%mm1
\n\t
"
"paddd %%mm3, %%mm4
\n\t
"
"psrad $15, %%mm0
\n\t
"
"psrad $15, %%mm2
\n\t
"
"psrad $15, %%mm1
\n\t
"
"psrad $15, %%mm4
\n\t
"
"packssdw %%mm1, %%mm0
\n\t
"
"packssdw %%mm4, %%mm2
\n\t
"
"packuswb %%mm0, %%mm0
\n\t
"
"packuswb %%mm2, %%mm2
\n\t
"
"movd %%mm0, (%1, %%"
REG_a
")
\n\t
"
"movd %%mm2, (%2, %%"
REG_a
")
\n\t
"
"add $4, %%"
REG_a
"
\n\t
"
" js 1b
\n\t
"
:
"+r"
(
src
)
:
"r"
(
dstU
+
width
),
"r"
(
dstV
+
width
),
"g"
((
x86_reg
)
-
width
),
"r"
(
ff_bgr24toUV
[
srcFormat
==
PIX_FMT_RGB24
])
:
"%"
REG_a
);
}
static
void
RENAME
(
bgr24ToUV
)(
uint8_t
*
dstU
,
uint8_t
*
dstV
,
const
uint8_t
*
src1
,
const
uint8_t
*
src2
,
int
width
,
uint32_t
*
unused
)
{
RENAME
(
bgr24ToUV_mmx
)(
dstU
,
dstV
,
src1
,
width
,
PIX_FMT_BGR24
);
assert
(
src1
==
src2
);
}
static
void
RENAME
(
rgb24ToUV
)(
uint8_t
*
dstU
,
uint8_t
*
dstV
,
const
uint8_t
*
src1
,
const
uint8_t
*
src2
,
int
width
,
uint32_t
*
unused
)
{
assert
(
src1
==
src2
);
RENAME
(
bgr24ToUV_mmx
)(
dstU
,
dstV
,
src1
,
width
,
PIX_FMT_RGB24
);
}
#if COMPILE_TEMPLATE_MMX2
static
void
RENAME
(
hyscale_fast
)(
SwsContext
*
c
,
int16_t
*
dst
,
int
dstWidth
,
const
uint8_t
*
src
,
...
...
@@ -1689,8 +1547,7 @@ static void RENAME(hcscale_fast)(SwsContext *c, int16_t *dst1, int16_t *dst2,
static
av_cold
void
RENAME
(
sws_init_swScale
)(
SwsContext
*
c
)
{
enum
PixelFormat
srcFormat
=
c
->
srcFormat
,
dstFormat
=
c
->
dstFormat
;
enum
PixelFormat
dstFormat
=
c
->
dstFormat
;
if
(
!
is16BPS
(
dstFormat
)
&&
!
is9_OR_10BPS
(
dstFormat
)
&&
dstFormat
!=
PIX_FMT_NV12
&&
dstFormat
!=
PIX_FMT_NV21
)
{
...
...
@@ -1762,18 +1619,4 @@ static av_cold void RENAME(sws_init_swScale)(SwsContext *c)
}
#endif
/* COMPILE_TEMPLATE_MMX2 */
}
if
(
!
c
->
chrSrcHSubSample
)
{
switch
(
srcFormat
)
{
case
PIX_FMT_BGR24
:
c
->
chrToYV12
=
RENAME
(
bgr24ToUV
);
break
;
case
PIX_FMT_RGB24
:
c
->
chrToYV12
=
RENAME
(
rgb24ToUV
);
break
;
default:
break
;
}
}
switch
(
srcFormat
)
{
case
PIX_FMT_BGR24
:
c
->
lumToYV12
=
RENAME
(
bgr24ToY
);
break
;
case
PIX_FMT_RGB24
:
c
->
lumToYV12
=
RENAME
(
rgb24ToY
);
break
;
default:
break
;
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment