Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in / Register
Toggle navigation
F
ffmpeg.wasm-core
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Linshizhi
ffmpeg.wasm-core
Commits
07a566e7
Commit
07a566e7
authored
Apr 22, 2018
by
Martin Vignali
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
swscale/swscale_unscaled : add X86_64 (SSE2 and AVX) for uyvyto422
and checkasm test
parent
e6e46258
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
219 additions
and
0 deletions
+219
-0
rgb2rgb.c
libswscale/x86/rgb2rgb.c
+19
-0
rgb_2_rgb.asm
libswscale/x86/rgb_2_rgb.asm
+150
-0
sw_rgb.c
tests/checkasm/sw_rgb.c
+50
-0
No files found.
libswscale/x86/rgb2rgb.c
View file @
07a566e7
...
...
@@ -150,6 +150,15 @@ void ff_shuffle_bytes_1230_ssse3(const uint8_t *src, uint8_t *dst, int src_size)
void
ff_shuffle_bytes_3012_ssse3
(
const
uint8_t
*
src
,
uint8_t
*
dst
,
int
src_size
);
void
ff_shuffle_bytes_3210_ssse3
(
const
uint8_t
*
src
,
uint8_t
*
dst
,
int
src_size
);
#if ARCH_X86_64
void
ff_uyvytoyuv422_sse2
(
uint8_t
*
ydst
,
uint8_t
*
udst
,
uint8_t
*
vdst
,
const
uint8_t
*
src
,
int
width
,
int
height
,
int
lumStride
,
int
chromStride
,
int
srcStride
);
void
ff_uyvytoyuv422_avx
(
uint8_t
*
ydst
,
uint8_t
*
udst
,
uint8_t
*
vdst
,
const
uint8_t
*
src
,
int
width
,
int
height
,
int
lumStride
,
int
chromStride
,
int
srcStride
);
#endif
av_cold
void
rgb2rgb_init_x86
(
void
)
{
int
cpu_flags
=
av_get_cpu_flags
();
...
...
@@ -167,6 +176,11 @@ av_cold void rgb2rgb_init_x86(void)
rgb2rgb_init_avx
();
#endif
/* HAVE_INLINE_ASM */
if
(
EXTERNAL_SSE2
(
cpu_flags
))
{
#if ARCH_X86_64
uyvytoyuv422
=
ff_uyvytoyuv422_sse2
;
#endif
}
if
(
EXTERNAL_SSSE3
(
cpu_flags
))
{
shuffle_bytes_0321
=
ff_shuffle_bytes_0321_ssse3
;
shuffle_bytes_2103
=
ff_shuffle_bytes_2103_ssse3
;
...
...
@@ -174,4 +188,9 @@ av_cold void rgb2rgb_init_x86(void)
shuffle_bytes_3012
=
ff_shuffle_bytes_3012_ssse3
;
shuffle_bytes_3210
=
ff_shuffle_bytes_3210_ssse3
;
}
if
(
EXTERNAL_AVX
(
cpu_flags
))
{
#if ARCH_X86_64
uyvytoyuv422
=
ff_uyvytoyuv422_avx
;
#endif
}
}
libswscale/x86/rgb_2_rgb.asm
View file @
07a566e7
...
...
@@ -32,6 +32,16 @@ pb_shuffle3210: db 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12
SECTION
.
text
%macro
RSHIFT_COPY
3
; %1 dst ; %2 src ; %3 shift
%if
cpuflag
(
avx
)
psrldq
%1
,
%2
,
%3
%else
mova
%1
,
%2
RSHIFT
%1
,
%3
%endif
%endmacro
;------------------------------------------------------------------------------
; shuffle_bytes_## (const uint8_t *src, uint8_t *dst, int src_size)
;------------------------------------------------------------------------------
...
...
@@ -84,3 +94,143 @@ SHUFFLE_BYTES 0, 3, 2, 1
SHUFFLE_BYTES
1
,
2
,
3
,
0
SHUFFLE_BYTES
3
,
0
,
1
,
2
SHUFFLE_BYTES
3
,
2
,
1
,
0
;-----------------------------------------------------------------------------------------------
; uyvytoyuv422(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
; const uint8_t *src, int width, int height,
; int lumStride, int chromStride, int srcStride)
;-----------------------------------------------------------------------------------------------
%macro
UYVY_TO_YUV422
0
cglobal
uyvytoyuv422
,
9
,
14
,
8
,
ydst
,
udst
,
vdst
,
src
,
w
,
h
,
lum_stride
,
chrom_stride
,
src_stride
,
wtwo
,
whalf
,
tmp
,
x
,
back_w
pxor
m0
,
m0
pcmpeqw
m1
,
m1
psrlw
m1
,
8
movsxdifnidn
wq
,
wd
movsxdifnidn
lum_strideq
,
lum_strided
movsxdifnidn
chrom_strideq
,
chrom_strided
movsxdifnidn
src_strideq
,
src_strided
mov
back_wq
,
wq
mov
whalfq
,
wq
shr
whalfq
,
1
; whalf = width / 2
lea
srcq
,
[
srcq
+
wq
*
2
]
add
ydstq
,
wq
add
udstq
,
whalfq
add
vdstq
,
whalfq
.
loop_line
:
mov
xq
,
wq
mov
wtwoq
,
wq
add
wtwoq
,
wtwoq
; wtwo = width * 2
neg
wq
neg
wtwoq
neg
whalfq
;calc scalar loop count
and
xq
,
mmsize
*
2
-
1
je
.
loop_simd
.
loop_scalar
:
mov
tmpb
,
[
srcq
+
wtwoq
+
0
]
mov
[
udstq
+
whalfq
]
,
tmpb
mov
tmpb
,
[
srcq
+
wtwoq
+
1
]
mov
[
ydstq
+
wq
]
,
tmpb
mov
tmpb
,
[
srcq
+
wtwoq
+
2
]
mov
[
vdstq
+
whalfq
]
,
tmpb
mov
tmpb
,
[
srcq
+
wtwoq
+
3
]
mov
[
ydstq
+
wq
+
1
]
,
tmpb
add
wq
,
2
add
wtwoq
,
4
add
whalfq
,
1
sub
xq
,
2
jg
.
loop_scalar
; check if simd loop is need
cmp
wq
,
0
jge
.
end_line
.
loop_simd
:
movu
m2
,
[
srcq
+
wtwoq
]
movu
m3
,
[
srcq
+
wtwoq
+
mmsize
]
movu
m4
,
[
srcq
+
wtwoq
+
mmsize
*
2
]
movu
m5
,
[
srcq
+
wtwoq
+
mmsize
*
3
]
; extract y part 1
RSHIFT_COPY
m6
,
m2
,
1
; UYVY UYVY -> YVYU YVY...
pand
m6
,
m1
; YxYx YxYx...
RSHIFT_COPY
m7
,
m3
,
1
; UYVY UYVY -> YVYU YVY...
pand
m7
,
m1
; YxYx YxYx...
packuswb
m6
,
m7
; YYYY YYYY...
movu
[
ydstq
+
wq
]
,
m6
; extract y part 2
RSHIFT_COPY
m6
,
m4
,
1
; UYVY UYVY -> YVYU YVY...
pand
m6
,
m1
; YxYx YxYx...
RSHIFT_COPY
m7
,
m5
,
1
; UYVY UYVY -> YVYU YVY...
pand
m7
,
m1
; YxYx YxYx...
packuswb
m6
,
m7
; YYYY YYYY...
movu
[
ydstq
+
wq
+
mmsize
]
,
m6
; extract uv
pand
m2
,
m1
; UxVx...
pand
m3
,
m1
; UxVx...
pand
m4
,
m1
; UxVx...
pand
m5
,
m1
; UxVx...
packuswb
m2
,
m3
; UVUV...
packuswb
m4
,
m5
; UVUV...
; U
pand
m6
,
m2
,
m1
; UxUx...
pand
m7
,
m4
,
m1
; UxUx...
packuswb
m6
,
m7
; UUUU
movu
[
udstq
+
whalfq
]
,
m6
; V
psrlw
m2
,
8
; VxVx...
psrlw
m4
,
8
; VxVx...
packuswb
m2
,
m4
; VVVV
movu
[
vdstq
+
whalfq
]
,
m2
add
whalfq
,
mmsize
add
wtwoq
,
mmsize
*
4
add
wq
,
mmsize
*
2
jl
.
loop_simd
.
end_line
:
add
srcq
,
src_strideq
add
ydstq
,
lum_strideq
add
udstq
,
chrom_strideq
add
vdstq
,
chrom_strideq
;restore initial state of line variable
mov
wq
,
back_wq
mov
xq
,
wq
mov
whalfq
,
wq
shr
whalfq
,
1
; whalf = width / 2
sub
hd
,
1
jg
.
loop_line
RET
%endmacro
%if
ARCH_X86_64
INIT_XMM
sse2
UYVY_TO_YUV422
INIT_XMM
avx
UYVY_TO_YUV422
%endif
tests/checkasm/sw_rgb.c
View file @
07a566e7
...
...
@@ -35,8 +35,12 @@
} while (0)
static
const
uint8_t
width
[]
=
{
12
,
16
,
20
,
32
,
36
,
128
};
static
const
struct
{
uint8_t
w
,
h
,
s
;}
planes
[]
=
{
{
12
,
16
,
12
},
{
16
,
16
,
16
},
{
20
,
23
,
25
},
{
32
,
18
,
48
},
{
8
,
128
,
16
},
{
128
,
128
,
128
}
};
#define MAX_STRIDE 128
#define MAX_HEIGHT 128
static
void
check_shuffle_bytes
(
void
*
func
,
const
char
*
report
)
{
...
...
@@ -64,6 +68,49 @@ static void check_shuffle_bytes(void * func, const char * report)
}
}
static
void
check_uyvy_to_422p
()
{
int
i
;
LOCAL_ALIGNED_32
(
uint8_t
,
src0
,
[
MAX_STRIDE
*
MAX_HEIGHT
*
2
]);
LOCAL_ALIGNED_32
(
uint8_t
,
src1
,
[
MAX_STRIDE
*
MAX_HEIGHT
*
2
]);
LOCAL_ALIGNED_32
(
uint8_t
,
dst_y_0
,
[
MAX_STRIDE
*
MAX_HEIGHT
]);
LOCAL_ALIGNED_32
(
uint8_t
,
dst_y_1
,
[
MAX_STRIDE
*
MAX_HEIGHT
]);
LOCAL_ALIGNED_32
(
uint8_t
,
dst_u_0
,
[(
MAX_STRIDE
/
2
)
*
MAX_HEIGHT
]);
LOCAL_ALIGNED_32
(
uint8_t
,
dst_u_1
,
[(
MAX_STRIDE
/
2
)
*
MAX_HEIGHT
]);
LOCAL_ALIGNED_32
(
uint8_t
,
dst_v_0
,
[(
MAX_STRIDE
/
2
)
*
MAX_HEIGHT
]);
LOCAL_ALIGNED_32
(
uint8_t
,
dst_v_1
,
[(
MAX_STRIDE
/
2
)
*
MAX_HEIGHT
]);
declare_func_emms
(
AV_CPU_FLAG_MMX
,
void
,
uint8_t
*
ydst
,
uint8_t
*
udst
,
uint8_t
*
vdst
,
const
uint8_t
*
src
,
int
width
,
int
height
,
int
lumStride
,
int
chromStride
,
int
srcStride
);
randomize_buffers
(
src0
,
MAX_STRIDE
*
MAX_HEIGHT
*
2
);
memcpy
(
src1
,
src0
,
MAX_STRIDE
*
MAX_HEIGHT
*
2
);
if
(
check_func
(
uyvytoyuv422
,
"uyvytoyuv422"
))
{
for
(
i
=
0
;
i
<
6
;
i
++
)
{
memset
(
dst_y_0
,
0
,
MAX_STRIDE
*
MAX_HEIGHT
);
memset
(
dst_y_1
,
0
,
MAX_STRIDE
*
MAX_HEIGHT
);
memset
(
dst_u_0
,
0
,
(
MAX_STRIDE
/
2
)
*
MAX_HEIGHT
);
memset
(
dst_u_1
,
0
,
(
MAX_STRIDE
/
2
)
*
MAX_HEIGHT
);
memset
(
dst_v_0
,
0
,
(
MAX_STRIDE
/
2
)
*
MAX_HEIGHT
);
memset
(
dst_v_1
,
0
,
(
MAX_STRIDE
/
2
)
*
MAX_HEIGHT
);
call_ref
(
dst_y_0
,
dst_u_0
,
dst_v_0
,
src0
,
planes
[
i
].
w
,
planes
[
i
].
h
,
MAX_STRIDE
,
MAX_STRIDE
/
2
,
planes
[
i
].
s
);
call_new
(
dst_y_1
,
dst_u_1
,
dst_v_1
,
src1
,
planes
[
i
].
w
,
planes
[
i
].
h
,
MAX_STRIDE
,
MAX_STRIDE
/
2
,
planes
[
i
].
s
);
if
(
memcmp
(
dst_y_0
,
dst_y_1
,
MAX_STRIDE
*
MAX_HEIGHT
)
||
memcmp
(
dst_u_0
,
dst_u_1
,
(
MAX_STRIDE
/
2
)
*
MAX_HEIGHT
)
||
memcmp
(
dst_v_0
,
dst_v_1
,
(
MAX_STRIDE
/
2
)
*
MAX_HEIGHT
))
fail
();
}
bench_new
(
dst_y_1
,
dst_u_1
,
dst_v_1
,
src1
,
planes
[
5
].
w
,
planes
[
5
].
h
,
MAX_STRIDE
,
MAX_STRIDE
/
2
,
planes
[
5
].
s
);
}
}
void
checkasm_check_sw_rgb
(
void
)
{
ff_sws_rgb2rgb_init
();
...
...
@@ -82,4 +129,7 @@ void checkasm_check_sw_rgb(void)
check_shuffle_bytes
(
shuffle_bytes_3210
,
"shuffle_bytes_3210"
);
report
(
"shuffle_bytes_3210"
);
check_uyvy_to_422p
();
report
(
"uyvytoyuv422"
);
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment