Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in / Register
Toggle navigation
F
ffmpeg.wasm-core
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Linshizhi
ffmpeg.wasm-core
Commits
bd61f3c6
Commit
bd61f3c6
authored
Jun 23, 2016
by
Rostislav Pehlivanov
Committed by
Rostislav Pehlivanov
Jul 11, 2016
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
diracdsp: add SIMD for the 10 bit version of put_signed_rect_clamped
Signed-off-by:
Rostislav Pehlivanov
<
rpehlivanov@obe.tv
>
parent
80721cc1
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
46 additions
and
0 deletions
+46
-0
diracdsp.asm
libavcodec/x86/diracdsp.asm
+42
-0
diracdsp_init.c
libavcodec/x86/diracdsp_init.c
+4
-0
No files found.
libavcodec/x86/diracdsp.asm
View file @
bd61f3c6
...
...
@@ -22,6 +22,8 @@
SECTION_RODATA
pw_7
:
times
8
dw
7
convert_to_unsigned_10bit
:
times
4
dd
0x200
clip_10bit
:
times
8
dw
0x3ff
cextern
pw_3
cextern
pw_16
...
...
@@ -300,3 +302,43 @@ cglobal dequant_subband_32, 7, 7, 4, src, dst, stride, qf, qs, tot_v, tot_h
jg
.
loop_v
RET
%if
ARCH_X86_64
==
1
; void put_signed_rect_clamped_10(uint8_t *dst, int dst_stride, const uint8_t *src, int src_stride, int width, int height)
cglobal
put_signed_rect_clamped_10
,
6
,
9
,
6
,
dst
,
dst_stride
,
src
,
src_stride
,
w
,
h
mov
r6
,
srcq
mov
r7
,
dstq
mov
r8
,
wq
pxor
m2
,
m2
mova
m3
,
[
clip_10bit
]
mova
m4
,
[
convert_to_unsigned_10bit
]
.
loop_h
:
mov
srcq
,
r6
mov
dstq
,
r7
mov
wq
,
r8
.
loop_w
:
movu
m0
,
[
srcq
+
0
*
mmsize
]
movu
m1
,
[
srcq
+
1
*
mmsize
]
paddd
m0
,
m4
paddd
m1
,
m4
packusdw
m0
,
m0
,
m1
CLIPW
m0
,
m2
,
m3
; packusdw saturates so it's fine
movu
[dstq],
m0
add
srcq
,
2
*
mmsize
add
dstq
,
1
*
mmsize
sub
wd
,
8
jg
.
loop_w
add
r6
,
src_strideq
add
r7
,
dst_strideq
sub
hd
,
1
jg
.
loop_h
RET
%endif
libavcodec/x86/diracdsp_init.c
View file @
bd61f3c6
...
...
@@ -45,6 +45,9 @@ void ff_put_rect_clamped_mmx(uint8_t *dst, int dst_stride, const int16_t *src, i
void
ff_put_rect_clamped_sse2
(
uint8_t
*
dst
,
int
dst_stride
,
const
int16_t
*
src
,
int
src_stride
,
int
width
,
int
height
);
void
ff_put_signed_rect_clamped_mmx
(
uint8_t
*
dst
,
int
dst_stride
,
const
int16_t
*
src
,
int
src_stride
,
int
width
,
int
height
);
void
ff_put_signed_rect_clamped_sse2
(
uint8_t
*
dst
,
int
dst_stride
,
const
int16_t
*
src
,
int
src_stride
,
int
width
,
int
height
);
#if ARCH_X86_64
void
ff_put_signed_rect_clamped_10_sse4
(
uint8_t
*
dst
,
int
dst_stride
,
const
uint8_t
*
src
,
int
src_stride
,
int
width
,
int
height
);
#endif
void
ff_dequant_subband_32_sse4
(
uint8_t
*
src
,
uint8_t
*
dst
,
ptrdiff_t
stride
,
const
int
qf
,
const
int
qs
,
int
tot_v
,
int
tot_h
);
...
...
@@ -189,5 +192,6 @@ void ff_diracdsp_init_x86(DiracDSPContext* c)
if
(
EXTERNAL_SSE4
(
mm_flags
))
{
c
->
dequant_subband
[
1
]
=
ff_dequant_subband_32_sse4
;
c
->
put_signed_rect_clamped
[
1
]
=
ff_put_signed_rect_clamped_10_sse4
;
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment