Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in / Register
Toggle navigation
F
ffmpeg.wasm-core
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Linshizhi
ffmpeg.wasm-core
Commits
dd927e2e
Commit
dd927e2e
authored
Jan 30, 2009
by
Måns Rullgård
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
ARM: NEON optimised vector_fmul
Originally committed as revision 16867 to
svn://svn.ffmpeg.org/ffmpeg/trunk
parent
89150098
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
44 additions
and
0 deletions
+44
-0
dsputil_neon.c
libavcodec/arm/dsputil_neon.c
+4
-0
dsputil_neon_s.S
libavcodec/arm/dsputil_neon_s.S
+40
-0
No files found.
libavcodec/arm/dsputil_neon.c
View file @
dd927e2e
...
...
@@ -146,6 +146,8 @@ void ff_h264_idct_add8_neon(uint8_t **dest, const int *block_offset,
DCTELEM
*
block
,
int
stride
,
const
uint8_t
nnzc
[
6
*
8
]);
void
ff_vector_fmul_neon
(
float
*
dst
,
const
float
*
src
,
int
len
);
void
ff_float_to_int16_neon
(
int16_t
*
,
const
float
*
,
long
);
void
ff_float_to_int16_interleave_neon
(
int16_t
*
,
const
float
**
,
long
,
int
);
...
...
@@ -242,6 +244,8 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx)
c
->
h264_idct_add16intra
=
ff_h264_idct_add16intra_neon
;
c
->
h264_idct_add8
=
ff_h264_idct_add8_neon
;
c
->
vector_fmul
=
ff_vector_fmul_neon
;
if
(
!
(
avctx
->
flags
&
CODEC_FLAG_BITEXACT
))
{
c
->
float_to_int16
=
ff_float_to_int16_neon
;
c
->
float_to_int16_interleave
=
ff_float_to_int16_interleave_neon
;
...
...
libavcodec/arm/dsputil_neon_s.S
View file @
dd927e2e
...
...
@@ -609,3 +609,43 @@ function ff_float_to_int16_interleave_neon, export=1
vcvt.s32.f32 q1, q1, #16
b 6b
.endfunc
function ff_vector_fmul_neon, export=1
mov r3, r0
subs r2, r2, #8
vld1.64 {d0-d3}, [r0,:128]!
vld1.64 {d4-d7}, [r1,:128]!
vmul.f32 q8, q0, q2
vmul.f32 q9, q1, q3
beq 3f
bics ip, r2, #15
beq 2f
1: subs ip, ip, #16
vld1.64 {d0-d1}, [r0,:128]!
vld1.64 {d4-d5}, [r1,:128]!
vmul.f32 q10, q0, q2
vld1.64 {d2-d3}, [r0,:128]!
vld1.64 {d6-d7}, [r1,:128]!
vmul.f32 q11, q1, q3
vst1.64 {d16-d19},[r3,:128]!
vld1.64 {d0-d1}, [r0,:128]!
vld1.64 {d4-d5}, [r1,:128]!
vmul.f32 q8, q0, q2
vld1.64 {d2-d3}, [r0,:128]!
vld1.64 {d6-d7}, [r1,:128]!
vmul.f32 q9, q1, q3
vst1.64 {d20-d23},[r3,:128]!
bne 1b
ands r2, r2, #15
beq 3f
2: vld1.64 {d0-d1}, [r0,:128]!
vld1.64 {d4-d5}, [r1,:128]!
vst1.64 {d16-d17},[r3,:128]!
vmul.f32 q8, q0, q2
vld1.64 {d2-d3}, [r0,:128]!
vld1.64 {d6-d7}, [r1,:128]!
vst1.64 {d18-d19},[r3,:128]!
vmul.f32 q9, q1, q3
3: vst1.64 {d16-d19},[r3,:128]!
bx lr
.endfunc
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment