Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in / Register
Toggle navigation
F
ffmpeg.wasm-core
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Linshizhi
ffmpeg.wasm-core
Commits
6c889888
Commit
6c889888
authored
Dec 05, 2011
by
Janne Grunau
Committed by
Mans Rullgard
Dec 06, 2011
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
rv40: NEON optimised weighted prediction
Signed-off-by:
Mans Rullgard
<
mans@mansr.com
>
parent
f5c05b9a
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
92 additions
and
0 deletions
+92
-0
Makefile
libavcodec/arm/Makefile
+1
-0
rv40dsp_init_neon.c
libavcodec/arm/rv40dsp_init_neon.c
+6
-0
rv40dsp_neon.S
libavcodec/arm/rv40dsp_neon.S
+85
-0
No files found.
libavcodec/arm/Makefile
View file @
6c889888
...
...
@@ -69,6 +69,7 @@ NEON-OBJS-$(CONFIG_RV30_DECODER) += arm/rv34dsp_init_neon.o \
NEON-OBJS-$(CONFIG_RV40_DECODER)
+=
arm/rv34dsp_init_neon.o
\
arm/rv34dsp_neon.o
\
arm/rv40dsp_init_neon.o
\
arm/rv40dsp_neon.o
\
arm/h264cmc_neon.o
\
NEON-OBJS-$(CONFIG_VP3_DECODER)
+=
arm/vp3dsp_neon.o
...
...
libavcodec/arm/rv40dsp_init_neon.c
View file @
6c889888
...
...
@@ -29,10 +29,16 @@ void ff_put_rv40_chroma_mc4_neon(uint8_t *, uint8_t *, int, int, int, int);
void
ff_avg_rv40_chroma_mc8_neon
(
uint8_t
*
,
uint8_t
*
,
int
,
int
,
int
,
int
);
void
ff_avg_rv40_chroma_mc4_neon
(
uint8_t
*
,
uint8_t
*
,
int
,
int
,
int
,
int
);
void
ff_rv40_weight_func_16_neon
(
uint8_t
*
,
uint8_t
*
,
uint8_t
*
,
int
,
int
,
int
);
void
ff_rv40_weight_func_8_neon
(
uint8_t
*
,
uint8_t
*
,
uint8_t
*
,
int
,
int
,
int
);
void
ff_rv40dsp_init_neon
(
RV34DSPContext
*
c
,
DSPContext
*
dsp
)
{
c
->
put_chroma_pixels_tab
[
0
]
=
ff_put_rv40_chroma_mc8_neon
;
c
->
put_chroma_pixels_tab
[
1
]
=
ff_put_rv40_chroma_mc4_neon
;
c
->
avg_chroma_pixels_tab
[
0
]
=
ff_avg_rv40_chroma_mc8_neon
;
c
->
avg_chroma_pixels_tab
[
1
]
=
ff_avg_rv40_chroma_mc4_neon
;
c
->
rv40_weight_pixels_tab
[
0
]
=
ff_rv40_weight_func_16_neon
;
c
->
rv40_weight_pixels_tab
[
1
]
=
ff_rv40_weight_func_8_neon
;
}
libavcodec/arm/rv40dsp_neon.S
0 → 100644
View file @
6c889888
/*
* Copyright (c) 2011 Janne Grunau <janne-libav@jannau.net>
*
* This file is part of Libav.
*
* Libav is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* Libav is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with Libav; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "asm.S"
.macro rv40_weight
vmovl.u8 q8, d2
vmovl.u8 q9, d3
vmovl.u8 q10, d4
vmovl.u8 q11, d5
vmull.u16 q2, d16, d0[2]
vmull.u16 q3, d17, d0[2]
vmull.u16 q8, d18, d0[2]
vmull.u16 q9, d19, d0[2]
vmull.u16 q12, d20, d0[0]
vmull.u16 q13, d21, d0[0]
vmull.u16 q14, d22, d0[0]
vmull.u16 q15, d23, d0[0]
vshrn.i32 d4, q2, #9
vshrn.i32 d5, q3, #9
vshrn.i32 d6, q8, #9
vshrn.i32 d7, q9, #9
vshrn.i32 d16, q12, #9
vshrn.i32 d17, q13, #9
vshrn.i32 d18, q14, #9
vshrn.i32 d19, q15, #9
vadd.u16 q2, q2, q8
vadd.u16 q3, q3, q9
vrshrn.i16 d2, q2, #5
vrshrn.i16 d3, q3, #5
.endm
/* void ff_rv40_weight_func_16_neon(uint8_t *dst, uint8_t *src1, uint8_t *src2,
int w1, int w2, int stride) */
function ff_rv40_weight_func_16_neon, export=1
ldr r12, [sp]
vmov d0, r3, r12
ldr r12, [sp, #4]
mov r3, #16
1:
vld1.8 {q1}, [r1,:128], r12
vld1.8 {q2}, [r2,:128], r12
rv40_weight
vst1.8 {q1}, [r0,:128], r12
subs r3, r3, #1
bne 1b
bx lr
endfunc
/* void ff_rv40_weight_func_8_neon(uint8_t *dst, uint8_t *src1, uint8_t *src2,
int w1, int w2, int stride) */
function ff_rv40_weight_func_8_neon, export=1
ldr r12, [sp]
vmov d0, r3, r12
ldr r12, [sp, #4]
mov r3, #8
1:
vld1.8 {d2}, [r1,:64], r12
vld1.8 {d3}, [r1,:64], r12
vld1.8 {d4}, [r2,:64], r12
vld1.8 {d5}, [r2,:64], r12
rv40_weight
vst1.8 {d2}, [r0,:64], r12
vst1.8 {d3}, [r0,:64], r12
subs r3, r3, #2
bne 1b
bx lr
endfunc
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment