Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in / Register
Toggle navigation
F
ffmpeg.wasm-core
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Linshizhi
ffmpeg.wasm-core
Commits
f5c05b9a
Commit
f5c05b9a
authored
Dec 05, 2011
by
Janne Grunau
Committed by
Mans Rullgard
Dec 06, 2011
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
rv40: NEON optimised chroma MC
Signed-off-by:
Mans Rullgard
<
mans@mansr.com
>
parent
f054a827
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
118 additions
and
5 deletions
+118
-5
Makefile
libavcodec/arm/Makefile
+2
-0
h264cmc_neon.S
libavcodec/arm/h264cmc_neon.S
+75
-5
rv40dsp_init_neon.c
libavcodec/arm/rv40dsp_init_neon.c
+38
-0
rv34dsp.h
libavcodec/rv34dsp.h
+1
-0
rv40dsp.c
libavcodec/rv40dsp.c
+2
-0
No files found.
libavcodec/arm/Makefile
View file @
f5c05b9a
...
@@ -68,6 +68,8 @@ NEON-OBJS-$(CONFIG_RV30_DECODER) += arm/rv34dsp_init_neon.o \
...
@@ -68,6 +68,8 @@ NEON-OBJS-$(CONFIG_RV30_DECODER) += arm/rv34dsp_init_neon.o \
NEON-OBJS-$(CONFIG_RV40_DECODER)
+=
arm/rv34dsp_init_neon.o
\
NEON-OBJS-$(CONFIG_RV40_DECODER)
+=
arm/rv34dsp_init_neon.o
\
arm/rv34dsp_neon.o
\
arm/rv34dsp_neon.o
\
arm/rv40dsp_init_neon.o
\
arm/h264cmc_neon.o
\
NEON-OBJS-$(CONFIG_VP3_DECODER)
+=
arm/vp3dsp_neon.o
NEON-OBJS-$(CONFIG_VP3_DECODER)
+=
arm/vp3dsp_neon.o
...
...
libavcodec/arm/h264cmc_neon.S
View file @
f5c05b9a
...
@@ -21,8 +21,8 @@
...
@@ -21,8 +21,8 @@
#include "asm.S"
#include "asm.S"
/* chroma_mc8(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y) */
/* chroma_mc8(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y) */
.macro h264_chroma_mc8 type
.macro h264_chroma_mc8 type
, codec=h264
function ff_\type\()_
h264
_chroma_mc8_neon, export=1
function ff_\type\()_
\codec\()
_chroma_mc8_neon, export=1
push {r4-r7, lr}
push {r4-r7, lr}
ldrd r4, [sp, #20]
ldrd r4, [sp, #20]
.ifc \type,avg
.ifc \type,avg
...
@@ -31,6 +31,15 @@ function ff_\type\()_h264_chroma_mc8_neon, export=1
...
@@ -31,6 +31,15 @@ function ff_\type\()_h264_chroma_mc8_neon, export=1
pld [r1]
pld [r1]
pld [r1, r2]
pld [r1, r2]
.ifc \codec,rv40
movrel r6, rv40bias
lsr r7, r5, #1
add r6, r6, r7, lsl #3
lsr r7, r4, #1
add r6, r6, r7, lsl #1
vld1.16 {d22[],d23[]}, [r6,:16]
.endif
A muls r7, r4, r5
A muls r7, r4, r5
T mul r7, r4, r5
T mul r7, r4, r5
T cmp r7, #0
T cmp r7, #0
...
@@ -67,10 +76,17 @@ T cmp r7, #0
...
@@ -67,10 +76,17 @@ T cmp r7, #0
vmlal.u8 q9, d7, d1
vmlal.u8 q9, d7, d1
vmlal.u8 q9, d4, d2
vmlal.u8 q9, d4, d2
vmlal.u8 q9, d5, d3
vmlal.u8 q9, d5, d3
vrshrn.u16 d16, q8, #6
vld1.8 {d6, d7}, [r5], r4
vld1.8 {d6, d7}, [r5], r4
pld [r1]
pld [r1]
.ifc \codec,h264
vrshrn.u16 d16, q8, #6
vrshrn.u16 d17, q9, #6
vrshrn.u16 d17, q9, #6
.else
vadd.u16 q8, q8, q11
vadd.u16 q9, q9, q11
vshrn.u16 d16, q8, #6
vshrn.u16 d17, q9, #6
.endif
.ifc \type,avg
.ifc \type,avg
vld1.8 {d20}, [lr,:64], r2
vld1.8 {d20}, [lr,:64], r2
vld1.8 {d21}, [lr,:64], r2
vld1.8 {d21}, [lr,:64], r2
...
@@ -102,8 +118,15 @@ T cmp r7, #0
...
@@ -102,8 +118,15 @@ T cmp r7, #0
vmull.u8 q9, d6, d0
vmull.u8 q9, d6, d0
vmlal.u8 q9, d4, d1
vmlal.u8 q9, d4, d1
vld1.8 {d6}, [r5], r4
vld1.8 {d6}, [r5], r4
.ifc \codec,h264
vrshrn.u16 d16, q8, #6
vrshrn.u16 d16, q8, #6
vrshrn.u16 d17, q9, #6
vrshrn.u16 d17, q9, #6
.else
vadd.u16 q8, q8, q11
vadd.u16 q9, q9, q11
vshrn.u16 d16, q8, #6
vshrn.u16 d17, q9, #6
.endif
.ifc \type,avg
.ifc \type,avg
vld1.8 {d20}, [lr,:64], r2
vld1.8 {d20}, [lr,:64], r2
vld1.8 {d21}, [lr,:64], r2
vld1.8 {d21}, [lr,:64], r2
...
@@ -131,8 +154,15 @@ T cmp r7, #0
...
@@ -131,8 +154,15 @@ T cmp r7, #0
vmlal.u8 q9, d7, d1
vmlal.u8 q9, d7, d1
pld [r1]
pld [r1]
vext.8 d5, d4, d5, #1
vext.8 d5, d4, d5, #1
.ifc \codec,h264
vrshrn.u16 d16, q8, #6
vrshrn.u16 d16, q8, #6
vrshrn.u16 d17, q9, #6
vrshrn.u16 d17, q9, #6
.else
vadd.u16 q8, q8, q11
vadd.u16 q9, q9, q11
vshrn.u16 d16, q8, #6
vshrn.u16 d17, q9, #6
.endif
.ifc \type,avg
.ifc \type,avg
vld1.8 {d20}, [lr,:64], r2
vld1.8 {d20}, [lr,:64], r2
vld1.8 {d21}, [lr,:64], r2
vld1.8 {d21}, [lr,:64], r2
...
@@ -149,8 +179,8 @@ endfunc
...
@@ -149,8 +179,8 @@ endfunc
.endm
.endm
/* chroma_mc4(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y) */
/* chroma_mc4(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y) */
.macro h264_chroma_mc4 type
.macro h264_chroma_mc4 type
, codec=h264
function ff_\type\()_
h264
_chroma_mc4_neon, export=1
function ff_\type\()_
\codec\()
_chroma_mc4_neon, export=1
push {r4-r7, lr}
push {r4-r7, lr}
ldrd r4, [sp, #20]
ldrd r4, [sp, #20]
.ifc \type,avg
.ifc \type,avg
...
@@ -159,6 +189,15 @@ function ff_\type\()_h264_chroma_mc4_neon, export=1
...
@@ -159,6 +189,15 @@ function ff_\type\()_h264_chroma_mc4_neon, export=1
pld [r1]
pld [r1]
pld [r1, r2]
pld [r1, r2]
.ifc \codec,rv40
movrel r6, rv40bias
lsr r7, r5, #1
add r6, r6, r7, lsl #3
lsr r7, r4, #1
add r6, r6, r7, lsl #1
vld1.16 {d22[],d23[]}, [r6,:16]
.endif
A muls r7, r4, r5
A muls r7, r4, r5
T mul r7, r4, r5
T mul r7, r4, r5
T cmp r7, #0
T cmp r7, #0
...
@@ -199,7 +238,12 @@ T cmp r7, #0
...
@@ -199,7 +238,12 @@ T cmp r7, #0
vld1.8 {d6}, [r5], r4
vld1.8 {d6}, [r5], r4
vadd.i16 d16, d16, d17
vadd.i16 d16, d16, d17
vadd.i16 d17, d18, d19
vadd.i16 d17, d18, d19
.ifc \codec,h264
vrshrn.u16 d16, q8, #6
vrshrn.u16 d16, q8, #6
.else
vadd.u16 q8, q8, q11
vshrn.u16 d16, q8, #6
.endif
subs r3, r3, #2
subs r3, r3, #2
pld [r1]
pld [r1]
.ifc \type,avg
.ifc \type,avg
...
@@ -236,7 +280,12 @@ T cmp r7, #0
...
@@ -236,7 +280,12 @@ T cmp r7, #0
vld1.32 {d4[1]}, [r5], r4
vld1.32 {d4[1]}, [r5], r4
vadd.i16 d16, d16, d17
vadd.i16 d16, d16, d17
vadd.i16 d17, d18, d19
vadd.i16 d17, d18, d19
.ifc \codec,h264
vrshrn.u16 d16, q8, #6
vrshrn.u16 d16, q8, #6
.else
vadd.u16 q8, q8, q11
vshrn.u16 d16, q8, #6
.endif
.ifc \type,avg
.ifc \type,avg
vld1.32 {d20[0]}, [lr,:32], r2
vld1.32 {d20[0]}, [lr,:32], r2
vld1.32 {d20[1]}, [lr,:32], r2
vld1.32 {d20[1]}, [lr,:32], r2
...
@@ -266,7 +315,12 @@ T cmp r7, #0
...
@@ -266,7 +315,12 @@ T cmp r7, #0
vadd.i16 d16, d16, d17
vadd.i16 d16, d16, d17
vadd.i16 d17, d18, d19
vadd.i16 d17, d18, d19
pld [r1]
pld [r1]
.ifc \codec,h264
vrshrn.u16 d16, q8, #6
vrshrn.u16 d16, q8, #6
.else
vadd.u16 q8, q8, q11
vshrn.u16 d16, q8, #6
.endif
.ifc \type,avg
.ifc \type,avg
vld1.32 {d20[0]}, [lr,:32], r2
vld1.32 {d20[0]}, [lr,:32], r2
vld1.32 {d20[1]}, [lr,:32], r2
vld1.32 {d20[1]}, [lr,:32], r2
...
@@ -352,9 +406,25 @@ function ff_\type\()_h264_chroma_mc2_neon, export=1
...
@@ -352,9 +406,25 @@ function ff_\type\()_h264_chroma_mc2_neon, export=1
endfunc
endfunc
.endm
.endm
#if CONFIG_H264_DECODER
h264_chroma_mc8 put
h264_chroma_mc8 put
h264_chroma_mc8 avg
h264_chroma_mc8 avg
h264_chroma_mc4 put
h264_chroma_mc4 put
h264_chroma_mc4 avg
h264_chroma_mc4 avg
h264_chroma_mc2 put
h264_chroma_mc2 put
h264_chroma_mc2 avg
h264_chroma_mc2 avg
#endif
#if CONFIG_RV40_DECODER
const rv40bias
.short 0, 16, 32, 16
.short 32, 28, 32, 28
.short 0, 32, 16, 32
.short 32, 28, 32, 28
endconst
h264_chroma_mc8 put, rv40
h264_chroma_mc8 avg, rv40
h264_chroma_mc4 put, rv40
h264_chroma_mc4 avg, rv40
#endif
libavcodec/arm/rv40dsp_init_neon.c
0 → 100644
View file @
f5c05b9a
/*
* Copyright (c) 2011 Janne Grunau <janne-libav@jannau.net>
*
* This file is part of Libav.
*
* Libav is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* Libav is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with Libav; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <stdint.h>
#include "libavcodec/avcodec.h"
#include "libavcodec/rv34dsp.h"
void
ff_put_rv40_chroma_mc8_neon
(
uint8_t
*
,
uint8_t
*
,
int
,
int
,
int
,
int
);
void
ff_put_rv40_chroma_mc4_neon
(
uint8_t
*
,
uint8_t
*
,
int
,
int
,
int
,
int
);
void
ff_avg_rv40_chroma_mc8_neon
(
uint8_t
*
,
uint8_t
*
,
int
,
int
,
int
,
int
);
void
ff_avg_rv40_chroma_mc4_neon
(
uint8_t
*
,
uint8_t
*
,
int
,
int
,
int
,
int
);
void
ff_rv40dsp_init_neon
(
RV34DSPContext
*
c
,
DSPContext
*
dsp
)
{
c
->
put_chroma_pixels_tab
[
0
]
=
ff_put_rv40_chroma_mc8_neon
;
c
->
put_chroma_pixels_tab
[
1
]
=
ff_put_rv40_chroma_mc4_neon
;
c
->
avg_chroma_pixels_tab
[
0
]
=
ff_avg_rv40_chroma_mc8_neon
;
c
->
avg_chroma_pixels_tab
[
1
]
=
ff_avg_rv40_chroma_mc4_neon
;
}
libavcodec/rv34dsp.h
View file @
f5c05b9a
...
@@ -59,5 +59,6 @@ void ff_rv40dsp_init(RV34DSPContext *c, DSPContext* dsp);
...
@@ -59,5 +59,6 @@ void ff_rv40dsp_init(RV34DSPContext *c, DSPContext* dsp);
void
ff_rv34dsp_init_neon
(
RV34DSPContext
*
c
,
DSPContext
*
dsp
);
void
ff_rv34dsp_init_neon
(
RV34DSPContext
*
c
,
DSPContext
*
dsp
);
void
ff_rv40dsp_init_x86
(
RV34DSPContext
*
c
,
DSPContext
*
dsp
);
void
ff_rv40dsp_init_x86
(
RV34DSPContext
*
c
,
DSPContext
*
dsp
);
void
ff_rv40dsp_init_neon
(
RV34DSPContext
*
c
,
DSPContext
*
dsp
);
#endif
/* AVCODEC_RV34DSP_H */
#endif
/* AVCODEC_RV34DSP_H */
libavcodec/rv40dsp.c
View file @
f5c05b9a
...
@@ -534,4 +534,6 @@ av_cold void ff_rv40dsp_init(RV34DSPContext *c, DSPContext* dsp) {
...
@@ -534,4 +534,6 @@ av_cold void ff_rv40dsp_init(RV34DSPContext *c, DSPContext* dsp) {
if
(
HAVE_MMX
)
if
(
HAVE_MMX
)
ff_rv40dsp_init_x86
(
c
,
dsp
);
ff_rv40dsp_init_x86
(
c
,
dsp
);
if
(
HAVE_NEON
)
ff_rv40dsp_init_neon
(
c
,
dsp
);
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment