Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in / Register
Toggle navigation
F
ffmpeg.wasm-core
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Linshizhi
ffmpeg.wasm-core
Commits
716f1705
Commit
716f1705
authored
Dec 01, 2011
by
Mans Rullgard
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
ARM: add remaining NEON avg_pixels8/16 functions
parent
94267ddf
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
75 additions
and
0 deletions
+75
-0
dsputil_init_neon.c
libavcodec/arm/dsputil_init_neon.c
+27
-0
dsputil_neon.S
libavcodec/arm/dsputil_neon.S
+48
-0
No files found.
libavcodec/arm/dsputil_init_neon.c
View file @
716f1705
...
@@ -53,7 +53,19 @@ void ff_put_pixels8_y2_no_rnd_neon(uint8_t *, const uint8_t *, int, int);
...
@@ -53,7 +53,19 @@ void ff_put_pixels8_y2_no_rnd_neon(uint8_t *, const uint8_t *, int, int);
void
ff_put_pixels8_xy2_no_rnd_neon
(
uint8_t
*
,
const
uint8_t
*
,
int
,
int
);
void
ff_put_pixels8_xy2_no_rnd_neon
(
uint8_t
*
,
const
uint8_t
*
,
int
,
int
);
void
ff_avg_pixels16_neon
(
uint8_t
*
,
const
uint8_t
*
,
int
,
int
);
void
ff_avg_pixels16_neon
(
uint8_t
*
,
const
uint8_t
*
,
int
,
int
);
void
ff_avg_pixels16_x2_neon
(
uint8_t
*
,
const
uint8_t
*
,
int
,
int
);
void
ff_avg_pixels16_y2_neon
(
uint8_t
*
,
const
uint8_t
*
,
int
,
int
);
void
ff_avg_pixels16_xy2_neon
(
uint8_t
*
,
const
uint8_t
*
,
int
,
int
);
void
ff_avg_pixels8_neon
(
uint8_t
*
,
const
uint8_t
*
,
int
,
int
);
void
ff_avg_pixels8_neon
(
uint8_t
*
,
const
uint8_t
*
,
int
,
int
);
void
ff_avg_pixels8_x2_neon
(
uint8_t
*
,
const
uint8_t
*
,
int
,
int
);
void
ff_avg_pixels8_y2_neon
(
uint8_t
*
,
const
uint8_t
*
,
int
,
int
);
void
ff_avg_pixels8_xy2_neon
(
uint8_t
*
,
const
uint8_t
*
,
int
,
int
);
void
ff_avg_pixels16_x2_no_rnd_neon
(
uint8_t
*
,
const
uint8_t
*
,
int
,
int
);
void
ff_avg_pixels16_y2_no_rnd_neon
(
uint8_t
*
,
const
uint8_t
*
,
int
,
int
);
void
ff_avg_pixels16_xy2_no_rnd_neon
(
uint8_t
*
,
const
uint8_t
*
,
int
,
int
);
void
ff_avg_pixels8_x2_no_rnd_neon
(
uint8_t
*
,
const
uint8_t
*
,
int
,
int
);
void
ff_avg_pixels8_y2_no_rnd_neon
(
uint8_t
*
,
const
uint8_t
*
,
int
,
int
);
void
ff_avg_pixels8_xy2_no_rnd_neon
(
uint8_t
*
,
const
uint8_t
*
,
int
,
int
);
void
ff_add_pixels_clamped_neon
(
const
DCTELEM
*
,
uint8_t
*
,
int
);
void
ff_add_pixels_clamped_neon
(
const
DCTELEM
*
,
uint8_t
*
,
int
);
void
ff_put_pixels_clamped_neon
(
const
DCTELEM
*
,
uint8_t
*
,
int
);
void
ff_put_pixels_clamped_neon
(
const
DCTELEM
*
,
uint8_t
*
,
int
);
...
@@ -211,7 +223,22 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx)
...
@@ -211,7 +223,22 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx)
c
->
put_no_rnd_pixels_tab
[
1
][
3
]
=
ff_put_pixels8_xy2_no_rnd_neon
;
c
->
put_no_rnd_pixels_tab
[
1
][
3
]
=
ff_put_pixels8_xy2_no_rnd_neon
;
c
->
avg_pixels_tab
[
0
][
0
]
=
ff_avg_pixels16_neon
;
c
->
avg_pixels_tab
[
0
][
0
]
=
ff_avg_pixels16_neon
;
c
->
avg_pixels_tab
[
0
][
1
]
=
ff_avg_pixels16_x2_neon
;
c
->
avg_pixels_tab
[
0
][
2
]
=
ff_avg_pixels16_y2_neon
;
c
->
avg_pixels_tab
[
0
][
3
]
=
ff_avg_pixels16_xy2_neon
;
c
->
avg_pixels_tab
[
1
][
0
]
=
ff_avg_pixels8_neon
;
c
->
avg_pixels_tab
[
1
][
0
]
=
ff_avg_pixels8_neon
;
c
->
avg_pixels_tab
[
1
][
1
]
=
ff_avg_pixels8_x2_neon
;
c
->
avg_pixels_tab
[
1
][
2
]
=
ff_avg_pixels8_y2_neon
;
c
->
avg_pixels_tab
[
1
][
3
]
=
ff_avg_pixels8_xy2_neon
;
c
->
avg_no_rnd_pixels_tab
[
0
][
0
]
=
ff_avg_pixels16_neon
;
c
->
avg_no_rnd_pixels_tab
[
0
][
1
]
=
ff_avg_pixels16_x2_no_rnd_neon
;
c
->
avg_no_rnd_pixels_tab
[
0
][
2
]
=
ff_avg_pixels16_y2_no_rnd_neon
;
c
->
avg_no_rnd_pixels_tab
[
0
][
3
]
=
ff_avg_pixels16_xy2_no_rnd_neon
;
c
->
avg_no_rnd_pixels_tab
[
1
][
0
]
=
ff_avg_pixels8_neon
;
c
->
avg_no_rnd_pixels_tab
[
1
][
1
]
=
ff_avg_pixels8_x2_no_rnd_neon
;
c
->
avg_no_rnd_pixels_tab
[
1
][
2
]
=
ff_avg_pixels8_y2_no_rnd_neon
;
c
->
avg_no_rnd_pixels_tab
[
1
][
3
]
=
ff_avg_pixels8_xy2_no_rnd_neon
;
}
}
c
->
add_pixels_clamped
=
ff_add_pixels_clamped_neon
;
c
->
add_pixels_clamped
=
ff_add_pixels_clamped_neon
;
...
...
libavcodec/arm/dsputil_neon.S
View file @
716f1705
...
@@ -81,6 +81,13 @@ endfunc
...
@@ -81,6 +81,13 @@ endfunc
avg q0, q0, q1
avg q0, q0, q1
vext.8 q3, q2, q3, #1
vext.8 q3, q2, q3, #1
avg q2, q2, q3
avg q2, q2, q3
.if \avg
vld1.8 {q1}, [r0,:128], r2
vld1.8 {q3}, [r0,:128]
vrhadd.u8 q0, q0, q1
vrhadd.u8 q2, q2, q3
sub r0, r0, r2
.endif
vst1.64 {d0, d1}, [r0,:128], r2
vst1.64 {d0, d1}, [r0,:128], r2
vst1.64 {d4, d5}, [r0,:128], r2
vst1.64 {d4, d5}, [r0,:128], r2
bne 1b
bne 1b
...
@@ -97,6 +104,13 @@ endfunc
...
@@ -97,6 +104,13 @@ endfunc
vld1.64 {d2, d3}, [r1], r2
vld1.64 {d2, d3}, [r1], r2
pld [r1]
pld [r1]
pld [r1, r2]
pld [r1, r2]
.if \avg
vld1.8 {q8}, [r0,:128], r2
vld1.8 {q9}, [r0,:128]
vrhadd.u8 q2, q2, q8
vrhadd.u8 q3, q3, q9
sub r0, r0, r2
.endif
vst1.64 {d4, d5}, [r0,:128], r2
vst1.64 {d4, d5}, [r0,:128], r2
vst1.64 {d6, d7}, [r0,:128], r2
vst1.64 {d6, d7}, [r0,:128], r2
bne 1b
bne 1b
...
@@ -131,6 +145,10 @@ endfunc
...
@@ -131,6 +145,10 @@ endfunc
vadd.u16 q1, q1, q13
vadd.u16 q1, q1, q13
.endif
.endif
shrn d29, q1, #2
shrn d29, q1, #2
.if \avg
vld1.8 {q8}, [r0,:128]
vrhadd.u8 q14, q14, q8
.endif
vaddl.u8 q8, d0, d30
vaddl.u8 q8, d0, d30
vld1.64 {d2-d4}, [r1], r2
vld1.64 {d2-d4}, [r1], r2
vaddl.u8 q10, d1, d31
vaddl.u8 q10, d1, d31
...
@@ -147,6 +165,10 @@ endfunc
...
@@ -147,6 +165,10 @@ endfunc
vadd.u16 q0, q0, q13
vadd.u16 q0, q0, q13
.endif
.endif
shrn d31, q0, #2
shrn d31, q0, #2
.if \avg
vld1.8 {q9}, [r0,:128]
vrhadd.u8 q15, q15, q9
.endif
vaddl.u8 q9, d2, d4
vaddl.u8 q9, d2, d4
vaddl.u8 q11, d3, d5
vaddl.u8 q11, d3, d5
vst1.64 {d30,d31}, [r0,:128], r2
vst1.64 {d30,d31}, [r0,:128], r2
...
@@ -193,6 +215,12 @@ endfunc
...
@@ -193,6 +215,12 @@ endfunc
subs r3, r3, #2
subs r3, r3, #2
vswp d1, d2
vswp d1, d2
avg q0, q0, q1
avg q0, q0, q1
.if \avg
vld1.8 {d4}, [r0,:64], r2
vld1.8 {d5}, [r0,:64]
vrhadd.u8 q0, q0, q2
sub r0, r0, r2
.endif
vst1.64 {d0}, [r0,:64], r2
vst1.64 {d0}, [r0,:64], r2
vst1.64 {d1}, [r0,:64], r2
vst1.64 {d1}, [r0,:64], r2
bne 1b
bne 1b
...
@@ -209,6 +237,12 @@ endfunc
...
@@ -209,6 +237,12 @@ endfunc
vld1.64 {d1}, [r1], r2
vld1.64 {d1}, [r1], r2
pld [r1]
pld [r1]
pld [r1, r2]
pld [r1, r2]
.if \avg
vld1.8 {d2}, [r0,:64], r2
vld1.8 {d3}, [r0,:64]
vrhadd.u8 q2, q2, q1
sub r0, r0, r2
.endif
vst1.64 {d4}, [r0,:64], r2
vst1.64 {d4}, [r0,:64], r2
vst1.64 {d5}, [r0,:64], r2
vst1.64 {d5}, [r0,:64], r2
bne 1b
bne 1b
...
@@ -240,11 +274,19 @@ endfunc
...
@@ -240,11 +274,19 @@ endfunc
vld1.64 {d2, d3}, [r1], r2
vld1.64 {d2, d3}, [r1], r2
vadd.u16 q10, q8, q9
vadd.u16 q10, q8, q9
pld [r1, r2]
pld [r1, r2]
.if \avg
vld1.8 {d7}, [r0,:64]
vrhadd.u8 d5, d5, d7
.endif
.ifeq \rnd
.ifeq \rnd
vadd.u16 q10, q10, q11
vadd.u16 q10, q10, q11
.endif
.endif
vst1.64 {d5}, [r0,:64], r2
vst1.64 {d5}, [r0,:64], r2
shrn d7, q10, #2
shrn d7, q10, #2
.if \avg
vld1.8 {d5}, [r0,:64]
vrhadd.u8 d7, d7, d5
.endif
vext.8 d6, d2, d3, #1
vext.8 d6, d2, d3, #1
vaddl.u8 q9, d2, d6
vaddl.u8 q9, d2, d6
vst1.64 {d7}, [r0,:64], r2
vst1.64 {d7}, [r0,:64], r2
...
@@ -294,6 +336,9 @@ function ff_avg_h264_qpel16_mc00_neon, export=1
...
@@ -294,6 +336,9 @@ function ff_avg_h264_qpel16_mc00_neon, export=1
endfunc
endfunc
pixfunc avg_, pixels16, avg=1
pixfunc avg_, pixels16, avg=1
pixfunc2 avg_, pixels16_x2, avg=1
pixfunc2 avg_, pixels16_y2, avg=1
pixfunc2 avg_, pixels16_xy2, avg=1
function ff_put_h264_qpel8_mc00_neon, export=1
function ff_put_h264_qpel8_mc00_neon, export=1
mov r3, #8
mov r3, #8
...
@@ -309,6 +354,9 @@ function ff_avg_h264_qpel8_mc00_neon, export=1
...
@@ -309,6 +354,9 @@ function ff_avg_h264_qpel8_mc00_neon, export=1
endfunc
endfunc
pixfunc avg_, pixels8, avg=1
pixfunc avg_, pixels8, avg=1
pixfunc2 avg_, pixels8_x2, avg=1
pixfunc2 avg_, pixels8_y2, avg=1
pixfunc2 avg_, pixels8_xy2, avg=1
function ff_put_pixels_clamped_neon, export=1
function ff_put_pixels_clamped_neon, export=1
vld1.64 {d16-d19}, [r0,:128]!
vld1.64 {d16-d19}, [r0,:128]!
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment