Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in / Register
Toggle navigation
F
ffmpeg.wasm-core
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Linshizhi
ffmpeg.wasm-core
Commits
c65d67ef
Commit
c65d67ef
authored
Dec 20, 2013
by
Janne Grunau
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
aarch64: hpeldsp NEON optimizations
Ported from ARMv7 NEON.
parent
d5dd8c7b
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
531 additions
and
5 deletions
+531
-5
Makefile
libavcodec/aarch64/Makefile
+4
-1
h264qpel_init_aarch64.c
libavcodec/aarch64/h264qpel_init_aarch64.c
+4
-4
hpeldsp_init_aarch64.c
libavcodec/aarch64/hpeldsp_init_aarch64.c
+123
-0
hpeldsp_neon.S
libavcodec/aarch64/hpeldsp_neon.S
+397
-0
hpeldsp.c
libavcodec/hpeldsp.c
+2
-0
hpeldsp.h
libavcodec/hpeldsp.h
+1
-0
No files found.
libavcodec/aarch64/Makefile
View file @
c65d67ef
OBJS-$(CONFIG_H264CHROMA)
+=
aarch64/h264chroma_init_aarch64.o
OBJS-$(CONFIG_H264CHROMA)
+=
aarch64/h264chroma_init_aarch64.o
OBJS-$(CONFIG_H264DSP)
+=
aarch64/h264dsp_init_aarch64.o
OBJS-$(CONFIG_H264DSP)
+=
aarch64/h264dsp_init_aarch64.o
OBJS-$(CONFIG_H264QPEL)
+=
aarch64/h264qpel_init_aarch64.o
OBJS-$(CONFIG_H264QPEL)
+=
aarch64/h264qpel_init_aarch64.o
OBJS-$(CONFIG_HPELDSP)
+=
aarch64/hpeldsp_init_aarch64.o
OBJS-$(CONFIG_RV40_DECODER)
+=
aarch64/rv40dsp_init_aarch64.o
OBJS-$(CONFIG_RV40_DECODER)
+=
aarch64/rv40dsp_init_aarch64.o
OBJS-$(CONFIG_VC1_DECODER)
+=
aarch64/vc1dsp_init_aarch64.o
OBJS-$(CONFIG_VC1_DECODER)
+=
aarch64/vc1dsp_init_aarch64.o
NEON-OBJS-$(CONFIG_H264CHROMA)
+=
aarch64/h264cmc_neon.o
NEON-OBJS-$(CONFIG_H264CHROMA)
+=
aarch64/h264cmc_neon.o
NEON-OBJS-$(CONFIG_H264DSP)
+=
aarch64/h264idct_neon.o
NEON-OBJS-$(CONFIG_H264DSP)
+=
aarch64/h264idct_neon.o
NEON-OBJS-$(CONFIG_H264QPEL)
+=
aarch64/h264qpel_neon.o
NEON-OBJS-$(CONFIG_H264QPEL)
+=
aarch64/h264qpel_neon.o
\
aarch64/hpeldsp_neon.o
NEON-OBJS-$(CONFIG_HPELDSP)
+=
aarch64/hpeldsp_neon.o
libavcodec/aarch64/h264qpel_init_aarch64.c
View file @
c65d67ef
...
@@ -101,7 +101,7 @@ av_cold void ff_h264qpel_init_aarch64(H264QpelContext *c, int bit_depth)
...
@@ -101,7 +101,7 @@ av_cold void ff_h264qpel_init_aarch64(H264QpelContext *c, int bit_depth)
int
cpu_flags
=
av_get_cpu_flags
();
int
cpu_flags
=
av_get_cpu_flags
();
if
(
have_neon
(
cpu_flags
)
&&
!
high_bit_depth
)
{
if
(
have_neon
(
cpu_flags
)
&&
!
high_bit_depth
)
{
/* c->put_h264_qpel_pixels_tab[0][ 0] = ff_put_h264_qpel16_mc00_neon; */
c
->
put_h264_qpel_pixels_tab
[
0
][
0
]
=
ff_put_h264_qpel16_mc00_neon
;
c
->
put_h264_qpel_pixels_tab
[
0
][
1
]
=
ff_put_h264_qpel16_mc10_neon
;
c
->
put_h264_qpel_pixels_tab
[
0
][
1
]
=
ff_put_h264_qpel16_mc10_neon
;
c
->
put_h264_qpel_pixels_tab
[
0
][
2
]
=
ff_put_h264_qpel16_mc20_neon
;
c
->
put_h264_qpel_pixels_tab
[
0
][
2
]
=
ff_put_h264_qpel16_mc20_neon
;
c
->
put_h264_qpel_pixels_tab
[
0
][
3
]
=
ff_put_h264_qpel16_mc30_neon
;
c
->
put_h264_qpel_pixels_tab
[
0
][
3
]
=
ff_put_h264_qpel16_mc30_neon
;
...
@@ -118,7 +118,7 @@ av_cold void ff_h264qpel_init_aarch64(H264QpelContext *c, int bit_depth)
...
@@ -118,7 +118,7 @@ av_cold void ff_h264qpel_init_aarch64(H264QpelContext *c, int bit_depth)
c
->
put_h264_qpel_pixels_tab
[
0
][
14
]
=
ff_put_h264_qpel16_mc23_neon
;
c
->
put_h264_qpel_pixels_tab
[
0
][
14
]
=
ff_put_h264_qpel16_mc23_neon
;
c
->
put_h264_qpel_pixels_tab
[
0
][
15
]
=
ff_put_h264_qpel16_mc33_neon
;
c
->
put_h264_qpel_pixels_tab
[
0
][
15
]
=
ff_put_h264_qpel16_mc33_neon
;
/* c->put_h264_qpel_pixels_tab[1][ 0] = ff_put_h264_qpel8_mc00_neon; */
c
->
put_h264_qpel_pixels_tab
[
1
][
0
]
=
ff_put_h264_qpel8_mc00_neon
;
c
->
put_h264_qpel_pixels_tab
[
1
][
1
]
=
ff_put_h264_qpel8_mc10_neon
;
c
->
put_h264_qpel_pixels_tab
[
1
][
1
]
=
ff_put_h264_qpel8_mc10_neon
;
c
->
put_h264_qpel_pixels_tab
[
1
][
2
]
=
ff_put_h264_qpel8_mc20_neon
;
c
->
put_h264_qpel_pixels_tab
[
1
][
2
]
=
ff_put_h264_qpel8_mc20_neon
;
c
->
put_h264_qpel_pixels_tab
[
1
][
3
]
=
ff_put_h264_qpel8_mc30_neon
;
c
->
put_h264_qpel_pixels_tab
[
1
][
3
]
=
ff_put_h264_qpel8_mc30_neon
;
...
@@ -135,7 +135,7 @@ av_cold void ff_h264qpel_init_aarch64(H264QpelContext *c, int bit_depth)
...
@@ -135,7 +135,7 @@ av_cold void ff_h264qpel_init_aarch64(H264QpelContext *c, int bit_depth)
c
->
put_h264_qpel_pixels_tab
[
1
][
14
]
=
ff_put_h264_qpel8_mc23_neon
;
c
->
put_h264_qpel_pixels_tab
[
1
][
14
]
=
ff_put_h264_qpel8_mc23_neon
;
c
->
put_h264_qpel_pixels_tab
[
1
][
15
]
=
ff_put_h264_qpel8_mc33_neon
;
c
->
put_h264_qpel_pixels_tab
[
1
][
15
]
=
ff_put_h264_qpel8_mc33_neon
;
/* c->avg_h264_qpel_pixels_tab[0][ 0] = ff_avg_h264_qpel16_mc00_neon; */
c
->
avg_h264_qpel_pixels_tab
[
0
][
0
]
=
ff_avg_h264_qpel16_mc00_neon
;
c
->
avg_h264_qpel_pixels_tab
[
0
][
1
]
=
ff_avg_h264_qpel16_mc10_neon
;
c
->
avg_h264_qpel_pixels_tab
[
0
][
1
]
=
ff_avg_h264_qpel16_mc10_neon
;
c
->
avg_h264_qpel_pixels_tab
[
0
][
2
]
=
ff_avg_h264_qpel16_mc20_neon
;
c
->
avg_h264_qpel_pixels_tab
[
0
][
2
]
=
ff_avg_h264_qpel16_mc20_neon
;
c
->
avg_h264_qpel_pixels_tab
[
0
][
3
]
=
ff_avg_h264_qpel16_mc30_neon
;
c
->
avg_h264_qpel_pixels_tab
[
0
][
3
]
=
ff_avg_h264_qpel16_mc30_neon
;
...
@@ -152,7 +152,7 @@ av_cold void ff_h264qpel_init_aarch64(H264QpelContext *c, int bit_depth)
...
@@ -152,7 +152,7 @@ av_cold void ff_h264qpel_init_aarch64(H264QpelContext *c, int bit_depth)
c
->
avg_h264_qpel_pixels_tab
[
0
][
14
]
=
ff_avg_h264_qpel16_mc23_neon
;
c
->
avg_h264_qpel_pixels_tab
[
0
][
14
]
=
ff_avg_h264_qpel16_mc23_neon
;
c
->
avg_h264_qpel_pixels_tab
[
0
][
15
]
=
ff_avg_h264_qpel16_mc33_neon
;
c
->
avg_h264_qpel_pixels_tab
[
0
][
15
]
=
ff_avg_h264_qpel16_mc33_neon
;
/* c->avg_h264_qpel_pixels_tab[1][ 0] = ff_avg_h264_qpel8_mc00_neon; */
c
->
avg_h264_qpel_pixels_tab
[
1
][
0
]
=
ff_avg_h264_qpel8_mc00_neon
;
c
->
avg_h264_qpel_pixels_tab
[
1
][
1
]
=
ff_avg_h264_qpel8_mc10_neon
;
c
->
avg_h264_qpel_pixels_tab
[
1
][
1
]
=
ff_avg_h264_qpel8_mc10_neon
;
c
->
avg_h264_qpel_pixels_tab
[
1
][
2
]
=
ff_avg_h264_qpel8_mc20_neon
;
c
->
avg_h264_qpel_pixels_tab
[
1
][
2
]
=
ff_avg_h264_qpel8_mc20_neon
;
c
->
avg_h264_qpel_pixels_tab
[
1
][
3
]
=
ff_avg_h264_qpel8_mc30_neon
;
c
->
avg_h264_qpel_pixels_tab
[
1
][
3
]
=
ff_avg_h264_qpel8_mc30_neon
;
...
...
libavcodec/aarch64/hpeldsp_init_aarch64.c
0 → 100644
View file @
c65d67ef
/*
* ARM NEON optimised DSP functions
* Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
*
* This file is part of Libav.
*
* Libav is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* Libav is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with Libav; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <stddef.h>
#include <stdint.h>
#include "config.h"
#include "libavutil/attributes.h"
#include "libavutil/cpu.h"
#include "libavutil/aarch64/cpu.h"
#include "libavcodec/hpeldsp.h"
void
ff_put_pixels16_neon
(
uint8_t
*
block
,
const
uint8_t
*
pixels
,
ptrdiff_t
line_size
,
int
h
);
void
ff_put_pixels16_x2_neon
(
uint8_t
*
block
,
const
uint8_t
*
pixels
,
ptrdiff_t
line_size
,
int
h
);
void
ff_put_pixels16_y2_neon
(
uint8_t
*
block
,
const
uint8_t
*
pixels
,
ptrdiff_t
line_size
,
int
h
);
void
ff_put_pixels16_xy2_neon
(
uint8_t
*
block
,
const
uint8_t
*
pixels
,
ptrdiff_t
line_size
,
int
h
);
void
ff_put_pixels8_neon
(
uint8_t
*
block
,
const
uint8_t
*
pixels
,
ptrdiff_t
line_size
,
int
h
);
void
ff_put_pixels8_x2_neon
(
uint8_t
*
block
,
const
uint8_t
*
pixels
,
ptrdiff_t
line_size
,
int
h
);
void
ff_put_pixels8_y2_neon
(
uint8_t
*
block
,
const
uint8_t
*
pixels
,
ptrdiff_t
line_size
,
int
h
);
void
ff_put_pixels8_xy2_neon
(
uint8_t
*
block
,
const
uint8_t
*
pixels
,
ptrdiff_t
line_size
,
int
h
);
void
ff_put_pixels16_x2_no_rnd_neon
(
uint8_t
*
block
,
const
uint8_t
*
pixels
,
ptrdiff_t
line_size
,
int
h
);
void
ff_put_pixels16_y2_no_rnd_neon
(
uint8_t
*
block
,
const
uint8_t
*
pixels
,
ptrdiff_t
line_size
,
int
h
);
void
ff_put_pixels16_xy2_no_rnd_neon
(
uint8_t
*
block
,
const
uint8_t
*
pixels
,
ptrdiff_t
line_size
,
int
h
);
void
ff_put_pixels8_x2_no_rnd_neon
(
uint8_t
*
block
,
const
uint8_t
*
pixels
,
ptrdiff_t
line_size
,
int
h
);
void
ff_put_pixels8_y2_no_rnd_neon
(
uint8_t
*
block
,
const
uint8_t
*
pixels
,
ptrdiff_t
line_size
,
int
h
);
void
ff_put_pixels8_xy2_no_rnd_neon
(
uint8_t
*
block
,
const
uint8_t
*
pixels
,
ptrdiff_t
line_size
,
int
h
);
void
ff_avg_pixels16_neon
(
uint8_t
*
block
,
const
uint8_t
*
pixels
,
ptrdiff_t
line_size
,
int
h
);
void
ff_avg_pixels16_x2_neon
(
uint8_t
*
block
,
const
uint8_t
*
pixels
,
ptrdiff_t
line_size
,
int
h
);
void
ff_avg_pixels16_y2_neon
(
uint8_t
*
block
,
const
uint8_t
*
pixels
,
ptrdiff_t
line_size
,
int
h
);
void
ff_avg_pixels16_xy2_neon
(
uint8_t
*
block
,
const
uint8_t
*
pixels
,
ptrdiff_t
line_size
,
int
h
);
void
ff_avg_pixels8_neon
(
uint8_t
*
block
,
const
uint8_t
*
pixels
,
ptrdiff_t
line_size
,
int
h
);
void
ff_avg_pixels8_x2_neon
(
uint8_t
*
block
,
const
uint8_t
*
pixels
,
ptrdiff_t
line_size
,
int
h
);
void
ff_avg_pixels8_y2_neon
(
uint8_t
*
block
,
const
uint8_t
*
pixels
,
ptrdiff_t
line_size
,
int
h
);
void
ff_avg_pixels8_xy2_neon
(
uint8_t
*
block
,
const
uint8_t
*
pixels
,
ptrdiff_t
line_size
,
int
h
);
void
ff_avg_pixels16_x2_no_rnd_neon
(
uint8_t
*
block
,
const
uint8_t
*
pixels
,
ptrdiff_t
line_size
,
int
h
);
void
ff_avg_pixels16_y2_no_rnd_neon
(
uint8_t
*
block
,
const
uint8_t
*
pixels
,
ptrdiff_t
line_size
,
int
h
);
void
ff_avg_pixels16_xy2_no_rnd_neon
(
uint8_t
*
block
,
const
uint8_t
*
pixels
,
ptrdiff_t
line_size
,
int
h
);
av_cold
void
ff_hpeldsp_init_aarch64
(
HpelDSPContext
*
c
,
int
flags
)
{
int
cpu_flags
=
av_get_cpu_flags
();
if
(
have_neon
(
cpu_flags
))
{
c
->
put_pixels_tab
[
0
][
0
]
=
ff_put_pixels16_neon
;
c
->
put_pixels_tab
[
0
][
1
]
=
ff_put_pixels16_x2_neon
;
c
->
put_pixels_tab
[
0
][
2
]
=
ff_put_pixels16_y2_neon
;
c
->
put_pixels_tab
[
0
][
3
]
=
ff_put_pixels16_xy2_neon
;
c
->
put_pixels_tab
[
1
][
0
]
=
ff_put_pixels8_neon
;
c
->
put_pixels_tab
[
1
][
1
]
=
ff_put_pixels8_x2_neon
;
c
->
put_pixels_tab
[
1
][
2
]
=
ff_put_pixels8_y2_neon
;
c
->
put_pixels_tab
[
1
][
3
]
=
ff_put_pixels8_xy2_neon
;
c
->
put_no_rnd_pixels_tab
[
0
][
0
]
=
ff_put_pixels16_neon
;
c
->
put_no_rnd_pixels_tab
[
0
][
1
]
=
ff_put_pixels16_x2_no_rnd_neon
;
c
->
put_no_rnd_pixels_tab
[
0
][
2
]
=
ff_put_pixels16_y2_no_rnd_neon
;
c
->
put_no_rnd_pixels_tab
[
0
][
3
]
=
ff_put_pixels16_xy2_no_rnd_neon
;
c
->
put_no_rnd_pixels_tab
[
1
][
0
]
=
ff_put_pixels8_neon
;
c
->
put_no_rnd_pixels_tab
[
1
][
1
]
=
ff_put_pixels8_x2_no_rnd_neon
;
c
->
put_no_rnd_pixels_tab
[
1
][
2
]
=
ff_put_pixels8_y2_no_rnd_neon
;
c
->
put_no_rnd_pixels_tab
[
1
][
3
]
=
ff_put_pixels8_xy2_no_rnd_neon
;
c
->
avg_pixels_tab
[
0
][
0
]
=
ff_avg_pixels16_neon
;
c
->
avg_pixels_tab
[
0
][
1
]
=
ff_avg_pixels16_x2_neon
;
c
->
avg_pixels_tab
[
0
][
2
]
=
ff_avg_pixels16_y2_neon
;
c
->
avg_pixels_tab
[
0
][
3
]
=
ff_avg_pixels16_xy2_neon
;
c
->
avg_pixels_tab
[
1
][
0
]
=
ff_avg_pixels8_neon
;
c
->
avg_pixels_tab
[
1
][
1
]
=
ff_avg_pixels8_x2_neon
;
c
->
avg_pixels_tab
[
1
][
2
]
=
ff_avg_pixels8_y2_neon
;
c
->
avg_pixels_tab
[
1
][
3
]
=
ff_avg_pixels8_xy2_neon
;
c
->
avg_no_rnd_pixels_tab
[
0
]
=
ff_avg_pixels16_neon
;
c
->
avg_no_rnd_pixels_tab
[
1
]
=
ff_avg_pixels16_x2_no_rnd_neon
;
c
->
avg_no_rnd_pixels_tab
[
2
]
=
ff_avg_pixels16_y2_no_rnd_neon
;
c
->
avg_no_rnd_pixels_tab
[
3
]
=
ff_avg_pixels16_xy2_no_rnd_neon
;
}
}
libavcodec/aarch64/hpeldsp_neon.S
0 → 100644
View file @
c65d67ef
This diff is collapsed.
Click to expand it.
libavcodec/hpeldsp.c
View file @
c65d67ef
...
@@ -54,6 +54,8 @@ av_cold void ff_hpeldsp_init(HpelDSPContext *c, int flags)
...
@@ -54,6 +54,8 @@ av_cold void ff_hpeldsp_init(HpelDSPContext *c, int flags)
hpel_funcs
(
avg
,
[
3
],
2
);
hpel_funcs
(
avg
,
[
3
],
2
);
hpel_funcs
(
avg_no_rnd
,,
16
);
hpel_funcs
(
avg_no_rnd
,,
16
);
if
(
ARCH_AARCH64
)
ff_hpeldsp_init_aarch64
(
c
,
flags
);
if
(
ARCH_ARM
)
if
(
ARCH_ARM
)
ff_hpeldsp_init_arm
(
c
,
flags
);
ff_hpeldsp_init_arm
(
c
,
flags
);
if
(
ARCH_BFIN
)
if
(
ARCH_BFIN
)
...
...
libavcodec/hpeldsp.h
View file @
c65d67ef
...
@@ -94,6 +94,7 @@ typedef struct HpelDSPContext {
...
@@ -94,6 +94,7 @@ typedef struct HpelDSPContext {
void
ff_hpeldsp_init
(
HpelDSPContext
*
c
,
int
flags
);
void
ff_hpeldsp_init
(
HpelDSPContext
*
c
,
int
flags
);
void
ff_hpeldsp_init_aarch64
(
HpelDSPContext
*
c
,
int
flags
);
void
ff_hpeldsp_init_arm
(
HpelDSPContext
*
c
,
int
flags
);
void
ff_hpeldsp_init_arm
(
HpelDSPContext
*
c
,
int
flags
);
void
ff_hpeldsp_init_bfin
(
HpelDSPContext
*
c
,
int
flags
);
void
ff_hpeldsp_init_bfin
(
HpelDSPContext
*
c
,
int
flags
);
void
ff_hpeldsp_init_ppc
(
HpelDSPContext
*
c
,
int
flags
);
void
ff_hpeldsp_init_ppc
(
HpelDSPContext
*
c
,
int
flags
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment