Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in / Register
Toggle navigation
F
ffmpeg.wasm-core
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Linshizhi
ffmpeg.wasm-core
Commits
1a094af6
Commit
1a094af6
authored
Jan 30, 2016
by
Diego Biurrun
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
fft: Split MDCT bits off from FFT
parent
4d13bcce
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
463 additions
and
243 deletions
+463
-243
Makefile
libavcodec/aarch64/Makefile
+1
-0
fft_init_aarch64.c
libavcodec/aarch64/fft_init_aarch64.c
+0
-12
mdct_init.c
libavcodec/aarch64/mdct_init.c
+39
-0
Makefile
libavcodec/arm/Makefile
+2
-0
fft_fixed_init_arm.c
libavcodec/arm/fft_fixed_init_arm.c
+0
-10
fft_init_arm.c
libavcodec/arm/fft_init_arm.c
+0
-15
mdct_fixed_init_arm.c
libavcodec/arm/mdct_fixed_init_arm.c
+40
-0
mdct_init_arm.c
libavcodec/arm/mdct_init_arm.c
+47
-0
fft.h
libavcodec/fft.h
+7
-0
fft_template.c
libavcodec/fft_template.c
+0
-7
mdct_template.c
libavcodec/mdct_template.c
+20
-0
Makefile
libavcodec/ppc/Makefile
+1
-0
fft_init.c
libavcodec/ppc/fft_init.c
+2
-122
mdct_init.c
libavcodec/ppc/mdct_init.c
+154
-0
Makefile
libavcodec/x86/Makefile
+1
-0
fft.asm
libavcodec/x86/fft.asm
+66
-62
fft.h
libavcodec/x86/fft.h
+0
-8
fft_init.c
libavcodec/x86/fft_init.c
+0
-7
mdct.h
libavcodec/x86/mdct.h
+32
-0
mdct_init.c
libavcodec/x86/mdct_init.c
+51
-0
No files found.
libavcodec/aarch64/Makefile
View file @
1a094af6
...
...
@@ -7,6 +7,7 @@ OBJS-$(CONFIG_H264PRED) += aarch64/h264pred_init.o
OBJS-$(CONFIG_H264QPEL)
+=
aarch64/h264qpel_init_aarch64.o
OBJS-$(CONFIG_HPELDSP)
+=
aarch64/hpeldsp_init_aarch64.o
OBJS-$(CONFIG_IMDCT15)
+=
aarch64/imdct15_init.o
OBJS-$(CONFIG_MDCT)
+=
aarch64/mdct_init.o
OBJS-$(CONFIG_MPEGAUDIODSP)
+=
aarch64/mpegaudiodsp_init.o
OBJS-$(CONFIG_NEON_CLOBBER_TEST)
+=
aarch64/neontest.o
OBJS-$(CONFIG_VIDEODSP)
+=
aarch64/videodsp_init.o
...
...
libavcodec/aarch64/fft_init_aarch64.c
View file @
1a094af6
...
...
@@ -18,8 +18,6 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "config.h"
#include "libavutil/attributes.h"
#include "libavutil/cpu.h"
#include "libavutil/aarch64/cpu.h"
...
...
@@ -29,10 +27,6 @@
void
ff_fft_permute_neon
(
FFTContext
*
s
,
FFTComplex
*
z
);
void
ff_fft_calc_neon
(
FFTContext
*
s
,
FFTComplex
*
z
);
void
ff_imdct_calc_neon
(
FFTContext
*
s
,
FFTSample
*
output
,
const
FFTSample
*
input
);
void
ff_imdct_half_neon
(
FFTContext
*
s
,
FFTSample
*
output
,
const
FFTSample
*
input
);
void
ff_mdct_calc_neon
(
FFTContext
*
s
,
FFTSample
*
output
,
const
FFTSample
*
input
);
av_cold
void
ff_fft_init_aarch64
(
FFTContext
*
s
)
{
int
cpu_flags
=
av_get_cpu_flags
();
...
...
@@ -40,11 +34,5 @@ av_cold void ff_fft_init_aarch64(FFTContext *s)
if
(
have_neon
(
cpu_flags
))
{
s
->
fft_permute
=
ff_fft_permute_neon
;
s
->
fft_calc
=
ff_fft_calc_neon
;
#if CONFIG_MDCT
s
->
imdct_calc
=
ff_imdct_calc_neon
;
s
->
imdct_half
=
ff_imdct_half_neon
;
s
->
mdct_calc
=
ff_mdct_calc_neon
;
s
->
mdct_permutation
=
FF_MDCT_PERM_INTERLEAVE
;
#endif
}
}
libavcodec/aarch64/mdct_init.c
0 → 100644
View file @
1a094af6
/*
* This file is part of Libav.
*
* Libav is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* Libav is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with Libav; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "libavutil/attributes.h"
#include "libavutil/cpu.h"
#include "libavutil/aarch64/cpu.h"
#include "libavcodec/fft.h"
void
ff_imdct_calc_neon
(
FFTContext
*
s
,
FFTSample
*
output
,
const
FFTSample
*
input
);
void
ff_imdct_half_neon
(
FFTContext
*
s
,
FFTSample
*
output
,
const
FFTSample
*
input
);
void
ff_mdct_calc_neon
(
FFTContext
*
s
,
FFTSample
*
output
,
const
FFTSample
*
input
);
av_cold
void
ff_mdct_init_aarch64
(
FFTContext
*
s
)
{
int
cpu_flags
=
av_get_cpu_flags
();
if
(
have_neon
(
cpu_flags
))
{
s
->
imdct_calc
=
ff_imdct_calc_neon
;
s
->
imdct_half
=
ff_imdct_half_neon
;
s
->
mdct_calc
=
ff_mdct_calc_neon
;
s
->
mdct_permutation
=
FF_MDCT_PERM_INTERLEAVE
;
}
}
libavcodec/arm/Makefile
View file @
1a094af6
...
...
@@ -21,6 +21,8 @@ OBJS-$(CONFIG_IDCTDSP) += arm/idctdsp_init_arm.o \
OBJS-$(CONFIG_FLACDSP)
+=
arm/flacdsp_init_arm.o
\
arm/flacdsp_arm.o
OBJS-$(CONFIG_G722DSP)
+=
arm/g722dsp_init_arm.o
OBJS-$(CONFIG_MDCT)
+=
arm/mdct_init_arm.o
\
arm/mdct_fixed_init_arm.o
OBJS-$(CONFIG_ME_CMP)
+=
arm/me_cmp_init_arm.o
OBJS-$(CONFIG_MPEGAUDIODSP)
+=
arm/mpegaudiodsp_init_arm.o
OBJS-$(CONFIG_MPEGVIDEO)
+=
arm/mpegvideo_arm.o
...
...
libavcodec/arm/fft_fixed_init_arm.c
View file @
1a094af6
...
...
@@ -26,8 +26,6 @@
#include "libavcodec/fft.h"
void
ff_fft_fixed_calc_neon
(
FFTContext
*
s
,
FFTComplex
*
z
);
void
ff_mdct_fixed_calc_neon
(
FFTContext
*
s
,
FFTSample
*
o
,
const
FFTSample
*
i
);
void
ff_mdct_fixed_calcw_neon
(
FFTContext
*
s
,
FFTDouble
*
o
,
const
FFTSample
*
i
);
av_cold
void
ff_fft_fixed_init_arm
(
FFTContext
*
s
)
{
...
...
@@ -36,13 +34,5 @@ av_cold void ff_fft_fixed_init_arm(FFTContext *s)
if
(
have_neon
(
cpu_flags
))
{
s
->
fft_permutation
=
FF_FFT_PERM_SWAP_LSBS
;
s
->
fft_calc
=
ff_fft_fixed_calc_neon
;
#if CONFIG_MDCT
if
(
!
s
->
inverse
&&
s
->
nbits
>=
3
)
{
s
->
mdct_permutation
=
FF_MDCT_PERM_INTERLEAVE
;
s
->
mdct_calc
=
ff_mdct_fixed_calc_neon
;
s
->
mdct_calcw
=
ff_mdct_fixed_calcw_neon
;
}
#endif
}
}
libavcodec/arm/fft_init_arm.c
View file @
1a094af6
...
...
@@ -29,31 +29,16 @@ void ff_fft_calc_vfp(FFTContext *s, FFTComplex *z);
void
ff_fft_permute_neon
(
FFTContext
*
s
,
FFTComplex
*
z
);
void
ff_fft_calc_neon
(
FFTContext
*
s
,
FFTComplex
*
z
);
void
ff_imdct_half_vfp
(
FFTContext
*
s
,
FFTSample
*
output
,
const
FFTSample
*
input
);
void
ff_imdct_calc_neon
(
FFTContext
*
s
,
FFTSample
*
output
,
const
FFTSample
*
input
);
void
ff_imdct_half_neon
(
FFTContext
*
s
,
FFTSample
*
output
,
const
FFTSample
*
input
);
void
ff_mdct_calc_neon
(
FFTContext
*
s
,
FFTSample
*
output
,
const
FFTSample
*
input
);
av_cold
void
ff_fft_init_arm
(
FFTContext
*
s
)
{
int
cpu_flags
=
av_get_cpu_flags
();
if
(
have_vfp_vm
(
cpu_flags
))
{
s
->
fft_calc
=
ff_fft_calc_vfp
;
#if CONFIG_MDCT
s
->
imdct_half
=
ff_imdct_half_vfp
;
#endif
}
if
(
have_neon
(
cpu_flags
))
{
s
->
fft_permute
=
ff_fft_permute_neon
;
s
->
fft_calc
=
ff_fft_calc_neon
;
#if CONFIG_MDCT
s
->
imdct_calc
=
ff_imdct_calc_neon
;
s
->
imdct_half
=
ff_imdct_half_neon
;
s
->
mdct_calc
=
ff_mdct_calc_neon
;
s
->
mdct_permutation
=
FF_MDCT_PERM_INTERLEAVE
;
#endif
}
}
libavcodec/arm/mdct_fixed_init_arm.c
0 → 100644
View file @
1a094af6
/*
* This file is part of Libav.
*
* Libav is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* Libav is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with Libav; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "libavutil/attributes.h"
#include "libavutil/cpu.h"
#include "libavutil/arm/cpu.h"
#define FFT_FLOAT 0
#include "libavcodec/fft.h"
void
ff_mdct_fixed_calc_neon
(
FFTContext
*
s
,
FFTSample
*
o
,
const
FFTSample
*
i
);
void
ff_mdct_fixed_calcw_neon
(
FFTContext
*
s
,
FFTDouble
*
o
,
const
FFTSample
*
i
);
av_cold
void
ff_mdct_fixed_init_arm
(
FFTContext
*
s
)
{
int
cpu_flags
=
av_get_cpu_flags
();
if
(
have_neon
(
cpu_flags
))
{
if
(
!
s
->
inverse
&&
s
->
nbits
>=
3
)
{
s
->
mdct_permutation
=
FF_MDCT_PERM_INTERLEAVE
;
s
->
mdct_calc
=
ff_mdct_fixed_calc_neon
;
s
->
mdct_calcw
=
ff_mdct_fixed_calcw_neon
;
}
}
}
libavcodec/arm/mdct_init_arm.c
0 → 100644
View file @
1a094af6
/*
* Copyright (c) 2009 Mans Rullgard <mans@mansr.com>
*
* This file is part of Libav.
*
* Libav is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* Libav is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with Libav; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "libavutil/attributes.h"
#include "libavutil/cpu.h"
#include "libavutil/arm/cpu.h"
#include "libavcodec/fft.h"
void
ff_imdct_half_vfp
(
FFTContext
*
s
,
FFTSample
*
output
,
const
FFTSample
*
input
);
void
ff_imdct_calc_neon
(
FFTContext
*
s
,
FFTSample
*
output
,
const
FFTSample
*
input
);
void
ff_imdct_half_neon
(
FFTContext
*
s
,
FFTSample
*
output
,
const
FFTSample
*
input
);
void
ff_mdct_calc_neon
(
FFTContext
*
s
,
FFTSample
*
output
,
const
FFTSample
*
input
);
av_cold
void
ff_mdct_init_arm
(
FFTContext
*
s
)
{
int
cpu_flags
=
av_get_cpu_flags
();
if
(
have_vfp_vm
(
cpu_flags
))
{
s
->
imdct_half
=
ff_imdct_half_vfp
;
}
if
(
have_neon
(
cpu_flags
))
{
s
->
imdct_calc
=
ff_imdct_calc_neon
;
s
->
imdct_half
=
ff_imdct_half_neon
;
s
->
mdct_calc
=
ff_mdct_calc_neon
;
s
->
mdct_permutation
=
FF_MDCT_PERM_INTERLEAVE
;
}
}
libavcodec/fft.h
View file @
1a094af6
...
...
@@ -154,4 +154,11 @@ void ff_fft_end(FFTContext *s);
int
ff_mdct_init
(
FFTContext
*
s
,
int
nbits
,
int
inverse
,
double
scale
);
void
ff_mdct_end
(
FFTContext
*
s
);
void
ff_mdct_init_aarch64
(
FFTContext
*
s
);
void
ff_mdct_init_arm
(
FFTContext
*
s
);
void
ff_mdct_init_ppc
(
FFTContext
*
s
);
void
ff_mdct_init_x86
(
FFTContext
*
s
);
void
ff_mdct_fixed_init_arm
(
FFTContext
*
s
);
#endif
/* AVCODEC_FFT_H */
libavcodec/fft_template.c
View file @
1a094af6
...
...
@@ -151,20 +151,13 @@ av_cold int ff_fft_init(FFTContext *s, int nbits, int inverse)
s
->
fft_permute
=
fft_permute_c
;
s
->
fft_calc
=
fft_calc_c
;
#if CONFIG_MDCT
s
->
imdct_calc
=
ff_imdct_calc_c
;
s
->
imdct_half
=
ff_imdct_half_c
;
s
->
mdct_calc
=
ff_mdct_calc_c
;
#endif
#if FFT_FLOAT
if
(
ARCH_AARCH64
)
ff_fft_init_aarch64
(
s
);
if
(
ARCH_ARM
)
ff_fft_init_arm
(
s
);
if
(
ARCH_PPC
)
ff_fft_init_ppc
(
s
);
if
(
ARCH_X86
)
ff_fft_init_x86
(
s
);
if
(
CONFIG_MDCT
)
s
->
mdct_calcw
=
s
->
mdct_calc
;
#else
if
(
CONFIG_MDCT
)
s
->
mdct_calcw
=
ff_mdct_calcw_c
;
if
(
ARCH_ARM
)
ff_fft_fixed_init_arm
(
s
);
#endif
...
...
libavcodec/mdct_template.c
View file @
1a094af6
...
...
@@ -56,6 +56,26 @@ av_cold int ff_mdct_init(FFTContext *s, int nbits, int inverse, double scale)
if
(
ff_fft_init
(
s
,
s
->
mdct_bits
-
2
,
inverse
)
<
0
)
goto
fail
;
s
->
imdct_calc
=
ff_imdct_calc_c
;
s
->
imdct_half
=
ff_imdct_half_c
;
s
->
mdct_calc
=
ff_mdct_calc_c
;
#if FFT_FLOAT
if
(
ARCH_AARCH64
)
ff_mdct_init_aarch64
(
s
);
if
(
ARCH_ARM
)
ff_mdct_init_arm
(
s
);
if
(
ARCH_PPC
)
ff_mdct_init_ppc
(
s
);
if
(
ARCH_X86
)
ff_mdct_init_x86
(
s
);
s
->
mdct_calcw
=
s
->
mdct_calc
;
#else
s
->
mdct_calcw
=
ff_mdct_calcw_c
;
if
(
ARCH_ARM
)
ff_mdct_fixed_init_arm
(
s
);
#endif
s
->
tcos
=
av_malloc
(
n
/
2
*
sizeof
(
FFTSample
));
if
(
!
s
->
tcos
)
goto
fail
;
...
...
libavcodec/ppc/Makefile
View file @
1a094af6
...
...
@@ -11,6 +11,7 @@ OBJS-$(CONFIG_HPELDSP) += ppc/hpeldsp_altivec.o
OBJS-$(CONFIG_HUFFYUVDSP)
+=
ppc/huffyuvdsp_altivec.o
OBJS-$(CONFIG_FDCTDSP)
+=
ppc/fdctdsp.o
OBJS-$(CONFIG_IDCTDSP)
+=
ppc/idctdsp.o
OBJS-$(CONFIG_MDCT)
+=
ppc/mdct_init.o
OBJS-$(CONFIG_ME_CMP)
+=
ppc/me_cmp.o
OBJS-$(CONFIG_MPEGAUDIODSP)
+=
ppc/mpegaudiodsp_altivec.o
OBJS-$(CONFIG_MPEGVIDEO)
+=
ppc/mpegvideo_altivec.o
\
...
...
libavcodec/ppc/fft_init.c
View file @
1a094af6
/*
* FFT/IFFT transforms
* AltiVec-enabled
* Copyright (c) 2009 Loren Merritt
*
* This file is part of Libav.
*
* Libav is free software; you can redistribute it and/or
...
...
@@ -21,126 +17,14 @@
*/
#include "config.h"
#include "libavutil/cpu.h"
#include "libavutil/ppc/cpu.h"
#include "libavutil/ppc/types_altivec.h"
#include "libavutil/ppc/util_altivec.h"
#include "libavcodec/fft.h"
/**
* Do a complex FFT with the parameters defined in ff_fft_init().
* The input data must be permuted before with s->revtab table.
* No 1.0 / sqrt(n) normalization is done.
* AltiVec-enabled:
* This code assumes that the 'z' pointer is 16 bytes-aligned.
* It also assumes all FFTComplex are 8 bytes-aligned pairs of floats.
*/
#include "libavcodec/fft.h"
void
ff_fft_calc_altivec
(
FFTContext
*
s
,
FFTComplex
*
z
);
void
ff_fft_calc_interleave_altivec
(
FFTContext
*
s
,
FFTComplex
*
z
);
#if HAVE_GNU_AS && HAVE_ALTIVEC && HAVE_BIGENDIAN
static
void
imdct_half_altivec
(
FFTContext
*
s
,
FFTSample
*
output
,
const
FFTSample
*
input
)
{
int
j
,
k
;
int
n
=
1
<<
s
->
mdct_bits
;
int
n4
=
n
>>
2
;
int
n8
=
n
>>
3
;
int
n32
=
n
>>
5
;
const
uint16_t
*
revtabj
=
s
->
revtab
;
const
uint16_t
*
revtabk
=
s
->
revtab
+
n4
;
const
vec_f
*
tcos
=
(
const
vec_f
*
)(
s
->
tcos
+
n8
);
const
vec_f
*
tsin
=
(
const
vec_f
*
)(
s
->
tsin
+
n8
);
const
vec_f
*
pin
=
(
const
vec_f
*
)(
input
+
n4
);
vec_f
*
pout
=
(
vec_f
*
)(
output
+
n4
);
/* pre rotation */
k
=
n32
-
1
;
do
{
vec_f
cos
,
sin
,
cos0
,
sin0
,
cos1
,
sin1
,
re
,
im
,
r0
,
i0
,
r1
,
i1
,
a
,
b
,
c
,
d
;
#define CMULA(p,o0,o1,o2,o3)\
a = pin[ k*2+p];
/* { z[k].re, z[k].im, z[k+1].re, z[k+1].im } */
\
b = pin[-k*2-p-1];
/* { z[-k-2].re, z[-k-2].im, z[-k-1].re, z[-k-1].im } */
\
re = vec_perm(a, b, vcprm(0,2,s0,s2));
/* { z[k].re, z[k+1].re, z[-k-2].re, z[-k-1].re } */
\
im = vec_perm(a, b, vcprm(s3,s1,3,1));
/* { z[-k-1].im, z[-k-2].im, z[k+1].im, z[k].im } */
\
cos = vec_perm(cos0, cos1, vcprm(o0,o1,s##o2,s##o3));
/* { cos[k], cos[k+1], cos[-k-2], cos[-k-1] } */
\
sin = vec_perm(sin0, sin1, vcprm(o0,o1,s##o2,s##o3));\
r##p = im*cos - re*sin;\
i##p = re*cos + im*sin;
#define STORE2(v,dst)\
j = dst;\
vec_ste(v, 0, output+j*2);\
vec_ste(v, 4, output+j*2);
#define STORE8(p)\
a = vec_perm(r##p, i##p, vcprm(0,s0,0,s0));\
b = vec_perm(r##p, i##p, vcprm(1,s1,1,s1));\
c = vec_perm(r##p, i##p, vcprm(2,s2,2,s2));\
d = vec_perm(r##p, i##p, vcprm(3,s3,3,s3));\
STORE2(a, revtabk[ p*2-4]);\
STORE2(b, revtabk[ p*2-3]);\
STORE2(c, revtabj[-p*2+2]);\
STORE2(d, revtabj[-p*2+3]);
cos0
=
tcos
[
k
];
sin0
=
tsin
[
k
];
cos1
=
tcos
[
-
k
-
1
];
sin1
=
tsin
[
-
k
-
1
];
CMULA
(
0
,
0
,
1
,
2
,
3
);
CMULA
(
1
,
2
,
3
,
0
,
1
);
STORE8
(
0
);
STORE8
(
1
);
revtabj
+=
4
;
revtabk
-=
4
;
k
--
;
}
while
(
k
>=
0
);
ff_fft_calc_altivec
(
s
,
(
FFTComplex
*
)
output
);
/* post rotation + reordering */
j
=
-
n32
;
k
=
n32
-
1
;
do
{
vec_f
cos
,
sin
,
re
,
im
,
a
,
b
,
c
,
d
;
#define CMULB(d0,d1,o)\
re = pout[o*2];\
im = pout[o*2+1];\
cos = tcos[o];\
sin = tsin[o];\
d0 = im*sin - re*cos;\
d1 = re*sin + im*cos;
CMULB
(
a
,
b
,
j
);
CMULB
(
c
,
d
,
k
);
pout
[
2
*
j
]
=
vec_perm
(
a
,
d
,
vcprm
(
0
,
s3
,
1
,
s2
));
pout
[
2
*
j
+
1
]
=
vec_perm
(
a
,
d
,
vcprm
(
2
,
s1
,
3
,
s0
));
pout
[
2
*
k
]
=
vec_perm
(
c
,
b
,
vcprm
(
0
,
s3
,
1
,
s2
));
pout
[
2
*
k
+
1
]
=
vec_perm
(
c
,
b
,
vcprm
(
2
,
s1
,
3
,
s0
));
j
++
;
k
--
;
}
while
(
k
>=
0
);
}
static
void
imdct_calc_altivec
(
FFTContext
*
s
,
FFTSample
*
output
,
const
FFTSample
*
input
)
{
int
k
;
int
n
=
1
<<
s
->
mdct_bits
;
int
n4
=
n
>>
2
;
int
n16
=
n
>>
4
;
vec_u32
sign
=
{
1U
<<
31
,
1U
<<
31
,
1U
<<
31
,
1U
<<
31
};
vec_u32
*
p0
=
(
vec_u32
*
)(
output
+
n4
);
vec_u32
*
p1
=
(
vec_u32
*
)(
output
+
n4
*
3
);
imdct_half_altivec
(
s
,
output
+
n4
,
input
);
for
(
k
=
0
;
k
<
n16
;
k
++
)
{
vec_u32
a
=
p0
[
k
]
^
sign
;
vec_u32
b
=
p1
[
-
k
-
1
];
p0
[
-
k
-
1
]
=
vec_perm
(
a
,
a
,
vcprm
(
3
,
2
,
1
,
0
));
p1
[
k
]
=
vec_perm
(
b
,
b
,
vcprm
(
3
,
2
,
1
,
0
));
}
}
#endif
/* HAVE_GNU_AS && HAVE_ALTIVEC && HAVE_BIGENDIAN */
av_cold
void
ff_fft_init_ppc
(
FFTContext
*
s
)
{
#if HAVE_GNU_AS && HAVE_ALTIVEC && HAVE_BIGENDIAN
...
...
@@ -148,9 +32,5 @@ av_cold void ff_fft_init_ppc(FFTContext *s)
return
;
s
->
fft_calc
=
ff_fft_calc_interleave_altivec
;
if
(
s
->
mdct_bits
>=
5
)
{
s
->
imdct_calc
=
imdct_calc_altivec
;
s
->
imdct_half
=
imdct_half_altivec
;
}
#endif
/* HAVE_GNU_AS && HAVE_ALTIVEC && HAVE_BIGENDIAN */
}
libavcodec/ppc/mdct_init.c
0 → 100644
View file @
1a094af6
/*
* FFT/IFFT transforms
* AltiVec-enabled
* Copyright (c) 2009 Loren Merritt
*
* This file is part of Libav.
*
* Libav is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* Libav is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with Libav; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "config.h"
#include "libavutil/cpu.h"
#include "libavutil/ppc/cpu.h"
#include "libavutil/ppc/types_altivec.h"
#include "libavutil/ppc/util_altivec.h"
#include "libavcodec/fft.h"
/**
* Do a complex FFT with the parameters defined in ff_fft_init().
* The input data must be permuted before with s->revtab table.
* No 1.0 / sqrt(n) normalization is done.
* AltiVec-enabled:
* This code assumes that the 'z' pointer is 16 bytes-aligned.
* It also assumes all FFTComplex are 8 bytes-aligned pairs of floats.
*/
void
ff_fft_calc_altivec
(
FFTContext
*
s
,
FFTComplex
*
z
);
#if HAVE_GNU_AS && HAVE_ALTIVEC && HAVE_BIGENDIAN
static
void
imdct_half_altivec
(
FFTContext
*
s
,
FFTSample
*
output
,
const
FFTSample
*
input
)
{
int
j
,
k
;
int
n
=
1
<<
s
->
mdct_bits
;
int
n4
=
n
>>
2
;
int
n8
=
n
>>
3
;
int
n32
=
n
>>
5
;
const
uint16_t
*
revtabj
=
s
->
revtab
;
const
uint16_t
*
revtabk
=
s
->
revtab
+
n4
;
const
vec_f
*
tcos
=
(
const
vec_f
*
)(
s
->
tcos
+
n8
);
const
vec_f
*
tsin
=
(
const
vec_f
*
)(
s
->
tsin
+
n8
);
const
vec_f
*
pin
=
(
const
vec_f
*
)(
input
+
n4
);
vec_f
*
pout
=
(
vec_f
*
)(
output
+
n4
);
/* pre rotation */
k
=
n32
-
1
;
do
{
vec_f
cos
,
sin
,
cos0
,
sin0
,
cos1
,
sin1
,
re
,
im
,
r0
,
i0
,
r1
,
i1
,
a
,
b
,
c
,
d
;
#define CMULA(p,o0,o1,o2,o3)\
a = pin[ k*2+p];
/* { z[k].re, z[k].im, z[k+1].re, z[k+1].im } */
\
b = pin[-k*2-p-1];
/* { z[-k-2].re, z[-k-2].im, z[-k-1].re, z[-k-1].im } */
\
re = vec_perm(a, b, vcprm(0,2,s0,s2));
/* { z[k].re, z[k+1].re, z[-k-2].re, z[-k-1].re } */
\
im = vec_perm(a, b, vcprm(s3,s1,3,1));
/* { z[-k-1].im, z[-k-2].im, z[k+1].im, z[k].im } */
\
cos = vec_perm(cos0, cos1, vcprm(o0,o1,s##o2,s##o3));
/* { cos[k], cos[k+1], cos[-k-2], cos[-k-1] } */
\
sin = vec_perm(sin0, sin1, vcprm(o0,o1,s##o2,s##o3));\
r##p = im*cos - re*sin;\
i##p = re*cos + im*sin;
#define STORE2(v,dst)\
j = dst;\
vec_ste(v, 0, output+j*2);\
vec_ste(v, 4, output+j*2);
#define STORE8(p)\
a = vec_perm(r##p, i##p, vcprm(0,s0,0,s0));\
b = vec_perm(r##p, i##p, vcprm(1,s1,1,s1));\
c = vec_perm(r##p, i##p, vcprm(2,s2,2,s2));\
d = vec_perm(r##p, i##p, vcprm(3,s3,3,s3));\
STORE2(a, revtabk[ p*2-4]);\
STORE2(b, revtabk[ p*2-3]);\
STORE2(c, revtabj[-p*2+2]);\
STORE2(d, revtabj[-p*2+3]);
cos0
=
tcos
[
k
];
sin0
=
tsin
[
k
];
cos1
=
tcos
[
-
k
-
1
];
sin1
=
tsin
[
-
k
-
1
];
CMULA
(
0
,
0
,
1
,
2
,
3
);
CMULA
(
1
,
2
,
3
,
0
,
1
);
STORE8
(
0
);
STORE8
(
1
);
revtabj
+=
4
;
revtabk
-=
4
;
k
--
;
}
while
(
k
>=
0
);
ff_fft_calc_altivec
(
s
,
(
FFTComplex
*
)
output
);
/* post rotation + reordering */
j
=
-
n32
;
k
=
n32
-
1
;
do
{
vec_f
cos
,
sin
,
re
,
im
,
a
,
b
,
c
,
d
;
#define CMULB(d0,d1,o)\
re = pout[o*2];\
im = pout[o*2+1];\
cos = tcos[o];\
sin = tsin[o];\
d0 = im*sin - re*cos;\
d1 = re*sin + im*cos;
CMULB
(
a
,
b
,
j
);
CMULB
(
c
,
d
,
k
);
pout
[
2
*
j
]
=
vec_perm
(
a
,
d
,
vcprm
(
0
,
s3
,
1
,
s2
));
pout
[
2
*
j
+
1
]
=
vec_perm
(
a
,
d
,
vcprm
(
2
,
s1
,
3
,
s0
));
pout
[
2
*
k
]
=
vec_perm
(
c
,
b
,
vcprm
(
0
,
s3
,
1
,
s2
));
pout
[
2
*
k
+
1
]
=
vec_perm
(
c
,
b
,
vcprm
(
2
,
s1
,
3
,
s0
));
j
++
;
k
--
;
}
while
(
k
>=
0
);
}
static
void
imdct_calc_altivec
(
FFTContext
*
s
,
FFTSample
*
output
,
const
FFTSample
*
input
)
{
int
k
;
int
n
=
1
<<
s
->
mdct_bits
;
int
n4
=
n
>>
2
;
int
n16
=
n
>>
4
;
vec_u32
sign
=
{
1U
<<
31
,
1U
<<
31
,
1U
<<
31
,
1U
<<
31
};
vec_u32
*
p0
=
(
vec_u32
*
)(
output
+
n4
);
vec_u32
*
p1
=
(
vec_u32
*
)(
output
+
n4
*
3
);
imdct_half_altivec
(
s
,
output
+
n4
,
input
);
for
(
k
=
0
;
k
<
n16
;
k
++
)
{
vec_u32
a
=
p0
[
k
]
^
sign
;
vec_u32
b
=
p1
[
-
k
-
1
];
p0
[
-
k
-
1
]
=
vec_perm
(
a
,
a
,
vcprm
(
3
,
2
,
1
,
0
));
p1
[
k
]
=
vec_perm
(
b
,
b
,
vcprm
(
3
,
2
,
1
,
0
));
}
}
#endif
/* HAVE_GNU_AS && HAVE_ALTIVEC && HAVE_BIGENDIAN */
av_cold
void
ff_mdct_init_ppc
(
FFTContext
*
s
)
{
#if HAVE_GNU_AS && HAVE_ALTIVEC && HAVE_BIGENDIAN
if
(
!
PPC_ALTIVEC
(
av_get_cpu_flags
()))
return
;
if
(
s
->
mdct_bits
>=
5
)
{
s
->
imdct_calc
=
imdct_calc_altivec
;
s
->
imdct_half
=
imdct_half_altivec
;
}
#endif
/* HAVE_GNU_AS && HAVE_ALTIVEC && HAVE_BIGENDIAN */
}
libavcodec/x86/Makefile
View file @
1a094af6
...
...
@@ -19,6 +19,7 @@ OBJS-$(CONFIG_HUFFYUVDSP) += x86/huffyuvdsp_init.o
OBJS-$(CONFIG_HUFFYUVENCDSP)
+=
x86/huffyuvencdsp_mmx.o
OBJS-$(CONFIG_IDCTDSP)
+=
x86/idctdsp_init.o
OBJS-$(CONFIG_LPC)
+=
x86/lpc.o
OBJS-$(CONFIG_MDCT)
+=
x86/mdct_init.o
OBJS-$(CONFIG_ME_CMP)
+=
x86/me_cmp_init.o
OBJS-$(CONFIG_MPEGAUDIODSP)
+=
x86/mpegaudiodsp.o
OBJS-$(CONFIG_MPEGVIDEO)
+=
x86/mpegvideo.o
\
...
...
libavcodec/x86/fft.asm
View file @
1a094af6
...
...
@@ -655,68 +655,6 @@ cglobal fft_permute, 2,7,1
jl
.
loopcopy
REP_RET
%macro
IMDCT_CALC_FUNC
0
cglobal
imdct_calc
,
3
,
5
,
3
mov
r3d
,
[
r0
+
FFTContext
.
mdctsize
]
mov
r4
,
[
r0
+
FFTContext
.
imdcthalf
]
add
r1
,
r3
PUSH
r3
PUSH
r1
%if
ARCH_X86_32
push
r2
push
r1
push
r0
%else
sub
rsp
,
8
+
32
*
WIN64
; allocate win64 shadow space
%endif
call
r4
%if
ARCH_X86_32
add
esp
,
12
%else
add
rsp
,
8
+
32
*
WIN64
%endif
POP
r1
POP
r3
lea
r0
,
[
r1
+
2
*
r3
]
mov
r2
,
r3
sub
r3
,
mmsize
neg
r2
mova
m2
,
[
ps_m1m1m1m1
]
.
loop
:
%if
mmsize
==
8
PSWAPD
m0
,
[
r1
+
r3
]
PSWAPD
m1
,
[
r0
+
r2
]
pxor
m0
,
m2
%else
mova
m0
,
[
r1
+
r3
]
mova
m1
,
[
r0
+
r2
]
shufps
m0
,
m0
,
0x1b
shufps
m1
,
m1
,
0x1b
xorps
m0
,
m2
%endif
mova
[
r0
+
r3
]
,
m1
mova
[
r1
+
r2
]
,
m0
sub
r3
,
mmsize
add
r2
,
mmsize
jl
.
loop
%if
cpuflag
(
3
dnow
)
femms
RET
%else
REP_RET
%endif
%endmacro
%if
ARCH_X86_32
INIT_MMX
3
dnow
IMDCT_CALC_FUNC
INIT_MMX
3
dnowext
IMDCT_CALC_FUNC
%endif
INIT_XMM
sse
IMDCT_CALC_FUNC
%if
ARCH_X86_32
INIT_MMX
3
dnow
%define
mulps
pfmul
...
...
@@ -791,6 +729,70 @@ DECL_FFT 4
DECL_FFT
4
,
_interleave
%endif
%if
CONFIG_MDCT
%macro
IMDCT_CALC_FUNC
0
cglobal
imdct_calc
,
3
,
5
,
3
mov
r3d
,
[
r0
+
FFTContext
.
mdctsize
]
mov
r4
,
[
r0
+
FFTContext
.
imdcthalf
]
add
r1
,
r3
PUSH
r3
PUSH
r1
%if
ARCH_X86_32
push
r2
push
r1
push
r0
%else
sub
rsp
,
8
+
32
*
WIN64
; allocate win64 shadow space
%endif
call
r4
%if
ARCH_X86_32
add
esp
,
12
%else
add
rsp
,
8
+
32
*
WIN64
%endif
POP
r1
POP
r3
lea
r0
,
[
r1
+
2
*
r3
]
mov
r2
,
r3
sub
r3
,
mmsize
neg
r2
mova
m2
,
[
ps_m1m1m1m1
]
.
loop
:
%if
mmsize
==
8
PSWAPD
m0
,
[
r1
+
r3
]
PSWAPD
m1
,
[
r0
+
r2
]
pxor
m0
,
m2
%else
mova
m0
,
[
r1
+
r3
]
mova
m1
,
[
r0
+
r2
]
shufps
m0
,
m0
,
0x1b
shufps
m1
,
m1
,
0x1b
xorps
m0
,
m2
%endif
mova
[
r0
+
r3
]
,
m1
mova
[
r1
+
r2
]
,
m0
sub
r3
,
mmsize
add
r2
,
mmsize
jl
.
loop
%if
cpuflag
(
3
dnow
)
femms
RET
%else
REP_RET
%endif
%endmacro
%if
ARCH_X86_32
INIT_MMX
3
dnow
IMDCT_CALC_FUNC
INIT_MMX
3
dnowext
IMDCT_CALC_FUNC
%endif
INIT_XMM
sse
IMDCT_CALC_FUNC
INIT_XMM
sse
%undef
mulps
%undef
addps
...
...
@@ -1081,3 +1083,5 @@ DECL_IMDCT POSROTATESHUF_3DNOW
INIT_YMM
avx
DECL_IMDCT
POSROTATESHUF_AVX
%endif
; CONFIG_MDCT
libavcodec/x86/fft.h
View file @
1a094af6
...
...
@@ -27,12 +27,4 @@ void ff_fft_calc_sse(FFTContext *s, FFTComplex *z);
void
ff_fft_calc_3dnow
(
FFTContext
*
s
,
FFTComplex
*
z
);
void
ff_fft_calc_3dnowext
(
FFTContext
*
s
,
FFTComplex
*
z
);
void
ff_imdct_calc_3dnow
(
FFTContext
*
s
,
FFTSample
*
output
,
const
FFTSample
*
input
);
void
ff_imdct_half_3dnow
(
FFTContext
*
s
,
FFTSample
*
output
,
const
FFTSample
*
input
);
void
ff_imdct_calc_3dnowext
(
FFTContext
*
s
,
FFTSample
*
output
,
const
FFTSample
*
input
);
void
ff_imdct_half_3dnowext
(
FFTContext
*
s
,
FFTSample
*
output
,
const
FFTSample
*
input
);
void
ff_imdct_calc_sse
(
FFTContext
*
s
,
FFTSample
*
output
,
const
FFTSample
*
input
);
void
ff_imdct_half_sse
(
FFTContext
*
s
,
FFTSample
*
output
,
const
FFTSample
*
input
);
void
ff_imdct_half_avx
(
FFTContext
*
s
,
FFTSample
*
output
,
const
FFTSample
*
input
);
#endif
/* AVCODEC_X86_FFT_H */
libavcodec/x86/fft_init.c
View file @
1a094af6
...
...
@@ -30,28 +30,21 @@ av_cold void ff_fft_init_x86(FFTContext *s)
#if ARCH_X86_32
if
(
EXTERNAL_AMD3DNOW
(
cpu_flags
))
{
s
->
imdct_calc
=
ff_imdct_calc_3dnow
;
s
->
imdct_half
=
ff_imdct_half_3dnow
;
s
->
fft_calc
=
ff_fft_calc_3dnow
;
}
if
(
EXTERNAL_AMD3DNOWEXT
(
cpu_flags
))
{
s
->
imdct_calc
=
ff_imdct_calc_3dnowext
;
s
->
imdct_half
=
ff_imdct_half_3dnowext
;
s
->
fft_calc
=
ff_fft_calc_3dnowext
;
}
#endif
/* ARCH_X86_32 */
if
(
EXTERNAL_SSE
(
cpu_flags
))
{
s
->
imdct_calc
=
ff_imdct_calc_sse
;
s
->
imdct_half
=
ff_imdct_half_sse
;
s
->
fft_permute
=
ff_fft_permute_sse
;
s
->
fft_calc
=
ff_fft_calc_sse
;
s
->
fft_permutation
=
FF_FFT_PERM_SWAP_LSBS
;
}
if
(
EXTERNAL_AVX_FAST
(
cpu_flags
)
&&
s
->
nbits
>=
5
)
{
s
->
imdct_half
=
ff_imdct_half_avx
;
s
->
fft_calc
=
ff_fft_calc_avx
;
s
->
fft_permutation
=
FF_FFT_PERM_AVX
;
}
...
...
libavcodec/x86/mdct.h
0 → 100644
View file @
1a094af6
/*
* This file is part of Libav.
*
* Libav is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* Libav is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with Libav; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef AVCODEC_X86_MDCT_H
#define AVCODEC_X86_MDCT_H
#include "libavcodec/fft.h"
void
ff_imdct_calc_3dnow
(
FFTContext
*
s
,
FFTSample
*
output
,
const
FFTSample
*
input
);
void
ff_imdct_half_3dnow
(
FFTContext
*
s
,
FFTSample
*
output
,
const
FFTSample
*
input
);
void
ff_imdct_calc_3dnowext
(
FFTContext
*
s
,
FFTSample
*
output
,
const
FFTSample
*
input
);
void
ff_imdct_half_3dnowext
(
FFTContext
*
s
,
FFTSample
*
output
,
const
FFTSample
*
input
);
void
ff_imdct_calc_sse
(
FFTContext
*
s
,
FFTSample
*
output
,
const
FFTSample
*
input
);
void
ff_imdct_half_sse
(
FFTContext
*
s
,
FFTSample
*
output
,
const
FFTSample
*
input
);
void
ff_imdct_half_avx
(
FFTContext
*
s
,
FFTSample
*
output
,
const
FFTSample
*
input
);
#endif
/* AVCODEC_X86_MDCT_H */
libavcodec/x86/mdct_init.c
0 → 100644
View file @
1a094af6
/*
* This file is part of Libav.
*
* Libav is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* Libav is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with Libav; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "config.h"
#include "libavutil/attributes.h"
#include "libavutil/cpu.h"
#include "libavutil/x86/cpu.h"
#include "mdct.h"
av_cold
void
ff_mdct_init_x86
(
FFTContext
*
s
)
{
int
cpu_flags
=
av_get_cpu_flags
();
#if ARCH_X86_32
if
(
EXTERNAL_AMD3DNOW
(
cpu_flags
))
{
s
->
imdct_calc
=
ff_imdct_calc_3dnow
;
s
->
imdct_half
=
ff_imdct_half_3dnow
;
}
if
(
EXTERNAL_AMD3DNOWEXT
(
cpu_flags
))
{
s
->
imdct_calc
=
ff_imdct_calc_3dnowext
;
s
->
imdct_half
=
ff_imdct_half_3dnowext
;
}
#endif
/* ARCH_X86_32 */
if
(
EXTERNAL_SSE
(
cpu_flags
))
{
s
->
imdct_calc
=
ff_imdct_calc_sse
;
s
->
imdct_half
=
ff_imdct_half_sse
;
}
if
(
EXTERNAL_AVX_FAST
(
cpu_flags
)
&&
s
->
nbits
>=
5
)
{
s
->
imdct_half
=
ff_imdct_half_avx
;
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment