Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in / Register
Toggle navigation
F
ffmpeg.wasm-core
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Linshizhi
ffmpeg.wasm-core
Commits
209f50e1
Commit
209f50e1
authored
Jan 25, 2016
by
James Almer
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
avcodec/synth_filter: split off remaining code from dcadec files
Signed-off-by:
James Almer
<
jamrial@gmail.com
>
parent
5dc37a5d
Hide whitespace changes
Inline
Side-by-side
Showing
11 changed files
with
424 additions
and
320 deletions
+424
-320
Makefile
libavcodec/aarch64/Makefile
+2
-1
dcadsp_init.c
libavcodec/aarch64/dcadsp_init.c
+0
-21
synth_filter_init.c
libavcodec/aarch64/synth_filter_init.c
+47
-0
Makefile
libavcodec/arm/Makefile
+2
-1
dcadsp_init_arm.c
libavcodec/arm/dcadsp_init_arm.c
+0
-22
synth_filter_init_arm.c
libavcodec/arm/synth_filter_init_arm.c
+49
-0
Makefile
libavcodec/x86/Makefile
+4
-2
dcadsp.asm
libavcodec/x86/dcadsp.asm
+0
-222
dcadsp_init.c
libavcodec/x86/dcadsp_init.c
+0
-51
synth_filter.asm
libavcodec/x86/synth_filter.asm
+246
-0
synth_filter_init.c
libavcodec/x86/synth_filter_init.c
+74
-0
No files found.
libavcodec/aarch64/Makefile
View file @
209f50e1
OBJS-$(CONFIG_DCA_DECODER)
+=
aarch64/dcadsp_init.o
OBJS-$(CONFIG_DCA_DECODER)
+=
aarch64/dcadsp_init.o
\
aarch64/synth_filter_init.o
OBJS-$(CONFIG_FFT)
+=
aarch64/fft_init_aarch64.o
OBJS-$(CONFIG_FMTCONVERT)
+=
aarch64/fmtconvert_init.o
OBJS-$(CONFIG_H264CHROMA)
+=
aarch64/h264chroma_init_aarch64.o
...
...
libavcodec/aarch64/dcadsp_init.c
View file @
209f50e1
...
...
@@ -24,23 +24,10 @@
#include "libavutil/attributes.h"
#include "libavutil/internal.h"
#include "libavcodec/dcadsp.h"
#include "libavcodec/fft.h"
#include "asm-offsets.h"
#if HAVE_NEON || HAVE_VFP
AV_CHECK_OFFSET
(
FFTContext
,
imdct_half
,
IMDCT_HALF
);
#endif
void
ff_dca_lfe_fir0_neon
(
float
*
out
,
const
float
*
in
,
const
float
*
coefs
);
void
ff_dca_lfe_fir1_neon
(
float
*
out
,
const
float
*
in
,
const
float
*
coefs
);
void
ff_synth_filter_float_neon
(
FFTContext
*
imdct
,
float
*
synth_buf_ptr
,
int
*
synth_buf_offset
,
float
synth_buf2
[
32
],
const
float
window
[
512
],
float
out
[
32
],
const
float
in
[
32
],
float
scale
);
av_cold
void
ff_dcadsp_init_aarch64
(
DCADSPContext
*
s
)
{
int
cpu_flags
=
av_get_cpu_flags
();
...
...
@@ -50,11 +37,3 @@ av_cold void ff_dcadsp_init_aarch64(DCADSPContext *s)
s
->
lfe_fir
[
1
]
=
ff_dca_lfe_fir1_neon
;
}
}
av_cold
void
ff_synth_filter_init_aarch64
(
SynthFilterContext
*
s
)
{
int
cpu_flags
=
av_get_cpu_flags
();
if
(
have_neon
(
cpu_flags
))
s
->
synth_filter_float
=
ff_synth_filter_float_neon
;
}
libavcodec/aarch64/synth_filter_init.c
0 → 100644
View file @
209f50e1
/*
* Copyright (c) 2010 Mans Rullgard <mans@mansr.com>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "config.h"
#include "libavutil/aarch64/cpu.h"
#include "libavutil/attributes.h"
#include "libavutil/internal.h"
#include "libavcodec/fft.h"
#include "libavcodec/synth_filter.h"
#include "asm-offsets.h"
#if HAVE_NEON || HAVE_VFP
AV_CHECK_OFFSET
(
FFTContext
,
imdct_half
,
IMDCT_HALF
);
#endif
void
ff_synth_filter_float_neon
(
FFTContext
*
imdct
,
float
*
synth_buf_ptr
,
int
*
synth_buf_offset
,
float
synth_buf2
[
32
],
const
float
window
[
512
],
float
out
[
32
],
const
float
in
[
32
],
float
scale
);
av_cold
void
ff_synth_filter_init_aarch64
(
SynthFilterContext
*
s
)
{
int
cpu_flags
=
av_get_cpu_flags
();
if
(
have_neon
(
cpu_flags
))
s
->
synth_filter_float
=
ff_synth_filter_float_neon
;
}
libavcodec/arm/Makefile
View file @
209f50e1
...
...
@@ -36,7 +36,8 @@ OBJS-$(CONFIG_VP8DSP) += arm/vp8dsp_init_arm.o
# decoders/encoders
OBJS-$(CONFIG_AAC_DECODER)
+=
arm/aacpsdsp_init_arm.o
\
arm/sbrdsp_init_arm.o
OBJS-$(CONFIG_DCA_DECODER)
+=
arm/dcadsp_init_arm.o
OBJS-$(CONFIG_DCA_DECODER)
+=
arm/dcadsp_init_arm.o
\
arm/synth_filter_init_arm.o
OBJS-$(CONFIG_HEVC_DECODER)
+=
arm/hevcdsp_init_arm.o
OBJS-$(CONFIG_MLP_DECODER)
+=
arm/mlpdsp_init_arm.o
OBJS-$(CONFIG_RV40_DECODER)
+=
arm/rv40dsp_init_arm.o
...
...
libavcodec/arm/dcadsp_init_arm.c
View file @
209f50e1
...
...
@@ -37,18 +37,6 @@ void ff_dca_qmf_32_subbands_vfp(float samples_in[32][8], int sb_act,
const
float
window
[
512
],
float
*
samples_out
,
float
raXin
[
32
],
float
scale
);
void
ff_synth_filter_float_vfp
(
FFTContext
*
imdct
,
float
*
synth_buf_ptr
,
int
*
synth_buf_offset
,
float
synth_buf2
[
32
],
const
float
window
[
512
],
float
out
[
32
],
const
float
in
[
32
],
float
scale
);
void
ff_synth_filter_float_neon
(
FFTContext
*
imdct
,
float
*
synth_buf_ptr
,
int
*
synth_buf_offset
,
float
synth_buf2
[
32
],
const
float
window
[
512
],
float
out
[
32
],
const
float
in
[
32
],
float
scale
);
av_cold
void
ff_dcadsp_init_arm
(
DCADSPContext
*
s
)
{
int
cpu_flags
=
av_get_cpu_flags
();
...
...
@@ -63,13 +51,3 @@ av_cold void ff_dcadsp_init_arm(DCADSPContext *s)
s
->
lfe_fir
[
1
]
=
ff_dca_lfe_fir1_neon
;
}
}
av_cold
void
ff_synth_filter_init_arm
(
SynthFilterContext
*
s
)
{
int
cpu_flags
=
av_get_cpu_flags
();
if
(
have_vfp_vm
(
cpu_flags
))
s
->
synth_filter_float
=
ff_synth_filter_float_vfp
;
if
(
have_neon
(
cpu_flags
))
s
->
synth_filter_float
=
ff_synth_filter_float_neon
;
}
libavcodec/arm/synth_filter_init_arm.c
0 → 100644
View file @
209f50e1
/*
* Copyright (c) 2010 Mans Rullgard <mans@mansr.com>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "config.h"
#include "libavutil/arm/cpu.h"
#include "libavutil/attributes.h"
#include "libavutil/internal.h"
#include "libavcodec/fft.h"
#include "libavcodec/synth_filter.h"
void
ff_synth_filter_float_vfp
(
FFTContext
*
imdct
,
float
*
synth_buf_ptr
,
int
*
synth_buf_offset
,
float
synth_buf2
[
32
],
const
float
window
[
512
],
float
out
[
32
],
const
float
in
[
32
],
float
scale
);
void
ff_synth_filter_float_neon
(
FFTContext
*
imdct
,
float
*
synth_buf_ptr
,
int
*
synth_buf_offset
,
float
synth_buf2
[
32
],
const
float
window
[
512
],
float
out
[
32
],
const
float
in
[
32
],
float
scale
);
av_cold
void
ff_synth_filter_init_arm
(
SynthFilterContext
*
s
)
{
int
cpu_flags
=
av_get_cpu_flags
();
if
(
have_vfp_vm
(
cpu_flags
))
s
->
synth_filter_float
=
ff_synth_filter_float_vfp
;
if
(
have_neon
(
cpu_flags
))
s
->
synth_filter_float
=
ff_synth_filter_float_neon
;
}
libavcodec/x86/Makefile
View file @
209f50e1
...
...
@@ -44,7 +44,8 @@ OBJS-$(CONFIG_ADPCM_G722_ENCODER) += x86/g722dsp_init.o
OBJS-$(CONFIG_ALAC_DECODER)
+=
x86/alacdsp_init.o
OBJS-$(CONFIG_APNG_DECODER)
+=
x86/pngdsp_init.o
OBJS-$(CONFIG_CAVS_DECODER)
+=
x86/cavsdsp.o
OBJS-$(CONFIG_DCA_DECODER)
+=
x86/dcadsp_init.o
OBJS-$(CONFIG_DCA_DECODER)
+=
x86/dcadsp_init.o
\
x86/synth_filter_init.o
OBJS-$(CONFIG_DNXHD_ENCODER)
+=
x86/dnxhdenc_init.o
OBJS-$(CONFIG_HEVC_DECODER)
+=
x86/hevcdsp_init.o
OBJS-$(CONFIG_JPEG2000_DECODER)
+=
x86/jpeg2000dsp_init.o
...
...
@@ -132,7 +133,8 @@ YASM-OBJS-$(CONFIG_ADPCM_G722_DECODER) += x86/g722dsp.o
YASM-OBJS-$(CONFIG_ADPCM_G722_ENCODER)
+=
x86/g722dsp.o
YASM-OBJS-$(CONFIG_ALAC_DECODER)
+=
x86/alacdsp.o
YASM-OBJS-$(CONFIG_APNG_DECODER)
+=
x86/pngdsp.o
YASM-OBJS-$(CONFIG_DCA_DECODER)
+=
x86/dcadsp.o
YASM-OBJS-$(CONFIG_DCA_DECODER)
+=
x86/dcadsp.o
\
x86/synth_filter.o
YASM-OBJS-$(CONFIG_DIRAC_DECODER)
+=
x86/diracdsp_mmx.o
x86/diracdsp_yasm.o
\
x86/dwt_yasm.o
YASM-OBJS-$(CONFIG_DNXHD_ENCODER)
+=
x86/dnxhdenc.o
...
...
libavcodec/x86/dcadsp.asm
View file @
209f50e1
...
...
@@ -121,225 +121,3 @@ DCA_LFE_FIR 1
INIT_XMM
fma3
DCA_LFE_FIR
0
%endif
%macro
SETZERO
1
%if
cpuflag
(
sse2
)
&&
notcpuflag
(
avx
)
pxor
%1
,
%1
%else
xorps
%1
,
%1
,
%1
%endif
%endmacro
%macro
SHUF
3
%if
cpuflag
(
avx
)
mova
%3
,
[
%2
-
16
]
vperm2f128
%1
,
%3
,
%3
,
1
vshufps
%1
,
%1
,
%1
,
q0123
%elif
cpuflag
(
sse2
)
pshufd
%1
,
[
%2
]
,
q0123
%else
mova
%1
,
[
%2
]
shufps
%1
,
%1
,
q0123
%endif
%endmacro
%macro
INNER_LOOP
1
; reading backwards: ptr1 = synth_buf + j + i; ptr2 = synth_buf + j - i
;~ a += window[i + j] * (-synth_buf[15 - i + j])
;~ b += window[i + j + 16] * (synth_buf[i + j])
SHUF
m5
,
ptr2
+
j
+
(
15
-
3
)
*
4
,
m6
mova
m6
,
[
ptr1
+
j
]
%if
ARCH_X86_64
SHUF
m11
,
ptr2
+
j
+
(
15
-
3
)
*
4
-
mmsize
,
m12
mova
m12
,
[
ptr1
+
j
+
mmsize
]
%endif
%if
cpuflag
(
fma3
)
fmaddps
m2
,
m6
,
[
win
+
%1
+
j
+
16
*
4
]
,
m2
fnmaddps
m1
,
m5
,
[
win
+
%1
+
j
]
,
m1
%if
ARCH_X86_64
fmaddps
m8
,
m12
,
[
win
+
%1
+
j
+
mmsize
+
16
*
4
]
,
m8
fnmaddps
m7
,
m11
,
[
win
+
%1
+
j
+
mmsize
]
,
m7
%endif
%else
; non-FMA
mulps
m6
,
m6
,
[
win
+
%1
+
j
+
16
*
4
]
mulps
m5
,
m5
,
[
win
+
%1
+
j
]
%if
ARCH_X86_64
mulps
m12
,
m12
,
[
win
+
%1
+
j
+
mmsize
+
16
*
4
]
mulps
m11
,
m11
,
[
win
+
%1
+
j
+
mmsize
]
%endif
addps
m2
,
m2
,
m6
subps
m1
,
m1
,
m5
%if
ARCH_X86_64
addps
m8
,
m8
,
m12
subps
m7
,
m7
,
m11
%endif
%endif
; cpuflag(fma3)
;~ c += window[i + j + 32] * (synth_buf[16 + i + j])
;~ d += window[i + j + 48] * (synth_buf[31 - i + j])
SHUF
m6
,
ptr2
+
j
+
(
31
-
3
)
*
4
,
m5
mova
m5
,
[
ptr1
+
j
+
16
*
4
]
%if
ARCH_X86_64
SHUF
m12
,
ptr2
+
j
+
(
31
-
3
)
*
4
-
mmsize
,
m11
mova
m11
,
[
ptr1
+
j
+
mmsize
+
16
*
4
]
%endif
%if
cpuflag
(
fma3
)
fmaddps
m3
,
m5
,
[
win
+
%1
+
j
+
32
*
4
]
,
m3
fmaddps
m4
,
m6
,
[
win
+
%1
+
j
+
48
*
4
]
,
m4
%if
ARCH_X86_64
fmaddps
m9
,
m11
,
[
win
+
%1
+
j
+
mmsize
+
32
*
4
]
,
m9
fmaddps
m10
,
m12
,
[
win
+
%1
+
j
+
mmsize
+
48
*
4
]
,
m10
%endif
%else
; non-FMA
mulps
m5
,
m5
,
[
win
+
%1
+
j
+
32
*
4
]
mulps
m6
,
m6
,
[
win
+
%1
+
j
+
48
*
4
]
%if
ARCH_X86_64
mulps
m11
,
m11
,
[
win
+
%1
+
j
+
mmsize
+
32
*
4
]
mulps
m12
,
m12
,
[
win
+
%1
+
j
+
mmsize
+
48
*
4
]
%endif
addps
m3
,
m3
,
m5
addps
m4
,
m4
,
m6
%if
ARCH_X86_64
addps
m9
,
m9
,
m11
addps
m10
,
m10
,
m12
%endif
%endif
; cpuflag(fma3)
sub
j
,
64
*
4
%endmacro
; void ff_synth_filter_inner_<opt>(float *synth_buf, float synth_buf2[32],
; const float window[512], float out[32],
; intptr_t offset, float scale)
%macro
SYNTH_FILTER
0
cglobal
synth_filter_inner
,
0
,
6
+
4
*
ARCH_X86_64
,
7
+
6
*
ARCH_X86_64
,
\
synth_buf
,
synth_buf2
,
window
,
out
,
off
,
scale
%define
scale
m0
%if
ARCH_X86_32
||
WIN64
%if
cpuflag
(
sse2
)
&&
notcpuflag
(
avx
)
movd
scale
,
scalem
SPLATD
m0
%else
VBROADCASTSS
m0
,
scalem
%endif
; Make sure offset is in a register and not on the stack
%define
OFFQ
r4q
%else
SPLATD
xmm0
%if
cpuflag
(
avx
)
vinsertf128
m0
,
m0
,
xmm0
,
1
%endif
%define
OFFQ
offq
%endif
; prepare inner counter limit 1
mov
r5q
,
480
sub
r5q
,
offmp
and
r5q
,
-
64
shl
r5q
,
2
%if
ARCH_X86_32
||
notcpuflag
(
avx
)
mov
OFFQ
,
r5q
%define
i
r5q
mov
i
,
16
*
4
-
(
ARCH_X86_64
+
1
)
*
mmsize
; main loop counter
%else
%define
i
0
%define
OFFQ
r5q
%endif
%define
buf2
synth_buf2q
%if
ARCH_X86_32
mov
buf2
,
synth_buf2mp
%endif
.
mainloop
:
; m1 = a m2 = b m3 = c m4 = d
SETZERO
m3
SETZERO
m4
mova
m1
,
[
buf2
+
i
]
mova
m2
,
[
buf2
+
i
+
16
*
4
]
%if
ARCH_X86_32
%define
ptr1
r0q
%define
ptr2
r1q
%define
win
r2q
%define
j
r3q
mov
win
,
windowm
mov
ptr1
,
synth_bufm
%if
ARCH_X86_32
||
notcpuflag
(
avx
)
add
win
,
i
add
ptr1
,
i
%endif
%else
; ARCH_X86_64
%define
ptr1
r6q
%define
ptr2
r7q
; must be loaded
%define
win
r8q
%define
j
r9q
SETZERO
m9
SETZERO
m10
mova
m7
,
[
buf2
+
i
+
mmsize
]
mova
m8
,
[
buf2
+
i
+
mmsize
+
16
*
4
]
lea
win
,
[
windowq
+
i
]
lea
ptr1
,
[
synth_bufq
+
i
]
%endif
mov
ptr2
,
synth_bufmp
; prepare the inner loop counter
mov
j
,
OFFQ
%if
ARCH_X86_32
||
notcpuflag
(
avx
)
sub
ptr2
,
i
%endif
.
loop1
:
INNER_LOOP
0
jge
.
loop1
mov
j
,
448
*
4
sub
j
,
OFFQ
jz
.
end
sub
ptr1
,
j
sub
ptr2
,
j
add
win
,
OFFQ
; now at j-64, so define OFFSET
sub
j
,
64
*
4
.
loop2
:
INNER_LOOP
64
*
4
jge
.
loop2
.
end
:
%if
ARCH_X86_32
mov
buf2
,
synth_buf2m
; needed for next iteration anyway
mov
outq
,
outmp
; j, which will be set again during it
%endif
;~ out[i] = a * scale;
;~ out[i + 16] = b * scale;
mulps
m1
,
m1
,
scale
mulps
m2
,
m2
,
scale
%if
ARCH_X86_64
mulps
m7
,
m7
,
scale
mulps
m8
,
m8
,
scale
%endif
;~ synth_buf2[i] = c;
;~ synth_buf2[i + 16] = d;
mova
[
buf2
+
i
+
0
*
4
]
,
m3
mova
[
buf2
+
i
+
16
*
4
]
,
m4
%if
ARCH_X86_64
mova
[
buf2
+
i
+
0
*
4
+
mmsize
]
,
m9
mova
[
buf2
+
i
+
16
*
4
+
mmsize
]
,
m10
%endif
;~ out[i] = a;
;~ out[i + 16] = a;
mova
[
outq
+
i
+
0
*
4
]
,
m1
mova
[
outq
+
i
+
16
*
4
]
,
m2
%if
ARCH_X86_64
mova
[
outq
+
i
+
0
*
4
+
mmsize
]
,
m7
mova
[
outq
+
i
+
16
*
4
+
mmsize
]
,
m8
%endif
%if
ARCH_X86_32
||
notcpuflag
(
avx
)
sub
i
,
(
ARCH_X86_64
+
1
)
*
mmsize
jge
.
mainloop
%endif
RET
%endmacro
%if
ARCH_X86_32
INIT_XMM
sse
SYNTH_FILTER
%endif
INIT_XMM
sse2
SYNTH_FILTER
INIT_YMM
avx
SYNTH_FILTER
INIT_YMM
fma3
SYNTH_FILTER
libavcodec/x86/dcadsp_init.c
View file @
209f50e1
...
...
@@ -40,54 +40,3 @@ av_cold void ff_dcadsp_init_x86(DCADSPContext *s)
s
->
lfe_fir
[
0
]
=
ff_dca_lfe_fir0_fma3
;
}
}
#define SYNTH_FILTER_FUNC(opt) \
void ff_synth_filter_inner_##opt(float *synth_buf_ptr, float synth_buf2[32], \
const float window[512], \
float out[32], intptr_t offset, float scale); \
static void synth_filter_##opt(FFTContext *imdct, \
float *synth_buf_ptr, int *synth_buf_offset, \
float synth_buf2[32], const float window[512], \
float out[32], const float in[32], float scale) \
{ \
float *synth_buf= synth_buf_ptr + *synth_buf_offset; \
\
imdct->imdct_half(imdct, synth_buf, in); \
\
ff_synth_filter_inner_##opt(synth_buf, synth_buf2, window, \
out, *synth_buf_offset, scale); \
\
*synth_buf_offset = (*synth_buf_offset - 32) & 511; \
} \
#if HAVE_YASM
#if ARCH_X86_32
SYNTH_FILTER_FUNC
(
sse
)
#endif
SYNTH_FILTER_FUNC
(
sse2
)
SYNTH_FILTER_FUNC
(
avx
)
SYNTH_FILTER_FUNC
(
fma3
)
#endif
/* HAVE_YASM */
av_cold
void
ff_synth_filter_init_x86
(
SynthFilterContext
*
s
)
{
#if HAVE_YASM
int
cpu_flags
=
av_get_cpu_flags
();
#if ARCH_X86_32
if
(
EXTERNAL_SSE
(
cpu_flags
))
{
s
->
synth_filter_float
=
synth_filter_sse
;
}
#endif
if
(
EXTERNAL_SSE2
(
cpu_flags
))
{
s
->
synth_filter_float
=
synth_filter_sse2
;
}
if
(
EXTERNAL_AVX_FAST
(
cpu_flags
))
{
s
->
synth_filter_float
=
synth_filter_avx
;
}
if
(
EXTERNAL_FMA3
(
cpu_flags
)
&&
!
(
cpu_flags
&
AV_CPU_FLAG_AVXSLOW
))
{
s
->
synth_filter_float
=
synth_filter_fma3
;
}
#endif
/* HAVE_YASM */
}
libavcodec/x86/synth_filter.asm
0 → 100644
View file @
209f50e1
;******************************************************************************
;* SSE-optimized functions for the DCA decoder
;* Copyright (C) 2012-2014 Christophe Gisquet <christophe.gisquet@gmail.com>
;*
;* This file is part of FFmpeg.
;*
;* FFmpeg is free software; you can redistribute it and/or
;* modify it under the terms of the GNU Lesser General Public
;* License as published by the Free Software Foundation; either
;* version 2.1 of the License, or (at your option) any later version.
;*
;* FFmpeg is distributed in the hope that it will be useful,
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;* Lesser General Public License for more details.
;*
;* You should have received a copy of the GNU Lesser General Public
;* License along with FFmpeg; if not, write to the Free Software
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;******************************************************************************
%include
"libavutil/x86/x86util.asm"
SECTION
.
text
%macro
SETZERO
1
%if
cpuflag
(
sse2
)
&&
notcpuflag
(
avx
)
pxor
%1
,
%1
%else
xorps
%1
,
%1
,
%1
%endif
%endmacro
%macro
SHUF
3
%if
cpuflag
(
avx
)
mova
%3
,
[
%2
-
16
]
vperm2f128
%1
,
%3
,
%3
,
1
vshufps
%1
,
%1
,
%1
,
q0123
%elif
cpuflag
(
sse2
)
pshufd
%1
,
[
%2
]
,
q0123
%else
mova
%1
,
[
%2
]
shufps
%1
,
%1
,
q0123
%endif
%endmacro
%macro
INNER_LOOP
1
; reading backwards: ptr1 = synth_buf + j + i; ptr2 = synth_buf + j - i
;~ a += window[i + j] * (-synth_buf[15 - i + j])
;~ b += window[i + j + 16] * (synth_buf[i + j])
SHUF
m5
,
ptr2
+
j
+
(
15
-
3
)
*
4
,
m6
mova
m6
,
[
ptr1
+
j
]
%if
ARCH_X86_64
SHUF
m11
,
ptr2
+
j
+
(
15
-
3
)
*
4
-
mmsize
,
m12
mova
m12
,
[
ptr1
+
j
+
mmsize
]
%endif
%if
cpuflag
(
fma3
)
fmaddps
m2
,
m6
,
[
win
+
%1
+
j
+
16
*
4
]
,
m2
fnmaddps
m1
,
m5
,
[
win
+
%1
+
j
]
,
m1
%if
ARCH_X86_64
fmaddps
m8
,
m12
,
[
win
+
%1
+
j
+
mmsize
+
16
*
4
]
,
m8
fnmaddps
m7
,
m11
,
[
win
+
%1
+
j
+
mmsize
]
,
m7
%endif
%else
; non-FMA
mulps
m6
,
m6
,
[
win
+
%1
+
j
+
16
*
4
]
mulps
m5
,
m5
,
[
win
+
%1
+
j
]
%if
ARCH_X86_64
mulps
m12
,
m12
,
[
win
+
%1
+
j
+
mmsize
+
16
*
4
]
mulps
m11
,
m11
,
[
win
+
%1
+
j
+
mmsize
]
%endif
addps
m2
,
m2
,
m6
subps
m1
,
m1
,
m5
%if
ARCH_X86_64
addps
m8
,
m8
,
m12
subps
m7
,
m7
,
m11
%endif
%endif
; cpuflag(fma3)
;~ c += window[i + j + 32] * (synth_buf[16 + i + j])
;~ d += window[i + j + 48] * (synth_buf[31 - i + j])
SHUF
m6
,
ptr2
+
j
+
(
31
-
3
)
*
4
,
m5
mova
m5
,
[
ptr1
+
j
+
16
*
4
]
%if
ARCH_X86_64
SHUF
m12
,
ptr2
+
j
+
(
31
-
3
)
*
4
-
mmsize
,
m11
mova
m11
,
[
ptr1
+
j
+
mmsize
+
16
*
4
]
%endif
%if
cpuflag
(
fma3
)
fmaddps
m3
,
m5
,
[
win
+
%1
+
j
+
32
*
4
]
,
m3
fmaddps
m4
,
m6
,
[
win
+
%1
+
j
+
48
*
4
]
,
m4
%if
ARCH_X86_64
fmaddps
m9
,
m11
,
[
win
+
%1
+
j
+
mmsize
+
32
*
4
]
,
m9
fmaddps
m10
,
m12
,
[
win
+
%1
+
j
+
mmsize
+
48
*
4
]
,
m10
%endif
%else
; non-FMA
mulps
m5
,
m5
,
[
win
+
%1
+
j
+
32
*
4
]
mulps
m6
,
m6
,
[
win
+
%1
+
j
+
48
*
4
]
%if
ARCH_X86_64
mulps
m11
,
m11
,
[
win
+
%1
+
j
+
mmsize
+
32
*
4
]
mulps
m12
,
m12
,
[
win
+
%1
+
j
+
mmsize
+
48
*
4
]
%endif
addps
m3
,
m3
,
m5
addps
m4
,
m4
,
m6
%if
ARCH_X86_64
addps
m9
,
m9
,
m11
addps
m10
,
m10
,
m12
%endif
%endif
; cpuflag(fma3)
sub
j
,
64
*
4
%endmacro
; void ff_synth_filter_inner_<opt>(float *synth_buf, float synth_buf2[32],
; const float window[512], float out[32],
; intptr_t offset, float scale)
%macro
SYNTH_FILTER
0
cglobal
synth_filter_inner
,
0
,
6
+
4
*
ARCH_X86_64
,
7
+
6
*
ARCH_X86_64
,
\
synth_buf
,
synth_buf2
,
window
,
out
,
off
,
scale
%define
scale
m0
%if
ARCH_X86_32
||
WIN64
%if
cpuflag
(
sse2
)
&&
notcpuflag
(
avx
)
movd
scale
,
scalem
SPLATD
m0
%else
VBROADCASTSS
m0
,
scalem
%endif
; Make sure offset is in a register and not on the stack
%define
OFFQ
r4q
%else
SPLATD
xmm0
%if
cpuflag
(
avx
)
vinsertf128
m0
,
m0
,
xmm0
,
1
%endif
%define
OFFQ
offq
%endif
; prepare inner counter limit 1
mov
r5q
,
480
sub
r5q
,
offmp
and
r5q
,
-
64
shl
r5q
,
2
%if
ARCH_X86_32
||
notcpuflag
(
avx
)
mov
OFFQ
,
r5q
%define
i
r5q
mov
i
,
16
*
4
-
(
ARCH_X86_64
+
1
)
*
mmsize
; main loop counter
%else
%define
i
0
%define
OFFQ
r5q
%endif
%define
buf2
synth_buf2q
%if
ARCH_X86_32
mov
buf2
,
synth_buf2mp
%endif
.
mainloop
:
; m1 = a m2 = b m3 = c m4 = d
SETZERO
m3
SETZERO
m4
mova
m1
,
[
buf2
+
i
]
mova
m2
,
[
buf2
+
i
+
16
*
4
]
%if
ARCH_X86_32
%define
ptr1
r0q
%define
ptr2
r1q
%define
win
r2q
%define
j
r3q
mov
win
,
windowm
mov
ptr1
,
synth_bufm
%if
ARCH_X86_32
||
notcpuflag
(
avx
)
add
win
,
i
add
ptr1
,
i
%endif
%else
; ARCH_X86_64
%define
ptr1
r6q
%define
ptr2
r7q
; must be loaded
%define
win
r8q
%define
j
r9q
SETZERO
m9
SETZERO
m10
mova
m7
,
[
buf2
+
i
+
mmsize
]
mova
m8
,
[
buf2
+
i
+
mmsize
+
16
*
4
]
lea
win
,
[
windowq
+
i
]
lea
ptr1
,
[
synth_bufq
+
i
]
%endif
mov
ptr2
,
synth_bufmp
; prepare the inner loop counter
mov
j
,
OFFQ
%if
ARCH_X86_32
||
notcpuflag
(
avx
)
sub
ptr2
,
i
%endif
.
loop1
:
INNER_LOOP
0
jge
.
loop1
mov
j
,
448
*
4
sub
j
,
OFFQ
jz
.
end
sub
ptr1
,
j
sub
ptr2
,
j
add
win
,
OFFQ
; now at j-64, so define OFFSET
sub
j
,
64
*
4
.
loop2
:
INNER_LOOP
64
*
4
jge
.
loop2
.
end
:
%if
ARCH_X86_32
mov
buf2
,
synth_buf2m
; needed for next iteration anyway
mov
outq
,
outmp
; j, which will be set again during it
%endif
;~ out[i] = a * scale;
;~ out[i + 16] = b * scale;
mulps
m1
,
m1
,
scale
mulps
m2
,
m2
,
scale
%if
ARCH_X86_64
mulps
m7
,
m7
,
scale
mulps
m8
,
m8
,
scale
%endif
;~ synth_buf2[i] = c;
;~ synth_buf2[i + 16] = d;
mova
[
buf2
+
i
+
0
*
4
]
,
m3
mova
[
buf2
+
i
+
16
*
4
]
,
m4
%if
ARCH_X86_64
mova
[
buf2
+
i
+
0
*
4
+
mmsize
]
,
m9
mova
[
buf2
+
i
+
16
*
4
+
mmsize
]
,
m10
%endif
;~ out[i] = a;
;~ out[i + 16] = a;
mova
[
outq
+
i
+
0
*
4
]
,
m1
mova
[
outq
+
i
+
16
*
4
]
,
m2
%if
ARCH_X86_64
mova
[
outq
+
i
+
0
*
4
+
mmsize
]
,
m7
mova
[
outq
+
i
+
16
*
4
+
mmsize
]
,
m8
%endif
%if
ARCH_X86_32
||
notcpuflag
(
avx
)
sub
i
,
(
ARCH_X86_64
+
1
)
*
mmsize
jge
.
mainloop
%endif
RET
%endmacro
%if
ARCH_X86_32
INIT_XMM
sse
SYNTH_FILTER
%endif
INIT_XMM
sse2
SYNTH_FILTER
INIT_YMM
avx
SYNTH_FILTER
INIT_YMM
fma3
SYNTH_FILTER
libavcodec/x86/synth_filter_init.c
0 → 100644
View file @
209f50e1
/*
* Copyright (c) 2012-2014 Christophe Gisquet <christophe.gisquet@gmail.com>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "libavutil/attributes.h"
#include "libavutil/cpu.h"
#include "libavutil/x86/cpu.h"
#include "libavcodec/synth_filter.h"
#define SYNTH_FILTER_FUNC(opt) \
void ff_synth_filter_inner_##opt(float *synth_buf_ptr, float synth_buf2[32], \
const float window[512], \
float out[32], intptr_t offset, float scale); \
static void synth_filter_##opt(FFTContext *imdct, \
float *synth_buf_ptr, int *synth_buf_offset, \
float synth_buf2[32], const float window[512], \
float out[32], const float in[32], float scale) \
{ \
float *synth_buf= synth_buf_ptr + *synth_buf_offset; \
\
imdct->imdct_half(imdct, synth_buf, in); \
\
ff_synth_filter_inner_##opt(synth_buf, synth_buf2, window, \
out, *synth_buf_offset, scale); \
\
*synth_buf_offset = (*synth_buf_offset - 32) & 511; \
} \
#if HAVE_YASM
#if ARCH_X86_32
SYNTH_FILTER_FUNC
(
sse
)
#endif
SYNTH_FILTER_FUNC
(
sse2
)
SYNTH_FILTER_FUNC
(
avx
)
SYNTH_FILTER_FUNC
(
fma3
)
#endif
/* HAVE_YASM */
av_cold
void
ff_synth_filter_init_x86
(
SynthFilterContext
*
s
)
{
#if HAVE_YASM
int
cpu_flags
=
av_get_cpu_flags
();
#if ARCH_X86_32
if
(
EXTERNAL_SSE
(
cpu_flags
))
{
s
->
synth_filter_float
=
synth_filter_sse
;
}
#endif
if
(
EXTERNAL_SSE2
(
cpu_flags
))
{
s
->
synth_filter_float
=
synth_filter_sse2
;
}
if
(
EXTERNAL_AVX_FAST
(
cpu_flags
))
{
s
->
synth_filter_float
=
synth_filter_avx
;
}
if
(
EXTERNAL_FMA3
(
cpu_flags
)
&&
!
(
cpu_flags
&
AV_CPU_FLAG_AVXSLOW
))
{
s
->
synth_filter_float
=
synth_filter_fma3
;
}
#endif
/* HAVE_YASM */
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment