Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in / Register
Toggle navigation
F
ffmpeg.wasm-core
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Linshizhi
ffmpeg.wasm-core
Commits
b0bfa369
Commit
b0bfa369
authored
Jul 26, 2019
by
Paul B Mahol
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
avfilter: add arnndn filter
parent
c3985c0e
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
1560 additions
and
2 deletions
+1560
-2
Changelog
Changelog
+1
-0
filters.texi
doc/filters.texi
+11
-0
Makefile
libavfilter/Makefile
+1
-0
af_arnndn.c
libavfilter/af_arnndn.c
+1544
-0
allfilters.c
libavfilter/allfilters.c
+1
-0
version.h
libavfilter/version.h
+2
-2
No files found.
Changelog
View file @
b0bfa369
...
...
@@ -15,6 +15,7 @@ version <next>:
- scroll video filter
- photosensitivity filter
- anlms filter
- arnndn filter
version 4.2:
...
...
doc/filters.texi
View file @
b0bfa369
...
...
@@ -2116,6 +2116,17 @@ atrim=end=5,areverse
@end example
@end itemize
@section arnndn
Reduce noise from speech using Recurrent Neural Networks.
This filter accepts the following options:
@table @option
@item model, m
Set train model file to load. This option is always required.
@end table
@section asetnsamples
Set the number of samples per each output audio frame.
...
...
libavfilter/Makefile
View file @
b0bfa369
...
...
@@ -72,6 +72,7 @@ OBJS-$(CONFIG_APULSATOR_FILTER) += af_apulsator.o
OBJS-$(CONFIG_AREALTIME_FILTER)
+=
f_realtime.o
OBJS-$(CONFIG_ARESAMPLE_FILTER)
+=
af_aresample.o
OBJS-$(CONFIG_AREVERSE_FILTER)
+=
f_reverse.o
OBJS-$(CONFIG_ARNNDN_FILTER)
+=
af_arnndn.o
OBJS-$(CONFIG_ASELECT_FILTER)
+=
f_select.o
OBJS-$(CONFIG_ASENDCMD_FILTER)
+=
f_sendcmd.o
OBJS-$(CONFIG_ASETNSAMPLES_FILTER)
+=
af_asetnsamples.o
...
...
libavfilter/af_arnndn.c
0 → 100644
View file @
b0bfa369
/*
* Copyright (c) 2018 Gregor Richards
* Copyright (c) 2017 Mozilla
* Copyright (c) 2005-2009 Xiph.Org Foundation
* Copyright (c) 2007-2008 CSIRO
* Copyright (c) 2008-2011 Octasic Inc.
* Copyright (c) Jean-Marc Valin
* Copyright (c) 2019 Paul B Mahol
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <float.h>
#include "libavutil/avassert.h"
#include "libavutil/avstring.h"
#include "libavutil/float_dsp.h"
#include "libavutil/opt.h"
#include "libavutil/tx.h"
#include "avfilter.h"
#include "audio.h"
#include "filters.h"
#include "formats.h"
#define FRAME_SIZE_SHIFT 2
#define FRAME_SIZE (120<<FRAME_SIZE_SHIFT)
#define WINDOW_SIZE (2*FRAME_SIZE)
#define FREQ_SIZE (FRAME_SIZE + 1)
#define PITCH_MIN_PERIOD 60
#define PITCH_MAX_PERIOD 768
#define PITCH_FRAME_SIZE 960
#define PITCH_BUF_SIZE (PITCH_MAX_PERIOD+PITCH_FRAME_SIZE)
#define SQUARE(x) ((x)*(x))
#define NB_BANDS 22
#define CEPS_MEM 8
#define NB_DELTA_CEPS 6
#define NB_FEATURES (NB_BANDS+3*NB_DELTA_CEPS+2)
#define WEIGHTS_SCALE (1.f/256)
#define MAX_NEURONS 128
#define ACTIVATION_TANH 0
#define ACTIVATION_SIGMOID 1
#define ACTIVATION_RELU 2
#define Q15ONE 1.0f
typedef
struct
DenseLayer
{
const
float
*
bias
;
const
float
*
input_weights
;
int
nb_inputs
;
int
nb_neurons
;
int
activation
;
}
DenseLayer
;
typedef
struct
GRULayer
{
const
float
*
bias
;
const
float
*
input_weights
;
const
float
*
recurrent_weights
;
int
nb_inputs
;
int
nb_neurons
;
int
activation
;
}
GRULayer
;
typedef
struct
RNNModel
{
int
input_dense_size
;
const
DenseLayer
*
input_dense
;
int
vad_gru_size
;
const
GRULayer
*
vad_gru
;
int
noise_gru_size
;
const
GRULayer
*
noise_gru
;
int
denoise_gru_size
;
const
GRULayer
*
denoise_gru
;
int
denoise_output_size
;
const
DenseLayer
*
denoise_output
;
int
vad_output_size
;
const
DenseLayer
*
vad_output
;
}
RNNModel
;
typedef
struct
RNNState
{
float
*
vad_gru_state
;
float
*
noise_gru_state
;
float
*
denoise_gru_state
;
RNNModel
*
model
;
}
RNNState
;
typedef
struct
DenoiseState
{
float
analysis_mem
[
FRAME_SIZE
];
float
cepstral_mem
[
CEPS_MEM
][
NB_BANDS
];
int
memid
;
DECLARE_ALIGNED
(
32
,
float
,
synthesis_mem
)[
FRAME_SIZE
];
float
pitch_buf
[
PITCH_BUF_SIZE
];
float
pitch_enh_buf
[
PITCH_BUF_SIZE
];
float
last_gain
;
int
last_period
;
float
mem_hp_x
[
2
];
float
lastg
[
NB_BANDS
];
RNNState
rnn
;
AVTXContext
*
tx
,
*
txi
;
av_tx_fn
tx_fn
,
txi_fn
;
}
DenoiseState
;
typedef
struct
AudioRNNContext
{
const
AVClass
*
class
;
char
*
model_name
;
int
channels
;
DenoiseState
*
st
;
DECLARE_ALIGNED
(
32
,
float
,
window
)[
WINDOW_SIZE
];
float
dct_table
[
NB_BANDS
*
NB_BANDS
];
RNNModel
*
model
;
AVFloatDSPContext
*
fdsp
;
}
AudioRNNContext
;
#define F_ACTIVATION_TANH 0
#define F_ACTIVATION_SIGMOID 1
#define F_ACTIVATION_RELU 2
static
void
rnnoise_model_free
(
RNNModel
*
model
)
{
#define FREE_MAYBE(ptr) do { if (ptr) free(ptr); } while (0)
#define FREE_DENSE(name) do { \
if (model->name) { \
av_free((void *) model->name->input_weights); \
av_free((void *) model->name->bias); \
av_free((void *) model->name); \
} \
} while (0)
#define FREE_GRU(name) do { \
if (model->name) { \
av_free((void *) model->name->input_weights); \
av_free((void *) model->name->recurrent_weights); \
av_free((void *) model->name->bias); \
av_free((void *) model->name); \
} \
} while (0)
if
(
!
model
)
return
;
FREE_DENSE
(
input_dense
);
FREE_GRU
(
vad_gru
);
FREE_GRU
(
noise_gru
);
FREE_GRU
(
denoise_gru
);
FREE_DENSE
(
denoise_output
);
FREE_DENSE
(
vad_output
);
av_free
(
model
);
}
static
RNNModel
*
rnnoise_model_from_file
(
FILE
*
f
)
{
RNNModel
*
ret
;
DenseLayer
*
input_dense
;
GRULayer
*
vad_gru
;
GRULayer
*
noise_gru
;
GRULayer
*
denoise_gru
;
DenseLayer
*
denoise_output
;
DenseLayer
*
vad_output
;
int
in
;
if
(
fscanf
(
f
,
"rnnoise-nu model file version %d
\n
"
,
&
in
)
!=
1
||
in
!=
1
)
return
NULL
;
ret
=
av_calloc
(
1
,
sizeof
(
RNNModel
));
if
(
!
ret
)
return
NULL
;
#define ALLOC_LAYER(type, name) \
name = av_calloc(1, sizeof(type)); \
if (!name) { \
rnnoise_model_free(ret); \
return NULL; \
} \
ret->name = name
ALLOC_LAYER
(
DenseLayer
,
input_dense
);
ALLOC_LAYER
(
GRULayer
,
vad_gru
);
ALLOC_LAYER
(
GRULayer
,
noise_gru
);
ALLOC_LAYER
(
GRULayer
,
denoise_gru
);
ALLOC_LAYER
(
DenseLayer
,
denoise_output
);
ALLOC_LAYER
(
DenseLayer
,
vad_output
);
#define INPUT_VAL(name) do { \
if (fscanf(f, "%d", &in) != 1 || in < 0 || in > 128) { \
rnnoise_model_free(ret); \
return NULL; \
} \
name = in; \
} while (0)
#define INPUT_ACTIVATION(name) do { \
int activation; \
INPUT_VAL(activation); \
switch (activation) { \
case F_ACTIVATION_SIGMOID: \
name = ACTIVATION_SIGMOID; \
break; \
case F_ACTIVATION_RELU: \
name = ACTIVATION_RELU; \
break; \
default: \
name = ACTIVATION_TANH; \
} \
} while (0)
#define INPUT_ARRAY(name, len) do { \
float *values = av_calloc((len), sizeof(float)); \
if (!values) { \
rnnoise_model_free(ret); \
return NULL; \
} \
name = values; \
for (int i = 0; i < (len); i++) { \
if (fscanf(f, "%d", &in) != 1) { \
rnnoise_model_free(ret); \
return NULL; \
} \
values[i] = in; \
} \
} while (0)
#define INPUT_ARRAY3(name, len0, len1, len2) do { \
float *values = av_calloc(FFALIGN((len0), 4) * FFALIGN((len1), 4) * (len2), sizeof(float)); \
if (!values) { \
rnnoise_model_free(ret); \
return NULL; \
} \
name = values; \
for (int k = 0; k < (len0); k++) { \
for (int i = 0; i < (len2); i++) { \
for (int j = 0; j < (len1); j++) { \
if (fscanf(f, "%d", &in) != 1) { \
rnnoise_model_free(ret); \
return NULL; \
} \
values[j * (len2) * FFALIGN((len0), 4) + i * FFALIGN((len0), 4) + k] = in; \
} \
} \
} \
} while (0)
#define INPUT_DENSE(name) do { \
INPUT_VAL(name->nb_inputs); \
INPUT_VAL(name->nb_neurons); \
ret->name ## _size = name->nb_neurons; \
INPUT_ACTIVATION(name->activation); \
INPUT_ARRAY(name->input_weights, name->nb_inputs * name->nb_neurons); \
INPUT_ARRAY(name->bias, name->nb_neurons); \
} while (0)
#define INPUT_GRU(name) do { \
INPUT_VAL(name->nb_inputs); \
INPUT_VAL(name->nb_neurons); \
ret->name ## _size = name->nb_neurons; \
INPUT_ACTIVATION(name->activation); \
INPUT_ARRAY3(name->input_weights, name->nb_inputs, name->nb_neurons, 3); \
INPUT_ARRAY3(name->recurrent_weights, name->nb_neurons, name->nb_neurons, 3); \
INPUT_ARRAY(name->bias, name->nb_neurons * 3); \
} while (0)
INPUT_DENSE
(
input_dense
);
INPUT_GRU
(
vad_gru
);
INPUT_GRU
(
noise_gru
);
INPUT_GRU
(
denoise_gru
);
INPUT_DENSE
(
denoise_output
);
INPUT_DENSE
(
vad_output
);
return
ret
;
}
static
int
query_formats
(
AVFilterContext
*
ctx
)
{
AVFilterFormats
*
formats
=
NULL
;
AVFilterChannelLayouts
*
layouts
=
NULL
;
static
const
enum
AVSampleFormat
sample_fmts
[]
=
{
AV_SAMPLE_FMT_FLTP
,
AV_SAMPLE_FMT_NONE
};
int
ret
,
sample_rates
[]
=
{
48000
,
-
1
};
formats
=
ff_make_format_list
(
sample_fmts
);
if
(
!
formats
)
return
AVERROR
(
ENOMEM
);
ret
=
ff_set_common_formats
(
ctx
,
formats
);
if
(
ret
<
0
)
return
ret
;
layouts
=
ff_all_channel_counts
();
if
(
!
layouts
)
return
AVERROR
(
ENOMEM
);
ret
=
ff_set_common_channel_layouts
(
ctx
,
layouts
);
if
(
ret
<
0
)
return
ret
;
formats
=
ff_make_format_list
(
sample_rates
);
if
(
!
formats
)
return
AVERROR
(
ENOMEM
);
return
ff_set_common_samplerates
(
ctx
,
formats
);
}
static
int
config_input
(
AVFilterLink
*
inlink
)
{
AVFilterContext
*
ctx
=
inlink
->
dst
;
AudioRNNContext
*
s
=
ctx
->
priv
;
int
ret
;
s
->
channels
=
inlink
->
channels
;
s
->
st
=
av_calloc
(
s
->
channels
,
sizeof
(
DenoiseState
));
if
(
!
s
->
st
)
return
AVERROR
(
ENOMEM
);
for
(
int
i
=
0
;
i
<
s
->
channels
;
i
++
)
{
DenoiseState
*
st
=
&
s
->
st
[
i
];
st
->
rnn
.
model
=
s
->
model
;
st
->
rnn
.
vad_gru_state
=
av_calloc
(
sizeof
(
float
),
FFALIGN
(
s
->
model
->
vad_gru_size
,
16
));
st
->
rnn
.
noise_gru_state
=
av_calloc
(
sizeof
(
float
),
FFALIGN
(
s
->
model
->
noise_gru_size
,
16
));
st
->
rnn
.
denoise_gru_state
=
av_calloc
(
sizeof
(
float
),
FFALIGN
(
s
->
model
->
denoise_gru_size
,
16
));
if
(
!
st
->
rnn
.
vad_gru_state
||
!
st
->
rnn
.
noise_gru_state
||
!
st
->
rnn
.
denoise_gru_state
)
return
AVERROR
(
ENOMEM
);
ret
=
av_tx_init
(
&
st
->
tx
,
&
st
->
tx_fn
,
AV_TX_FLOAT_FFT
,
0
,
WINDOW_SIZE
,
NULL
,
0
);
if
(
ret
<
0
)
return
ret
;
ret
=
av_tx_init
(
&
st
->
txi
,
&
st
->
txi_fn
,
AV_TX_FLOAT_FFT
,
1
,
WINDOW_SIZE
,
NULL
,
0
);
if
(
ret
<
0
)
return
ret
;
}
return
0
;
}
static
void
biquad
(
float
*
y
,
float
mem
[
2
],
const
float
*
x
,
const
float
*
b
,
const
float
*
a
,
int
N
)
{
for
(
int
i
=
0
;
i
<
N
;
i
++
)
{
float
xi
,
yi
;
xi
=
x
[
i
];
yi
=
x
[
i
]
+
mem
[
0
];
mem
[
0
]
=
mem
[
1
]
+
(
b
[
0
]
*
xi
-
a
[
0
]
*
yi
);
mem
[
1
]
=
(
b
[
1
]
*
xi
-
a
[
1
]
*
yi
);
y
[
i
]
=
yi
;
}
}
#define RNN_MOVE(dst, src, n) (memmove((dst), (src), (n)*sizeof(*(dst)) + 0*((dst)-(src)) ))
#define RNN_CLEAR(dst, n) (memset((dst), 0, (n)*sizeof(*(dst))))
#define RNN_COPY(dst, src, n) (memcpy((dst), (src), (n)*sizeof(*(dst)) + 0*((dst)-(src)) ))
static
void
forward_transform
(
DenoiseState
*
st
,
AVComplexFloat
*
out
,
const
float
*
in
)
{
AVComplexFloat
x
[
WINDOW_SIZE
];
AVComplexFloat
y
[
WINDOW_SIZE
];
for
(
int
i
=
0
;
i
<
WINDOW_SIZE
;
i
++
)
{
x
[
i
].
re
=
in
[
i
];
x
[
i
].
im
=
0
;
}
st
->
tx_fn
(
st
->
tx
,
y
,
x
,
sizeof
(
float
));
RNN_COPY
(
out
,
y
,
FREQ_SIZE
);
}
static
void
inverse_transform
(
DenoiseState
*
st
,
float
*
out
,
const
AVComplexFloat
*
in
)
{
AVComplexFloat
x
[
WINDOW_SIZE
];
AVComplexFloat
y
[
WINDOW_SIZE
];
for
(
int
i
=
0
;
i
<
FREQ_SIZE
;
i
++
)
x
[
i
]
=
in
[
i
];
for
(
int
i
=
FREQ_SIZE
;
i
<
WINDOW_SIZE
;
i
++
)
{
x
[
i
].
re
=
x
[
WINDOW_SIZE
-
i
].
re
;
x
[
i
].
im
=
-
x
[
WINDOW_SIZE
-
i
].
im
;
}
st
->
txi_fn
(
st
->
txi
,
y
,
x
,
sizeof
(
float
));
for
(
int
i
=
0
;
i
<
WINDOW_SIZE
;
i
++
)
out
[
i
]
=
y
[
i
].
re
/
WINDOW_SIZE
;
}
static
const
uint8_t
eband5ms
[]
=
{
/*0 200 400 600 800 1k 1.2 1.4 1.6 2k 2.4 2.8 3.2 4k 4.8 5.6 6.8 8k 9.6 12k 15.6 20k*/
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
10
,
12
,
14
,
16
,
20
,
24
,
28
,
34
,
40
,
48
,
60
,
78
,
100
};
static
void
compute_band_energy
(
float
*
bandE
,
const
AVComplexFloat
*
X
)
{
float
sum
[
NB_BANDS
]
=
{
0
};
for
(
int
i
=
0
;
i
<
NB_BANDS
-
1
;
i
++
)
{
int
band_size
;
band_size
=
(
eband5ms
[
i
+
1
]
-
eband5ms
[
i
])
<<
FRAME_SIZE_SHIFT
;
for
(
int
j
=
0
;
j
<
band_size
;
j
++
)
{
float
tmp
,
frac
=
(
float
)
j
/
band_size
;
tmp
=
SQUARE
(
X
[(
eband5ms
[
i
]
<<
FRAME_SIZE_SHIFT
)
+
j
].
re
);
tmp
+=
SQUARE
(
X
[(
eband5ms
[
i
]
<<
FRAME_SIZE_SHIFT
)
+
j
].
im
);
sum
[
i
]
+=
(
1
.
f
-
frac
)
*
tmp
;
sum
[
i
+
1
]
+=
frac
*
tmp
;
}
}
sum
[
0
]
*=
2
;
sum
[
NB_BANDS
-
1
]
*=
2
;
for
(
int
i
=
0
;
i
<
NB_BANDS
;
i
++
)
bandE
[
i
]
=
sum
[
i
];
}
static
void
compute_band_corr
(
float
*
bandE
,
const
AVComplexFloat
*
X
,
const
AVComplexFloat
*
P
)
{
float
sum
[
NB_BANDS
]
=
{
0
};
for
(
int
i
=
0
;
i
<
NB_BANDS
-
1
;
i
++
)
{
int
band_size
;
band_size
=
(
eband5ms
[
i
+
1
]
-
eband5ms
[
i
])
<<
FRAME_SIZE_SHIFT
;
for
(
int
j
=
0
;
j
<
band_size
;
j
++
)
{
float
tmp
,
frac
=
(
float
)
j
/
band_size
;
tmp
=
X
[(
eband5ms
[
i
]
<<
FRAME_SIZE_SHIFT
)
+
j
].
re
*
P
[(
eband5ms
[
i
]
<<
FRAME_SIZE_SHIFT
)
+
j
].
re
;
tmp
+=
X
[(
eband5ms
[
i
]
<<
FRAME_SIZE_SHIFT
)
+
j
].
im
*
P
[(
eband5ms
[
i
]
<<
FRAME_SIZE_SHIFT
)
+
j
].
im
;
sum
[
i
]
+=
(
1
-
frac
)
*
tmp
;
sum
[
i
+
1
]
+=
frac
*
tmp
;
}
}
sum
[
0
]
*=
2
;
sum
[
NB_BANDS
-
1
]
*=
2
;
for
(
int
i
=
0
;
i
<
NB_BANDS
;
i
++
)
bandE
[
i
]
=
sum
[
i
];
}
static
void
frame_analysis
(
AudioRNNContext
*
s
,
DenoiseState
*
st
,
AVComplexFloat
*
X
,
float
*
Ex
,
const
float
*
in
)
{
LOCAL_ALIGNED_32
(
float
,
x
,
[
WINDOW_SIZE
]);
RNN_COPY
(
x
,
st
->
analysis_mem
,
FRAME_SIZE
);
RNN_COPY
(
x
+
FRAME_SIZE
,
in
,
FRAME_SIZE
);
RNN_COPY
(
st
->
analysis_mem
,
in
,
FRAME_SIZE
);
s
->
fdsp
->
vector_fmul
(
x
,
x
,
s
->
window
,
WINDOW_SIZE
);
forward_transform
(
st
,
X
,
x
);
compute_band_energy
(
Ex
,
X
);
}
static
void
frame_synthesis
(
AudioRNNContext
*
s
,
DenoiseState
*
st
,
float
*
out
,
const
AVComplexFloat
*
y
)
{
LOCAL_ALIGNED_32
(
float
,
x
,
[
WINDOW_SIZE
]);
inverse_transform
(
st
,
x
,
y
);
s
->
fdsp
->
vector_fmul
(
x
,
x
,
s
->
window
,
WINDOW_SIZE
);
s
->
fdsp
->
vector_fmac_scalar
(
x
,
st
->
synthesis_mem
,
1
.
f
,
FRAME_SIZE
);
RNN_COPY
(
out
,
x
,
FRAME_SIZE
);
RNN_COPY
(
st
->
synthesis_mem
,
&
x
[
FRAME_SIZE
],
FRAME_SIZE
);
}
static
inline
void
xcorr_kernel
(
const
float
*
x
,
const
float
*
y
,
float
sum
[
4
],
int
len
)
{
float
y_0
,
y_1
,
y_2
,
y_3
=
0
;
int
j
;
y_0
=
*
y
++
;
y_1
=
*
y
++
;
y_2
=
*
y
++
;
for
(
j
=
0
;
j
<
len
-
3
;
j
+=
4
)
{
float
tmp
;
tmp
=
*
x
++
;
y_3
=
*
y
++
;
sum
[
0
]
+=
tmp
*
y_0
;
sum
[
1
]
+=
tmp
*
y_1
;
sum
[
2
]
+=
tmp
*
y_2
;
sum
[
3
]
+=
tmp
*
y_3
;
tmp
=
*
x
++
;
y_0
=
*
y
++
;
sum
[
0
]
+=
tmp
*
y_1
;
sum
[
1
]
+=
tmp
*
y_2
;
sum
[
2
]
+=
tmp
*
y_3
;
sum
[
3
]
+=
tmp
*
y_0
;
tmp
=
*
x
++
;
y_1
=
*
y
++
;
sum
[
0
]
+=
tmp
*
y_2
;
sum
[
1
]
+=
tmp
*
y_3
;
sum
[
2
]
+=
tmp
*
y_0
;
sum
[
3
]
+=
tmp
*
y_1
;
tmp
=
*
x
++
;
y_2
=
*
y
++
;
sum
[
0
]
+=
tmp
*
y_3
;
sum
[
1
]
+=
tmp
*
y_0
;
sum
[
2
]
+=
tmp
*
y_1
;
sum
[
3
]
+=
tmp
*
y_2
;
}
if
(
j
++
<
len
)
{
float
tmp
=
*
x
++
;
y_3
=
*
y
++
;
sum
[
0
]
+=
tmp
*
y_0
;
sum
[
1
]
+=
tmp
*
y_1
;
sum
[
2
]
+=
tmp
*
y_2
;
sum
[
3
]
+=
tmp
*
y_3
;
}
if
(
j
++
<
len
)
{
float
tmp
=*
x
++
;
y_0
=
*
y
++
;
sum
[
0
]
+=
tmp
*
y_1
;
sum
[
1
]
+=
tmp
*
y_2
;
sum
[
2
]
+=
tmp
*
y_3
;
sum
[
3
]
+=
tmp
*
y_0
;
}
if
(
j
<
len
)
{
float
tmp
=*
x
++
;
y_1
=
*
y
++
;
sum
[
0
]
+=
tmp
*
y_2
;
sum
[
1
]
+=
tmp
*
y_3
;
sum
[
2
]
+=
tmp
*
y_0
;
sum
[
3
]
+=
tmp
*
y_1
;
}
}
static
inline
float
celt_inner_prod
(
const
float
*
x
,
const
float
*
y
,
int
N
)
{
float
xy
=
0
.
f
;
for
(
int
i
=
0
;
i
<
N
;
i
++
)
xy
+=
x
[
i
]
*
y
[
i
];
return
xy
;
}
static
void
celt_pitch_xcorr
(
const
float
*
x
,
const
float
*
y
,
float
*
xcorr
,
int
len
,
int
max_pitch
)
{
int
i
;
for
(
i
=
0
;
i
<
max_pitch
-
3
;
i
+=
4
)
{
float
sum
[
4
]
=
{
0
,
0
,
0
,
0
};
xcorr_kernel
(
x
,
y
+
i
,
sum
,
len
);
xcorr
[
i
]
=
sum
[
0
];
xcorr
[
i
+
1
]
=
sum
[
1
];
xcorr
[
i
+
2
]
=
sum
[
2
];
xcorr
[
i
+
3
]
=
sum
[
3
];
}
/* In case max_pitch isn't a multiple of 4, do non-unrolled version. */
for
(;
i
<
max_pitch
;
i
++
)
{
xcorr
[
i
]
=
celt_inner_prod
(
x
,
y
+
i
,
len
);
}
}
static
int
celt_autocorr
(
const
float
*
x
,
/* in: [0...n-1] samples x */
float
*
ac
,
/* out: [0...lag-1] ac values */
const
float
*
window
,
int
overlap
,
int
lag
,
int
n
)
{
int
fastN
=
n
-
lag
;
int
shift
;
const
float
*
xptr
;
float
xx
[
PITCH_BUF_SIZE
>>
1
];
if
(
overlap
==
0
)
{
xptr
=
x
;
}
else
{
for
(
int
i
=
0
;
i
<
n
;
i
++
)
xx
[
i
]
=
x
[
i
];
for
(
int
i
=
0
;
i
<
overlap
;
i
++
)
{
xx
[
i
]
=
x
[
i
]
*
window
[
i
];
xx
[
n
-
i
-
1
]
=
x
[
n
-
i
-
1
]
*
window
[
i
];
}
xptr
=
xx
;
}
shift
=
0
;
celt_pitch_xcorr
(
xptr
,
xptr
,
ac
,
fastN
,
lag
+
1
);
for
(
int
k
=
0
;
k
<=
lag
;
k
++
)
{
float
d
=
0
.
f
;
for
(
int
i
=
k
+
fastN
;
i
<
n
;
i
++
)
d
+=
xptr
[
i
]
*
xptr
[
i
-
k
];
ac
[
k
]
+=
d
;
}
return
shift
;
}
static
void
celt_lpc
(
float
*
lpc
,
/* out: [0...p-1] LPC coefficients */
const
float
*
ac
,
/* in: [0...p] autocorrelation values */
int
p
)
{
float
r
,
error
=
ac
[
0
];
RNN_CLEAR
(
lpc
,
p
);
if
(
ac
[
0
]
!=
0
)
{
for
(
int
i
=
0
;
i
<
p
;
i
++
)
{
/* Sum up this iteration's reflection coefficient */
float
rr
=
0
;
for
(
int
j
=
0
;
j
<
i
;
j
++
)
rr
+=
(
lpc
[
j
]
*
ac
[
i
-
j
]);
rr
+=
ac
[
i
+
1
];
r
=
-
rr
/
error
;
/* Update LPC coefficients and total error */
lpc
[
i
]
=
r
;
for
(
int
j
=
0
;
j
<
(
i
+
1
)
>>
1
;
j
++
)
{
float
tmp1
,
tmp2
;
tmp1
=
lpc
[
j
];
tmp2
=
lpc
[
i
-
1
-
j
];
lpc
[
j
]
=
tmp1
+
(
r
*
tmp2
);
lpc
[
i
-
1
-
j
]
=
tmp2
+
(
r
*
tmp1
);
}
error
=
error
-
(
r
*
r
*
error
);
/* Bail out once we get 30 dB gain */
if
(
error
<
.
001
f
*
ac
[
0
])
break
;
}
}
}
static
void
celt_fir5
(
const
float
*
x
,
const
float
*
num
,
float
*
y
,
int
N
,
float
*
mem
)
{
float
num0
,
num1
,
num2
,
num3
,
num4
;
float
mem0
,
mem1
,
mem2
,
mem3
,
mem4
;
num0
=
num
[
0
];
num1
=
num
[
1
];
num2
=
num
[
2
];
num3
=
num
[
3
];
num4
=
num
[
4
];
mem0
=
mem
[
0
];
mem1
=
mem
[
1
];
mem2
=
mem
[
2
];
mem3
=
mem
[
3
];
mem4
=
mem
[
4
];
for
(
int
i
=
0
;
i
<
N
;
i
++
)
{
float
sum
=
x
[
i
];
sum
+=
(
num0
*
mem0
);
sum
+=
(
num1
*
mem1
);
sum
+=
(
num2
*
mem2
);
sum
+=
(
num3
*
mem3
);
sum
+=
(
num4
*
mem4
);
mem4
=
mem3
;
mem3
=
mem2
;
mem2
=
mem1
;
mem1
=
mem0
;
mem0
=
x
[
i
];
y
[
i
]
=
sum
;
}
mem
[
0
]
=
mem0
;
mem
[
1
]
=
mem1
;
mem
[
2
]
=
mem2
;
mem
[
3
]
=
mem3
;
mem
[
4
]
=
mem4
;
}
static
void
pitch_downsample
(
float
*
x
[],
float
*
x_lp
,
int
len
,
int
C
)
{
float
ac
[
5
];
float
tmp
=
Q15ONE
;
float
lpc
[
4
],
mem
[
5
]
=
{
0
,
0
,
0
,
0
,
0
};
float
lpc2
[
5
];
float
c1
=
.
8
f
;
for
(
int
i
=
1
;
i
<
len
>>
1
;
i
++
)
x_lp
[
i
]
=
.
5
f
*
(.
5
f
*
(
x
[
0
][(
2
*
i
-
1
)]
+
x
[
0
][(
2
*
i
+
1
)])
+
x
[
0
][
2
*
i
]);
x_lp
[
0
]
=
.
5
f
*
(.
5
f
*
(
x
[
0
][
1
])
+
x
[
0
][
0
]);
if
(
C
==
2
)
{
for
(
int
i
=
1
;
i
<
len
>>
1
;
i
++
)
x_lp
[
i
]
+=
(.
5
f
*
(.
5
f
*
(
x
[
1
][(
2
*
i
-
1
)]
+
x
[
1
][(
2
*
i
+
1
)])
+
x
[
1
][
2
*
i
]));
x_lp
[
0
]
+=
.
5
f
*
(.
5
f
*
(
x
[
1
][
1
])
+
x
[
1
][
0
]);
}
celt_autocorr
(
x_lp
,
ac
,
NULL
,
0
,
4
,
len
>>
1
);
/* Noise floor -40 dB */
ac
[
0
]
*=
1
.
0001
f
;
/* Lag windowing */
for
(
int
i
=
1
;
i
<=
4
;
i
++
)
{
/*ac[i] *= exp(-.5*(2*M_PI*.002*i)*(2*M_PI*.002*i));*/
ac
[
i
]
-=
ac
[
i
]
*
(.
00
8
f
*
i
)
*
(.
00
8
f
*
i
);
}
celt_lpc
(
lpc
,
ac
,
4
);
for
(
int
i
=
0
;
i
<
4
;
i
++
)
{
tmp
=
.
9
f
*
tmp
;
lpc
[
i
]
=
(
lpc
[
i
]
*
tmp
);
}
/* Add a zero */
lpc2
[
0
]
=
lpc
[
0
]
+
.
8
f
;
lpc2
[
1
]
=
lpc
[
1
]
+
(
c1
*
lpc
[
0
]);
lpc2
[
2
]
=
lpc
[
2
]
+
(
c1
*
lpc
[
1
]);
lpc2
[
3
]
=
lpc
[
3
]
+
(
c1
*
lpc
[
2
]);
lpc2
[
4
]
=
(
c1
*
lpc
[
3
]);
celt_fir5
(
x_lp
,
lpc2
,
x_lp
,
len
>>
1
,
mem
);
}
static
inline
void
dual_inner_prod
(
const
float
*
x
,
const
float
*
y01
,
const
float
*
y02
,
int
N
,
float
*
xy1
,
float
*
xy2
)
{
float
xy01
=
0
,
xy02
=
0
;
for
(
int
i
=
0
;
i
<
N
;
i
++
)
{
xy01
+=
(
x
[
i
]
*
y01
[
i
]);
xy02
+=
(
x
[
i
]
*
y02
[
i
]);
}
*
xy1
=
xy01
;
*
xy2
=
xy02
;
}
static
float
compute_pitch_gain
(
float
xy
,
float
xx
,
float
yy
)
{
return
xy
/
sqrtf
(
1
.
f
+
xx
*
yy
);
}
static
const
int
second_check
[
16
]
=
{
0
,
0
,
3
,
2
,
3
,
2
,
5
,
2
,
3
,
2
,
3
,
2
,
5
,
2
,
3
,
2
};
static
const
float
remove_doubling
(
float
*
x
,
int
maxperiod
,
int
minperiod
,
int
N
,
int
*
T0_
,
int
prev_period
,
float
prev_gain
)
{
int
k
,
i
,
T
,
T0
;
float
g
,
g0
;
float
pg
;
float
xy
,
xx
,
yy
,
xy2
;
float
xcorr
[
3
];
float
best_xy
,
best_yy
;
int
offset
;
int
minperiod0
;
float
yy_lookup
[
PITCH_MAX_PERIOD
+
1
];
minperiod0
=
minperiod
;
maxperiod
/=
2
;
minperiod
/=
2
;
*
T0_
/=
2
;
prev_period
/=
2
;
N
/=
2
;
x
+=
maxperiod
;
if
(
*
T0_
>=
maxperiod
)
*
T0_
=
maxperiod
-
1
;
T
=
T0
=
*
T0_
;
dual_inner_prod
(
x
,
x
,
x
-
T0
,
N
,
&
xx
,
&
xy
);
yy_lookup
[
0
]
=
xx
;
yy
=
xx
;
for
(
i
=
1
;
i
<=
maxperiod
;
i
++
)
{
yy
=
yy
+
(
x
[
-
i
]
*
x
[
-
i
])
-
(
x
[
N
-
i
]
*
x
[
N
-
i
]);
yy_lookup
[
i
]
=
FFMAX
(
0
,
yy
);
}
yy
=
yy_lookup
[
T0
];
best_xy
=
xy
;
best_yy
=
yy
;
g
=
g0
=
compute_pitch_gain
(
xy
,
xx
,
yy
);
/* Look for any pitch at T/k */
for
(
k
=
2
;
k
<=
15
;
k
++
)
{
int
T1
,
T1b
;
float
g1
;
float
cont
=
0
;
float
thresh
;
T1
=
(
2
*
T0
+
k
)
/
(
2
*
k
);
if
(
T1
<
minperiod
)
break
;
/* Look for another strong correlation at T1b */
if
(
k
==
2
)
{
if
(
T1
+
T0
>
maxperiod
)
T1b
=
T0
;
else
T1b
=
T0
+
T1
;
}
else
{
T1b
=
(
2
*
second_check
[
k
]
*
T0
+
k
)
/
(
2
*
k
);
}
dual_inner_prod
(
x
,
&
x
[
-
T1
],
&
x
[
-
T1b
],
N
,
&
xy
,
&
xy2
);
xy
=
.
5
f
*
(
xy
+
xy2
);
yy
=
.
5
f
*
(
yy_lookup
[
T1
]
+
yy_lookup
[
T1b
]);
g1
=
compute_pitch_gain
(
xy
,
xx
,
yy
);
if
(
FFABS
(
T1
-
prev_period
)
<=
1
)
cont
=
prev_gain
;
else
if
(
FFABS
(
T1
-
prev_period
)
<=
2
&&
5
*
k
*
k
<
T0
)
cont
=
prev_gain
*
.
5
f
;
else
cont
=
0
;
thresh
=
FFMAX
(.
3
f
,
(.
7
f
*
g0
)
-
cont
);
/* Bias against very high pitch (very short period) to avoid false-positives
due to short-term correlation */
if
(
T1
<
3
*
minperiod
)
thresh
=
FFMAX
(.
4
f
,
(.
85
f
*
g0
)
-
cont
);
else
if
(
T1
<
2
*
minperiod
)
thresh
=
FFMAX
(.
5
f
,
(.
9
f
*
g0
)
-
cont
);
if
(
g1
>
thresh
)
{
best_xy
=
xy
;
best_yy
=
yy
;
T
=
T1
;
g
=
g1
;
}
}
best_xy
=
FFMAX
(
0
,
best_xy
);
if
(
best_yy
<=
best_xy
)
pg
=
Q15ONE
;
else
pg
=
best_xy
/
(
best_yy
+
1
);
for
(
k
=
0
;
k
<
3
;
k
++
)
xcorr
[
k
]
=
celt_inner_prod
(
x
,
x
-
(
T
+
k
-
1
),
N
);
if
((
xcorr
[
2
]
-
xcorr
[
0
])
>
.
7
f
*
(
xcorr
[
1
]
-
xcorr
[
0
]))
offset
=
1
;
else
if
((
xcorr
[
0
]
-
xcorr
[
2
])
>
(.
7
f
*
(
xcorr
[
1
]
-
xcorr
[
2
])))
offset
=
-
1
;
else
offset
=
0
;
if
(
pg
>
g
)
pg
=
g
;
*
T0_
=
2
*
T
+
offset
;
if
(
*
T0_
<
minperiod0
)
*
T0_
=
minperiod0
;
return
pg
;
}
static
void
find_best_pitch
(
float
*
xcorr
,
float
*
y
,
int
len
,
int
max_pitch
,
int
*
best_pitch
)
{
float
best_num
[
2
];
float
best_den
[
2
];
float
Syy
=
1
.
f
;
best_num
[
0
]
=
-
1
;
best_num
[
1
]
=
-
1
;
best_den
[
0
]
=
0
;
best_den
[
1
]
=
0
;
best_pitch
[
0
]
=
0
;
best_pitch
[
1
]
=
1
;
for
(
int
j
=
0
;
j
<
len
;
j
++
)
Syy
+=
y
[
j
]
*
y
[
j
];
for
(
int
i
=
0
;
i
<
max_pitch
;
i
++
)
{
if
(
xcorr
[
i
]
>
0
)
{
float
num
;
float
xcorr16
;
xcorr16
=
xcorr
[
i
];
/* Considering the range of xcorr16, this should avoid both underflows
and overflows (inf) when squaring xcorr16 */
xcorr16
*=
1e-12
f
;
num
=
xcorr16
*
xcorr16
;
if
((
num
*
best_den
[
1
])
>
(
best_num
[
1
]
*
Syy
))
{
if
((
num
*
best_den
[
0
])
>
(
best_num
[
0
]
*
Syy
))
{
best_num
[
1
]
=
best_num
[
0
];
best_den
[
1
]
=
best_den
[
0
];
best_pitch
[
1
]
=
best_pitch
[
0
];
best_num
[
0
]
=
num
;
best_den
[
0
]
=
Syy
;
best_pitch
[
0
]
=
i
;
}
else
{
best_num
[
1
]
=
num
;
best_den
[
1
]
=
Syy
;
best_pitch
[
1
]
=
i
;
}
}
}
Syy
+=
y
[
i
+
len
]
*
y
[
i
+
len
]
-
y
[
i
]
*
y
[
i
];
Syy
=
FFMAX
(
1
,
Syy
);
}
}
static
void
pitch_search
(
const
float
*
x_lp
,
float
*
y
,
int
len
,
int
max_pitch
,
int
*
pitch
)
{
int
lag
;
int
best_pitch
[
2
]
=
{
0
,
0
};
int
offset
;
float
x_lp4
[
WINDOW_SIZE
];
float
y_lp4
[
WINDOW_SIZE
];
float
xcorr
[
WINDOW_SIZE
];
lag
=
len
+
max_pitch
;
/* Downsample by 2 again */
for
(
int
j
=
0
;
j
<
len
>>
2
;
j
++
)
x_lp4
[
j
]
=
x_lp
[
2
*
j
];
for
(
int
j
=
0
;
j
<
lag
>>
2
;
j
++
)
y_lp4
[
j
]
=
y
[
2
*
j
];
/* Coarse search with 4x decimation */
celt_pitch_xcorr
(
x_lp4
,
y_lp4
,
xcorr
,
len
>>
2
,
max_pitch
>>
2
);
find_best_pitch
(
xcorr
,
y_lp4
,
len
>>
2
,
max_pitch
>>
2
,
best_pitch
);
/* Finer search with 2x decimation */
for
(
int
i
=
0
;
i
<
max_pitch
>>
1
;
i
++
)
{
float
sum
;
xcorr
[
i
]
=
0
;
if
(
FFABS
(
i
-
2
*
best_pitch
[
0
])
>
2
&&
FFABS
(
i
-
2
*
best_pitch
[
1
])
>
2
)
continue
;
sum
=
celt_inner_prod
(
x_lp
,
y
+
i
,
len
>>
1
);
xcorr
[
i
]
=
FFMAX
(
-
1
,
sum
);
}
find_best_pitch
(
xcorr
,
y
,
len
>>
1
,
max_pitch
>>
1
,
best_pitch
);
/* Refine by pseudo-interpolation */
if
(
best_pitch
[
0
]
>
0
&&
best_pitch
[
0
]
<
(
max_pitch
>>
1
)
-
1
)
{
float
a
,
b
,
c
;
a
=
xcorr
[
best_pitch
[
0
]
-
1
];
b
=
xcorr
[
best_pitch
[
0
]];
c
=
xcorr
[
best_pitch
[
0
]
+
1
];
if
(
c
-
a
>
.
7
f
*
(
b
-
a
))
offset
=
1
;
else
if
(
a
-
c
>
.
7
f
*
(
b
-
c
))
offset
=
-
1
;
else
offset
=
0
;
}
else
{
offset
=
0
;
}
*
pitch
=
2
*
best_pitch
[
0
]
-
offset
;
}
static
void
dct
(
AudioRNNContext
*
s
,
float
*
out
,
const
float
*
in
)
{
for
(
int
i
=
0
;
i
<
NB_BANDS
;
i
++
)
{
float
sum
=
0
.
f
;
for
(
int
j
=
0
;
j
<
NB_BANDS
;
j
++
)
{
sum
+=
in
[
j
]
*
s
->
dct_table
[
j
*
NB_BANDS
+
i
];
}
out
[
i
]
=
sum
*
sqrtf
(
2
.
f
/
22
);
}
}
static
int
compute_frame_features
(
AudioRNNContext
*
s
,
DenoiseState
*
st
,
AVComplexFloat
*
X
,
AVComplexFloat
*
P
,
float
*
Ex
,
float
*
Ep
,
float
*
Exp
,
float
*
features
,
const
float
*
in
)
{
float
E
=
0
;
float
*
ceps_0
,
*
ceps_1
,
*
ceps_2
;
float
spec_variability
=
0
;
float
Ly
[
NB_BANDS
];
LOCAL_ALIGNED_32
(
float
,
p
,
[
WINDOW_SIZE
]);
float
pitch_buf
[
PITCH_BUF_SIZE
>>
1
];
int
pitch_index
;
float
gain
;
float
*
(
pre
[
1
]);
float
tmp
[
NB_BANDS
];
float
follow
,
logMax
;
frame_analysis
(
s
,
st
,
X
,
Ex
,
in
);
RNN_MOVE
(
st
->
pitch_buf
,
&
st
->
pitch_buf
[
FRAME_SIZE
],
PITCH_BUF_SIZE
-
FRAME_SIZE
);
RNN_COPY
(
&
st
->
pitch_buf
[
PITCH_BUF_SIZE
-
FRAME_SIZE
],
in
,
FRAME_SIZE
);
pre
[
0
]
=
&
st
->
pitch_buf
[
0
];
pitch_downsample
(
pre
,
pitch_buf
,
PITCH_BUF_SIZE
,
1
);
pitch_search
(
pitch_buf
+
(
PITCH_MAX_PERIOD
>>
1
),
pitch_buf
,
PITCH_FRAME_SIZE
,
PITCH_MAX_PERIOD
-
3
*
PITCH_MIN_PERIOD
,
&
pitch_index
);
pitch_index
=
PITCH_MAX_PERIOD
-
pitch_index
;
gain
=
remove_doubling
(
pitch_buf
,
PITCH_MAX_PERIOD
,
PITCH_MIN_PERIOD
,
PITCH_FRAME_SIZE
,
&
pitch_index
,
st
->
last_period
,
st
->
last_gain
);
st
->
last_period
=
pitch_index
;
st
->
last_gain
=
gain
;
for
(
int
i
=
0
;
i
<
WINDOW_SIZE
;
i
++
)
p
[
i
]
=
st
->
pitch_buf
[
PITCH_BUF_SIZE
-
WINDOW_SIZE
-
pitch_index
+
i
];
s
->
fdsp
->
vector_fmul
(
p
,
p
,
s
->
window
,
WINDOW_SIZE
);
forward_transform
(
st
,
P
,
p
);
compute_band_energy
(
Ep
,
P
);
compute_band_corr
(
Exp
,
X
,
P
);
for
(
int
i
=
0
;
i
<
NB_BANDS
;
i
++
)
Exp
[
i
]
=
Exp
[
i
]
/
sqrtf
(.
001
f
+
Ex
[
i
]
*
Ep
[
i
]);
dct
(
s
,
tmp
,
Exp
);
for
(
int
i
=
0
;
i
<
NB_DELTA_CEPS
;
i
++
)
features
[
NB_BANDS
+
2
*
NB_DELTA_CEPS
+
i
]
=
tmp
[
i
];
features
[
NB_BANDS
+
2
*
NB_DELTA_CEPS
]
-=
1
.
3
;
features
[
NB_BANDS
+
2
*
NB_DELTA_CEPS
+
1
]
-=
0
.
9
;
features
[
NB_BANDS
+
3
*
NB_DELTA_CEPS
]
=
.
01
*
(
pitch_index
-
300
);
logMax
=
-
2
;
follow
=
-
2
;
for
(
int
i
=
0
;
i
<
NB_BANDS
;
i
++
)
{
Ly
[
i
]
=
log10f
(
1e-2
f
+
Ex
[
i
]);
Ly
[
i
]
=
FFMAX
(
logMax
-
7
,
FFMAX
(
follow
-
1
.
5
,
Ly
[
i
]));
logMax
=
FFMAX
(
logMax
,
Ly
[
i
]);
follow
=
FFMAX
(
follow
-
1
.
5
,
Ly
[
i
]);
E
+=
Ex
[
i
];
}
if
(
E
<
0
.
04
f
)
{
/* If there's no audio, avoid messing up the state. */
RNN_CLEAR
(
features
,
NB_FEATURES
);
return
1
;
}
dct
(
s
,
features
,
Ly
);
features
[
0
]
-=
12
;
features
[
1
]
-=
4
;
ceps_0
=
st
->
cepstral_mem
[
st
->
memid
];
ceps_1
=
(
st
->
memid
<
1
)
?
st
->
cepstral_mem
[
CEPS_MEM
+
st
->
memid
-
1
]
:
st
->
cepstral_mem
[
st
->
memid
-
1
];
ceps_2
=
(
st
->
memid
<
2
)
?
st
->
cepstral_mem
[
CEPS_MEM
+
st
->
memid
-
2
]
:
st
->
cepstral_mem
[
st
->
memid
-
2
];
for
(
int
i
=
0
;
i
<
NB_BANDS
;
i
++
)
ceps_0
[
i
]
=
features
[
i
];
st
->
memid
++
;
for
(
int
i
=
0
;
i
<
NB_DELTA_CEPS
;
i
++
)
{
features
[
i
]
=
ceps_0
[
i
]
+
ceps_1
[
i
]
+
ceps_2
[
i
];
features
[
NB_BANDS
+
i
]
=
ceps_0
[
i
]
-
ceps_2
[
i
];
features
[
NB_BANDS
+
NB_DELTA_CEPS
+
i
]
=
ceps_0
[
i
]
-
2
*
ceps_1
[
i
]
+
ceps_2
[
i
];
}
/* Spectral variability features. */
if
(
st
->
memid
==
CEPS_MEM
)
st
->
memid
=
0
;
for
(
int
i
=
0
;
i
<
CEPS_MEM
;
i
++
)
{
float
mindist
=
1e15
f
;
for
(
int
j
=
0
;
j
<
CEPS_MEM
;
j
++
)
{
float
dist
=
0
.
f
;
for
(
int
k
=
0
;
k
<
NB_BANDS
;
k
++
)
{
float
tmp
;
tmp
=
st
->
cepstral_mem
[
i
][
k
]
-
st
->
cepstral_mem
[
j
][
k
];
dist
+=
tmp
*
tmp
;
}
if
(
j
!=
i
)
mindist
=
FFMIN
(
mindist
,
dist
);
}
spec_variability
+=
mindist
;
}
features
[
NB_BANDS
+
3
*
NB_DELTA_CEPS
+
1
]
=
spec_variability
/
CEPS_MEM
-
2
.
1
;
return
0
;
}
static
void
interp_band_gain
(
float
*
g
,
const
float
*
bandE
)
{
memset
(
g
,
0
,
sizeof
(
*
g
)
*
FREQ_SIZE
);
for
(
int
i
=
0
;
i
<
NB_BANDS
-
1
;
i
++
)
{
const
int
band_size
=
(
eband5ms
[
i
+
1
]
-
eband5ms
[
i
])
<<
FRAME_SIZE_SHIFT
;
for
(
int
j
=
0
;
j
<
band_size
;
j
++
)
{
float
frac
=
(
float
)
j
/
band_size
;
g
[(
eband5ms
[
i
]
<<
FRAME_SIZE_SHIFT
)
+
j
]
=
(
1
.
f
-
frac
)
*
bandE
[
i
]
+
frac
*
bandE
[
i
+
1
];
}
}
}
static
void
pitch_filter
(
AVComplexFloat
*
X
,
const
AVComplexFloat
*
P
,
const
float
*
Ex
,
const
float
*
Ep
,
const
float
*
Exp
,
const
float
*
g
)
{
float
newE
[
NB_BANDS
];
float
r
[
NB_BANDS
];
float
norm
[
NB_BANDS
];
float
rf
[
FREQ_SIZE
]
=
{
0
};
float
normf
[
FREQ_SIZE
]
=
{
0
};
for
(
int
i
=
0
;
i
<
NB_BANDS
;
i
++
)
{
if
(
Exp
[
i
]
>
g
[
i
])
r
[
i
]
=
1
;
else
r
[
i
]
=
SQUARE
(
Exp
[
i
])
*
(
1
-
SQUARE
(
g
[
i
]))
/
(.
001
+
SQUARE
(
g
[
i
])
*
(
1
-
SQUARE
(
Exp
[
i
])));
r
[
i
]
=
sqrtf
(
av_clipf
(
r
[
i
],
0
,
1
));
r
[
i
]
*=
sqrtf
(
Ex
[
i
]
/
(
1e-8
+
Ep
[
i
]));
}
interp_band_gain
(
rf
,
r
);
for
(
int
i
=
0
;
i
<
FREQ_SIZE
;
i
++
)
{
X
[
i
].
re
+=
rf
[
i
]
*
P
[
i
].
re
;
X
[
i
].
im
+=
rf
[
i
]
*
P
[
i
].
im
;
}
compute_band_energy
(
newE
,
X
);
for
(
int
i
=
0
;
i
<
NB_BANDS
;
i
++
)
{
norm
[
i
]
=
sqrtf
(
Ex
[
i
]
/
(
1e-8
+
newE
[
i
]));
}
interp_band_gain
(
normf
,
norm
);
for
(
int
i
=
0
;
i
<
FREQ_SIZE
;
i
++
)
{
X
[
i
].
re
*=
normf
[
i
];
X
[
i
].
im
*=
normf
[
i
];
}
}
static
const
float
tansig_table
[
201
]
=
{
0
.
000000
f
,
0
.
03
9979
f
,
0
.
07
9830
f
,
0
.
119427
f
,
0
.
158649
f
,
0
.
197375
f
,
0
.
235496
f
,
0
.
272905
f
,
0
.
309507
f
,
0
.
345214
f
,
0
.
379949
f
,
0
.
413644
f
,
0
.
446244
f
,
0
.
477700
f
,
0
.
507977
f
,
0
.
537050
f
,
0
.
564900
f
,
0
.
591519
f
,
0
.
616909
f
,
0
.
641077
f
,
0
.
664037
f
,
0
.
685809
f
,
0
.
706419
f
,
0
.
725897
f
,
0
.
744277
f
,
0
.
761594
f
,
0
.
777888
f
,
0
.
793199
f
,
0
.
807569
f
,
0
.
821040
f
,
0
.
833655
f
,
0
.
845456
f
,
0
.
856485
f
,
0
.
866784
f
,
0
.
876393
f
,
0
.
885352
f
,
0
.
893698
f
,
0
.
901468
f
,
0
.
908698
f
,
0
.
915420
f
,
0
.
921669
f
,
0
.
927473
f
,
0
.
932862
f
,
0
.
937863
f
,
0
.
942503
f
,
0
.
946806
f
,
0
.
950795
f
,
0
.
954492
f
,
0
.
957917
f
,
0
.
961090
f
,
0
.
964028
f
,
0
.
966747
f
,
0
.
969265
f
,
0
.
971594
f
,
0
.
973749
f
,
0
.
975743
f
,
0
.
977587
f
,
0
.
979293
f
,
0
.
980869
f
,
0
.
982327
f
,
0
.
983675
f
,
0
.
984921
f
,
0
.
986072
f
,
0
.
987136
f
,
0
.
988119
f
,
0
.
989027
f
,
0
.
989867
f
,
0
.
990642
f
,
0
.
991359
f
,
0
.
992020
f
,
0
.
992631
f
,
0
.
993196
f
,
0
.
993718
f
,
0
.
994199
f
,
0
.
994644
f
,
0
.
995055
f
,
0
.
995434
f
,
0
.
995784
f
,
0
.
996108
f
,
0
.
996407
f
,
0
.
996682
f
,
0
.
996937
f
,
0
.
997172
f
,
0
.
997389
f
,
0
.
997590
f
,
0
.
997775
f
,
0
.
997946
f
,
0
.
998104
f
,
0
.
998249
f
,
0
.
998384
f
,
0
.
998508
f
,
0
.
998623
f
,
0
.
998728
f
,
0
.
998826
f
,
0
.
998916
f
,
0
.
999000
f
,
0
.
999076
f
,
0
.
999147
f
,
0
.
999213
f
,
0
.
999273
f
,
0
.
999329
f
,
0
.
999381
f
,
0
.
999428
f
,
0
.
999472
f
,
0
.
999513
f
,
0
.
999550
f
,
0
.
999585
f
,
0
.
999617
f
,
0
.
999646
f
,
0
.
999673
f
,
0
.
999699
f
,
0
.
999722
f
,
0
.
999743
f
,
0
.
999763
f
,
0
.
999781
f
,
0
.
999798
f
,
0
.
999813
f
,
0
.
999828
f
,
0
.
999841
f
,
0
.
999853
f
,
0
.
999865
f
,
0
.
999875
f
,
0
.
999885
f
,
0
.
999893
f
,
0
.
999902
f
,
0
.
999909
f
,
0
.
999916
f
,
0
.
999923
f
,
0
.
999929
f
,
0
.
999934
f
,
0
.
999939
f
,
0
.
999944
f
,
0
.
999948
f
,
0
.
999952
f
,
0
.
999956
f
,
0
.
999959
f
,
0
.
999962
f
,
0
.
999965
f
,
0
.
999968
f
,
0
.
999970
f
,
0
.
999973
f
,
0
.
999975
f
,
0
.
999977
f
,
0
.
999978
f
,
0
.
999980
f
,
0
.
999982
f
,
0
.
999983
f
,
0
.
999984
f
,
0
.
999986
f
,
0
.
999987
f
,
0
.
999988
f
,
0
.
999989
f
,
0
.
999990
f
,
0
.
999990
f
,
0
.
999991
f
,
0
.
999992
f
,
0
.
999992
f
,
0
.
999993
f
,
0
.
999994
f
,
0
.
999994
f
,
0
.
999994
f
,
0
.
999995
f
,
0
.
999995
f
,
0
.
999996
f
,
0
.
999996
f
,
0
.
999996
f
,
0
.
999997
f
,
0
.
999997
f
,
0
.
999997
f
,
0
.
999997
f
,
0
.
999997
f
,
0
.
999998
f
,
0
.
999998
f
,
0
.
999998
f
,
0
.
999998
f
,
0
.
999998
f
,
0
.
999998
f
,
0
.
999999
f
,
0
.
999999
f
,
0
.
999999
f
,
0
.
999999
f
,
0
.
999999
f
,
0
.
999999
f
,
0
.
999999
f
,
0
.
999999
f
,
0
.
999999
f
,
0
.
999999
f
,
0
.
999999
f
,
0
.
999999
f
,
0
.
999999
f
,
1
.
000000
f
,
1
.
000000
f
,
1
.
000000
f
,
1
.
000000
f
,
1
.
000000
f
,
1
.
000000
f
,
1
.
000000
f
,
1
.
000000
f
,
1
.
000000
f
,
1
.
000000
f
,
1
.
000000
f
,
};
static
inline
float
tansig_approx
(
float
x
)
{
float
y
,
dy
;
float
sign
=
1
;
int
i
;
/* Tests are reversed to catch NaNs */
if
(
!
(
x
<
8
))
return
1
;
if
(
!
(
x
>-
8
))
return
-
1
;
/* Another check in case of -ffast-math */
if
(
isnan
(
x
))
return
0
;
if
(
x
<
0
)
{
x
=-
x
;
sign
=-
1
;
}
i
=
(
int
)
floor
(.
5
f
+
25
*
x
);
x
-=
.
04
f
*
i
;
y
=
tansig_table
[
i
];
dy
=
1
-
y
*
y
;
y
=
y
+
x
*
dy
*
(
1
-
y
*
x
);
return
sign
*
y
;
}
static
inline
float
sigmoid_approx
(
float
x
)
{
return
.
5
f
+
.
5
f
*
tansig_approx
(.
5
f
*
x
);
}
static
void
compute_dense
(
const
DenseLayer
*
layer
,
float
*
output
,
const
float
*
input
)
{
const
int
N
=
layer
->
nb_neurons
,
M
=
layer
->
nb_inputs
,
stride
=
N
;
for
(
int
i
=
0
;
i
<
N
;
i
++
)
{
/* Compute update gate. */
float
sum
=
layer
->
bias
[
i
];
for
(
int
j
=
0
;
j
<
M
;
j
++
)
sum
+=
layer
->
input_weights
[
j
*
stride
+
i
]
*
input
[
j
];
output
[
i
]
=
WEIGHTS_SCALE
*
sum
;
}
if
(
layer
->
activation
==
ACTIVATION_SIGMOID
)
{
for
(
int
i
=
0
;
i
<
N
;
i
++
)
output
[
i
]
=
sigmoid_approx
(
output
[
i
]);
}
else
if
(
layer
->
activation
==
ACTIVATION_TANH
)
{
for
(
int
i
=
0
;
i
<
N
;
i
++
)
output
[
i
]
=
tansig_approx
(
output
[
i
]);
}
else
if
(
layer
->
activation
==
ACTIVATION_RELU
)
{
for
(
int
i
=
0
;
i
<
N
;
i
++
)
output
[
i
]
=
FFMAX
(
0
,
output
[
i
]);
}
else
{
av_assert0
(
0
);
}
}
static
void
compute_gru
(
AudioRNNContext
*
s
,
const
GRULayer
*
gru
,
float
*
state
,
const
float
*
input
)
{
LOCAL_ALIGNED_32
(
float
,
z
,
[
MAX_NEURONS
]);
LOCAL_ALIGNED_32
(
float
,
r
,
[
MAX_NEURONS
]);
LOCAL_ALIGNED_32
(
float
,
h
,
[
MAX_NEURONS
]);
const
int
M
=
gru
->
nb_inputs
;
const
int
N
=
gru
->
nb_neurons
;
const
int
AN
=
FFALIGN
(
N
,
4
);
const
int
AM
=
FFALIGN
(
M
,
4
);
const
int
stride
=
3
*
AN
,
istride
=
3
*
AM
;
for
(
int
i
=
0
;
i
<
N
;
i
++
)
{
/* Compute update gate. */
float
sum
=
gru
->
bias
[
i
];
sum
+=
s
->
fdsp
->
scalarproduct_float
(
gru
->
input_weights
+
i
*
istride
,
input
,
AM
);
sum
+=
s
->
fdsp
->
scalarproduct_float
(
gru
->
recurrent_weights
+
i
*
stride
,
state
,
AN
);
z
[
i
]
=
sigmoid_approx
(
WEIGHTS_SCALE
*
sum
);
}
for
(
int
i
=
0
;
i
<
N
;
i
++
)
{
/* Compute reset gate. */
float
sum
=
gru
->
bias
[
N
+
i
];
sum
+=
s
->
fdsp
->
scalarproduct_float
(
gru
->
input_weights
+
AM
+
i
*
istride
,
input
,
AM
);
sum
+=
s
->
fdsp
->
scalarproduct_float
(
gru
->
recurrent_weights
+
AN
+
i
*
stride
,
state
,
AN
);
r
[
i
]
=
sigmoid_approx
(
WEIGHTS_SCALE
*
sum
);
}
for
(
int
i
=
0
;
i
<
N
;
i
++
)
{
/* Compute output. */
float
sum
=
gru
->
bias
[
2
*
N
+
i
];
sum
+=
s
->
fdsp
->
scalarproduct_float
(
gru
->
input_weights
+
2
*
AM
+
i
*
istride
,
input
,
AM
);
for
(
int
j
=
0
;
j
<
N
;
j
++
)
sum
+=
gru
->
recurrent_weights
[
2
*
AN
+
i
*
stride
+
j
]
*
state
[
j
]
*
r
[
j
];
if
(
gru
->
activation
==
ACTIVATION_SIGMOID
)
sum
=
sigmoid_approx
(
WEIGHTS_SCALE
*
sum
);
else
if
(
gru
->
activation
==
ACTIVATION_TANH
)
sum
=
tansig_approx
(
WEIGHTS_SCALE
*
sum
);
else
if
(
gru
->
activation
==
ACTIVATION_RELU
)
sum
=
FFMAX
(
0
,
WEIGHTS_SCALE
*
sum
);
else
av_assert0
(
0
);
h
[
i
]
=
z
[
i
]
*
state
[
i
]
+
(
1
.
f
-
z
[
i
])
*
sum
;
}
RNN_COPY
(
state
,
h
,
N
);
}
#define INPUT_SIZE 42
static
void
compute_rnn
(
AudioRNNContext
*
s
,
RNNState
*
rnn
,
float
*
gains
,
float
*
vad
,
const
float
*
input
)
{
LOCAL_ALIGNED_32
(
float
,
dense_out
,
[
MAX_NEURONS
]);
LOCAL_ALIGNED_32
(
float
,
noise_input
,
[
MAX_NEURONS
*
3
]);
LOCAL_ALIGNED_32
(
float
,
denoise_input
,
[
MAX_NEURONS
*
3
]);
compute_dense
(
rnn
->
model
->
input_dense
,
dense_out
,
input
);
compute_gru
(
s
,
rnn
->
model
->
vad_gru
,
rnn
->
vad_gru_state
,
dense_out
);
compute_dense
(
rnn
->
model
->
vad_output
,
vad
,
rnn
->
vad_gru_state
);
for
(
int
i
=
0
;
i
<
rnn
->
model
->
input_dense_size
;
i
++
)
noise_input
[
i
]
=
dense_out
[
i
];
for
(
int
i
=
0
;
i
<
rnn
->
model
->
vad_gru_size
;
i
++
)
noise_input
[
i
+
rnn
->
model
->
input_dense_size
]
=
rnn
->
vad_gru_state
[
i
];
for
(
int
i
=
0
;
i
<
INPUT_SIZE
;
i
++
)
noise_input
[
i
+
rnn
->
model
->
input_dense_size
+
rnn
->
model
->
vad_gru_size
]
=
input
[
i
];
compute_gru
(
s
,
rnn
->
model
->
noise_gru
,
rnn
->
noise_gru_state
,
noise_input
);
for
(
int
i
=
0
;
i
<
rnn
->
model
->
vad_gru_size
;
i
++
)
denoise_input
[
i
]
=
rnn
->
vad_gru_state
[
i
];
for
(
int
i
=
0
;
i
<
rnn
->
model
->
noise_gru_size
;
i
++
)
denoise_input
[
i
+
rnn
->
model
->
vad_gru_size
]
=
rnn
->
noise_gru_state
[
i
];
for
(
int
i
=
0
;
i
<
INPUT_SIZE
;
i
++
)
denoise_input
[
i
+
rnn
->
model
->
vad_gru_size
+
rnn
->
model
->
noise_gru_size
]
=
input
[
i
];
compute_gru
(
s
,
rnn
->
model
->
denoise_gru
,
rnn
->
denoise_gru_state
,
denoise_input
);
compute_dense
(
rnn
->
model
->
denoise_output
,
gains
,
rnn
->
denoise_gru_state
);
}
static
float
rnnoise_channel
(
AudioRNNContext
*
s
,
DenoiseState
*
st
,
float
*
out
,
const
float
*
in
)
{
AVComplexFloat
X
[
FREQ_SIZE
];
AVComplexFloat
P
[
WINDOW_SIZE
];
float
x
[
FRAME_SIZE
];
float
Ex
[
NB_BANDS
],
Ep
[
NB_BANDS
];
float
Exp
[
NB_BANDS
];
float
features
[
NB_FEATURES
];
float
g
[
NB_BANDS
];
float
gf
[
FREQ_SIZE
];
float
vad_prob
=
0
;
static
const
float
a_hp
[
2
]
=
{
-
1
.
99599
,
0
.
99600
};
static
const
float
b_hp
[
2
]
=
{
-
2
,
1
};
int
silence
;
biquad
(
x
,
st
->
mem_hp_x
,
in
,
b_hp
,
a_hp
,
FRAME_SIZE
);
silence
=
compute_frame_features
(
s
,
st
,
X
,
P
,
Ex
,
Ep
,
Exp
,
features
,
x
);
if
(
!
silence
)
{
compute_rnn
(
s
,
&
st
->
rnn
,
g
,
&
vad_prob
,
features
);
pitch_filter
(
X
,
P
,
Ex
,
Ep
,
Exp
,
g
);
for
(
int
i
=
0
;
i
<
NB_BANDS
;
i
++
)
{
float
alpha
=
.
6
f
;
g
[
i
]
=
FFMAX
(
g
[
i
],
alpha
*
st
->
lastg
[
i
]);
st
->
lastg
[
i
]
=
g
[
i
];
}
interp_band_gain
(
gf
,
g
);
for
(
int
i
=
0
;
i
<
FREQ_SIZE
;
i
++
)
{
X
[
i
].
re
*=
gf
[
i
];
X
[
i
].
im
*=
gf
[
i
];
}
}
frame_synthesis
(
s
,
st
,
out
,
X
);
return
vad_prob
;
}
typedef
struct
ThreadData
{
AVFrame
*
in
,
*
out
;
}
ThreadData
;
static
int
rnnoise_channels
(
AVFilterContext
*
ctx
,
void
*
arg
,
int
jobnr
,
int
nb_jobs
)
{
AudioRNNContext
*
s
=
ctx
->
priv
;
ThreadData
*
td
=
arg
;
AVFrame
*
in
=
td
->
in
;
AVFrame
*
out
=
td
->
out
;
const
int
start
=
(
out
->
channels
*
jobnr
)
/
nb_jobs
;
const
int
end
=
(
out
->
channels
*
(
jobnr
+
1
))
/
nb_jobs
;
for
(
int
ch
=
start
;
ch
<
end
;
ch
++
)
{
rnnoise_channel
(
s
,
&
s
->
st
[
ch
],
(
float
*
)
out
->
extended_data
[
ch
],
(
const
float
*
)
in
->
extended_data
[
ch
]);
}
return
0
;
}
static
int
filter_frame
(
AVFilterLink
*
inlink
,
AVFrame
*
in
)
{
AVFilterContext
*
ctx
=
inlink
->
dst
;
AVFilterLink
*
outlink
=
ctx
->
outputs
[
0
];
AVFrame
*
out
=
NULL
;
ThreadData
td
;
out
=
ff_get_audio_buffer
(
outlink
,
FRAME_SIZE
);
if
(
!
out
)
{
av_frame_free
(
&
in
);
return
AVERROR
(
ENOMEM
);
}
out
->
pts
=
in
->
pts
;
td
.
in
=
in
;
td
.
out
=
out
;
ctx
->
internal
->
execute
(
ctx
,
rnnoise_channels
,
&
td
,
NULL
,
FFMIN
(
outlink
->
channels
,
ff_filter_get_nb_threads
(
ctx
)));
av_frame_free
(
&
in
);
return
ff_filter_frame
(
outlink
,
out
);
}
static
int
activate
(
AVFilterContext
*
ctx
)
{
AVFilterLink
*
inlink
=
ctx
->
inputs
[
0
];
AVFilterLink
*
outlink
=
ctx
->
outputs
[
0
];
AVFrame
*
in
=
NULL
;
int
ret
;
FF_FILTER_FORWARD_STATUS_BACK
(
outlink
,
inlink
);
ret
=
ff_inlink_consume_samples
(
inlink
,
FRAME_SIZE
,
FRAME_SIZE
,
&
in
);
if
(
ret
<
0
)
return
ret
;
if
(
ret
>
0
)
return
filter_frame
(
inlink
,
in
);
FF_FILTER_FORWARD_STATUS
(
inlink
,
outlink
);
FF_FILTER_FORWARD_WANTED
(
outlink
,
inlink
);
return
FFERROR_NOT_READY
;
}
static
av_cold
int
init
(
AVFilterContext
*
ctx
)
{
AudioRNNContext
*
s
=
ctx
->
priv
;
FILE
*
f
;
s
->
fdsp
=
avpriv_float_dsp_alloc
(
0
);
if
(
!
s
->
fdsp
)
return
AVERROR
(
ENOMEM
);
if
(
!
s
->
model_name
)
return
AVERROR
(
EINVAL
);
f
=
av_fopen_utf8
(
s
->
model_name
,
"r"
);
if
(
!
f
)
return
AVERROR
(
EINVAL
);
s
->
model
=
rnnoise_model_from_file
(
f
);
fclose
(
f
);
if
(
!
s
->
model
)
return
AVERROR
(
EINVAL
);
for
(
int
i
=
0
;
i
<
FRAME_SIZE
;
i
++
)
{
s
->
window
[
i
]
=
sin
(.
5
*
M_PI
*
sin
(.
5
*
M_PI
*
(
i
+
.
5
)
/
FRAME_SIZE
)
*
sin
(.
5
*
M_PI
*
(
i
+
.
5
)
/
FRAME_SIZE
));
s
->
window
[
WINDOW_SIZE
-
1
-
i
]
=
s
->
window
[
i
];
}
for
(
int
i
=
0
;
i
<
NB_BANDS
;
i
++
)
{
for
(
int
j
=
0
;
j
<
NB_BANDS
;
j
++
)
{
s
->
dct_table
[
i
*
NB_BANDS
+
j
]
=
cosf
((
i
+
.
5
f
)
*
j
*
M_PI
/
NB_BANDS
);
if
(
j
==
0
)
s
->
dct_table
[
i
*
NB_BANDS
+
j
]
*=
sqrtf
(.
5
);
}
}
return
0
;
}
static
av_cold
void
uninit
(
AVFilterContext
*
ctx
)
{
AudioRNNContext
*
s
=
ctx
->
priv
;
av_freep
(
&
s
->
fdsp
);
rnnoise_model_free
(
s
->
model
);
s
->
model
=
NULL
;
if
(
s
->
st
)
{
for
(
int
ch
=
0
;
ch
<
s
->
channels
;
ch
++
)
{
av_freep
(
&
s
->
st
[
ch
].
rnn
.
vad_gru_state
);
av_freep
(
&
s
->
st
[
ch
].
rnn
.
noise_gru_state
);
av_freep
(
&
s
->
st
[
ch
].
rnn
.
denoise_gru_state
);
av_tx_uninit
(
&
s
->
st
[
ch
].
tx
);
av_tx_uninit
(
&
s
->
st
[
ch
].
txi
);
}
}
av_freep
(
&
s
->
st
);
}
static
const
AVFilterPad
inputs
[]
=
{
{
.
name
=
"default"
,
.
type
=
AVMEDIA_TYPE_AUDIO
,
.
config_props
=
config_input
,
},
{
NULL
}
};
static
const
AVFilterPad
outputs
[]
=
{
{
.
name
=
"default"
,
.
type
=
AVMEDIA_TYPE_AUDIO
,
},
{
NULL
}
};
#define OFFSET(x) offsetof(AudioRNNContext, x)
#define AF AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
static
const
AVOption
arnndn_options
[]
=
{
{
"model"
,
"set model name"
,
OFFSET
(
model_name
),
AV_OPT_TYPE_STRING
,
{.
str
=
NULL
},
0
,
0
,
AF
},
{
"m"
,
"set model name"
,
OFFSET
(
model_name
),
AV_OPT_TYPE_STRING
,
{.
str
=
NULL
},
0
,
0
,
AF
},
{
NULL
}
};
AVFILTER_DEFINE_CLASS
(
arnndn
);
AVFilter
ff_af_arnndn
=
{
.
name
=
"arnndn"
,
.
description
=
NULL_IF_CONFIG_SMALL
(
"Reduce noise from speech using Recurrent Neural Networks."
),
.
query_formats
=
query_formats
,
.
priv_size
=
sizeof
(
AudioRNNContext
),
.
priv_class
=
&
arnndn_class
,
.
activate
=
activate
,
.
init
=
init
,
.
uninit
=
uninit
,
.
inputs
=
inputs
,
.
outputs
=
outputs
,
.
flags
=
AVFILTER_FLAG_SLICE_THREADS
,
};
libavfilter/allfilters.c
View file @
b0bfa369
...
...
@@ -65,6 +65,7 @@ extern AVFilter ff_af_apulsator;
extern
AVFilter
ff_af_arealtime
;
extern
AVFilter
ff_af_aresample
;
extern
AVFilter
ff_af_areverse
;
extern
AVFilter
ff_af_arnndn
;
extern
AVFilter
ff_af_aselect
;
extern
AVFilter
ff_af_asendcmd
;
extern
AVFilter
ff_af_asetnsamples
;
...
...
libavfilter/version.h
View file @
b0bfa369
...
...
@@ -30,8 +30,8 @@
#include "libavutil/version.h"
#define LIBAVFILTER_VERSION_MAJOR 7
#define LIBAVFILTER_VERSION_MINOR 6
2
#define LIBAVFILTER_VERSION_MICRO 10
1
#define LIBAVFILTER_VERSION_MINOR 6
3
#define LIBAVFILTER_VERSION_MICRO 10
0
#define LIBAVFILTER_VERSION_INT AV_VERSION_INT(LIBAVFILTER_VERSION_MAJOR, \
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment