Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in / Register
Toggle navigation
F
ffmpeg.wasm-core
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Linshizhi
ffmpeg.wasm-core
Commits
f49cec2b
Commit
f49cec2b
authored
May 05, 2019
by
Paul B Mahol
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
avfilter: add asr filter
parent
670251de
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
224 additions
and
1 deletion
+224
-1
Changelog
Changelog
+1
-0
configure
configure
+4
-0
filters.texi
doc/filters.texi
+35
-0
Makefile
libavfilter/Makefile
+1
-0
af_asr.c
libavfilter/af_asr.c
+181
-0
allfilters.c
libavfilter/allfilters.c
+1
-0
version.h
libavfilter/version.h
+1
-1
No files found.
Changelog
View file @
f49cec2b
...
...
@@ -29,6 +29,7 @@ version <next>:
- Support decoding of HEVC 4:4:4 content in vdpau
- colorhold filter
- xmedian filter
- asr filter
version 4.1:
...
...
configure
View file @
f49cec2b
...
...
@@ -307,6 +307,7 @@ External library support:
--enable-opengl enable OpenGL rendering [no]
--enable-openssl enable openssl, needed for https support
if gnutls, libtls or mbedtls is not used [no]
--enable-pocketsphinx enable PocketSphinx, needed for asr filter [no]
--disable-sndio disable sndio support [autodetect]
--disable-schannel disable SChannel SSP, needed for TLS support on
Windows if openssl and gnutls are not used [autodetect]
...
...
@@ -1799,6 +1800,7 @@ EXTERNAL_LIBRARY_LIST="
mediacodec
openal
opengl
pocketsphinx
vapoursynth
"
...
...
@@ -3400,6 +3402,7 @@ afir_filter_deps="avcodec"
afir_filter_select
=
"fft"
amovie_filter_deps
=
"avcodec avformat"
aresample_filter_deps
=
"swresample"
asr_filter_deps
=
"pocketsphinx"
ass_filter_deps
=
"libass"
atempo_filter_deps
=
"avcodec"
atempo_filter_select
=
"rdft"
...
...
@@ -6298,6 +6301,7 @@ enabled openssl && { check_pkg_config openssl openssl openssl/ssl.h OP
check_lib openssl openssl/ssl.h SSL_library_init
-lssl32
-leay32
||
check_lib openssl openssl/ssl.h SSL_library_init
-lssl
-lcrypto
-lws2_32
-lgdi32
||
die
"ERROR: openssl not found"
;
}
enabled pocketsphinx
&&
require_pkg_config pocketsphinx pocketsphinx pocketsphinx/pocketsphinx.h ps_init
enabled rkmpp
&&
{
require_pkg_config rkmpp rockchip_mpp rockchip/rk_mpi.h mpp_create
&&
require_pkg_config rockchip_mpp
"rockchip_mpp >= 1.3.7"
rockchip/rk_mpi.h mpp_create
&&
{
enabled libdrm
||
...
...
doc/filters.texi
View file @
f49cec2b
...
...
@@ -2131,6 +2131,41 @@ It accepts the following values:
Set additional parameter which controls sigmoid function.
@end table
@section asr
Automatic Speech Recognition
This filter uses PocketSphinx for speech recognition. To enable
compilation of this filter, you need to configure FFmpeg with
@code{--enable-pocketsphinx}.
It accepts the following options:
@table @option
@item rate
Set sampling rate of input audio. Defaults is @code{16000}.
This need to match speech models, otherwise one will get poor results.
@item hmm
Set dictionary containing acoustic model files.
@item dict
Set pronunciation dictionary.
@item lm
Set language model file.
@item lmctl
Set language model set.
@item lmname
Set which language model to use.
@item logfn
Set output for log messages.
@end table
The filter exports recognized speech as the frame metadata @code{lavfi.asr.text}.
@anchor{astats}
@section astats
...
...
libavfilter/Makefile
View file @
f49cec2b
...
...
@@ -82,6 +82,7 @@ OBJS-$(CONFIG_ASHOWINFO_FILTER) += af_ashowinfo.o
OBJS-$(CONFIG_ASIDEDATA_FILTER)
+=
f_sidedata.o
OBJS-$(CONFIG_ASOFTCLIP_FILTER)
+=
af_asoftclip.o
OBJS-$(CONFIG_ASPLIT_FILTER)
+=
split.o
OBJS-$(CONFIG_ASR_FILTER)
+=
af_asr.o
OBJS-$(CONFIG_ASTATS_FILTER)
+=
af_astats.o
OBJS-$(CONFIG_ASTREAMSELECT_FILTER)
+=
f_streamselect.o
framesync.o
OBJS-$(CONFIG_ATEMPO_FILTER)
+=
af_atempo.o
...
...
libavfilter/af_asr.c
0 → 100644
View file @
f49cec2b
/*
* Copyright (c) 2019 Paul B Mahol
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <pocketsphinx/pocketsphinx.h>
#include "libavutil/avassert.h"
#include "libavutil/avstring.h"
#include "libavutil/channel_layout.h"
#include "libavutil/opt.h"
#include "audio.h"
#include "avfilter.h"
#include "internal.h"
typedef
struct
ASRContext
{
const
AVClass
*
class
;
int
rate
;
char
*
hmm
;
char
*
dict
;
char
*
lm
;
char
*
lmctl
;
char
*
lmname
;
char
*
logfn
;
ps_decoder_t
*
ps
;
cmd_ln_t
*
config
;
int
utt_started
;
}
ASRContext
;
#define OFFSET(x) offsetof(ASRContext, x)
#define FLAGS AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_FILTERING_PARAM
static
const
AVOption
asr_options
[]
=
{
{
"rate"
,
"set sampling rate"
,
OFFSET
(
rate
),
AV_OPT_TYPE_INT
,
{.
i64
=
16000
},
0
,
INT_MAX
,
.
flags
=
FLAGS
},
{
"hmm"
,
"set directory containing acoustic model files"
,
OFFSET
(
hmm
),
AV_OPT_TYPE_STRING
,
{.
str
=
NULL
},
.
flags
=
FLAGS
},
{
"dict"
,
"set pronunciation dictionary"
,
OFFSET
(
dict
),
AV_OPT_TYPE_STRING
,
{.
str
=
NULL
},
.
flags
=
FLAGS
},
{
"lm"
,
"set language model file"
,
OFFSET
(
lm
),
AV_OPT_TYPE_STRING
,
{.
str
=
NULL
},
.
flags
=
FLAGS
},
{
"lmctl"
,
"set language model set"
,
OFFSET
(
lmctl
),
AV_OPT_TYPE_STRING
,
{.
str
=
NULL
},
.
flags
=
FLAGS
},
{
"lmname"
,
"set which language model to use"
,
OFFSET
(
lmname
),
AV_OPT_TYPE_STRING
,
{.
str
=
NULL
},
.
flags
=
FLAGS
},
{
"logfn"
,
"set output for log messages"
,
OFFSET
(
logfn
),
AV_OPT_TYPE_STRING
,
{.
str
=
"/dev/null"
},
.
flags
=
FLAGS
},
{
NULL
}
};
AVFILTER_DEFINE_CLASS
(
asr
);
static
int
filter_frame
(
AVFilterLink
*
inlink
,
AVFrame
*
in
)
{
AVFilterContext
*
ctx
=
inlink
->
dst
;
AVDictionary
**
metadata
=
&
in
->
metadata
;
ASRContext
*
s
=
ctx
->
priv
;
int
have_speech
;
const
char
*
speech
;
ps_process_raw
(
s
->
ps
,
(
const
int16_t
*
)
in
->
data
[
0
],
in
->
nb_samples
,
0
,
0
);
have_speech
=
ps_get_in_speech
(
s
->
ps
);
if
(
have_speech
&&
!
s
->
utt_started
)
s
->
utt_started
=
1
;
if
(
!
have_speech
&&
s
->
utt_started
)
{
ps_end_utt
(
s
->
ps
);
speech
=
ps_get_hyp
(
s
->
ps
,
NULL
);
if
(
speech
!=
NULL
)
av_dict_set
(
metadata
,
"lavfi.asr.text"
,
speech
,
0
);
ps_start_utt
(
s
->
ps
);
s
->
utt_started
=
0
;
}
return
ff_filter_frame
(
ctx
->
outputs
[
0
],
in
);
}
static
int
config_input
(
AVFilterLink
*
inlink
)
{
AVFilterContext
*
ctx
=
inlink
->
dst
;
ASRContext
*
s
=
ctx
->
priv
;
ps_start_utt
(
s
->
ps
);
return
0
;
}
static
av_cold
int
asr_init
(
AVFilterContext
*
ctx
)
{
ASRContext
*
s
=
ctx
->
priv
;
const
float
frate
=
s
->
rate
;
char
*
rate
=
av_asprintf
(
"%f"
,
frate
);
const
char
*
argv
[]
=
{
"-logfn"
,
s
->
logfn
,
"-hmm"
,
s
->
hmm
,
"-lm"
,
s
->
lm
,
"-lmctl"
,
s
->
lmctl
,
"-lmname"
,
s
->
lmname
,
"-dict"
,
s
->
dict
,
"-samprate"
,
rate
,
NULL
};
s
->
config
=
cmd_ln_parse_r
(
NULL
,
ps_args
(),
14
,
(
char
**
)
argv
,
0
);
av_free
(
rate
);
if
(
!
s
->
config
)
return
AVERROR
(
ENOMEM
);
ps_default_search_args
(
s
->
config
);
s
->
ps
=
ps_init
(
s
->
config
);
if
(
!
s
->
ps
)
return
AVERROR
(
ENOMEM
);
return
0
;
}
static
int
query_formats
(
AVFilterContext
*
ctx
)
{
ASRContext
*
s
=
ctx
->
priv
;
int
sample_rates
[]
=
{
s
->
rate
,
-
1
};
int
ret
;
AVFilterFormats
*
formats
=
NULL
;
AVFilterChannelLayouts
*
layout
=
NULL
;
if
((
ret
=
ff_add_format
(
&
formats
,
AV_SAMPLE_FMT_S16
))
<
0
||
(
ret
=
ff_set_common_formats
(
ctx
,
formats
))
<
0
||
(
ret
=
ff_add_channel_layout
(
&
layout
,
AV_CH_LAYOUT_MONO
))
<
0
||
(
ret
=
ff_set_common_channel_layouts
(
ctx
,
layout
))
<
0
||
(
ret
=
ff_set_common_samplerates
(
ctx
,
ff_make_format_list
(
sample_rates
)
))
<
0
)
return
ret
;
return
0
;
}
static
av_cold
void
asr_uninit
(
AVFilterContext
*
ctx
)
{
ASRContext
*
s
=
ctx
->
priv
;
ps_free
(
s
->
ps
);
s
->
ps
=
NULL
;
cmd_ln_free_r
(
s
->
config
);
s
->
config
=
NULL
;
}
static
const
AVFilterPad
asr_inputs
[]
=
{
{
.
name
=
"default"
,
.
type
=
AVMEDIA_TYPE_AUDIO
,
.
filter_frame
=
filter_frame
,
.
config_props
=
config_input
,
},
{
NULL
}
};
static
const
AVFilterPad
asr_outputs
[]
=
{
{
.
name
=
"default"
,
.
type
=
AVMEDIA_TYPE_AUDIO
,
},
{
NULL
}
};
AVFilter
ff_af_asr
=
{
.
name
=
"asr"
,
.
description
=
NULL_IF_CONFIG_SMALL
(
"Automatic Speech Recognition."
),
.
priv_size
=
sizeof
(
ASRContext
),
.
priv_class
=
&
asr_class
,
.
init
=
asr_init
,
.
uninit
=
asr_uninit
,
.
query_formats
=
query_formats
,
.
inputs
=
asr_inputs
,
.
outputs
=
asr_outputs
,
};
libavfilter/allfilters.c
View file @
f49cec2b
...
...
@@ -74,6 +74,7 @@ extern AVFilter ff_af_ashowinfo;
extern
AVFilter
ff_af_asidedata
;
extern
AVFilter
ff_af_asoftclip
;
extern
AVFilter
ff_af_asplit
;
extern
AVFilter
ff_af_asr
;
extern
AVFilter
ff_af_astats
;
extern
AVFilter
ff_af_astreamselect
;
extern
AVFilter
ff_af_atempo
;
...
...
libavfilter/version.h
View file @
f49cec2b
...
...
@@ -30,7 +30,7 @@
#include "libavutil/version.h"
#define LIBAVFILTER_VERSION_MAJOR 7
#define LIBAVFILTER_VERSION_MINOR 5
2
#define LIBAVFILTER_VERSION_MINOR 5
3
#define LIBAVFILTER_VERSION_MICRO 100
#define LIBAVFILTER_VERSION_INT AV_VERSION_INT(LIBAVFILTER_VERSION_MAJOR, \
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment