Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in / Register
Toggle navigation
C
CharIP-Electron
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ali
CharIP-Electron
Commits
fec7d389
Commit
fec7d389
authored
Dec 05, 2023
by
ali
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
feat: asr 采用 ws 方式,视频数字人遇到无法匹配直接转大模型
parent
c1557511
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
195 additions
and
16 deletions
+195
-16
ShowPhoto.vue
src/renderer/screens/ShowPhoto.vue
+2
-2
ShowVideo.vue
src/renderer/screens/ShowVideo.vue
+190
-13
settings.ts
src/renderer/store/settings.ts
+3
-1
No files found.
src/renderer/screens/ShowPhoto.vue
View file @
fec7d389
...
@@ -250,7 +250,7 @@ function endAudioInput() {
...
@@ -250,7 +250,7 @@ function endAudioInput() {
async
function
onAsr
(
question
:
string
)
{
async
function
onAsr
(
question
:
string
)
{
console
.
log
(
'---------------->question: '
,
question
)
console
.
log
(
'---------------->question: '
,
question
)
endAudioInput
()
endAudioInput
()
const
ws
=
await
initSocket
()
const
ws
=
await
init
LLM
Socket
()
inputContext
.
ws
=
ws
inputContext
.
ws
=
ws
let
sliceAnswer
=
''
let
sliceAnswer
=
''
...
@@ -299,7 +299,7 @@ async function onAsr(question: string) {
...
@@ -299,7 +299,7 @@ async function onAsr(question: string) {
ws
.
send
(
JSON
.
stringify
({
prompt
:
question
,
historys_list
:
[]
}))
ws
.
send
(
JSON
.
stringify
({
prompt
:
question
,
historys_list
:
[]
}))
}
}
function
initSocket
():
Promise
<
WebSocket
>
{
function
init
LLM
Socket
():
Promise
<
WebSocket
>
{
const
ws
=
new
WebSocket
(
settings
.
llmUrl
)
const
ws
=
new
WebSocket
(
settings
.
llmUrl
)
return
new
Promise
((
resolve
,
reject
)
=>
{
return
new
Promise
((
resolve
,
reject
)
=>
{
ws
.
onopen
=
()
=>
resolve
(
ws
)
ws
.
onopen
=
()
=>
resolve
(
ws
)
...
...
src/renderer/screens/ShowVideo.vue
View file @
fec7d389
...
@@ -14,7 +14,8 @@ import useStore from '@/renderer/store'
...
@@ -14,7 +14,8 @@ import useStore from '@/renderer/store'
const
router
=
useRouter
()
const
router
=
useRouter
()
const
route
=
useRoute
()
const
route
=
useRoute
()
const
{
settings
,
video
:
useVideo
}
=
useStore
()
const
{
settings
,
video
:
useVideo
}
=
useStore
()
const
sampleRate
=
48000
let
sampleRate
=
48000
const
bufferSize
=
8192
;
const
iconMicrophone
=
new
URL
(
'/images/microphone-input.svg'
,
import
.
meta
.
url
).
href
const
iconMicrophone
=
new
URL
(
'/images/microphone-input.svg'
,
import
.
meta
.
url
).
href
const
recordVolume
=
ref
(
0
)
const
recordVolume
=
ref
(
0
)
...
@@ -95,9 +96,13 @@ const inputContext: {
...
@@ -95,9 +96,13 @@ const inputContext: {
scriptProcessorNode
?:
ScriptProcessorNode
scriptProcessorNode
?:
ScriptProcessorNode
model
?:
Model
model
?:
Model
ws
?:
WebSocket
ws
?:
WebSocket
}
=
{}
voskWs
?:
WebSocket
asrPartial
:
string
}
=
{
asrPartial
:
''
}
async
function
startAudioInput
()
{
async
function
start
VoskWasm
AudioInput
()
{
if
(
microphoneState
.
value
===
'loading'
)
return
if
(
microphoneState
.
value
===
'loading'
)
return
if
(
microphoneState
.
value
===
'input'
)
{
if
(
microphoneState
.
value
===
'input'
)
{
...
@@ -114,6 +119,7 @@ async function startAudioInput() {
...
@@ -114,6 +119,7 @@ async function startAudioInput() {
}
}
})
})
sampleRate
=
48000
const
mediaStream
=
await
navigator
.
mediaDevices
.
getUserMedia
({
const
mediaStream
=
await
navigator
.
mediaDevices
.
getUserMedia
({
video
:
false
,
video
:
false
,
audio
:
{
audio
:
{
...
@@ -152,6 +158,83 @@ async function startAudioInput() {
...
@@ -152,6 +158,83 @@ async function startAudioInput() {
inputContext
.
audioContext
=
audioContext
inputContext
.
audioContext
=
audioContext
}
}
async
function
startVoskWsAudioInput
()
{
if
(
microphoneState
.
value
===
'loading'
)
return
if
(
microphoneState
.
value
===
'input'
)
{
endAudioInput
()
return
}
initVoskWS
()
sampleRate
=
8000
const
mediaStream
=
await
navigator
.
mediaDevices
.
getUserMedia
({
audio
:
{
echoCancellation
:
true
,
noiseSuppression
:
true
,
channelCount
:
1
,
sampleRate
},
video
:
false
});
const
audioContext
=
new
AudioContext
({
sampleRate
});
const
source
=
audioContext
.
createMediaStreamSource
(
mediaStream
);
const
processor
=
audioContext
.
createScriptProcessor
();
source
.
connect
(
processor
);
processor
.
connect
(
audioContext
.
destination
);
processor
.
onaudioprocess
=
(
audioDataChunk
)
=>
postAudio
(
audioDataChunk
);
await
analyzeMicrophoneVolume
(
mediaStream
,
(
val
)
=>
{
recordVolume
.
value
=
val
})
microphoneState
.
value
=
'input'
inputContext
.
audioContext
=
audioContext
inputContext
.
mediaStream
=
mediaStream
}
function
postAudio
(
audioDataChunk
)
{
if
(
!
inputContext
.
voskWs
)
return
;
if
(
inputContext
.
voskWs
.
readyState
===
WebSocket
.
OPEN
)
{
const
inputData
=
audioDataChunk
.
inputBuffer
.
getChannelData
(
0
)
||
new
Float32Array
(
bufferSize
);
const
targetBuffer
=
new
Int16Array
(
inputData
.
length
);
for
(
let
index
=
inputData
.
length
;
index
>
0
;
index
--
)
{
targetBuffer
[
index
]
=
32767
*
Math
.
min
(
1
,
inputData
[
index
]);
}
inputContext
.
voskWs
.
send
(
targetBuffer
.
buffer
);
}
}
function
initVoskWS
()
{
return
new
Promise
((
resolve
,
reject
)
=>
{
inputContext
.
voskWs
=
new
WebSocket
(
settings
.
voskWsLUrl
);
inputContext
.
voskWs
.
binaryType
=
"arraybuffer"
;
inputContext
.
asrPartial
=
''
;
inputContext
.
voskWs
.
onopen
=
function
(
event
)
{
resolve
(
inputContext
.
voskWs
);
};
inputContext
.
voskWs
.
onerror
=
function
(
event
)
{
reject
(
new
Error
(
JSON
.
stringify
(
event
)))
};
inputContext
.
voskWs
.
onmessage
=
function
(
event
)
{
if
(
!
event
.
data
)
return
const
parsed
=
JSON
.
parse
(
event
.
data
);
if
(
parsed
.
partial
&&
parsed
.
partial
!==
'the'
)
inputContext
.
asrPartial
=
parsed
.
partial
+
'|'
;
// if (parsed.result) console.log(parsed.result);
if
(
parsed
.
text
)
{
inputContext
.
asrPartial
=
parsed
.
text
;
onAsr
(
inputContext
.
asrPartial
);
};
};
})
}
function
endAudioInput
()
{
function
endAudioInput
()
{
microphoneState
.
value
=
'waitInput'
microphoneState
.
value
=
'waitInput'
inputContext
.
mediaStream
?.
getTracks
().
forEach
((
track
)
=>
track
.
stop
())
inputContext
.
mediaStream
?.
getTracks
().
forEach
((
track
)
=>
track
.
stop
())
...
@@ -159,28 +242,90 @@ function endAudioInput() {
...
@@ -159,28 +242,90 @@ function endAudioInput() {
inputContext
.
audioContext2
?.
close
()
inputContext
.
audioContext2
?.
close
()
inputContext
.
scriptProcessorNode
&&
(
inputContext
.
scriptProcessorNode
.
onaudioprocess
=
null
)
inputContext
.
scriptProcessorNode
&&
(
inputContext
.
scriptProcessorNode
.
onaudioprocess
=
null
)
inputContext
.
model
?.
terminate
()
inputContext
.
model
?.
terminate
()
// inputContext.ws?.close()
if
(
inputContext
.
voskWs
)
{
inputContext
.
voskWs
.
send
(
'{"eof" : 1}'
);
inputContext
.
voskWs
.
close
();
}
}
function
setVideoUrl
(
url
:
string
)
{
const
videoEle
=
videoElement
.
value
as
HTMLVideoElement
if
(
!
videoEle
)
return
;
videoEle
.
src
=
url
videoEle
.
load
()
videoEle
.
play
()
}
}
async
function
onAsr
(
question
:
string
)
{
async
function
onAsr
(
question
:
string
)
{
endAudioInput
()
endAudioInput
()
console
.
log
(
'---------------->'
,
question
)
console
.
log
(
'---------------->'
,
question
)
const
videoEle
=
videoElement
.
value
as
HTMLVideoElement
if
(
!
role
)
return
if
(
!
role
||
!
videoEle
)
return
question
=
question
.
replace
(
/
\s
/g
,
''
)
question
=
question
.
replace
(
/
\s
/g
,
''
)
for
(
let
i
=
0
;
i
<
role
.
qa
.
length
;
i
++
)
{
for
(
let
i
=
0
;
i
<
role
.
qa
.
length
;
i
++
)
{
const
{
q
,
url
}
=
role
.
qa
[
i
]
const
{
q
,
url
}
=
role
.
qa
[
i
]
console
.
log
(
question
+
' : '
+
q
)
console
.
log
(
question
+
' : '
+
q
)
if
(
q
.
includes
(
question
))
{
if
(
q
.
includes
(
question
))
{
videoEle
.
src
=
url
const
videoEle
=
videoElement
.
value
as
HTMLVideoElement
videoEle
.
load
()
videoEle
&&
(
videoEle
.
loop
=
false
);
videoEle
.
play
()
videoEle
&&
(
videoEle
.
muted
=
false
);
setVideoUrl
(
url
);
return
;
}
}
}
}
// 视频链接匹配不上,直接走大模型
const
ws
=
await
initLLMSocket
()
let
sliceAnswer
=
''
let
answer
=
''
const
answerArray
:
string
[]
=
[]
let
isTime
=
true
inputContext
.
ws
=
ws
ws
.
onmessage
=
(
message
)
=>
{
try
{
const
{
text
,
event
}
=
JSON
.
parse
(
message
.
data
)
as
{
event
:
string
message_num
:
number
text
:
string
}
if
(
event
===
'stream_end'
)
{
answerArray
.
push
(
sliceAnswer
)
runTTSTask
(
answerArray
)
sliceAnswer
=
''
answerArray
.
push
(
sliceAnswer
)
sliceAnswer
=
''
inputContext
.
ws
?.
close
()
console
.
log
(
'----------------> answer: '
,
answer
)
return
}
answer
+=
text
isTime
&&
console
.
time
(
'sliceAnswer'
)
isTime
=
false
sliceAnswer
+=
text
if
(
/
[
。,?!;,.?!;
]
/
.
test
(
text
)
&&
sliceAnswer
.
length
>=
20
)
{
console
.
timeEnd
(
'sliceAnswer'
)
answerArray
.
push
(
sliceAnswer
)
runTTSTask
(
answerArray
)
sliceAnswer
=
''
isTime
=
true
}
}
catch
(
error
)
{
console
.
log
(
'返回答案错误 -----> '
+
JSON
.
stringify
(
error
))
}
}
console
.
log
(
'----------------> Asr:'
,
question
)
ws
.
send
(
JSON
.
stringify
({
prompt
:
question
,
historys_list
:
[]
}))
}
}
function
initSocket
():
Promise
<
WebSocket
>
{
function
init
LLM
Socket
():
Promise
<
WebSocket
>
{
const
ws
=
new
WebSocket
(
settings
.
llmUrl
)
const
ws
=
new
WebSocket
(
settings
.
llmUrl
)
return
new
Promise
((
resolve
,
reject
)
=>
{
return
new
Promise
((
resolve
,
reject
)
=>
{
ws
.
onopen
=
()
=>
resolve
(
ws
)
ws
.
onopen
=
()
=>
resolve
(
ws
)
...
@@ -197,10 +342,20 @@ async function runTTSTask(tasks: string[]) {
...
@@ -197,10 +342,20 @@ async function runTTSTask(tasks: string[]) {
while
(
tasks
.
length
)
{
while
(
tasks
.
length
)
{
const
task
=
tasks
.
shift
()
const
task
=
tasks
.
shift
()
if
(
!
task
)
break
if
(
!
task
)
break
if
(
task
.
length
<
1
)
continue
console
.
time
(
task
+
' TTS: '
)
console
.
time
(
task
+
' TTS: '
)
const
res
=
await
localTTS
({
url
:
settings
.
ttsHost
,
text
:
task
})
const
res
=
await
localTTS
({
console
.
log
(
'----------------> TTS:'
,
res
)
url
:
settings
.
ttsHost
,
text
:
task
,
audio_path
:
settings
.
userData
})
console
.
log
(
'----------------> TTS:'
,
res
[
0
].
text
)
console
.
timeEnd
(
task
+
' TTS: '
)
console
.
timeEnd
(
task
+
' TTS: '
)
const
audio
=
new
Audio
(
`file://
${
res
[
0
].
text
}
`
)
audio
.
load
()
ttsAudios
.
push
(
audio
)
runAudioPlay
()
}
}
}
catch
(
error
)
{
}
catch
(
error
)
{
console
.
error
(
error
)
console
.
error
(
error
)
...
@@ -209,6 +364,28 @@ async function runTTSTask(tasks: string[]) {
...
@@ -209,6 +364,28 @@ async function runTTSTask(tasks: string[]) {
isTTSRunning
=
false
isTTSRunning
=
false
}
}
const
ttsAudios
:
HTMLAudioElement
[]
=
[]
let
isPlayRunning
=
false
async
function
runAudioPlay
()
{
if
(
isPlayRunning
)
return
isPlayRunning
=
true
const
audio
=
ttsAudios
.
shift
()
if
(
!
audio
)
{
isPlayRunning
=
false
return
}
audio
.
onended
=
()
=>
{
isPlayRunning
=
false
const
videoEle
=
videoElement
.
value
as
HTMLVideoElement
videoEle
&&
(
videoEle
.
loop
=
true
);
videoEle
&&
(
videoEle
.
muted
=
true
);
setVideoUrl
(
new
URL
(
'/libai/10.mp4'
,
import
.
meta
.
url
).
href
);
runAudioPlay
()
}
await
audio
.
play
()
}
// eslint-disable-next-line no-unused-vars
// eslint-disable-next-line no-unused-vars
async
function
xfTTS
(
text
:
string
)
{
async
function
xfTTS
(
text
:
string
)
{
const
tone
=
settings
.
source
.
find
(({
sourceId
})
=>
settings
.
selectSource
===
sourceId
)
const
tone
=
settings
.
source
.
find
(({
sourceId
})
=>
settings
.
selectSource
===
sourceId
)
...
@@ -240,7 +417,7 @@ async function xfTTS(text: string) {
...
@@ -240,7 +417,7 @@ async function xfTTS(text: string) {
variant=
"elevated"
variant=
"elevated"
size=
"x-large"
size=
"x-large"
:disabled=
"microphoneState === 'loading' || microphoneState === 'disabled'"
:disabled=
"microphoneState === 'loading' || microphoneState === 'disabled'"
@
pointerdown=
"startAudioInput"
@
pointerdown=
"start
VoskWs
AudioInput"
>
>
<v-icon
v-if=
"microphoneState === 'waitInput'"
icon=
"mdi-microphone"
></v-icon>
<v-icon
v-if=
"microphoneState === 'waitInput'"
icon=
"mdi-microphone"
></v-icon>
<v-icon
v-if=
"microphoneState === 'loading'"
icon=
"mdi-microphone-settings"
></v-icon>
<v-icon
v-if=
"microphoneState === 'loading'"
icon=
"mdi-microphone-settings"
></v-icon>
...
...
src/renderer/store/settings.ts
View file @
fec7d389
...
@@ -25,6 +25,7 @@ export type ISettings = {
...
@@ -25,6 +25,7 @@ export type ISettings = {
isFullscreen
:
'yes'
|
'no'
isFullscreen
:
'yes'
|
'no'
isOpenDevTools
:
boolean
isOpenDevTools
:
boolean
llmUrl
:
string
llmUrl
:
string
voskWsLUrl
:
string
}
}
const
useSettingsStore
=
defineStore
(
'settings'
,
{
const
useSettingsStore
=
defineStore
(
'settings'
,
{
...
@@ -57,7 +58,8 @@ const useSettingsStore = defineStore('settings', {
...
@@ -57,7 +58,8 @@ const useSettingsStore = defineStore('settings', {
selectSource
:
''
,
selectSource
:
''
,
isFullscreen
:
'no'
,
isFullscreen
:
'no'
,
isOpenDevTools
:
false
,
isOpenDevTools
:
false
,
llmUrl
:
'ws://127.0.0.1:9001/api/v1/stream'
llmUrl
:
'ws://127.0.0.1:9001/api/v1/stream'
,
voskWsLUrl
:
'ws://127.0.0.1:2700'
})
as
ISettings
,
})
as
ISettings
,
getters
:
{},
getters
:
{},
actions
:
{
actions
:
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment