Commit 229f45cf authored by ali's avatar ali

feat: 视频数字人中断功能,修复照片数字人播放状态不精准,视频数字人nlp接入

parent 83ceae1a
...@@ -34,6 +34,13 @@ contextBridge.exposeInMainWorld('mainApi', { ...@@ -34,6 +34,13 @@ contextBridge.exposeInMainWorld('mainApi', {
throw new Error(`Receive failed: Unknown ipc channel name: ${channel}`) throw new Error(`Receive failed: Unknown ipc channel name: ${channel}`)
} }
}, },
receiveOnce: (channel: string, cbFunc: Function): void => {
if (rendererAvailChannels.includes(channel)) {
ipcRenderer.once(channel, (event, ...args) => cbFunc(event, ...args))
} else {
throw new Error(`Receive failed: Unknown ipc channel name: ${channel}`)
}
},
invoke: async (channel: string, ...data: any[]): Promise<any> => { invoke: async (channel: string, ...data: any[]): Promise<any> => {
if (mainAvailChannels.includes(channel)) { if (mainAvailChannels.includes(channel)) {
const result = await ipcRenderer.invoke.apply(null, [channel, ...data]) const result = await ipcRenderer.invoke.apply(null, [channel, ...data])
......
...@@ -127,7 +127,7 @@ function clear() { ...@@ -127,7 +127,7 @@ function clear() {
<v-app-bar color="#d71b1b" density="compact" class="header"> <v-app-bar color="#d71b1b" density="compact" class="header">
<template #append> <template #append>
<v-btn <v-btn
prepend-icon="mdi-home" prepend-icon="mdi-image-album"
variant="text" variant="text"
:class="{ active: isCurrentRoute('/') }" :class="{ active: isCurrentRoute('/') }"
@click="handleRoute('/')" @click="handleRoute('/')"
...@@ -135,7 +135,7 @@ function clear() { ...@@ -135,7 +135,7 @@ function clear() {
{{ $t('menu.photo') }} {{ $t('menu.photo') }}
</v-btn> </v-btn>
<v-btn <v-btn
prepend-icon="mdi-fit-to-screen-outline" prepend-icon="mdi-video-account"
variant="text" variant="text"
:class="{ active: isCurrentRoute('/video') }" :class="{ active: isCurrentRoute('/video') }"
@click="handleRoute('/video')" @click="handleRoute('/video')"
...@@ -230,6 +230,14 @@ function clear() { ...@@ -230,6 +230,14 @@ function clear() {
:model-value="setting.llmUrl" :model-value="setting.llmUrl"
></v-text-field> ></v-text-field>
<v-text-field
style="margin-top: 22px"
label="NLP-HOST"
:rules="[(value) => !!value || 'LNP 地址必填']"
hide-details="auto"
:model-value="setting.nlpHost"
></v-text-field>
<v-select <v-select
v-model="setting.liveHost.value" v-model="setting.liveHost.value"
style="margin-top: 22px" style="margin-top: 22px"
......
...@@ -65,7 +65,7 @@ export class HwWebRTC extends EventEmitter { ...@@ -65,7 +65,7 @@ export class HwWebRTC extends EventEmitter {
constructor(id: string, log: 'none' | 'error' | 'warn' | 'info' | 'debug' = 'none') { constructor(id: string, log: 'none' | 'error' | 'warn' | 'info' | 'debug' = 'none') {
super() super()
this.elementId = id this.elementId = id
// setLogLevel(log); window.HWLLSPlayer.setLogLevel(log);
} }
/** /**
......
...@@ -55,7 +55,7 @@ async function init() { ...@@ -55,7 +55,7 @@ async function init() {
const item = photo.list.find((i) => i.url === url) const item = photo.list.find((i) => i.url === url)
photoRole = new PhotoRole(settings.liveHost, `${item?.liveUrl}`, canvasEle) photoRole = new PhotoRole(settings.liveHost, `${item?.liveUrl}`, canvasEle)
photoRole.on('asyncAnswer', (ans) => { photoRole.on('asyncAnswer', async (ans) => {
if (ans.playState === 'playing') { if (ans.playState === 'playing') {
microphoneState.value = 'reply' microphoneState.value = 'reply'
return return
...@@ -64,8 +64,7 @@ async function init() { ...@@ -64,8 +64,7 @@ async function init() {
if ( if (
microphoneState.value === 'reply' && microphoneState.value === 'reply' &&
ans.playState === 'pause' && ans.playState === 'pause' &&
photoRole!.taskQueueLength === 0 && await checkSteps()
answerArray.length === 0
) { ) {
microphoneState.value = 'input' microphoneState.value = 'input'
} }
...@@ -220,7 +219,7 @@ async function startVoskWasmAudioInput() { ...@@ -220,7 +219,7 @@ async function startVoskWasmAudioInput() {
microphoneState.value = 'loading' microphoneState.value = 'loading'
const { recognizer, channel } = await initVosk({ const { recognizer, channel } = await initVosk({
result: onAsr, result: onQ,
partialResult: (text) => { partialResult: (text) => {
// console.log('----------------> partialResult:', text) // console.log('----------------> partialResult:', text)
} }
...@@ -347,12 +346,21 @@ function initVoskWS() { ...@@ -347,12 +346,21 @@ function initVoskWS() {
// if (parsed.result) console.log(parsed.result); // if (parsed.result) console.log(parsed.result);
if (parsed.text) { if (parsed.text) {
inputContext.asrPartial = parsed.text inputContext.asrPartial = parsed.text
onAsr(inputContext.asrPartial) onQ(inputContext.asrPartial)
} }
} }
}) })
} }
function initLLMSocket(): Promise<WebSocket> {
const ws = new WebSocket(settings.llmUrl)
return new Promise((resolve, reject) => {
ws.onopen = () => resolve(ws)
ws.onerror = reject
})
}
function endAudioInput() { function endAudioInput() {
microphoneState.value = 'waitInput' microphoneState.value = 'waitInput'
inputContext.mediaStream?.getTracks().forEach((track) => track.stop()) inputContext.mediaStream?.getTracks().forEach((track) => track.stop())
...@@ -367,7 +375,37 @@ function endAudioInput() { ...@@ -367,7 +375,37 @@ function endAudioInput() {
} }
const answerArray: { text: string; isLast: boolean }[] = [] const answerArray: { text: string; isLast: boolean }[] = []
async function onAsr(question: string) { const steps: Promise<string>[] = [];
const checkSteps = async () => {
let count = 0;
for (let i = 0; i < steps.length; i++) {
try {
const res = await Promise.race([steps[i], new Promise((resolve) => setTimeout(() => resolve(false), 10))])
if (res === false) continue;
} catch (e) {
console.error(e)
}
count ++;
if (count >= 2) {
return true
}
}
return false;
}
const createStep = () => {
let stepResolve: (string) => void = () => {};
let stepReject: (string) => void = () => {};
const pose = new Promise<string>((resolve, reject) => {
stepResolve = resolve;
stepReject = reject;
})
return { pose, stepResolve, stepReject }
}
async function onQ(question: string) {
console.log('---------------->question: ', question) console.log('---------------->question: ', question)
microphoneState.value = 'loading' microphoneState.value = 'loading'
...@@ -380,6 +418,9 @@ async function onAsr(question: string) { ...@@ -380,6 +418,9 @@ async function onAsr(question: string) {
let isTime = true let isTime = true
let sliceAnswerLength = 10 let sliceAnswerLength = 10
answerArray.length = 0 answerArray.length = 0
steps.length = 0;
const { pose, stepResolve, stepReject } = createStep();
steps.push(pose);
photoRole!.answerArgs = new PhotoAnswer() photoRole!.answerArgs = new PhotoAnswer()
ws.onmessage = (message) => { ws.onmessage = (message) => {
...@@ -396,6 +437,7 @@ async function onAsr(question: string) { ...@@ -396,6 +437,7 @@ async function onAsr(question: string) {
runTTSTask(answerArray) runTTSTask(answerArray)
inputContext.ws?.close() inputContext.ws?.close()
console.log('----------------> answer: ', answer) console.log('----------------> answer: ', answer)
stepResolve('chat');
return return
} }
...@@ -419,7 +461,7 @@ async function onAsr(question: string) { ...@@ -419,7 +461,7 @@ async function onAsr(question: string) {
} }
} }
} catch (error) { } catch (error) {
console.log('返回答案错误 -----> ' + JSON.stringify(error)) stepReject(JSON.stringify(error))
} }
} }
...@@ -427,33 +469,28 @@ async function onAsr(question: string) { ...@@ -427,33 +469,28 @@ async function onAsr(question: string) {
ws.send(JSON.stringify({ prompt: question, historys_list: [] })) ws.send(JSON.stringify({ prompt: question, historys_list: [] }))
} }
function initLLMSocket(): Promise<WebSocket> {
const ws = new WebSocket(settings.llmUrl)
return new Promise((resolve, reject) => {
ws.onopen = () => resolve(ws)
ws.onerror = reject
})
}
let isTTSRunning = false let isTTSRunning = false
async function runTTSTask(tasks: { text: string; isLast: boolean }[]) { async function runTTSTask(tasks: { text: string; isLast: boolean }[]) {
if (isTTSRunning) return if (isTTSRunning) return
isTTSRunning = true isTTSRunning = true
const { pose, stepResolve, stepReject } = createStep();
steps.push(pose);
try { try {
while (tasks.length) { while (tasks.length) {
const task = tasks.shift() const task = tasks.shift()
if (!task) break if (!task) break
if (task.text.trim().length < 1) continue if (task.text.trim().length < 1) continue
console.time(task + ' TTS: ') console.time(task.text + ' TTS: ')
const res = await localTTS({ const res = await localTTS({
url: settings.ttsHost, url: settings.ttsHost,
text: task.text, text: task.text,
audio_path: settings.userData audio_path: settings.userData
}) })
console.log('----------------> TTS:', res[0].text) console.log('----------------> TTS:', res[0].text)
console.timeEnd(task + ' TTS: ') console.timeEnd(task.text + ' TTS: ')
console.log('---------------->', res[0].text) console.log('---------------->', res[0].text)
const audioPath = await uploadFile({ filePath: res[0].text }) const audioPath = await uploadFile({ filePath: res[0].text })
...@@ -464,15 +501,16 @@ async function runTTSTask(tasks: { text: string; isLast: boolean }[]) { ...@@ -464,15 +501,16 @@ async function runTTSTask(tasks: { text: string; isLast: boolean }[]) {
}) })
} }
} catch (error) { } catch (error) {
console.error(error) stepReject(JSON.stringify(error))
} }
isTTSRunning = false isTTSRunning = false
stepResolve('TTS')
} }
function uploadFile({ filePath }: { filePath: string }) { function uploadFile({ filePath }: { filePath: string }) {
return new Promise<string>((resolve, reject) => { return new Promise<string>((resolve, reject) => {
window.mainApi.receive( window.mainApi.receiveOnce(
'msgReceivedFileUploadResponse', 'msgReceivedFileUploadResponse',
(event: Event, result: { code: number; data: null | { filename: string } }) => { (event: Event, result: { code: number; data: null | { filename: string } }) => {
if (result.code !== 200) { if (result.code !== 200) {
......
...@@ -25,6 +25,19 @@ const microphoneState = ref<'waitInput' | 'input' | 'loading' | 'disabled' | 're ...@@ -25,6 +25,19 @@ const microphoneState = ref<'waitInput' | 'input' | 'loading' | 'disabled' | 're
const videoElement = ref<HTMLVideoElement | null>(null) const videoElement = ref<HTMLVideoElement | null>(null)
const videoElement2 = ref<HTMLVideoElement | null>(null) const videoElement2 = ref<HTMLVideoElement | null>(null)
const videos = [videoElement, videoElement2] const videos = [videoElement, videoElement2]
const inputContext: {
mediaStream?: MediaStream
audioContext?: AudioContext
audioContext2?: AudioContext
scriptProcessorNode?: ScriptProcessorNode
model?: Model
ws?: WebSocket
voskWs?: WebSocket
asrPartial: string
playingAudio?: HTMLAudioElement
} = {
asrPartial: ''
}
onMounted(() => { onMounted(() => {
// init(); // init();
...@@ -87,19 +100,6 @@ function analyzeMicrophoneVolume(stream: MediaStream, callback: (number) => void ...@@ -87,19 +100,6 @@ function analyzeMicrophoneVolume(stream: MediaStream, callback: (number) => void
inputContext.scriptProcessorNode = recordEventNode inputContext.scriptProcessorNode = recordEventNode
} }
const inputContext: {
mediaStream?: MediaStream
audioContext?: AudioContext
audioContext2?: AudioContext
scriptProcessorNode?: ScriptProcessorNode
model?: Model
ws?: WebSocket
voskWs?: WebSocket
asrPartial: string
} = {
asrPartial: ''
}
async function startVoskWasmAudioInput() { async function startVoskWasmAudioInput() {
if (microphoneState.value === 'loading') return if (microphoneState.value === 'loading') return
...@@ -111,7 +111,7 @@ async function startVoskWasmAudioInput() { ...@@ -111,7 +111,7 @@ async function startVoskWasmAudioInput() {
microphoneState.value = 'loading' microphoneState.value = 'loading'
const { recognizer, channel } = await initVosk({ const { recognizer, channel } = await initVosk({
result: onAsr, result: onQ,
partialResult: (text) => { partialResult: (text) => {
// console.log('----------------> partialResult:', text) // console.log('----------------> partialResult:', text)
} }
...@@ -238,12 +238,20 @@ function initVoskWS() { ...@@ -238,12 +238,20 @@ function initVoskWS() {
// if (parsed.result) console.log(parsed.result); // if (parsed.result) console.log(parsed.result);
if (parsed.text) { if (parsed.text) {
inputContext.asrPartial = parsed.text inputContext.asrPartial = parsed.text
onAsr(inputContext.asrPartial) onQ(inputContext.asrPartial)
} }
} }
}) })
} }
function initLLMSocket(): Promise<WebSocket> {
const ws = new WebSocket(settings.llmUrl)
return new Promise((resolve, reject) => {
ws.onopen = () => resolve(ws)
ws.onerror = reject
})
}
function endAudioInput() { function endAudioInput() {
microphoneState.value = 'waitInput' microphoneState.value = 'waitInput'
inputContext.mediaStream?.getTracks().forEach((track) => track.stop()) inputContext.mediaStream?.getTracks().forEach((track) => track.stop())
...@@ -255,6 +263,11 @@ function endAudioInput() { ...@@ -255,6 +263,11 @@ function endAudioInput() {
inputContext.voskWs.send('{"eof" : 1}') inputContext.voskWs.send('{"eof" : 1}')
inputContext.voskWs.close() inputContext.voskWs.close()
} }
ttsAudios.length = 0;
inputContext.playingAudio?.pause();
videos[1].value?.pause();
videos[0].value?.pause();
isPlayRunning = false;
} }
const canplay = () => { const canplay = () => {
...@@ -272,16 +285,35 @@ function loadVideo(url: string) { ...@@ -272,16 +285,35 @@ function loadVideo(url: string) {
videos[1].value!.addEventListener('canplay', canplay) videos[1].value!.addEventListener('canplay', canplay)
} }
async function onAsr(question: string) { async function qNLP(question: string) {
console.log('---------------->', question) const resp = await (await fetch(`${settings.nlpHost}/api/v1/generate`, {
headers: {
accept: 'application/json, text/plain, */*',
'content-type': 'application/json'
},
body: JSON.stringify({
question
}),
method: 'POST',
mode: 'cors'
})).json() as { results: {text: null | string}[] };
if (resp.results[0].text === null) return '';
for (let i = 0; i < role!.qa.length; i++) {
const { q, url } = role!.qa[i]
if (q.includes(resp.results[0].text)) {
return url;
};
}
}
async function onQ(question: string) {
console.log('----------------> Asr:', question)
if (!role) return if (!role) return
microphoneState.value = 'loading' microphoneState.value = 'loading'
question = question.replace(/\s/g, '') if (await qNLP(question)) {
for (let i = 0; i < role.qa.length; i++) {
const { q, url } = role.qa[i]
console.log(question + ' : ' + q)
if (q.includes(question)) {
loadVideo(url) loadVideo(url)
microphoneState.value = 'reply' microphoneState.value = 'reply'
const videoEle = videos[1].value const videoEle = videos[1].value
...@@ -290,11 +322,10 @@ async function onAsr(question: string) { ...@@ -290,11 +322,10 @@ async function onAsr(question: string) {
videoEle!.onended = () => { videoEle!.onended = () => {
videoEle!.onended = null videoEle!.onended = null
microphoneState.value = 'input' microphoneState.value = 'input'
// 是否需要初始化 // TODO: 是否需要初始化
} }
return return
} }
}
// 视频链接匹配不上,直接走大模型 // 视频链接匹配不上,直接走大模型
const ws = await initLLMSocket() const ws = await initLLMSocket()
...@@ -306,6 +337,10 @@ async function onAsr(question: string) { ...@@ -306,6 +337,10 @@ async function onAsr(question: string) {
inputContext.ws = ws inputContext.ws = ws
ws.onmessage = (message) => { ws.onmessage = (message) => {
if (microphoneState.value === 'input') {
return;
}
try { try {
const { text, event } = JSON.parse(message.data) as { const { text, event } = JSON.parse(message.data) as {
event: string event: string
...@@ -348,23 +383,16 @@ async function onAsr(question: string) { ...@@ -348,23 +383,16 @@ async function onAsr(question: string) {
} }
} }
console.log('----------------> Asr:', question)
ws.send(JSON.stringify({ prompt: question, historys_list: [] })) ws.send(JSON.stringify({ prompt: question, historys_list: [] }))
} }
function initLLMSocket(): Promise<WebSocket> {
const ws = new WebSocket(settings.llmUrl)
return new Promise((resolve, reject) => {
ws.onopen = () => resolve(ws)
ws.onerror = reject
})
}
let isTTSRunning = false let isTTSRunning = false
async function runTTSTask(tasks: string[]) { async function runTTSTask(tasks: string[]) {
if (isTTSRunning) return if (isTTSRunning) return
isTTSRunning = true isTTSRunning = true
microphoneState.value = 'loading'
try { try {
while (tasks.length) { while (tasks.length) {
const task = tasks.shift() const task = tasks.shift()
...@@ -373,7 +401,6 @@ async function runTTSTask(tasks: string[]) { ...@@ -373,7 +401,6 @@ async function runTTSTask(tasks: string[]) {
console.time(task + ' TTS: ') console.time(task + ' TTS: ')
microphoneState.value = 'loading'
const res = await localTTS({ const res = await localTTS({
url: settings.ttsHost, url: settings.ttsHost,
text: task, text: task,
...@@ -383,6 +410,12 @@ async function runTTSTask(tasks: string[]) { ...@@ -383,6 +410,12 @@ async function runTTSTask(tasks: string[]) {
console.log('----------------> TTS:', res[0].text) console.log('----------------> TTS:', res[0].text)
console.timeEnd(task + ' TTS: ') console.timeEnd(task + ' TTS: ')
// @ts-ignore
if (microphoneState.value === 'input') {
break;
}
const audio = new Audio(`file://${res[0].text}`) const audio = new Audio(`file://${res[0].text}`)
audio.load() audio.load()
ttsAudios.push(audio) ttsAudios.push(audio)
...@@ -414,6 +447,7 @@ async function runAudioPlay() { ...@@ -414,6 +447,7 @@ async function runAudioPlay() {
runAudioPlay() runAudioPlay()
} }
await audio.play() await audio.play()
inputContext.playingAudio = audio;
loadVideo(role!.playUrl) loadVideo(role!.playUrl)
videos[1].value!.loop = true videos[1].value!.loop = true
videos[1].value!.muted = true videos[1].value!.muted = true
...@@ -434,6 +468,14 @@ async function xfTTS(text: string) { ...@@ -434,6 +468,14 @@ async function xfTTS(text: string) {
}) })
console.log('----------------> tts:', res) console.log('----------------> tts:', res)
} }
function down() {
if (microphoneState.value === 'reply') {
endAudioInput();
}
startVoskWsAudioInput();
}
</script> </script>
<template> <template>
...@@ -454,15 +496,14 @@ async function xfTTS(text: string) { ...@@ -454,15 +496,14 @@ async function xfTTS(text: string) {
size="x-large" size="x-large"
:disabled=" :disabled="
microphoneState === 'loading' || microphoneState === 'loading' ||
microphoneState === 'disabled' || microphoneState === 'disabled'
microphoneState === 'reply'
" "
@pointerdown="startVoskWsAudioInput" @pointerdown="down"
> >
<v-icon v-if="microphoneState === 'waitInput'" icon="mdi-microphone"></v-icon> <v-icon v-if="microphoneState === 'waitInput'" icon="mdi-microphone"></v-icon>
<v-icon v-if="microphoneState === 'loading'" icon="mdi-microphone-settings"></v-icon> <v-icon v-if="microphoneState === 'loading'" icon="mdi-microphone-settings"></v-icon>
<v-icon v-if="microphoneState === 'disabled'" icon="mdi-microphone-off"></v-icon> <v-icon v-if="microphoneState === 'disabled'" icon="mdi-microphone-off"></v-icon>
<v-icon v-if="microphoneState === 'reply'" icon="mdi-message-reply-text-outline"></v-icon> <v-icon v-if="microphoneState === 'reply'" icon="mdi-volume-high"></v-icon>
<template v-if="microphoneState === 'input'"> <template v-if="microphoneState === 'input'">
<img width="30" height="30" :src="iconMicrophone" alt="" srcset="" /> <img width="30" height="30" :src="iconMicrophone" alt="" srcset="" />
...@@ -488,7 +529,7 @@ async function xfTTS(text: string) { ...@@ -488,7 +529,7 @@ async function xfTTS(text: string) {
color="white" color="white"
variant="outlined" variant="outlined"
:disabled="microphoneState !== 'waitInput' && microphoneState !== 'input'" :disabled="microphoneState !== 'waitInput' && microphoneState !== 'input'"
@click="onAsr(item.q)" @click="onQ(item.q)"
> >
<v-icon start icon="mdi-help-circle-outline"></v-icon> <v-icon start icon="mdi-help-circle-outline"></v-icon>
{{ item.q }} {{ item.q }}
......
...@@ -30,6 +30,7 @@ export type ISettings = { ...@@ -30,6 +30,7 @@ export type ISettings = {
voskWsLUrl: string voskWsLUrl: string
liveHost: string liveHost: string
vConsole: boolean vConsole: boolean
nlpHost
} }
const useSettingsStore = defineStore('settings', { const useSettingsStore = defineStore('settings', {
...@@ -67,7 +68,8 @@ const useSettingsStore = defineStore('settings', { ...@@ -67,7 +68,8 @@ const useSettingsStore = defineStore('settings', {
llmToTTSSliceLength: 20, llmToTTSSliceLength: 20,
voskWsLUrl: 'ws://127.0.0.1:2700', voskWsLUrl: 'ws://127.0.0.1:2700',
liveHost: 'laihua', liveHost: 'laihua',
vConsole: true vConsole: true,
nlpHost: 'http://192.168.1.57:19001'
}) as ISettings, }) as ISettings,
getters: {}, getters: {},
actions: { actions: {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment