Commit 229f45cf authored by ali's avatar ali

feat: 视频数字人中断功能,修复照片数字人播放状态不精准,视频数字人nlp接入

parent 83ceae1a
......@@ -34,6 +34,13 @@ contextBridge.exposeInMainWorld('mainApi', {
throw new Error(`Receive failed: Unknown ipc channel name: ${channel}`)
}
},
receiveOnce: (channel: string, cbFunc: Function): void => {
if (rendererAvailChannels.includes(channel)) {
ipcRenderer.once(channel, (event, ...args) => cbFunc(event, ...args))
} else {
throw new Error(`Receive failed: Unknown ipc channel name: ${channel}`)
}
},
invoke: async (channel: string, ...data: any[]): Promise<any> => {
if (mainAvailChannels.includes(channel)) {
const result = await ipcRenderer.invoke.apply(null, [channel, ...data])
......
......@@ -127,7 +127,7 @@ function clear() {
<v-app-bar color="#d71b1b" density="compact" class="header">
<template #append>
<v-btn
prepend-icon="mdi-home"
prepend-icon="mdi-image-album"
variant="text"
:class="{ active: isCurrentRoute('/') }"
@click="handleRoute('/')"
......@@ -135,7 +135,7 @@ function clear() {
{{ $t('menu.photo') }}
</v-btn>
<v-btn
prepend-icon="mdi-fit-to-screen-outline"
prepend-icon="mdi-video-account"
variant="text"
:class="{ active: isCurrentRoute('/video') }"
@click="handleRoute('/video')"
......@@ -230,6 +230,14 @@ function clear() {
:model-value="setting.llmUrl"
></v-text-field>
<v-text-field
style="margin-top: 22px"
label="NLP-HOST"
:rules="[(value) => !!value || 'LNP 地址必填']"
hide-details="auto"
:model-value="setting.nlpHost"
></v-text-field>
<v-select
v-model="setting.liveHost.value"
style="margin-top: 22px"
......
......@@ -65,7 +65,7 @@ export class HwWebRTC extends EventEmitter {
constructor(id: string, log: 'none' | 'error' | 'warn' | 'info' | 'debug' = 'none') {
super()
this.elementId = id
// setLogLevel(log);
window.HWLLSPlayer.setLogLevel(log);
}
/**
......
......@@ -55,7 +55,7 @@ async function init() {
const item = photo.list.find((i) => i.url === url)
photoRole = new PhotoRole(settings.liveHost, `${item?.liveUrl}`, canvasEle)
photoRole.on('asyncAnswer', (ans) => {
photoRole.on('asyncAnswer', async (ans) => {
if (ans.playState === 'playing') {
microphoneState.value = 'reply'
return
......@@ -64,8 +64,7 @@ async function init() {
if (
microphoneState.value === 'reply' &&
ans.playState === 'pause' &&
photoRole!.taskQueueLength === 0 &&
answerArray.length === 0
await checkSteps()
) {
microphoneState.value = 'input'
}
......@@ -220,7 +219,7 @@ async function startVoskWasmAudioInput() {
microphoneState.value = 'loading'
const { recognizer, channel } = await initVosk({
result: onAsr,
result: onQ,
partialResult: (text) => {
// console.log('----------------> partialResult:', text)
}
......@@ -347,12 +346,21 @@ function initVoskWS() {
// if (parsed.result) console.log(parsed.result);
if (parsed.text) {
inputContext.asrPartial = parsed.text
onAsr(inputContext.asrPartial)
onQ(inputContext.asrPartial)
}
}
})
}
function initLLMSocket(): Promise<WebSocket> {
const ws = new WebSocket(settings.llmUrl)
return new Promise((resolve, reject) => {
ws.onopen = () => resolve(ws)
ws.onerror = reject
})
}
function endAudioInput() {
microphoneState.value = 'waitInput'
inputContext.mediaStream?.getTracks().forEach((track) => track.stop())
......@@ -367,7 +375,37 @@ function endAudioInput() {
}
const answerArray: { text: string; isLast: boolean }[] = []
async function onAsr(question: string) {
const steps: Promise<string>[] = [];
const checkSteps = async () => {
let count = 0;
for (let i = 0; i < steps.length; i++) {
try {
const res = await Promise.race([steps[i], new Promise((resolve) => setTimeout(() => resolve(false), 10))])
if (res === false) continue;
} catch (e) {
console.error(e)
}
count ++;
if (count >= 2) {
return true
}
}
return false;
}
const createStep = () => {
let stepResolve: (string) => void = () => {};
let stepReject: (string) => void = () => {};
const pose = new Promise<string>((resolve, reject) => {
stepResolve = resolve;
stepReject = reject;
})
return { pose, stepResolve, stepReject }
}
async function onQ(question: string) {
console.log('---------------->question: ', question)
microphoneState.value = 'loading'
......@@ -380,6 +418,9 @@ async function onAsr(question: string) {
let isTime = true
let sliceAnswerLength = 10
answerArray.length = 0
steps.length = 0;
const { pose, stepResolve, stepReject } = createStep();
steps.push(pose);
photoRole!.answerArgs = new PhotoAnswer()
ws.onmessage = (message) => {
......@@ -396,6 +437,7 @@ async function onAsr(question: string) {
runTTSTask(answerArray)
inputContext.ws?.close()
console.log('----------------> answer: ', answer)
stepResolve('chat');
return
}
......@@ -419,7 +461,7 @@ async function onAsr(question: string) {
}
}
} catch (error) {
console.log('返回答案错误 -----> ' + JSON.stringify(error))
stepReject(JSON.stringify(error))
}
}
......@@ -427,33 +469,28 @@ async function onAsr(question: string) {
ws.send(JSON.stringify({ prompt: question, historys_list: [] }))
}
function initLLMSocket(): Promise<WebSocket> {
const ws = new WebSocket(settings.llmUrl)
return new Promise((resolve, reject) => {
ws.onopen = () => resolve(ws)
ws.onerror = reject
})
}
let isTTSRunning = false
async function runTTSTask(tasks: { text: string; isLast: boolean }[]) {
if (isTTSRunning) return
isTTSRunning = true
const { pose, stepResolve, stepReject } = createStep();
steps.push(pose);
try {
while (tasks.length) {
const task = tasks.shift()
if (!task) break
if (task.text.trim().length < 1) continue
console.time(task + ' TTS: ')
console.time(task.text + ' TTS: ')
const res = await localTTS({
url: settings.ttsHost,
text: task.text,
audio_path: settings.userData
})
console.log('----------------> TTS:', res[0].text)
console.timeEnd(task + ' TTS: ')
console.timeEnd(task.text + ' TTS: ')
console.log('---------------->', res[0].text)
const audioPath = await uploadFile({ filePath: res[0].text })
......@@ -464,15 +501,16 @@ async function runTTSTask(tasks: { text: string; isLast: boolean }[]) {
})
}
} catch (error) {
console.error(error)
stepReject(JSON.stringify(error))
}
isTTSRunning = false
stepResolve('TTS')
}
function uploadFile({ filePath }: { filePath: string }) {
return new Promise<string>((resolve, reject) => {
window.mainApi.receive(
window.mainApi.receiveOnce(
'msgReceivedFileUploadResponse',
(event: Event, result: { code: number; data: null | { filename: string } }) => {
if (result.code !== 200) {
......
......@@ -25,6 +25,19 @@ const microphoneState = ref<'waitInput' | 'input' | 'loading' | 'disabled' | 're
const videoElement = ref<HTMLVideoElement | null>(null)
const videoElement2 = ref<HTMLVideoElement | null>(null)
const videos = [videoElement, videoElement2]
const inputContext: {
mediaStream?: MediaStream
audioContext?: AudioContext
audioContext2?: AudioContext
scriptProcessorNode?: ScriptProcessorNode
model?: Model
ws?: WebSocket
voskWs?: WebSocket
asrPartial: string
playingAudio?: HTMLAudioElement
} = {
asrPartial: ''
}
onMounted(() => {
// init();
......@@ -87,19 +100,6 @@ function analyzeMicrophoneVolume(stream: MediaStream, callback: (number) => void
inputContext.scriptProcessorNode = recordEventNode
}
const inputContext: {
mediaStream?: MediaStream
audioContext?: AudioContext
audioContext2?: AudioContext
scriptProcessorNode?: ScriptProcessorNode
model?: Model
ws?: WebSocket
voskWs?: WebSocket
asrPartial: string
} = {
asrPartial: ''
}
async function startVoskWasmAudioInput() {
if (microphoneState.value === 'loading') return
......@@ -111,7 +111,7 @@ async function startVoskWasmAudioInput() {
microphoneState.value = 'loading'
const { recognizer, channel } = await initVosk({
result: onAsr,
result: onQ,
partialResult: (text) => {
// console.log('----------------> partialResult:', text)
}
......@@ -238,12 +238,20 @@ function initVoskWS() {
// if (parsed.result) console.log(parsed.result);
if (parsed.text) {
inputContext.asrPartial = parsed.text
onAsr(inputContext.asrPartial)
onQ(inputContext.asrPartial)
}
}
})
}
function initLLMSocket(): Promise<WebSocket> {
const ws = new WebSocket(settings.llmUrl)
return new Promise((resolve, reject) => {
ws.onopen = () => resolve(ws)
ws.onerror = reject
})
}
function endAudioInput() {
microphoneState.value = 'waitInput'
inputContext.mediaStream?.getTracks().forEach((track) => track.stop())
......@@ -255,6 +263,11 @@ function endAudioInput() {
inputContext.voskWs.send('{"eof" : 1}')
inputContext.voskWs.close()
}
ttsAudios.length = 0;
inputContext.playingAudio?.pause();
videos[1].value?.pause();
videos[0].value?.pause();
isPlayRunning = false;
}
const canplay = () => {
......@@ -272,16 +285,35 @@ function loadVideo(url: string) {
videos[1].value!.addEventListener('canplay', canplay)
}
async function onAsr(question: string) {
console.log('---------------->', question)
async function qNLP(question: string) {
const resp = await (await fetch(`${settings.nlpHost}/api/v1/generate`, {
headers: {
accept: 'application/json, text/plain, */*',
'content-type': 'application/json'
},
body: JSON.stringify({
question
}),
method: 'POST',
mode: 'cors'
})).json() as { results: {text: null | string}[] };
if (resp.results[0].text === null) return '';
for (let i = 0; i < role!.qa.length; i++) {
const { q, url } = role!.qa[i]
if (q.includes(resp.results[0].text)) {
return url;
};
}
}
async function onQ(question: string) {
console.log('----------------> Asr:', question)
if (!role) return
microphoneState.value = 'loading'
question = question.replace(/\s/g, '')
for (let i = 0; i < role.qa.length; i++) {
const { q, url } = role.qa[i]
console.log(question + ' : ' + q)
if (q.includes(question)) {
if (await qNLP(question)) {
loadVideo(url)
microphoneState.value = 'reply'
const videoEle = videos[1].value
......@@ -290,11 +322,10 @@ async function onAsr(question: string) {
videoEle!.onended = () => {
videoEle!.onended = null
microphoneState.value = 'input'
// 是否需要初始化
// TODO: 是否需要初始化
}
return
}
}
// 视频链接匹配不上,直接走大模型
const ws = await initLLMSocket()
......@@ -306,6 +337,10 @@ async function onAsr(question: string) {
inputContext.ws = ws
ws.onmessage = (message) => {
if (microphoneState.value === 'input') {
return;
}
try {
const { text, event } = JSON.parse(message.data) as {
event: string
......@@ -348,23 +383,16 @@ async function onAsr(question: string) {
}
}
console.log('----------------> Asr:', question)
ws.send(JSON.stringify({ prompt: question, historys_list: [] }))
}
function initLLMSocket(): Promise<WebSocket> {
const ws = new WebSocket(settings.llmUrl)
return new Promise((resolve, reject) => {
ws.onopen = () => resolve(ws)
ws.onerror = reject
})
}
let isTTSRunning = false
async function runTTSTask(tasks: string[]) {
if (isTTSRunning) return
isTTSRunning = true
microphoneState.value = 'loading'
try {
while (tasks.length) {
const task = tasks.shift()
......@@ -373,7 +401,6 @@ async function runTTSTask(tasks: string[]) {
console.time(task + ' TTS: ')
microphoneState.value = 'loading'
const res = await localTTS({
url: settings.ttsHost,
text: task,
......@@ -383,6 +410,12 @@ async function runTTSTask(tasks: string[]) {
console.log('----------------> TTS:', res[0].text)
console.timeEnd(task + ' TTS: ')
// @ts-ignore
if (microphoneState.value === 'input') {
break;
}
const audio = new Audio(`file://${res[0].text}`)
audio.load()
ttsAudios.push(audio)
......@@ -414,6 +447,7 @@ async function runAudioPlay() {
runAudioPlay()
}
await audio.play()
inputContext.playingAudio = audio;
loadVideo(role!.playUrl)
videos[1].value!.loop = true
videos[1].value!.muted = true
......@@ -434,6 +468,14 @@ async function xfTTS(text: string) {
})
console.log('----------------> tts:', res)
}
function down() {
if (microphoneState.value === 'reply') {
endAudioInput();
}
startVoskWsAudioInput();
}
</script>
<template>
......@@ -454,15 +496,14 @@ async function xfTTS(text: string) {
size="x-large"
:disabled="
microphoneState === 'loading' ||
microphoneState === 'disabled' ||
microphoneState === 'reply'
microphoneState === 'disabled'
"
@pointerdown="startVoskWsAudioInput"
@pointerdown="down"
>
<v-icon v-if="microphoneState === 'waitInput'" icon="mdi-microphone"></v-icon>
<v-icon v-if="microphoneState === 'loading'" icon="mdi-microphone-settings"></v-icon>
<v-icon v-if="microphoneState === 'disabled'" icon="mdi-microphone-off"></v-icon>
<v-icon v-if="microphoneState === 'reply'" icon="mdi-message-reply-text-outline"></v-icon>
<v-icon v-if="microphoneState === 'reply'" icon="mdi-volume-high"></v-icon>
<template v-if="microphoneState === 'input'">
<img width="30" height="30" :src="iconMicrophone" alt="" srcset="" />
......@@ -488,7 +529,7 @@ async function xfTTS(text: string) {
color="white"
variant="outlined"
:disabled="microphoneState !== 'waitInput' && microphoneState !== 'input'"
@click="onAsr(item.q)"
@click="onQ(item.q)"
>
<v-icon start icon="mdi-help-circle-outline"></v-icon>
{{ item.q }}
......
......@@ -30,6 +30,7 @@ export type ISettings = {
voskWsLUrl: string
liveHost: string
vConsole: boolean
nlpHost
}
const useSettingsStore = defineStore('settings', {
......@@ -67,7 +68,8 @@ const useSettingsStore = defineStore('settings', {
llmToTTSSliceLength: 20,
voskWsLUrl: 'ws://127.0.0.1:2700',
liveHost: 'laihua',
vConsole: true
vConsole: true,
nlpHost: 'http://192.168.1.57:19001'
}) as ISettings,
getters: {},
actions: {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment