Commit 8a378618 authored by ali's avatar ali

feat: 处理视频数字人说话状态,静止状态衔接问题

parent 0a290374
<!-- eslint-disable no-unused-vars --> <!-- eslint-disable no-unused-vars -->
<!-- eslint-disable camelcase --> <!-- eslint-disable camelcase -->
<script setup lang="ts"> <script setup lang="ts">
import { onMounted, ref } from 'vue' import { nextTick, onMounted, ref } from 'vue'
import { useRoute, useRouter } from 'vue-router' import { useRoute, useRouter } from 'vue-router'
import type {
ServerMessagePartialResult,
ServerMessageResult,
Model
} from '@/renderer/plugins/asr/index'
import { audioAiTTS, localTTS } from '../plugins/tts'
import useStore from '@/renderer/store' import useStore from '@/renderer/store'
import { guid } from '@/renderer/utils/index'
const router = useRouter() const router = useRouter()
const route = useRoute() const route = useRoute()
...@@ -22,15 +17,11 @@ const recordVolume = ref(0) ...@@ -22,15 +17,11 @@ const recordVolume = ref(0)
const url = route.query.url as string const url = route.query.url as string
const role = useVideo.list.find((i) => i.url === url) const role = useVideo.list.find((i) => i.url === url)
const microphoneState = ref<'waitInput' | 'input' | 'loading' | 'disabled' | 'reply'>('waitInput') const microphoneState = ref<'waitInput' | 'input' | 'loading' | 'disabled' | 'reply'>('waitInput')
const videoElement = ref<HTMLVideoElement | null>(null)
const videoElement2 = ref<HTMLVideoElement | null>(null)
const videos = [videoElement, videoElement2]
const inputContext: { const inputContext: {
mediaStream?: MediaStream mediaStream?: MediaStream
audioContext?: AudioContext audioContext?: AudioContext
audioContext2?: AudioContext audioContext2?: AudioContext
scriptProcessorNode?: ScriptProcessorNode scriptProcessorNode?: ScriptProcessorNode
model?: Model
ws?: WebSocket ws?: WebSocket
voskWs?: WebSocket voskWs?: WebSocket
asrPartial: string asrPartial: string
...@@ -43,8 +34,18 @@ const inputContext: { ...@@ -43,8 +34,18 @@ const inputContext: {
ttsAudios: [] ttsAudios: []
} }
const videoElements = {
silence: createVideo(role?.url as string),
say: createVideo(role?.say as string),
}
const can = ref<HTMLCanvasElement | null>(null)
let videoElement: HTMLVideoElement | null = null;
onMounted(() => { onMounted(() => {
// init(); init().catch((error) => {
microphoneState.value = 'waitInput'
showError(`init:${error}`)
})
}) })
router.beforeEach((g) => { router.beforeEach((g) => {
...@@ -58,6 +59,41 @@ const showError = (msg: string) => { ...@@ -58,6 +59,41 @@ const showError = (msg: string) => {
errorMsg.value = msg errorMsg.value = msg
} }
function drawFrame(
ctx: CanvasRenderingContext2D,
video: HTMLVideoElement,
) {
ctx.canvas.width = video.videoWidth
ctx.canvas.height = video.videoHeight
ctx.clearRect(0, 0, video.videoWidth, video.videoHeight)
ctx.drawImage(video, 0, 0, video.videoWidth, video.videoHeight)
}
async function init() {
const ctx = can.value?.getContext('2d');
if (!ctx) return
videoElement = videoElements.silence.ele
await videoElements.silence.load
// ctx.canvas.width = videoElement.videoWidth
// ctx.canvas.height = videoElement.videoHeight
videoElement.play();
const fps = 1000 / 30
let lastTime = Date.now()
const updateFrame = () => {
if (Date.now() - lastTime > fps) {
videoElement && drawFrame(ctx, videoElement)
lastTime = Date.now()
}
requestAnimationFrame(updateFrame)
}
requestAnimationFrame(updateFrame)
}
function analyzeMicrophoneVolume(stream: MediaStream, callback: (number) => void) { function analyzeMicrophoneVolume(stream: MediaStream, callback: (number) => void) {
const audioContext = new AudioContext() const audioContext = new AudioContext()
const analyser = audioContext.createAnalyser() const analyser = audioContext.createAnalyser()
...@@ -187,7 +223,7 @@ async function llmEnd() { ...@@ -187,7 +223,7 @@ async function llmEnd() {
}) })
).json() ).json()
console.log('---------------->', resp) console.log('----------------> llmEnd: ', resp)
} }
async function endAudioInput() { async function endAudioInput() {
...@@ -198,31 +234,55 @@ async function endAudioInput() { ...@@ -198,31 +234,55 @@ async function endAudioInput() {
inputContext.audioContext?.close() inputContext.audioContext?.close()
inputContext.audioContext2?.close() inputContext.audioContext2?.close()
inputContext.scriptProcessorNode && (inputContext.scriptProcessorNode.onaudioprocess = null) inputContext.scriptProcessorNode && (inputContext.scriptProcessorNode.onaudioprocess = null)
inputContext.model?.terminate()
if (inputContext.voskWs) { if (inputContext.voskWs) {
inputContext.voskWs.send('{"eof" : 1}') inputContext.voskWs.send('{"eof" : 1}')
inputContext.voskWs.close() inputContext.voskWs.close()
} }
inputContext.ttsAudios.length = 0 inputContext.ttsAudios.length = 0
inputContext.playingAudio?.pause() inputContext.playingAudio?.pause()
videos[1].value?.pause() toggleVideo(videoElements.silence.ele)
videos[0].value?.pause()
isPlayRunning = false isPlayRunning = false
} }
const canplay = () => { function createVideo(url: string) {
videos[1].value!.style.opacity = '1' const video = document.createElement('video');
videos[0].value!.style.opacity = '0'
videos[0].value!.pause()
videos[1].value!.play()
videos[1].value!.removeEventListener('canplay', canplay)
videos.unshift(videos.pop()!)
}
function loadVideo(url: string) { if (url === role?.url || url === role?.say) {
videos[1].value!.src = url video.loop = true;
videos[1].value!.style.opacity = '0' video.muted = true;
videos[1].value!.addEventListener('canplay', canplay) } else {
video.loop = false;
video.muted = false;
}
video.style.display = 'none';
const load = new Promise<void>((resolve, reject) => {
video.oncanplay = () => {
video.oncanplay = null;
resolve()
video.currentTime = 2;
document.body.appendChild(video);
};
video.onerror = reject;
})
video.src = url;
return {
ele: video,
load
};
}
function toggleVideo(ele: HTMLVideoElement) {
videoElement?.pause();
videoElement && (videoElement.currentTime = 0.1);
if (videoElement && videoElement !== videoElements.silence.ele && videoElement !== videoElements.say.ele) {
document.body.removeChild(videoElement);
}
ele.currentTime = 0.1;
ele.pause();
videoElement = ele
videoElement.play()
} }
async function qNLP(question: string) { async function qNLP(question: string) {
...@@ -258,19 +318,21 @@ async function onQ(question: string) { ...@@ -258,19 +318,21 @@ async function onQ(question: string) {
try { try {
const nlpUrl = await qNLP(question) const nlpUrl = await qNLP(question)
if (nlpUrl) { if (nlpUrl) {
loadVideo(nlpUrl) const { ele, load } = createVideo(nlpUrl)
microphoneState.value = 'reply' microphoneState.value = 'reply'
const videoEle = videos[1].value await load;
videoEle!.loop = false // 防止切换视频渲染黑屏帧
videoEle!.muted = false await new Promise(resolve => setTimeout(resolve, 200))
videoEle!.onended = () => { toggleVideo(ele)
videoEle!.onended = null ele.onended = () => {
toggleVideo(videoElements.silence.ele)
microphoneState.value = 'input' microphoneState.value = 'input'
// TODO: 是否需要初始化 // TODO: 是否需要初始化
} }
return return
} }
} catch (error) { } catch (error) {
console.error(error);
microphoneState.value = 'input' microphoneState.value = 'input'
showError(`nlp:${error}`) showError(`nlp:${error}`)
return return
...@@ -287,15 +349,15 @@ async function onQ(question: string) { ...@@ -287,15 +349,15 @@ async function onQ(question: string) {
async function llmLoop(question: string) { async function llmLoop(question: string) {
if (!role) return if (!role) return
microphoneState.value = 'loading'
const sessionId = guid()
const resp = await ( const resp = await (
await fetch(`${settings.llmUrl}/api/v1/generate`, { await fetch(`${settings.llmUrl}/api/v1/generate`, {
method: 'POST', method: 'POST',
headers: { headers: {
'Content-Type': 'application/json' 'Content-Type': 'application/json'
}, },
body: JSON.stringify({ question }), body: JSON.stringify({ generator_id: sessionId, question }),
mode: 'cors' mode: 'cors'
}) })
).json() ).json()
...@@ -313,6 +375,7 @@ async function llmLoop(question: string) { ...@@ -313,6 +375,7 @@ async function llmLoop(question: string) {
break break
} }
// 轮询间隔时间
await new Promise((resolve) => setTimeout(resolve, 100)) await new Promise((resolve) => setTimeout(resolve, 100))
const { results } = await ( const { results } = await (
...@@ -322,18 +385,18 @@ async function llmLoop(question: string) { ...@@ -322,18 +385,18 @@ async function llmLoop(question: string) {
'Content-Type': 'application/json' 'Content-Type': 'application/json'
}, },
mode: 'cors', mode: 'cors',
body: JSON.stringify({ question }) body: JSON.stringify({ generator_id: sessionId, question })
}) })
).json() ).json()
const audioList = results[0].audio_list as string[] const audioList = results[0].audio_list as string[]
if (audioList.length === 0) continue if (audioList.length === 0) continue
const isEnd = audioList.at(-1) === 'stream_end' inputContext.llmEnd = audioList.at(-1) === 'stream_end';
if (isEnd) audioList.pop() if (inputContext.llmEnd) audioList.pop()
const newList = audioList.slice(index) const newList = audioList.slice(index)
if (newList.length === 0 && isEnd) break if (newList.length === 0 && inputContext.llmEnd) return;
if (newList.length === 0) continue if (newList.length === 0) continue
for (let i = index; i < audioList.length; i++) { for (let i = index; i < audioList.length; i++) {
...@@ -358,18 +421,14 @@ async function llmLoop(question: string) { ...@@ -358,18 +421,14 @@ async function llmLoop(question: string) {
// TODO: test // TODO: test
// inputContext.ttsAudios.push( // inputContext.ttsAudios.push(
// ...newList.map((path) => { // ...newList.map((path) => {
// const audio = new Audio(`http://192.168.1.57:6767/${path.split('\\').pop()}`) // const audio = new Audio(`http://10.90.120.45:6767/${path.split('\\').pop()}`)
// audio.load() // audio.load()
// return audio // return audio
// }) // })
// ) // )
runAudioPlay() runAudioPlay()
if (isEnd) break
} }
inputContext.llmEnd = true
} }
let isPlayRunning = false let isPlayRunning = false
...@@ -380,7 +439,7 @@ async function runAudioPlay() { ...@@ -380,7 +439,7 @@ async function runAudioPlay() {
const audio = inputContext.ttsAudios.shift() const audio = inputContext.ttsAudios.shift()
if (!audio) { if (!audio) {
isPlayRunning = false isPlayRunning = false
videos[0].value!.pause() toggleVideo(videoElements.silence.ele);
inputContext.llmEnd && (microphoneState.value = 'input') inputContext.llmEnd && (microphoneState.value = 'input')
return return
} }
...@@ -390,9 +449,12 @@ async function runAudioPlay() { ...@@ -390,9 +449,12 @@ async function runAudioPlay() {
} }
await audio.play() await audio.play()
inputContext.playingAudio = audio inputContext.playingAudio = audio
loadVideo(role!.playUrl) videoElements.say.load.then(async () => {
videos[1].value!.loop = true if (videoElements.say.ele.paused) {
videos[1].value!.muted = true toggleVideo(videoElements.say.ele);
videoElements.say.ele.play()
}
});
microphoneState.value = 'reply' microphoneState.value = 'reply'
} }
...@@ -416,8 +478,7 @@ async function down() { ...@@ -416,8 +478,7 @@ async function down() {
class="d-flex justify-center align-center" class="d-flex justify-center align-center"
:style="{ background: '#000' }" :style="{ background: '#000' }"
> >
<video id="videoElement" ref="videoElement" :src="url" class="video-ele active"></video> <canvas id="can" ref="can" style="width: 100%; height: 100%; aspect-ratio: 9/16; "></canvas>
<video id="videoElement2" ref="videoElement2" class="video-ele2"></video>
</div> </div>
<div class="voice"> <div class="voice">
...@@ -465,7 +526,7 @@ async function down() { ...@@ -465,7 +526,7 @@ async function down() {
</v-chip> </v-chip>
</div> </div>
<v-snackbar v-model="errorSnackbar" multi-line :timeout="3000"> <v-snackbar v-model="errorSnackbar" multi-line :timeout="6000">
{{ errorMsg }} {{ errorMsg }}
<template #actions> <template #actions>
...@@ -503,18 +564,6 @@ async function down() { ...@@ -503,18 +564,6 @@ async function down() {
border-radius: 36%; border-radius: 36%;
} }
.video-ele,
.video-ele2 {
position: absolute;
width: 100%;
height: 100%;
opacity: 0;
}
.video-ele.active,
.video-ele2.active {
opacity: 1;
}
.q-list { .q-list {
position: fixed; position: fixed;
bottom: 0; bottom: 0;
......
...@@ -2,7 +2,7 @@ import { defineStore } from 'pinia' ...@@ -2,7 +2,7 @@ import { defineStore } from 'pinia'
type IVideo = { type IVideo = {
list: { list: {
playUrl: string say: string
url: string url: string
poster: string poster: string
name: string name: string
...@@ -17,9 +17,9 @@ const useVideoStore = defineStore('video', { ...@@ -17,9 +17,9 @@ const useVideoStore = defineStore('video', {
list: [ list: [
{ {
url: new URL('/libai/wait.mp4', import.meta.url).href, url: new URL('/libai/wait.mp4', import.meta.url).href,
say: new URL('/libai/10.mp4', import.meta.url).href,
poster: new URL('/libai/poster.jpg', import.meta.url).href, poster: new URL('/libai/poster.jpg', import.meta.url).href,
name: '李白', name: '李白',
playUrl: new URL('/libai/10.mp4', import.meta.url).href,
qa: [ qa: [
{ {
url: new URL('/libai/1.mp4', import.meta.url).href, url: new URL('/libai/1.mp4', import.meta.url).href,
...@@ -74,9 +74,9 @@ const useVideoStore = defineStore('video', { ...@@ -74,9 +74,9 @@ const useVideoStore = defineStore('video', {
] ]
}, },
{ {
url: new URL('/suhe/wait.mov', import.meta.url).href, url: new URL('/suhe/silence.mov', import.meta.url).href,
say: new URL('/suhe/say.mov', import.meta.url).href,
poster: new URL('/suhe/poster.jpg', import.meta.url).href, poster: new URL('/suhe/poster.jpg', import.meta.url).href,
playUrl: new URL('/suhe/5.mov', import.meta.url).href,
name: '苏荷', name: '苏荷',
qa: [ qa: [
{ {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment