Commit 8a378618 authored by ali's avatar ali

feat: 处理视频数字人说话状态,静止状态衔接问题

parent 0a290374
<!-- eslint-disable no-unused-vars -->
<!-- eslint-disable camelcase -->
<script setup lang="ts">
import { onMounted, ref } from 'vue'
import { nextTick, onMounted, ref } from 'vue'
import { useRoute, useRouter } from 'vue-router'
import type {
ServerMessagePartialResult,
ServerMessageResult,
Model
} from '@/renderer/plugins/asr/index'
import { audioAiTTS, localTTS } from '../plugins/tts'
import useStore from '@/renderer/store'
import { guid } from '@/renderer/utils/index'
const router = useRouter()
const route = useRoute()
......@@ -22,15 +17,11 @@ const recordVolume = ref(0)
const url = route.query.url as string
const role = useVideo.list.find((i) => i.url === url)
const microphoneState = ref<'waitInput' | 'input' | 'loading' | 'disabled' | 'reply'>('waitInput')
const videoElement = ref<HTMLVideoElement | null>(null)
const videoElement2 = ref<HTMLVideoElement | null>(null)
const videos = [videoElement, videoElement2]
const inputContext: {
mediaStream?: MediaStream
audioContext?: AudioContext
audioContext2?: AudioContext
scriptProcessorNode?: ScriptProcessorNode
model?: Model
ws?: WebSocket
voskWs?: WebSocket
asrPartial: string
......@@ -43,8 +34,18 @@ const inputContext: {
ttsAudios: []
}
const videoElements = {
silence: createVideo(role?.url as string),
say: createVideo(role?.say as string),
}
const can = ref<HTMLCanvasElement | null>(null)
let videoElement: HTMLVideoElement | null = null;
onMounted(() => {
// init();
init().catch((error) => {
microphoneState.value = 'waitInput'
showError(`init:${error}`)
})
})
router.beforeEach((g) => {
......@@ -58,6 +59,41 @@ const showError = (msg: string) => {
errorMsg.value = msg
}
function drawFrame(
ctx: CanvasRenderingContext2D,
video: HTMLVideoElement,
) {
ctx.canvas.width = video.videoWidth
ctx.canvas.height = video.videoHeight
ctx.clearRect(0, 0, video.videoWidth, video.videoHeight)
ctx.drawImage(video, 0, 0, video.videoWidth, video.videoHeight)
}
async function init() {
const ctx = can.value?.getContext('2d');
if (!ctx) return
videoElement = videoElements.silence.ele
await videoElements.silence.load
// ctx.canvas.width = videoElement.videoWidth
// ctx.canvas.height = videoElement.videoHeight
videoElement.play();
const fps = 1000 / 30
let lastTime = Date.now()
const updateFrame = () => {
if (Date.now() - lastTime > fps) {
videoElement && drawFrame(ctx, videoElement)
lastTime = Date.now()
}
requestAnimationFrame(updateFrame)
}
requestAnimationFrame(updateFrame)
}
function analyzeMicrophoneVolume(stream: MediaStream, callback: (number) => void) {
const audioContext = new AudioContext()
const analyser = audioContext.createAnalyser()
......@@ -187,7 +223,7 @@ async function llmEnd() {
})
).json()
console.log('---------------->', resp)
console.log('----------------> llmEnd: ', resp)
}
async function endAudioInput() {
......@@ -198,31 +234,55 @@ async function endAudioInput() {
inputContext.audioContext?.close()
inputContext.audioContext2?.close()
inputContext.scriptProcessorNode && (inputContext.scriptProcessorNode.onaudioprocess = null)
inputContext.model?.terminate()
if (inputContext.voskWs) {
inputContext.voskWs.send('{"eof" : 1}')
inputContext.voskWs.close()
}
inputContext.ttsAudios.length = 0
inputContext.playingAudio?.pause()
videos[1].value?.pause()
videos[0].value?.pause()
toggleVideo(videoElements.silence.ele)
isPlayRunning = false
}
const canplay = () => {
videos[1].value!.style.opacity = '1'
videos[0].value!.style.opacity = '0'
videos[0].value!.pause()
videos[1].value!.play()
videos[1].value!.removeEventListener('canplay', canplay)
videos.unshift(videos.pop()!)
}
function createVideo(url: string) {
const video = document.createElement('video');
function loadVideo(url: string) {
videos[1].value!.src = url
videos[1].value!.style.opacity = '0'
videos[1].value!.addEventListener('canplay', canplay)
if (url === role?.url || url === role?.say) {
video.loop = true;
video.muted = true;
} else {
video.loop = false;
video.muted = false;
}
video.style.display = 'none';
const load = new Promise<void>((resolve, reject) => {
video.oncanplay = () => {
video.oncanplay = null;
resolve()
video.currentTime = 2;
document.body.appendChild(video);
};
video.onerror = reject;
})
video.src = url;
return {
ele: video,
load
};
}
function toggleVideo(ele: HTMLVideoElement) {
videoElement?.pause();
videoElement && (videoElement.currentTime = 0.1);
if (videoElement && videoElement !== videoElements.silence.ele && videoElement !== videoElements.say.ele) {
document.body.removeChild(videoElement);
}
ele.currentTime = 0.1;
ele.pause();
videoElement = ele
videoElement.play()
}
async function qNLP(question: string) {
......@@ -258,19 +318,21 @@ async function onQ(question: string) {
try {
const nlpUrl = await qNLP(question)
if (nlpUrl) {
loadVideo(nlpUrl)
const { ele, load } = createVideo(nlpUrl)
microphoneState.value = 'reply'
const videoEle = videos[1].value
videoEle!.loop = false
videoEle!.muted = false
videoEle!.onended = () => {
videoEle!.onended = null
await load;
// 防止切换视频渲染黑屏帧
await new Promise(resolve => setTimeout(resolve, 200))
toggleVideo(ele)
ele.onended = () => {
toggleVideo(videoElements.silence.ele)
microphoneState.value = 'input'
// TODO: 是否需要初始化
}
return
}
} catch (error) {
console.error(error);
microphoneState.value = 'input'
showError(`nlp:${error}`)
return
......@@ -287,15 +349,15 @@ async function onQ(question: string) {
async function llmLoop(question: string) {
if (!role) return
microphoneState.value = 'loading'
const sessionId = guid()
const resp = await (
await fetch(`${settings.llmUrl}/api/v1/generate`, {
method: 'POST',
headers: {
'Content-Type': 'application/json'
},
body: JSON.stringify({ question }),
body: JSON.stringify({ generator_id: sessionId, question }),
mode: 'cors'
})
).json()
......@@ -313,6 +375,7 @@ async function llmLoop(question: string) {
break
}
// 轮询间隔时间
await new Promise((resolve) => setTimeout(resolve, 100))
const { results } = await (
......@@ -322,18 +385,18 @@ async function llmLoop(question: string) {
'Content-Type': 'application/json'
},
mode: 'cors',
body: JSON.stringify({ question })
body: JSON.stringify({ generator_id: sessionId, question })
})
).json()
const audioList = results[0].audio_list as string[]
if (audioList.length === 0) continue
const isEnd = audioList.at(-1) === 'stream_end'
inputContext.llmEnd = audioList.at(-1) === 'stream_end';
if (isEnd) audioList.pop()
if (inputContext.llmEnd) audioList.pop()
const newList = audioList.slice(index)
if (newList.length === 0 && isEnd) break
if (newList.length === 0 && inputContext.llmEnd) return;
if (newList.length === 0) continue
for (let i = index; i < audioList.length; i++) {
......@@ -358,18 +421,14 @@ async function llmLoop(question: string) {
// TODO: test
// inputContext.ttsAudios.push(
// ...newList.map((path) => {
// const audio = new Audio(`http://192.168.1.57:6767/${path.split('\\').pop()}`)
// const audio = new Audio(`http://10.90.120.45:6767/${path.split('\\').pop()}`)
// audio.load()
// return audio
// })
// )
runAudioPlay()
if (isEnd) break
}
inputContext.llmEnd = true
}
let isPlayRunning = false
......@@ -380,7 +439,7 @@ async function runAudioPlay() {
const audio = inputContext.ttsAudios.shift()
if (!audio) {
isPlayRunning = false
videos[0].value!.pause()
toggleVideo(videoElements.silence.ele);
inputContext.llmEnd && (microphoneState.value = 'input')
return
}
......@@ -390,9 +449,12 @@ async function runAudioPlay() {
}
await audio.play()
inputContext.playingAudio = audio
loadVideo(role!.playUrl)
videos[1].value!.loop = true
videos[1].value!.muted = true
videoElements.say.load.then(async () => {
if (videoElements.say.ele.paused) {
toggleVideo(videoElements.say.ele);
videoElements.say.ele.play()
}
});
microphoneState.value = 'reply'
}
......@@ -416,8 +478,7 @@ async function down() {
class="d-flex justify-center align-center"
:style="{ background: '#000' }"
>
<video id="videoElement" ref="videoElement" :src="url" class="video-ele active"></video>
<video id="videoElement2" ref="videoElement2" class="video-ele2"></video>
<canvas id="can" ref="can" style="width: 100%; height: 100%; aspect-ratio: 9/16; "></canvas>
</div>
<div class="voice">
......@@ -465,7 +526,7 @@ async function down() {
</v-chip>
</div>
<v-snackbar v-model="errorSnackbar" multi-line :timeout="3000">
<v-snackbar v-model="errorSnackbar" multi-line :timeout="6000">
{{ errorMsg }}
<template #actions>
......@@ -503,18 +564,6 @@ async function down() {
border-radius: 36%;
}
.video-ele,
.video-ele2 {
position: absolute;
width: 100%;
height: 100%;
opacity: 0;
}
.video-ele.active,
.video-ele2.active {
opacity: 1;
}
.q-list {
position: fixed;
bottom: 0;
......
......@@ -2,7 +2,7 @@ import { defineStore } from 'pinia'
type IVideo = {
list: {
playUrl: string
say: string
url: string
poster: string
name: string
......@@ -17,9 +17,9 @@ const useVideoStore = defineStore('video', {
list: [
{
url: new URL('/libai/wait.mp4', import.meta.url).href,
say: new URL('/libai/10.mp4', import.meta.url).href,
poster: new URL('/libai/poster.jpg', import.meta.url).href,
name: '李白',
playUrl: new URL('/libai/10.mp4', import.meta.url).href,
qa: [
{
url: new URL('/libai/1.mp4', import.meta.url).href,
......@@ -74,9 +74,9 @@ const useVideoStore = defineStore('video', {
]
},
{
url: new URL('/suhe/wait.mov', import.meta.url).href,
url: new URL('/suhe/silence.mov', import.meta.url).href,
say: new URL('/suhe/say.mov', import.meta.url).href,
poster: new URL('/suhe/poster.jpg', import.meta.url).href,
playUrl: new URL('/suhe/5.mov', import.meta.url).href,
name: '苏荷',
qa: [
{
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment