Commit 52ba82c4 authored by ali's avatar ali

feat: 照片数字人支持中断功能

parent fe0d5ad2
......@@ -323,6 +323,27 @@ export class PhotoRole extends EventEmitter {
return resp
}
private async _stopLive(taskId: string) {
const resp = (await http({
method: 'POST',
url: `${this.host}/live/stop`,
data: { taskId }
})) as {
code: number
data: {
code: number
taskId: string
}
msg?: string
}
if (resp.code && resp.code !== 200) {
throw new Error(resp.msg)
}
return resp
}
private _checkStatus(
taskId: string,
checkStatus: 'init' | 'ready' | 'wait' | 'closing' | 'pushing'
......@@ -487,10 +508,14 @@ export class PhotoRole extends EventEmitter {
return (data as SdkConfigDataParams).baseUrl
}
destroy() {
this._webRTCContainer && document.body.removeChild(this._webRTCContainer)
async destroy() {
this._liveTaskQueue.length = 0;
await this._stopLive(this.sessionId);
this._liveStatus = 'closing';
// this.answerArgs = null;
// this._webRTCContainer && document.body.removeChild(this._webRTCContainer)
this._rtc?.stopPlay()
this._rtc?.destroyed()
// this._rtc?.destroyed()
clearTimeout(this._pollTimeout)
return {
code: 1
......
......@@ -27,6 +27,22 @@ const videoElement = ref<HTMLVideoElement | null>(null)
const can = ref<HTMLCanvasElement | null>(null)
let photoRole: PhotoRole | null = null
let flvPlayer: flvjs.Player | null = null
const inputContext: {
mediaStream?: MediaStream
audioContext?: AudioContext
audioContext2?: AudioContext
scriptProcessorNode?: ScriptProcessorNode
model?: Model
ws?: WebSocket
voskWs?: WebSocket
asrPartial: string
answerArray: { text: string; isLast: boolean }[]
steps: Promise<string>[]
} = {
asrPartial: '',
answerArray: [],
steps: []
}
onMounted(() => {
init()
......@@ -113,7 +129,6 @@ function draw(
}
}
// eslint-disable-next-line no-unused-vars
async function initPlayer(videoEle: HTMLVideoElement) {
flvPlayer = flvjs.createPlayer(
{
......@@ -191,19 +206,6 @@ function analyzeMicrophoneVolume(stream: MediaStream, callback: (number) => void
inputContext.scriptProcessorNode = recordEventNode
}
const inputContext: {
mediaStream?: MediaStream
audioContext?: AudioContext
audioContext2?: AudioContext
scriptProcessorNode?: ScriptProcessorNode
model?: Model
ws?: WebSocket
voskWs?: WebSocket
asrPartial: string
} = {
asrPartial: ''
}
async function startVoskWasmAudioInput() {
if (microphoneState.value === 'loading') return
......@@ -356,7 +358,7 @@ function initLLMSocket(): Promise<WebSocket> {
})
}
function endAudioInput() {
async function endAudioInput() {
microphoneState.value = 'waitInput'
inputContext.mediaStream?.getTracks().forEach((track) => track.stop())
inputContext.audioContext?.close()
......@@ -367,16 +369,17 @@ function endAudioInput() {
inputContext.voskWs.send('{"eof" : 1}')
inputContext.voskWs.close()
}
inputContext.asrPartial = ''
inputContext.answerArray.length = 0
await photoRole?.destroy();
}
const answerArray: { text: string; isLast: boolean }[] = []
const steps: Promise<string>[] = []
const checkSteps = async () => {
let count = 0
for (let i = 0; i < steps.length; i++) {
for (let i = 0; i < inputContext.steps.length; i++) {
try {
const res = await Promise.race([
steps[i],
inputContext.steps[i],
new Promise((resolve) => setTimeout(() => resolve(false), 10))
])
if (res === false) continue
......@@ -403,7 +406,7 @@ const createStep = () => {
return { pose, stepResolve, stepReject }
}
async function onQ(question: string) {
console.log('---------------->question: ', question)
console.log('----------------> question: ', question)
microphoneState.value = 'loading'
......@@ -414,10 +417,10 @@ async function onQ(question: string) {
let answer = ''
let isTime = true
let sliceAnswerLength = 10
answerArray.length = 0
steps.length = 0
inputContext.answerArray.length = 0
inputContext.steps.length = 0
const { pose, stepResolve, stepReject } = createStep()
steps.push(pose)
inputContext.steps.push(pose)
photoRole!.answerArgs = new PhotoAnswer()
ws.onmessage = (message) => {
......@@ -429,9 +432,9 @@ async function onQ(question: string) {
}
if (event === 'stream_end') {
answerArray.push({ text: sliceAnswer, isLast: true })
inputContext.answerArray.push({ text: sliceAnswer, isLast: true })
sliceAnswer = ''
runTTSTask(answerArray)
runTTSTask()
inputContext.ws?.close()
console.log('----------------> answer: ', answer)
stepResolve('chat')
......@@ -451,8 +454,8 @@ async function onQ(question: string) {
if (/[。,?!;,.?!;]/.test(t) && sliceAnswer.length >= sliceAnswerLength) {
console.timeEnd('sliceAnswer')
sliceAnswerLength = settings.llmToTTSSliceLength
answerArray.push({ text: sliceAnswer, isLast: true })
runTTSTask(answerArray)
inputContext.answerArray.push({ text: sliceAnswer, isLast: true })
runTTSTask()
sliceAnswer = ''
isTime = true
}
......@@ -462,21 +465,20 @@ async function onQ(question: string) {
}
}
console.log('----------------> Asr:', question)
ws.send(JSON.stringify({ prompt: question, historys_list: [] }))
}
let isTTSRunning = false
async function runTTSTask(tasks: { text: string; isLast: boolean }[]) {
async function runTTSTask() {
if (isTTSRunning) return
isTTSRunning = true
const { pose, stepResolve, stepReject } = createStep()
steps.push(pose)
inputContext.steps.push(pose)
try {
while (tasks.length) {
const task = tasks.shift()
while (inputContext.answerArray.length) {
const task = inputContext.answerArray.shift()
if (!task) break
if (task.text.trim().length < 1) continue
......@@ -486,11 +488,18 @@ async function runTTSTask(tasks: { text: string; isLast: boolean }[]) {
text: task.text,
audio_path: settings.userData
})
console.log('----------------> TTS:', res[0].text)
console.timeEnd(task.text + ' TTS: ')
console.log('---------------->', res[0].text)
const audioPath = await uploadFile({ filePath: res[0].text })
// @ts-ignore
if (microphoneState.value === 'input') {
break
}
photoRole?.enQueue({
taskId: photoRole.sessionId,
audioUrl: `https://resources.laihua.com/${audioPath}`,
......@@ -537,6 +546,14 @@ async function runAudioPlay() {
}
await audio.play()
}
async function down() {
if (microphoneState.value === 'reply') {
await endAudioInput()
}
startVoskWsAudioInput()
}
</script>
<template>
......@@ -563,10 +580,9 @@ async function runAudioPlay() {
size="x-large"
:disabled="
microphoneState === 'loading' ||
microphoneState === 'disabled' ||
microphoneState === 'reply'
microphoneState === 'disabled'
"
@pointerdown="startVoskWsAudioInput"
@pointerdown="down"
>
<v-icon v-if="microphoneState === 'waitInput'" icon="mdi-microphone"></v-icon>
<v-icon v-if="microphoneState === 'loading'" icon="mdi-microphone-settings"></v-icon>
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment