feat: 照片数字人支持中断功能

52ba82c4 · ali · fe0d5ad2 · 52ba82c4 · 52ba82c4
Commit 52ba82c4 authored Dec 26, 2023 by ali
Show whitespace changes
Inline Side-by-side

Showing with 79 additions and 38 deletions

PhotoRole.ts src/renderer/plugins/live/PhotoRole.ts +28 -3

ShowPhoto.vue src/renderer/screens/ShowPhoto.vue +51 -35

No files found.
--- a/src/renderer/plugins/live/PhotoRole.ts
+++ b/src/renderer/plugins/live/PhotoRole.ts
@@ -323,6 +323,27 @@ export class PhotoRole extends EventEmitter {
    return resp
  }
+  private async _stopLive(taskId: string) {
+    const resp = (await http({
+      method: 'POST',
+      url: `${this.host}/live/stop`,
+      data: { taskId }
+    })) as {
+      code: number
+      data: {
+        code: number
+        taskId: string
+      }
+      msg?: string
+    }
+    if (resp.code && resp.code !== 200) {
+      throw new Error(resp.msg)
+    }
+    return resp
+  }
  private _checkStatus(
    taskId: string,
    checkStatus: 'init' | 'ready' | 'wait' | 'closing' | 'pushing'
@@ -487,10 +508,14 @@ export class PhotoRole extends EventEmitter {
    return (data as SdkConfigDataParams).baseUrl
  }
-  destroy() {
+  async destroy() {
-    this._webRTCContainer && document.body.removeChild(this._webRTCContainer)
+    this._liveTaskQueue.length = 0;
+    await this._stopLive(this.sessionId);
+    this._liveStatus = 'closing';
+    // this.answerArgs = null;
+    // this._webRTCContainer && document.body.removeChild(this._webRTCContainer)
    this._rtc?.stopPlay()
-    this._rtc?.destroyed()
+    // this._rtc?.destroyed()
    clearTimeout(this._pollTimeout)
    return {
      code: 1

--- a/src/renderer/screens/ShowPhoto.vue
+++ b/src/renderer/screens/ShowPhoto.vue
@@ -27,6 +27,22 @@ const videoElement = ref<HTMLVideoElement | null>(null)
 const can = ref<HTMLCanvasElement | null>(null)
 let photoRole: PhotoRole | null = null
 let flvPlayer: flvjs.Player | null = null
+const inputContext: {
+  mediaStream?: MediaStream
+  audioContext?: AudioContext
+  audioContext2?: AudioContext
+  scriptProcessorNode?: ScriptProcessorNode
+  model?: Model
+  ws?: WebSocket
+  voskWs?: WebSocket
+  asrPartial: string
+  answerArray: { text: string; isLast: boolean }[]
+  steps: Promise<string>[]
+} = {
+  asrPartial: '',
+  answerArray: [],
+  steps: []
+}
 onMounted(() => {
  init()
@@ -113,7 +129,6 @@ function draw(
  }
 }
-// eslint-disable-next-line no-unused-vars
 async function initPlayer(videoEle: HTMLVideoElement) {
  flvPlayer = flvjs.createPlayer(
    {
@@ -191,19 +206,6 @@ function analyzeMicrophoneVolume(stream: MediaStream, callback: (number) => void
  inputContext.scriptProcessorNode = recordEventNode
 }
-const inputContext: {
-  mediaStream?: MediaStream
-  audioContext?: AudioContext
-  audioContext2?: AudioContext
-  scriptProcessorNode?: ScriptProcessorNode
-  model?: Model
-  ws?: WebSocket
-  voskWs?: WebSocket
-  asrPartial: string
-} = {
-  asrPartial: ''
-}
 async function startVoskWasmAudioInput() {
  if (microphoneState.value === 'loading') return
@@ -356,7 +358,7 @@ function initLLMSocket(): Promise<WebSocket> {
  })
 }
-function endAudioInput() {
+async function endAudioInput() {
  microphoneState.value = 'waitInput'
  inputContext.mediaStream?.getTracks().forEach((track) => track.stop())
  inputContext.audioContext?.close()
@@ -367,16 +369,17 @@ function endAudioInput() {
    inputContext.voskWs.send('{"eof" : 1}')
    inputContext.voskWs.close()
  }
+  inputContext.asrPartial = ''
+  inputContext.answerArray.length = 0
+  await photoRole?.destroy();
 }
-const answerArray: { text: string; isLast: boolean }[] = []
-const steps: Promise<string>[] = []
 const checkSteps = async () => {
  let count = 0
-  for (let i = 0; i < steps.length; i++) {
+  for (let i = 0; i < inputContext.steps.length; i++) {
    try {
      const res = await Promise.race([
-        steps[i],
+        inputContext.steps[i],
        new Promise((resolve) => setTimeout(() => resolve(false), 10))
      ])
      if (res === false) continue
@@ -403,7 +406,7 @@ const createStep = () => {
  return { pose, stepResolve, stepReject }
 }
 async function onQ(question: string) {
-  console.log('---------------->question: ', question)
+  console.log('----------------> question: ', question)
  microphoneState.value = 'loading'
@@ -414,10 +417,10 @@ async function onQ(question: string) {
  let answer = ''
  let isTime = true
  let sliceAnswerLength = 10
-  answerArray.length = 0
+  inputContext.answerArray.length = 0
-  steps.length = 0
+  inputContext.steps.length = 0
  const { pose, stepResolve, stepReject } = createStep()
-  steps.push(pose)
+  inputContext.steps.push(pose)
  photoRole!.answerArgs = new PhotoAnswer()
  ws.onmessage = (message) => {
@@ -429,9 +432,9 @@ async function onQ(question: string) {
      }
      if (event === 'stream_end') {
-        answerArray.push({ text: sliceAnswer, isLast: true })
+        inputContext.answerArray.push({ text: sliceAnswer, isLast: true })
        sliceAnswer = ''
-        runTTSTask(answerArray)
+        runTTSTask()
        inputContext.ws?.close()
        console.log('----------------> answer: ', answer)
        stepResolve('chat')
@@ -451,8 +454,8 @@ async function onQ(question: string) {
        if (/[。，？！；,.?!;]/.test(t) && sliceAnswer.length >= sliceAnswerLength) {
          console.timeEnd('sliceAnswer')
          sliceAnswerLength = settings.llmToTTSSliceLength
-          answerArray.push({ text: sliceAnswer, isLast: true })
+          inputContext.answerArray.push({ text: sliceAnswer, isLast: true })
-          runTTSTask(answerArray)
+          runTTSTask()
          sliceAnswer = ''
          isTime = true
        }
@@ -462,21 +465,20 @@ async function onQ(question: string) {
    }
  }
-  console.log('----------------> Asr:', question)
  ws.send(JSON.stringify({ prompt: question, historys_list: [] }))
 }
 let isTTSRunning = false
-async function runTTSTask(tasks: { text: string; isLast: boolean }[]) {
+async function runTTSTask() {
  if (isTTSRunning) return
  isTTSRunning = true
  const { pose, stepResolve, stepReject } = createStep()
-  steps.push(pose)
+  inputContext.steps.push(pose)
  try {
-    while (tasks.length) {
+    while (inputContext.answerArray.length) {
-      const task = tasks.shift()
+      const task = inputContext.answerArray.shift()
      if (!task) break
      if (task.text.trim().length < 1) continue
@@ -486,11 +488,18 @@ async function runTTSTask(tasks: { text: string; isLast: boolean }[]) {
        text: task.text,
        audio_path: settings.userData
      })
      console.log('----------------> TTS:', res[0].text)
      console.timeEnd(task.text + ' TTS: ')
      console.log('---------------->', res[0].text)
      const audioPath = await uploadFile({ filePath: res[0].text })
+      // @ts-ignore
+      if (microphoneState.value === 'input') {
+        break
+      }
      photoRole?.enQueue({
        taskId: photoRole.sessionId,
        audioUrl: `https://resources.laihua.com/${audioPath}`,
@@ -537,6 +546,14 @@ async function runAudioPlay() {
  }
  await audio.play()
 }
+async function down() {
+  if (microphoneState.value === 'reply') {
+    await endAudioInput()
+  }
+  startVoskWsAudioInput()
+}
 </script>
 <template>
@@ -563,10 +580,9 @@ async function runAudioPlay() {
      size="x-large"
      :disabled="
        microphoneState === 'loading' ||
-        microphoneState === 'disabled' ||
+        microphoneState === 'disabled'
-        microphoneState === 'reply'
      "
-      @pointerdown="startVoskWsAudioInput"
+      @pointerdown="down"
    >
      <v-icon v-if="microphoneState === 'waitInput'" icon="mdi-microphone"></v-icon>
      <v-icon v-if="microphoneState === 'loading'" icon="mdi-microphone-settings"></v-icon>