fix: 修复照片数字人重复播放问题

ca96fb29 · ali · 7db30264 · ca96fb29 · ca96fb29 · ca96fb29
Commit ca96fb29 authored Jan 02, 2024 by ali
Hide whitespace changes
Inline Side-by-side

Showing with 47 additions and 53 deletions

HwWebRTC.ts src/renderer/plugins/live/HwWebRTC.ts +0 -0

PhotoRole.ts src/renderer/plugins/live/PhotoRole.ts +4 -3

ShowPhoto.vue src/renderer/screens/ShowPhoto.vue +43 -50

No files found.
--- a/src/renderer/plugins/live/HwWebRTC.ts
+++ b/src/renderer/plugins/live/HwWebRTC.ts
--- a/src/renderer/plugins/live/PhotoRole.ts
+++ b/src/renderer/plugins/live/PhotoRole.ts
@@ -26,6 +26,7 @@ type SdkConfigDataParams = {
 }

 export class PhotoAnswer {
+  id = guid()
  question = ''
  answer = ''
  /** 将答案分割，一段一段合成数字人直播流 */
@@ -139,14 +140,14 @@ export class PhotoRole extends EventEmitter {
    try {
      while (this._liveTaskQueue.length) {
        const task = this._liveTaskQueue.shift() as LiveOptions
-        console.time(task.text)
+        console.time(task.audioUrl as string)
        if (this._liveStatus === 'closing') await this.initLive()

        console.log('----------------> append', task)

-        await this._appendLive(task)
+        await this._createLive(task)

-        console.timeEnd(task.text)
+        console.timeEnd(task.audioUrl as string)
      }
    } catch (error) {
      console.error(error)

--- a/src/renderer/screens/ShowPhoto.vue
+++ b/src/renderer/screens/ShowPhoto.vue
@@ -43,8 +43,15 @@ const inputContext: {
  steps: []
 }

+router.beforeEach((g) => {
+  if (!g.query.url) return router.push('/error')
+})
+
 onMounted(() => {
-  init()
+  init().catch((error) => {
+    microphoneState.value = 'waitInput'
+    showError(`init：${error}`)
+  })
  document.body.style.overflow = 'hidden'
 })

@@ -78,12 +85,7 @@ async function init() {
  photoRole = new PhotoRole(settings.liveHost, `${item?.liveUrl}`, canvasEle)
  photoRole.on('asyncAnswer', onAsyncAnswer)

-  try {
-    await photoRole.init()
-  } catch (error) {
-    console.error(error)
-    return
-  }
+  await photoRole.init()

  microphoneState.value = 'waitInput'

@@ -110,10 +112,6 @@ async function onAsyncAnswer(ans: PhotoAnswer) {
  }
 }

-router.beforeEach((g) => {
-  if (!g.query.url) return router.push('/error')
-})
-
 function analyzeMicrophoneVolume(stream: MediaStream, callback: (number) => void) {
  const audioContext = new AudioContext()
  const analyser = audioContext.createAnalyser()
@@ -178,7 +176,7 @@ async function startVoskWsAudioInput() {
      return
    }

-    postAudio(audioDataChunk)
+    sendAudio(audioDataChunk)
  }

  await analyzeMicrophoneVolume(mediaStream, (val) => {
@@ -191,17 +189,16 @@ async function startVoskWsAudioInput() {
  inputContext.mediaStream = mediaStream
 }

-function postAudio(audioDataChunk) {
-  if (!inputContext.voskWs) return
+function sendAudio(audioDataChunk) {
+  if (!inputContext.voskWs || inputContext.voskWs.readyState !== WebSocket.OPEN) return

-  if (inputContext.voskWs.readyState === WebSocket.OPEN) {
-    const inputData = audioDataChunk.inputBuffer.getChannelData(0) || new Float32Array(bufferSize)
-    const targetBuffer = new Int16Array(inputData.length)
-    for (let index = inputData.length; index > 0; index--) {
-      targetBuffer[index] = 32767 * Math.min(1, inputData[index])
-    }
-    inputContext.voskWs.send(targetBuffer.buffer)
+  const inputData = audioDataChunk.inputBuffer.getChannelData(0) || new Float32Array(bufferSize)
+  const targetBuffer = new Int16Array(inputData.length)
+  for (let index = inputData.length; index > 0; index--) {
+    // WebSocket 服务器只能处理 16 位整数，而 inputData 是一个 Float32Array 数组，其中的元素是 32 位浮点数。所以需要将 inputData 中的每个元素乘以 32767，以将其转换为 16 位整数。
+    targetBuffer[index] = 32767 * Math.min(1, inputData[index])
  }
+  inputContext.voskWs.send(targetBuffer.buffer)
 }

 function initVoskWS() {
@@ -243,7 +240,7 @@ async function llmEnd() {
    })
  ).json()

-  console.log('---------------->', resp)
+  console.log('----------------> llmEnd: ', resp)
 }

 async function endAudioInput() {
@@ -275,7 +272,7 @@ const checkSteps = async () => {
        inputContext.steps[i],
        new Promise((resolve) => setTimeout(() => resolve(false), 10))
      ])
-      if (res === false) continue
+      if (!res) continue
    } catch (e) {
      console.error(e)
    }
@@ -319,13 +316,15 @@ async function onQ(question: string) {

 async function llmLoop(question: string) {
  microphoneState.value = 'loading'
+
+  const answer = new PhotoAnswer()
  const resp = await (
    await fetch(`${settings.llmUrl}/api/v1/generate`, {
      method: 'POST',
      headers: {
        'Content-Type': 'application/json'
      },
-      body: JSON.stringify({ question }),
+      body: JSON.stringify({ generator_id: answer.id, question }),
      mode: 'cors'
    })
  ).json()
@@ -334,7 +333,7 @@ async function llmLoop(question: string) {
    throw new Error(`status_code: ${resp.results[0].status_code}; ${JSON.stringify(resp.results)}`)
  }

-  photoRole!.answerArgs = new PhotoAnswer()
+  photoRole!.answerArgs = answer
  // @ts-ignore
  photoRole!.off('asyncAnswer', onAsyncAnswer)
  photoRole!.on('asyncAnswer', onAsyncAnswer)
@@ -355,7 +354,7 @@ async function llmLoop(question: string) {
          'Content-Type': 'application/json'
        },
        mode: 'cors',
-        body: JSON.stringify({ question })
+        body: JSON.stringify({ generator_id: answer.id, question })
      })
    ).json()

@@ -366,37 +365,31 @@ async function llmLoop(question: string) {
    if (isEnd) audioList.pop()

    const newList = audioList.slice(index)
-    if (newList.length === 0 && isEnd) break
-    if (newList.length === 0) continue

-    for (let i = index; i < audioList.length; i++) {
-      console.log(results[0].text[i] + ':' + audioList[i])
-      photoRole!.answerArgs!.answer += results[0].text[i]
-      photoRole!.answerArgs!._typingAnswer.push(...results[0].text[i].split(''))
-    }
+    if (newList.length === 0 && isEnd) return
+    if (newList.length === 0) continue

-    index += newList.length
+    for (let i = 0; i < newList.length; i++) {
+      // @ts-ignore
+      if (microphoneState.value === 'input' || microphoneState.value === 'waitInput') {
+        return
+      }

-    const audioPaths = await Promise.all(
-      newList.map((path) => {
-        return uploadFile({ filePath: path })
-      })
-    )
+      console.log(results[0].text[index + i] + ':' + newList[i])
+      photoRole!.answerArgs!.answer += newList[i]
+      photoRole!.answerArgs!._typingAnswer.push(...newList[i].split(''))

-    // @ts-ignore
-    if (microphoneState.value === 'input' || microphoneState.value === 'waitInput') {
-      break
-    }
-
-    audioPaths.forEach((audioPath) => {
+      const path = await uploadFile({ filePath: newList[i] })
      photoRole?.enQueue({
        taskId: photoRole.sessionId,
-        audioUrl: `https://resources.laihua.com/${audioPath}`,
-        isLast: isEnd
+        audioUrl: `https://resources.laihua.com/${path}`,
+        isLast: true
      })
-    })
+    }
+
+    index += newList.length

-    if (isEnd) break
+    if (isEnd) return
  }
 }

@@ -477,7 +470,7 @@ async function down() {
    </v-btn>
  </div>

-  <v-snackbar v-model="errorSnackbar" multi-line :timeout="3000">
+  <v-snackbar v-model="errorSnackbar" multi-line :timeout="6000">
    {{ errorMsg }}

    <template #actions>