feat: llm websocket 重写成 llm loop post

71728fa5 · ali · 19dfc9af · 71728fa5 · 71728fa5 · 71728fa5
Commit 71728fa5 authored Dec 29, 2023 by ali
Hide whitespace changes
Inline Side-by-side

Showing with 189 additions and 471 deletions

IPCs.ts src/main/IPCs.ts +16 -11

ShowPhoto.vue src/renderer/screens/ShowPhoto.vue +81 -259

ShowVideo.vue src/renderer/screens/ShowVideo.vue +92 -201

No files found.
--- a/src/main/IPCs.ts
+++ b/src/main/IPCs.ts
@@ -118,17 +118,22 @@ export default class IPCs {

  static initializeChildWindow(window: BrowserWindow) {
    ipcMain.on('fileUpload', async (event, path: string) => {
-      const content = IPCs.readFile(path)
-      const formData = new FormData()
-      const blob = new Blob([content], { type: 'audio/wav' })
-      formData.append('file', blob)
-      const response = await http({
-        url: 'https://beta.laihua.com/api/upload/file',
-        method: 'POST',
-        data: formData
-      })
-
-      window.webContents.send('msgReceivedFileUploadResponse', response)
+
+      try {
+        const content = IPCs.readFile(path)
+        const formData = new FormData()
+        const blob = new Blob([content], { type: 'audio/wav' })
+        formData.append('file', blob)
+        const response = await http({
+          url: 'https://beta.laihua.com/api/upload/file',
+          method: 'POST',
+          data: formData
+        })
+        window.webContents.send('msgReceivedFileUploadResponse', response)
+      } catch (error) {
+        window.webContents.send('msgReceivedFileUploadResponse', { code: 500, message: JSON.stringify(error) })
+      }
+
    })
  }
 }
--- a/src/renderer/screens/ShowPhoto.vue
+++ b/src/renderer/screens/ShowPhoto.vue
@@ -26,7 +26,6 @@ const microphoneState = ref<'waitInput' | 'input' | 'loading' | 'disabled' | 're
 const videoElement = ref<HTMLVideoElement | null>(null)
 const can = ref<HTMLCanvasElement | null>(null)
 let photoRole: PhotoRole | null = null
-let flvPlayer: flvjs.Player | null = null
 const inputContext: {
  mediaStream?: MediaStream
  audioContext?: AudioContext
@@ -79,8 +78,6 @@ async function init() {
  photoRole = new PhotoRole(settings.liveHost, `${item?.liveUrl}`, canvasEle)
  photoRole.on('asyncAnswer', onAsyncAnswer)

-  // initPlayer(videoEle);
-
  try {
    await photoRole.init()
  } catch (error) {
@@ -113,78 +110,10 @@ async function onAsyncAnswer(ans: PhotoAnswer) {
  }
 }

-function draw(
-  ctx: CanvasRenderingContext2D,
-  img: HTMLImageElement,
-  liveVideo?: HTMLVideoElement,
-  videoInfo?: {
-    center: {
-      x: number
-      y: number
-    }
-    width: number
-    height: number
-    r_w: number
-    r_h: number
-  }
-) {
-  ctx.clearRect(0, 0, img.naturalWidth, img.naturalHeight)
-  ctx.drawImage(img, 0, 0, img.naturalWidth, img.naturalHeight)
-
-  if (liveVideo && videoInfo) {
-    const { center, r_w, r_h } = videoInfo
-    ctx.drawImage(liveVideo, center.x - r_w / 2, center.y - r_h / 2, r_w, r_h)
-  }
-}
-
-async function initPlayer(videoEle: HTMLVideoElement) {
-  flvPlayer = flvjs.createPlayer(
-    {
-      url: 'http://127.0.0.1:7001/live/movie.flv',
-      type: 'flv',
-      isLive: true,
-      cors: true
-    },
-    {
-      // enableWorker: true,
-      enableStashBuffer: false,
-      stashInitialSize: 128
-    }
-  )
-
-  flvPlayer.attachMediaElement(videoEle)
-  flvPlayer.load()
-  await flvPlayer.play()
-}
-
 router.beforeEach((g) => {
  if (!g.query.url) return router.push('/error')
 })

-async function initVosk({
-  result,
-  partialResult
-}: {
-  result?: (string) => void
-  partialResult?: (string) => void
-}) {
-  const channel = new MessageChannel()
-  const model = await settings.downLoadVoskModel()
-  const recognizer = new model.KaldiRecognizer(sampleRate)
-
-  model.registerPort(channel.port1)
-  recognizer.setWords(true)
-
-  recognizer.on('result', (message) => {
-    result && result((message as ServerMessageResult).result.text)
-  })
-  recognizer.on('partialresult', (message) => {
-    partialResult && partialResult((message as ServerMessagePartialResult).result.partial)
-  })
-
-  return { recognizer, channel }
-}
-
 function analyzeMicrophoneVolume(stream: MediaStream, callback: (number) => void) {
  const audioContext = new AudioContext()
  const analyser = audioContext.createAnalyser()
@@ -214,62 +143,6 @@ function analyzeMicrophoneVolume(stream: MediaStream, callback: (number) => void
  inputContext.scriptProcessorNode = recordEventNode
 }

-async function startVoskWasmAudioInput() {
-  if (microphoneState.value === 'loading') return
-
-  if (microphoneState.value === 'input') {
-    endAudioInput()
-    return
-  }
-
-  microphoneState.value = 'loading'
-
-  const { recognizer, channel } = await initVosk({
-    result: onQ,
-    partialResult: (text) => {
-      // console.log('----------------> partialResult:', text)
-    }
-  })
-
-  sampleRate = 48000
-  const mediaStream = await navigator.mediaDevices.getUserMedia({
-    video: false,
-    audio: {
-      echoCancellation: true,
-      noiseSuppression: true,
-      channelCount: 1,
-      sampleRate
-    }
-  })
-
-  const audioContext = new AudioContext()
-
-  await audioContext.audioWorklet.addModule(
-    new URL('/vosk/recognizer-processor.js', import.meta.url)
-  )
-  const recognizerProcessor = new AudioWorkletNode(audioContext, 'recognizer-processor', {
-    channelCount: 1,
-    numberOfInputs: 1,
-    numberOfOutputs: 1
-  })
-  recognizerProcessor.port.postMessage({ action: 'init', recognizerId: recognizer.id }, [
-    channel.port2
-  ])
-  recognizerProcessor.connect(audioContext.destination)
-
-  const source = audioContext.createMediaStreamSource(mediaStream)
-  source.connect(recognizerProcessor)
-
-  await analyzeMicrophoneVolume(mediaStream, (val) => {
-    recordVolume.value = val
-  })
-
-  microphoneState.value = 'input'
-
-  inputContext.mediaStream = mediaStream
-  inputContext.audioContext = audioContext
-}
-
 async function startVoskWsAudioInput() {
  if (microphoneState.value === 'loading') return

@@ -358,15 +231,21 @@ function initVoskWS() {
  })
 }

-function initLLMSocket(): Promise<WebSocket> {
-  const ws = new WebSocket(settings.llmUrl)
-  return new Promise((resolve, reject) => {
-    ws.onopen = () => resolve(ws)
-    ws.onerror = reject
-  })
+async function llmEnd() {
+  const resp = (await (await fetch(`${settings.llmUrl}/api/v1/interrupt`, {
+    method: 'POST',
+    headers: {
+      'Content-Type': 'application/json',
+    },
+    body: JSON.stringify({ end: 1 }),
+    mode: 'cors'
+  })).json() );
+
+  console.log('---------------->', resp);
 }

 async function endAudioInput() {
+  await llmEnd()
  microphoneState.value = 'waitInput'
  inputContext.ws?.close()
  inputContext.mediaStream?.getTracks().forEach((track) => track.stop())
@@ -422,132 +301,93 @@ async function onQ(question: string) {

  microphoneState.value = 'loading'

-  try {
-    const ws = await initLLMSocket()
-    const { pose, stepResolve, stepReject } = createStep()
-    const messageTimeout = setTimeout(async () => {
-      showError('llm：timeout!')
-      await endAudioInput()
-      microphoneState.value = 'waitInput'
-    }, 10000)
-
-    let sliceAnswer = ''
-    let answer = ''
-    let isTime = true
-    let sliceAnswerLength = 10
-
-    inputContext.ws = ws
-    inputContext.answerArray.length = 0
-    inputContext.steps.length = 0
-    inputContext.steps.push(pose)
-    photoRole!.answerArgs = new PhotoAnswer()
-    photoRole!.on('asyncAnswer', onAsyncAnswer)
-
-    ws.onmessage = (message) => {
-      clearTimeout(messageTimeout)
-
-      try {
-        let { text, event } = JSON.parse(message.data) as {
-          event: string
-          message_num: number
-          text: string
-        }
-
-        if (event === 'stream_end') {
-          inputContext.answerArray.push({ text: sliceAnswer, isLast: true })
-          sliceAnswer = ''
-          runTTSTask()
-          inputContext.ws?.close()
-          console.log('----------------> answer: ', answer)
-          stepResolve('chat')
-          return
-        }
-
-        text = text.replace(/\u0000/g, '').trim()
-        answer += text
-        photoRole!.answerArgs!.answer += answer
-        photoRole!.answerArgs!._typingAnswer.push(answer)
-        isTime && console.time('sliceAnswer')
-        isTime = false
-
-        const textArr = text.split('')
-        for (let i = 0; i < textArr.length; i++) {
-          const t = textArr[i]
-          sliceAnswer += t
-          if (/[。，？！；,.?!;]/.test(t) && sliceAnswer.length >= sliceAnswerLength) {
-            console.timeEnd('sliceAnswer')
-            sliceAnswerLength = settings.llmToTTSSliceLength
-            inputContext.answerArray.push({ text: sliceAnswer, isLast: true })
-            runTTSTask()
-            sliceAnswer = ''
-            isTime = true
-          }
-        }
-      } catch (error) {
-        showError('llm：' + error)
-        endAudioInput().then(() => {
-          microphoneState.value = 'waitInput'
-        })
-        stepReject(JSON.stringify(error))
-      }
-    }
+  const { pose, stepResolve, stepReject } = createStep()
+  inputContext.steps.length = 0
+  inputContext.steps.push(pose)

-    ws.send(JSON.stringify({ prompt: question, historys_list: [] }))
+  try {
+    await llmLoop(question);
+    stepResolve('llm')
  } catch (error) {
-    console.error(error)
    microphoneState.value = 'input'
-    showError(`llm：${JSON.stringify(error)}`)
+    showError(`llm：${error}`)
  }
 }

-let isTTSRunning = false
-async function runTTSTask() {
-  if (isTTSRunning) return
-  isTTSRunning = true
+async function llmLoop(question: string) {
+  microphoneState.value = 'loading'
+  const resp = (await (await fetch(`${settings.llmUrl}/api/v1/generate`, {
+    method: 'POST',
+    headers: {
+      'Content-Type': 'application/json',
+    },
+    body: JSON.stringify({ question }),
+    mode: 'cors'
+  })).json() );

-  const { pose, stepResolve, stepReject } = createStep()
-  inputContext.steps.push(pose)
+  if (resp.results[0].status_code !== 100) {
+    throw new Error(`status_code: ${resp.results[0].status_code}; ${ JSON.stringify(resp.results) }`);
+  }

-  try {
-    while (inputContext.answerArray.length) {
-      const task = inputContext.answerArray.shift()
-      if (!task) break
-      if (task.text.trim().length < 1) continue
-
-      console.time(task.text + ' TTS: ')
-      const res = await localTTS({
-        url: settings.ttsHost,
-        text: task.text,
-        audio_path: settings.userData
-      })
+  inputContext.steps.length = 0
+  photoRole!.answerArgs = new PhotoAnswer()
+  photoRole!.on('asyncAnswer', onAsyncAnswer)
+  let index = 0;
+
+  while (true) {
+    // @ts-ignore
+    if (microphoneState.value === 'input' || microphoneState.value === 'waitInput') {
+      break
+    }

-      console.log('----------------> TTS:', res[0].text)
-      console.timeEnd(task.text + ' TTS: ')
-      console.log('---------------->', res[0].text)
+    await new Promise( resolve => setTimeout(resolve, 100))

-      const audioPath = await uploadFile({ filePath: res[0].text })
+    const { results } = (await (await fetch(`${settings.llmUrl}/api/v1/audio`, {
+      method: 'POST',
+      headers: {
+        'Content-Type': 'application/json',
+      },
+      mode: 'cors',
+      body: JSON.stringify({ question })
+    })).json() );

-      // @ts-ignore
-      if (microphoneState.value === 'input') {
-        break
-      }
+    const audioList = results[0].audio_list as string[];
+    if (audioList.length === 0) continue;
+    const isEnd = audioList.at(-1) === 'stream_end';
+
+    if(isEnd) audioList.pop();
+
+    const newList = audioList.slice(index);
+    if (newList.length === 0 && isEnd) break;
+    if (newList.length === 0) continue;
+
+    for (let i = index; i < audioList.length; i++) {
+      console.log(results[0].text[i] +':'+ audioList[i]);
+      photoRole!.answerArgs!.answer += results[0].text[i]
+      photoRole!.answerArgs!._typingAnswer.push(...results[0].text[i].split(''))
+    }
+
+    index += newList.length;
+
+    const audioPaths = await Promise.all(newList.map(path => {
+      return uploadFile({ filePath: path })
+    }))
+
+    // @ts-ignore
+    if (microphoneState.value === 'input' || microphoneState.value === 'waitInput') {
+      break
+    }

+    audioPaths.forEach(audioPath => {
      photoRole?.enQueue({
        taskId: photoRole.sessionId,
        audioUrl: `https://resources.laihua.com/${audioPath}`,
-        isLast: task.isLast
+        isLast: isEnd
      })
-    }
-  } catch (error) {
-    showError('tts：' + error)
-    endAudioInput().then(() => {
-      microphoneState.value = 'waitInput'
    })
-    stepReject(JSON.stringify(error))
-  }

-  isTTSRunning = false
-  stepResolve('TTS')
+    if (isEnd) break;
+  }
 }

 function uploadFile({ filePath }: { filePath: string }) {
@@ -565,24 +405,6 @@ function uploadFile({ filePath }: { filePath: string }) {
  })
 }

-const ttsAudios: HTMLAudioElement[] = []
-let isPlayRunning = false
-async function runAudioPlay() {
-  if (isPlayRunning) return
-  isPlayRunning = true
-
-  const audio = ttsAudios.shift()
-  if (!audio) {
-    isPlayRunning = false
-    return
-  }
-  audio.onended = () => {
-    isPlayRunning = false
-    runAudioPlay()
-  }
-  await audio.play()
-}
-
 async function down() {
  if (microphoneState.value === 'reply') {
    await endAudioInput()

--- a/src/renderer/screens/ShowVideo.vue
+++ b/src/renderer/screens/ShowVideo.vue
@@ -34,11 +34,13 @@ const inputContext: {
  ws?: WebSocket
  voskWs?: WebSocket
  asrPartial: string
+  llmEnd: boolean
+  ttsAudios: HTMLAudioElement[]
  playingAudio?: HTMLAudioElement
-  answerArray: string[]
 } = {
  asrPartial: '',
-  answerArray: []
+  llmEnd: false,
+  ttsAudios: []
 }

 onMounted(() => {
@@ -56,30 +58,6 @@ const showError = (msg: string) => {
  errorMsg.value = msg
 }

-async function initVosk({
-  result,
-  partialResult
-}: {
-  result?: (string) => void
-  partialResult?: (string) => void
-}) {
-  const channel = new MessageChannel()
-  const model = await settings.downLoadVoskModel()
-  const recognizer = new model.KaldiRecognizer(sampleRate)
-
-  model.registerPort(channel.port1)
-  recognizer.setWords(true)
-
-  recognizer.on('result', (message) => {
-    result && result((message as ServerMessageResult).result.text)
-  })
-  recognizer.on('partialresult', (message) => {
-    partialResult && partialResult((message as ServerMessagePartialResult).result.partial)
-  })
-
-  return { recognizer, channel }
-}
-
 function analyzeMicrophoneVolume(stream: MediaStream, callback: (number) => void) {
  const audioContext = new AudioContext()
  const analyser = audioContext.createAnalyser()
@@ -109,62 +87,6 @@ function analyzeMicrophoneVolume(stream: MediaStream, callback: (number) => void
  inputContext.scriptProcessorNode = recordEventNode
 }

-async function startVoskWasmAudioInput() {
-  if (microphoneState.value === 'loading') return
-
-  if (microphoneState.value === 'input') {
-    endAudioInput()
-    return
-  }
-
-  microphoneState.value = 'loading'
-
-  const { recognizer, channel } = await initVosk({
-    result: onQ,
-    partialResult: (text) => {
-      // console.log('----------------> partialResult:', text)
-    }
-  })
-
-  sampleRate = 48000
-  const mediaStream = await navigator.mediaDevices.getUserMedia({
-    video: false,
-    audio: {
-      echoCancellation: true,
-      noiseSuppression: true,
-      channelCount: 1,
-      sampleRate
-    }
-  })
-
-  const audioContext = new AudioContext()
-
-  await audioContext.audioWorklet.addModule(
-    new URL('/vosk/recognizer-processor.js', import.meta.url)
-  )
-  const recognizerProcessor = new AudioWorkletNode(audioContext, 'recognizer-processor', {
-    channelCount: 1,
-    numberOfInputs: 1,
-    numberOfOutputs: 1
-  })
-  recognizerProcessor.port.postMessage({ action: 'init', recognizerId: recognizer.id }, [
-    channel.port2
-  ])
-  recognizerProcessor.connect(audioContext.destination)
-
-  const source = audioContext.createMediaStreamSource(mediaStream)
-  source.connect(recognizerProcessor)
-
-  await analyzeMicrophoneVolume(mediaStream, (val) => {
-    recordVolume.value = val
-  })
-
-  microphoneState.value = 'input'
-
-  inputContext.mediaStream = mediaStream
-  inputContext.audioContext = audioContext
-}
-
 async function startVoskWsAudioInput() {
  if (microphoneState.value === 'loading') return

@@ -253,15 +175,21 @@ function initVoskWS() {
  })
 }

-function initLLMSocket(): Promise<WebSocket> {
-  const ws = new WebSocket(settings.llmUrl)
-  return new Promise((resolve, reject) => {
-    ws.onopen = () => resolve(ws)
-    ws.onerror = reject
-  })
+async function llmEnd() {
+  const resp = (await (await fetch(`${settings.llmUrl}/api/v1/interrupt`, {
+    method: 'POST',
+    headers: {
+      'Content-Type': 'application/json',
+    },
+    body: JSON.stringify({ end: 1 }),
+    mode: 'cors'
+  })).json() );
+
+  console.log('---------------->', resp);
 }

-function endAudioInput() {
+async function endAudioInput() {
+  await llmEnd()
  microphoneState.value = 'waitInput'
  inputContext.ws?.close()
  inputContext.mediaStream?.getTracks().forEach((track) => track.stop())
@@ -273,12 +201,11 @@ function endAudioInput() {
    inputContext.voskWs.send('{"eof" : 1}')
    inputContext.voskWs.close()
  }
-  ttsAudios.length = 0
+  inputContext.ttsAudios.length = 0
  inputContext.playingAudio?.pause()
  videos[1].value?.pause()
  videos[0].value?.pause()
  isPlayRunning = false
-  inputContext.answerArray.length = 0
 }

 const canplay = () => {
@@ -347,126 +274,104 @@ async function onQ(question: string) {
    return
  }

-  // 视频链接匹配不上，直接走大模型
  try {
-    const ws = await initLLMSocket()
-    const messageTimeout = setTimeout(() => {
-      showError('llm：timeout!')
-      endAudioInput()
-      microphoneState.value = 'waitInput'
-    }, 10000)
-    let sliceAnswer = ''
-    let answer = ''
-    let isTime = true
-    let sliceAnswerLength = 10
-    inputContext.ws = ws
-    ws.onmessage = (message) => {
-      clearTimeout(messageTimeout)
-
-      if (microphoneState.value === 'input') {
-        return
-      }
-
-      try {
-        let { text, event } = JSON.parse(message.data) as {
-          event: string
-          message_num: number
-          text: string
-        }
-
-        if (event === 'stream_end') {
-          inputContext.answerArray.push(sliceAnswer)
-          runTTSTask()
-          sliceAnswer = ''
-          inputContext.ws?.close()
-          console.log('----------------> answer: ', answer)
-          return
-        }
-
-        text = text.replace(/\u0000/g, '').trim()
-        answer += text
-        isTime && console.time('sliceAnswer')
-        isTime = false
-
-        const textArr = text.split('')
-        for (let i = 0; i < textArr.length; i++) {
-          const t = textArr[i]
-          sliceAnswer += t
-          if (/[。，？！；,.?!;]/.test(t) && sliceAnswer.length >= sliceAnswerLength) {
-            console.timeEnd('sliceAnswer')
-            sliceAnswerLength = settings.llmToTTSSliceLength
-            inputContext.answerArray.push(sliceAnswer)
-            runTTSTask()
-            sliceAnswer = ''
-            isTime = true
-          }
-        }
-      } catch (error) {
-        console.error(error)
-        showError(`message：${error}`)
-        microphoneState.value = 'waitInput'
-      }
-    }
-    ws.send(JSON.stringify({ prompt: question, historys_list: [] }))
+    // 视频链接匹配不上，直接走大模型
+    await llmLoop(question);
  } catch (error) {
-    console.error(error)
    microphoneState.value = 'input'
-    showError(`llm：${JSON.stringify(error)}`)
+    showError(`llm：${error}`)
  }
 }

-let isTTSRunning = false
-async function runTTSTask() {
-  if (isTTSRunning) return
-  isTTSRunning = true
+async function llmLoop(question: string) {
+  if (!role) return;
+  microphoneState.value = 'loading'

-  try {
-    while (inputContext.answerArray.length) {
-      const task = inputContext.answerArray.shift()
-      if (!task) break
-      if (task.trim().length < 1) continue
+  const resp = (await (await fetch(`${settings.llmUrl}/api/v1/generate`, {
+    method: 'POST',
+    headers: {
+      'Content-Type': 'application/json',
+    },
+    body: JSON.stringify({ question }),
+    mode: 'cors'
+  })).json() );

-      console.time(task + ' TTS: ')
+  if (resp.results[0].status_code !== 100) {
+    throw new Error(`status_code: ${resp.results[0].status_code}; ${ JSON.stringify(resp.results) }`);
+  }

-      const res = await localTTS({
-        url: settings.ttsHost,
-        text: task,
-        audio_path: settings.userData
-      })
+  inputContext.llmEnd = false;
+  let index = 0;

-      console.log('----------------> TTS:', res[0].text)
-      console.timeEnd(task + ' TTS: ')
+  while (true) {
+    // @ts-ignore
+    if (microphoneState.value === 'input' || microphoneState.value === 'waitInput') {
+      break
+    }

-      // @ts-ignore
-      if (microphoneState.value === 'input') {
-        break
-      }
+    await new Promise( resolve => setTimeout(resolve, 100))

-      const audio = new Audio(`file://${res[0].text}`)
-      audio.load()
-      ttsAudios.push(audio)
-      runAudioPlay()
+    const { results } = (await (await fetch(`${settings.llmUrl}/api/v1/audio`, {
+      method: 'POST',
+      headers: {
+        'Content-Type': 'application/json',
+      },
+      mode: 'cors',
+      body: JSON.stringify({ question })
+    })).json() );
+
+    const audioList = results[0].audio_list as string[];
+    if (audioList.length === 0) continue;
+    const isEnd = audioList.at(-1) === 'stream_end';
+
+    if(isEnd) audioList.pop();
+
+    const newList = audioList.slice(index);
+    if (newList.length === 0 && isEnd) break;
+    if (newList.length === 0) continue;
+
+    for (let i = index; i < audioList.length; i++) {
+      console.log(results[0].text[i] +':'+ audioList[i]);
    }
-  } catch (error) {
-    showError(`tts：${error}`)
-    microphoneState.value = 'waitInput'
-    console.error(error)
+
+    index += newList.length;
+
+    // @ts-ignore
+    if (microphoneState.value === 'input' || microphoneState.value === 'waitInput') {
+      break
+    }
+
+    // inputContext.ttsAudios.push(...newList.map(path => {
+    //   const audio = new Audio(`file://${path}`)
+    //   audio.load()
+    //   return audio;
+    // }))
+
+    // TODO: test
+    inputContext.ttsAudios.push(...newList.map(path => {
+      const audio = new Audio(`http://192.168.1.57:6767/${path.split('\\').pop()}`)
+      audio.load()
+      return audio;
+    }))
+
+    runAudioPlay()
+
+    if (isEnd) break;
  }

-  isTTSRunning = false
+  inputContext.llmEnd = true;
 }

-const ttsAudios: HTMLAudioElement[] = []
 let isPlayRunning = false
 async function runAudioPlay() {
  if (isPlayRunning) return
  isPlayRunning = true

-  const audio = ttsAudios.shift()
+  const audio = inputContext.ttsAudios.shift()
  if (!audio) {
    isPlayRunning = false
    videos[0].value!.pause()
-    !isTTSRunning && (microphoneState.value = 'input')
+    inputContext.llmEnd && (microphoneState.value = 'input')
    return
  }
  audio.onended = () => {
@@ -482,23 +387,9 @@ async function runAudioPlay() {
  microphoneState.value = 'reply'
 }

-// eslint-disable-next-line no-unused-vars
-async function xfTTS(text: string) {
-  const tone = settings.source.find(({ sourceId }) => settings.selectSource === sourceId)
-  if (!tone) return
-  const res = await audioAiTTS({
-    host: settings.ttsHost,
-    text,
-    speed: 3,
-    speaker: tone.sourceId,
-    provider: tone.provider
-  })
-  console.log('----------------> tts:', res)
-}
-
 async function down() {
  if (microphoneState.value === 'reply') {
-    endAudioInput()
+    await endAudioInput()
  }

  try {