feat: 更换 icon；修复视频数字人切换视频闪烁问题，回答内容分割不准确问题，优化回答内容麦克风状态联动；新增 llmToTTSSliceLength 配置控制回答内容分割字数

2a5e9aa6 · ali · fd503c93 · fd503c93 · 2a5e9aa6 · 2a5e9aa6
Commit 2a5e9aa6 authored Dec 06, 2023 by ali
5 changed files
--- a/buildAssets/icons/icon.png
+++ b/buildAssets/icons/icon.png
--- a/src/renderer/components/layout/HeaderLayout.vue
+++ b/src/renderer/components/layout/HeaderLayout.vue
@@ -84,6 +84,11 @@ if (setting.asr.value === 'vosk_asr') {
 async function changeOpenDevTools() {
  await window.mainApi.send('openDevTools', setting.isOpenDevTools.value)
 }
+function clear() {
+  localStorage.clear()
+  location.reload()
+}
 </script>
 <template>
  <v-app-bar color="#d71b1b" density="compact" class="header">
@@ -181,6 +186,27 @@ async function changeOpenDevTools() {
                  :model-value="setting.llmUrl"
                ></v-text-field>
+                <v-slider
+                  v-model="setting.llmToTTSSliceLength.value"
+                  label="TTS 分句长度"
+                  class="align-center"
+                  :max="100"
+                  :min="0"
+                  hide-details
+                  :step="1"
+                >
+                  <template #append>
+                    <v-text-field
+                      v-model="setting.llmToTTSSliceLength.value"
+                      hide-details
+                      single-line
+                      density="compact"
+                      type="number"
+                      style="width: 80px"
+                    ></v-text-field>
+                  </template>
+                </v-slider>
                <v-switch
                  v-model="setting.isFullscreen.value"
                  hide-details
@@ -202,6 +228,7 @@ async function changeOpenDevTools() {
            <v-card-actions>
              <v-spacer></v-spacer>
+              <v-btn color="#d71b1b" text="清除缓存并刷新" @click="clear"></v-btn>
              <v-btn text="关闭" @click="isActive.value = false"></v-btn>
            </v-card-actions>
          </v-card>

--- a/src/renderer/screens/ShowPhoto.vue
+++ b/src/renderer/screens/ShowPhoto.vue
@@ -252,7 +252,7 @@ async function startVoskWsAudioInput() {
    return
  }
-  await initVoskWS();
+  await initVoskWS()
  sampleRate = 8000
  const mediaStream = await navigator.mediaDevices.getUserMedia({
    audio: {
@@ -369,14 +369,18 @@ async function onAsr(question: string) {
      answer += text
      isTime && console.time('sliceAnswer')
      isTime = false
-      sliceAnswer += text
-      if (/[。，？！；,.?!;]/.test(text) && sliceAnswer.length >= 20) {
+      const textArr = text.split('');
-        console.timeEnd('sliceAnswer')
+      for (let i = 0; i < textArr.length; i++) {
-        answerArray.push(sliceAnswer)
+        const t = textArr[i];
-        runTTSTask(answerArray)
+        sliceAnswer += t
-        sliceAnswer = ''
+        if (/[。，？！；,.?!;]/.test(t) && sliceAnswer.length >= settings.llmToTTSSliceLength) {
-        isTime = true
+          console.timeEnd('sliceAnswer')
+          answerArray.push(sliceAnswer)
+          runTTSTask(answerArray)
+          sliceAnswer = ''
+          isTime = true
+        }
      }
    } catch (error) {
      console.log('返回答案错误 -----> ' + JSON.stringify(error))

--- a/src/renderer/screens/ShowVideo.vue
+++ b/src/renderer/screens/ShowVideo.vue
@@ -24,8 +24,7 @@ const role = useVideo.list.find((i) => i.url === url)
 const microphoneState = ref<'waitInput' | 'input' | 'loading' | 'disabled' | 'reply'>('waitInput')
 const videoElement = ref<HTMLVideoElement | null>(null)
 const videoElement2 = ref<HTMLVideoElement | null>(null)
-const videos = [videoElement, videoElement2];
+const videos = [videoElement, videoElement2]
 onMounted(() => {
  // init();
@@ -166,7 +165,7 @@ async function startVoskWsAudioInput() {
  }
  initVoskWS()
-  sampleRate = 8000
+  sampleRate = 16000
  const mediaStream = await navigator.mediaDevices.getUserMedia({
    audio: {
      echoCancellation: true,
@@ -184,11 +183,15 @@ async function startVoskWsAudioInput() {
  processor.connect(audioContext.destination)
  processor.onaudioprocess = (audioDataChunk) => {
-    if (microphoneState.value === 'loading' || microphoneState.value === 'disabled' || microphoneState.value === 'reply') {
+    if (
-      return;
+      microphoneState.value === 'loading' ||
+      microphoneState.value === 'disabled' ||
+      microphoneState.value === 'reply'
+    ) {
+      return
    }
-    postAudio(audioDataChunk);
+    postAudio(audioDataChunk)
  }
  await analyzeMicrophoneVolume(mediaStream, (val) => {
@@ -255,24 +258,24 @@ function endAudioInput() {
 }
 const canplay = () => {
-  videos[1].value!.style.opacity = '1';
+  videos[1].value!.style.opacity = '1'
-  videos[0].value!.style.opacity = '0';
+  videos[0].value!.style.opacity = '0'
-  videos[0].value!.pause();
+  videos[0].value!.pause()
-  videos[1].value!.play();
+  videos[1].value!.play()
-  videos[1].value!.removeEventListener('canplay', canplay);
+  videos[1].value!.removeEventListener('canplay', canplay)
-  videos.unshift(videos.pop()!);
+  videos.unshift(videos.pop()!)
 }
 function loadVideo(url: string) {
  videos[1].value!.src = url
-  videos[1].value!.style.opacity = '0';
+  videos[1].value!.style.opacity = '0'
-  videos[1].value!.addEventListener('canplay', canplay);
+  videos[1].value!.addEventListener('canplay', canplay)
 }
 async function onAsr(question: string) {
  console.log('---------------->', question)
-  if (!role) return;
+  if (!role) return
-  microphoneState.value = 'loading';
+  microphoneState.value = 'loading'
  question = question.replace(/\s/g, '')
  for (let i = 0; i < role.qa.length; i++) {
@@ -280,13 +283,13 @@ async function onAsr(question: string) {
    console.log(question + ' : ' + q)
    if (q.includes(question)) {
      loadVideo(url)
-      microphoneState.value = 'reply';
+      microphoneState.value = 'reply'
      const videoEle = videos[1].value
      videoEle!.loop = false
      videoEle!.muted = false
      videoEle!.onended = () => {
-        videoEle!.onended = null;
+        videoEle!.onended = null
-        microphoneState.value = 'input';
+        microphoneState.value = 'input'
        // 是否需要初始化
      }
      return
@@ -324,18 +327,23 @@ async function onAsr(question: string) {
      answer += text
      isTime && console.time('sliceAnswer')
      isTime = false
-      sliceAnswer += text
-      if (/[。，？！；,.?!;]/.test(text) && sliceAnswer.length >= 10) {
+      const textArr = text.split('');
-        console.timeEnd('sliceAnswer')
+      for (let i = 0; i < textArr.length; i++) {
-        answerArray.push(sliceAnswer)
+        const t = textArr[i];
-        runTTSTask(answerArray)
+        sliceAnswer += t
-        sliceAnswer = ''
+        if (/[。，？！；,.?!;]/.test(t) && sliceAnswer.length >= settings.llmToTTSSliceLength) {
-        isTime = true
+          console.timeEnd('sliceAnswer')
+          answerArray.push(sliceAnswer)
+          runTTSTask(answerArray)
+          sliceAnswer = ''
+          isTime = true
+        }
      }
    } catch (error) {
      console.log('返回答案错误 -----> ' + JSON.stringify(error))
-      microphoneState.value = 'input';
+      microphoneState.value = 'input'
    }
  }
@@ -360,11 +368,11 @@ async function runTTSTask(tasks: string[]) {
    while (tasks.length) {
      const task = tasks.shift()
      if (!task) break
-      if (task.length < 1) continue
+      if (task.trim().length < 1) continue
      console.time(task + ' TTS: ')
-      microphoneState.value = 'loading';
+      microphoneState.value = 'loading'
      const res = await localTTS({
        url: settings.ttsHost,
        text: task,
@@ -395,21 +403,21 @@ async function runAudioPlay() {
  const audio = ttsAudios.shift()
  if (!audio) {
-    isPlayRunning = false;
+    isPlayRunning = false
-    videos[0].value!.pause();
+    videos[0].value!.pause()
-    !isTTSRunning && (microphoneState.value = 'input');
+    !isTTSRunning && (microphoneState.value = 'input')
    return
  }
  audio.onended = () => {
    isPlayRunning = false
    runAudioPlay()
  }
-  await audio.play();
+  await audio.play()
  loadVideo(new URL('/libai/10.mp4', import.meta.url).href)
  videos[1].value!.loop = true
  videos[1].value!.muted = true
-  microphoneState.value = 'reply';
+  microphoneState.value = 'reply'
 }
 // eslint-disable-next-line no-unused-vars
@@ -425,7 +433,6 @@ async function xfTTS(text: string) {
  })
  console.log('----------------> tts:', res)
 }
 </script>
 <template>
@@ -444,7 +451,11 @@ async function xfTTS(text: string) {
      color="#fff"
      variant="elevated"
      size="x-large"
-      :disabled="microphoneState === 'loading' || microphoneState === 'disabled' || microphoneState === 'reply'"
+      :disabled="
+        microphoneState === 'loading' ||
+        microphoneState === 'disabled' ||
+        microphoneState === 'reply'
+      "
      @pointerdown="startVoskWsAudioInput"
    >
      <v-icon v-if="microphoneState === 'waitInput'" icon="mdi-microphone"></v-icon>
@@ -512,13 +523,15 @@ async function xfTTS(text: string) {
  border-radius: 36%;
 }
-.video-ele, .video-ele2 {
+.video-ele,
+.video-ele2 {
  position: absolute;
  width: 100%;
  height: 100%;
  opacity: 0;
 }
-.video-ele.active, .video-ele2.active {
+.video-ele.active,
+.video-ele2.active {
  opacity: 1;
 }

--- a/src/renderer/store/settings.ts
+++ b/src/renderer/store/settings.ts
@@ -25,6 +25,7 @@ export type ISettings = {
  isFullscreen: 'yes' | 'no'
  isOpenDevTools: boolean
  llmUrl: string
+  llmToTTSSliceLength: number
  voskWsLUrl: string
 }
@@ -58,7 +59,8 @@ const useSettingsStore = defineStore('settings', {
      selectSource: '',
      isFullscreen: 'no',
      isOpenDevTools: false,
-      llmUrl: 'ws://127.0.0.1:9001/api/v1/stream',
+      llmUrl: 'ws://127.0.0.1:9899/api/v1/stream',
+      llmToTTSSliceLength: 20,
      voskWsLUrl: 'ws://127.0.0.1:2700'
    }) as ISettings,
  getters: {},