Commit 93479e9e authored by ali's avatar ali

feat: 照片数字人直连 asr_ws 服务

parent 36619ae0
......@@ -41,7 +41,8 @@ const isCurrentRoute = (path: string): boolean => {
const asrItems = ref([
// 'Web Speech API',
'vosk_asr',
'xf_asr'
'xf_asr',
'vosk_ws'
// 'Whisper Api'
])
const asrSelect = ref(setting.asr)
......@@ -76,7 +77,9 @@ async function changeVoskModel() {
await settings.downLoadVoskModel()
voskModelLoading.value = false
}
changeVoskModel()
if (setting.asr.value === 'vosk_asr') {
changeVoskModel()
}
async function changeOpenDevTools() {
await window.mainApi.send('openDevTools', setting.isOpenDevTools.value)
......@@ -153,7 +156,17 @@ async function changeOpenDevTools() {
></v-select>
</template>
<template v-if="setting.asr.value === 'vosk_ws'">
<v-text-field
label="ASR 地址"
:rules="[(value) => !!value || 'ASR 地址必填']"
hide-details="auto"
:model-value="setting.voskWsLUrl"
></v-text-field>
</template>
<v-text-field
style="margin-top: 22px"
label="TTS 地址"
:rules="[(value) => !!value || 'TTS 地址必填']"
hide-details="auto"
......
<!-- eslint-disable no-unused-vars -->
<!-- eslint-disable camelcase -->
<script setup lang="ts">
import { onMounted, ref } from 'vue'
......@@ -14,7 +15,8 @@ import flvjs from 'flv.js'
const router = useRouter()
const route = useRoute()
const { settings } = useStore()
const sampleRate = 48000
let sampleRate = 48000
const bufferSize = 8192
const iconMicrophone = new URL('/images/microphone-input.svg', import.meta.url).href
const recordVolume = ref(0)
......@@ -180,9 +182,13 @@ const inputContext: {
scriptProcessorNode?: ScriptProcessorNode
model?: Model
ws?: WebSocket
} = {}
voskWs?: WebSocket
asrPartial: string
} = {
asrPartial: ''
}
async function startAudioInput() {
async function startVoskWasmAudioInput() {
if (microphoneState.value === 'loading') return
if (microphoneState.value === 'input') {
......@@ -199,6 +205,7 @@ async function startAudioInput() {
}
})
sampleRate = 48000
const mediaStream = await navigator.mediaDevices.getUserMedia({
video: false,
audio: {
......@@ -237,6 +244,84 @@ async function startAudioInput() {
inputContext.audioContext = audioContext
}
async function startVoskWsAudioInput() {
if (microphoneState.value === 'loading') return
if (microphoneState.value === 'input') {
endAudioInput()
return
}
initVoskWS()
sampleRate = 8000
const mediaStream = await navigator.mediaDevices.getUserMedia({
audio: {
echoCancellation: true,
noiseSuppression: true,
channelCount: 1,
sampleRate
},
video: false
})
const audioContext = new AudioContext({ sampleRate })
const source = audioContext.createMediaStreamSource(mediaStream)
const processor = audioContext.createScriptProcessor()
source.connect(processor)
processor.connect(audioContext.destination)
processor.onaudioprocess = (audioDataChunk) => postAudio(audioDataChunk)
await analyzeMicrophoneVolume(mediaStream, (val) => {
recordVolume.value = val
})
microphoneState.value = 'input'
inputContext.audioContext = audioContext
inputContext.mediaStream = mediaStream
}
function postAudio(audioDataChunk) {
if (!inputContext.voskWs) return
if (inputContext.voskWs.readyState === WebSocket.OPEN) {
const inputData = audioDataChunk.inputBuffer.getChannelData(0) || new Float32Array(bufferSize)
const targetBuffer = new Int16Array(inputData.length)
for (let index = inputData.length; index > 0; index--) {
targetBuffer[index] = 32767 * Math.min(1, inputData[index])
}
inputContext.voskWs.send(targetBuffer.buffer)
}
}
function initVoskWS() {
return new Promise((resolve, reject) => {
inputContext.voskWs = new WebSocket(settings.voskWsLUrl)
inputContext.voskWs.binaryType = 'arraybuffer'
inputContext.asrPartial = ''
inputContext.voskWs.onopen = function (event) {
resolve(inputContext.voskWs)
}
inputContext.voskWs.onerror = function (event) {
reject(new Error(JSON.stringify(event)))
}
inputContext.voskWs.onmessage = function (event) {
if (!event.data) return
const parsed = JSON.parse(event.data)
if (parsed.partial && parsed.partial !== 'the') inputContext.asrPartial = parsed.partial + '|'
// if (parsed.result) console.log(parsed.result);
if (parsed.text) {
inputContext.asrPartial = parsed.text
onAsr(inputContext.asrPartial)
}
}
})
}
function endAudioInput() {
microphoneState.value = 'waitInput'
inputContext.mediaStream?.getTracks().forEach((track) => track.stop())
......@@ -244,7 +329,10 @@ function endAudioInput() {
inputContext.audioContext2?.close()
inputContext.scriptProcessorNode && (inputContext.scriptProcessorNode.onaudioprocess = null)
inputContext.model?.terminate()
// inputContext.ws?.close()
if (inputContext.voskWs) {
inputContext.voskWs.send('{"eof" : 1}')
inputContext.voskWs.close()
}
}
async function onAsr(question: string) {
......@@ -394,7 +482,7 @@ async function xfTTS(text: string) {
variant="elevated"
size="x-large"
:disabled="microphoneState === 'loading' || microphoneState === 'disabled'"
@pointerdown="startAudioInput"
@pointerdown="startVoskWsAudioInput"
>
<v-icon v-if="microphoneState === 'waitInput'" icon="mdi-microphone"></v-icon>
<v-icon v-if="microphoneState === 'loading'" icon="mdi-microphone-settings"></v-icon>
......
......@@ -8,7 +8,7 @@ export type ISettings = {
filePath: string
userData: string
appData: string
asr: 'vosk_asr' | 'xf_asr'
asr: 'vosk_asr' | 'xf_asr' | 'vosk_ws'
voskModels: string[]
voskSelectModel: string
tts: 'xf_tts' | 'local_tts'
......@@ -35,7 +35,7 @@ const useSettingsStore = defineStore('settings', {
filePath: '',
userData: '',
appData: '',
asr: 'vosk_asr',
asr: 'vosk_ws',
tts: 'local_tts',
voskModels: [
new URL('/vosk/models/vosk-model-small-ca-0.4.tar.gz', import.meta.url).href,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment