import { ref, onUnmounted } from 'vue' import { $api, } from '../common/globalFunction'; import config from '@/config' export function useAudioRecorder() { const isRecording = ref(false) const isStopping = ref(false) const isSocketConnected = ref(false) const recordingDuration = ref(0) const audioDataForDisplay = ref(new Array(16).fill(0)) const volumeLevel = ref(0) const recognizedText = ref('') const lastFinalText = ref('') let audioStream = null let audioContext = null let audioInput = null let scriptProcessor = null let websocket = null let durationTimer = null const generateUUID = () => { return ([1e7] + -1e3 + -4e3 + -8e3 + -1e11) .replace(/[018]/g, c => (c ^ crypto.getRandomValues(new Uint8Array(1))[0] & 15 >> c / 4).toString(16) ).replace(/-/g, '') } const fetchWsUrl = async () => { const res = await $api.createRequest('/app/speech/getToken') if (res.code !== 200) throw new Error('无法获取语音识别 wsUrl') const wsUrl = res.msg return wsUrl } function extractWsParams(wsUrl) { const url = new URL(wsUrl) const appkey = url.searchParams.get('appkey') const token = url.searchParams.get('token') return { appkey, token } } const connectWebSocket = async () => { const wsUrl = await fetchWsUrl() const { appkey, token } = extractWsParams(wsUrl) return new Promise((resolve, reject) => { websocket = new WebSocket(wsUrl) websocket.binaryType = 'arraybuffer' websocket.onopen = () => { isSocketConnected.value = true // 发送 StartTranscription 消息(参考 demo.html) const startTranscriptionMessage = { header: { appkey: appkey, // 不影响使用,可留空或由 wsUrl 带入 namespace: 'SpeechTranscriber', name: 'StartTranscription', task_id: generateUUID(), message_id: generateUUID() }, payload: { format: 'pcm', sample_rate: 16000, enable_intermediate_result: true, enable_punctuation_prediction: true, enable_inverse_text_normalization: true } } websocket.send(JSON.stringify(startTranscriptionMessage)) resolve() } websocket.onerror = (e) => { isSocketConnected.value = false reject(e) } websocket.onclose = () => { isSocketConnected.value = false } websocket.onmessage = (e) => { const msg = JSON.parse(e.data) const name = msg?.header?.name const payload = msg?.payload switch (name) { case 'TranscriptionResultChanged': { // 中间识别文本(可选:使用 stash_result.unfixedText 更精确) const text = payload?.unfixed_result || payload?.result || '' lastFinalText.value = text break } case 'SentenceBegin': { // 可选:开始新的一句,重置状态 // console.log('开始新的句子识别') break } case 'SentenceEnd': { const text = payload?.result || '' const confidence = payload?.confidence || 0 if (text && confidence > 0.5) { recognizedText.value += text lastFinalText.value = '' // console.log('识别完成:', { // text, // confidence // }) } break } case 'TranscriptionStarted': { // console.log('识别任务已开始') break } case 'TranscriptionCompleted': { lastFinalText.value = '' // console.log('识别全部完成') break } case 'TaskFailed': { console.error('识别失败:', msg?.header?.status_text) break } default: console.log('未知消息类型:', name, msg) break } } }) } const startRecording = async () => { if (isRecording.value) return // #ifdef MP-WEIXIN $api.msg('小程序暂不支持语音识别功能'); return; // #endif // #ifdef H5 try { if (typeof navigator === 'undefined' || !navigator.mediaDevices) { $api.msg('当前环境不支持录音功能'); return; } recognizedText.value = '' lastFinalText.value = '' await connectWebSocket() audioStream = await navigator.mediaDevices.getUserMedia({ audio: true }) audioContext = new(window.AudioContext || window.webkitAudioContext)({ sampleRate: 16000 }) audioInput = audioContext.createMediaStreamSource(audioStream) scriptProcessor = audioContext.createScriptProcessor(2048, 1, 1) scriptProcessor.onaudioprocess = (event) => { const input = event.inputBuffer.getChannelData(0) const pcm = new Int16Array(input.length) let sum = 0 for (let i = 0; i < input.length; ++i) { const s = Math.max(-1, Math.min(1, input[i])) pcm[i] = s * 0x7FFF sum += s * s } volumeLevel.value = Math.sqrt(sum / input.length) audioDataForDisplay.value = Array(16).fill(volumeLevel.value) if (websocket?.readyState === WebSocket.OPEN) { websocket.send(pcm.buffer) } } audioInput.connect(scriptProcessor) scriptProcessor.connect(audioContext.destination) isRecording.value = true recordingDuration.value = 0 durationTimer = setInterval(() => recordingDuration.value++, 1000) } catch (err) { console.error('启动失败:', err) cleanup() } // #endif } const stopRecording = () => { if (!isRecording.value || isStopping.value) return isStopping.value = true if (websocket?.readyState === WebSocket.OPEN) { websocket.send(JSON.stringify({ header: { namespace: 'SpeechTranscriber', name: 'StopTranscription', message_id: generateUUID() } })) websocket.close() } cleanup() isStopping.value = false } const cancelRecording = () => { if (!isRecording.value || isStopping.value) return isStopping.value = true websocket?.close() cleanup() isStopping.value = false } const cleanup = () => { clearInterval(durationTimer) scriptProcessor?.disconnect() audioInput?.disconnect() audioStream?.getTracks().forEach(track => track.stop()) audioContext?.close() audioStream = null audioContext = null audioInput = null scriptProcessor = null websocket = null isRecording.value = false isSocketConnected.value = false } onUnmounted(() => { if (isRecording.value) stopRecording() }) return { isRecording, isStopping, isSocketConnected, recordingDuration, audioDataForDisplay, volumeLevel, recognizedText, lastFinalText, startRecording, stopRecording, cancelRecording } }