flat:语音功能优化

2025-07-22 15:20:21 +08:00
parent ea04387b58
commit 58c36c01a0
11 changed files with 229 additions and 479 deletions
--- a/hook/useRealtimeRecorder.js
+++ b/hook/useRealtimeRecorder.js
@@ -1,387 +1,246 @@
 import {
    ref,
    onUnmounted
-} from 'vue';
+} from 'vue'
+import {
+    $api,

-function mergeText(prevText, newText) {
-    if (newText.startsWith(prevText)) {
-        return newText; // 直接替换，避免重复拼接
+} from '../common/globalFunction';
+
+import config from '@/config'
+
+export function useAudioRecorder() {
+    const isRecording = ref(false)
+    const isStopping = ref(false)
+    const isSocketConnected = ref(false)
+    const recordingDuration = ref(0)
+
+    const audioDataForDisplay = ref(new Array(16).fill(0))
+    const volumeLevel = ref(0)
+
+    const recognizedText = ref('')
+    const lastFinalText = ref('')
+
+    let audioStream = null
+    let audioContext = null
+    let audioInput = null
+    let scriptProcessor = null
+    let websocket = null
+    let durationTimer = null
+
+    const generateUUID = () => {
+        return ([1e7] + -1e3 + -4e3 + -8e3 + -1e11)
+            .replace(/[018]/g, c =>
+                (c ^ crypto.getRandomValues(new Uint8Array(1))[0] & 15 >> c / 4).toString(16)
+            ).replace(/-/g, '')
    }
-    return prevText + newText; // 兼容意外情况
-}

-export function useAudioRecorder(wsUrl) {
-    // 状态变量
-    const isRecording = ref(false);
-    const isStopping = ref(false);
-    const isSocketConnected = ref(false);
-    const recordingDuration = ref(0);
-    const audioDataForDisplay = ref(new Array(16).fill(0.01));
-    const volumeLevel = ref(0);
+    const fetchWsUrl = async () => {
+        const res = await $api.createRequest('/app/speech/getToken')
+        if (res.code !== 200) throw new Error('无法获取语音识别 wsUrl')
+        const wsUrl = res.msg
+        return wsUrl
+    }

-    // 音频相关
-    const audioContext = ref(null);
-    const mediaStream = ref(null);
-    const workletNode = ref(null);
-    const analyser = ref(null);
-
-    // 网络相关
-    const socket = ref(null);
-
-    // 配置常量
-    const SAMPLE_RATE = 16000;
-    const SILENCE_THRESHOLD = 0.05; // 静音阈值 (0-1)
-    const SILENCE_DURATION = 100; // 静音持续时间(ms)后切片
-    const MIN_SOUND_DURATION = 200; // 最小有效声音持续时间(ms)
-
-    // 音频处理变量
-    const lastSoundTime = ref(0);
-    const audioChunks = ref([]);
-    const currentChunkStartTime = ref(0);
-    const silenceStartTime = ref(0);
-
-    // 语音识别结果
-    const recognizedText = ref('');
-    const lastFinalText = ref(''); // 保存最终确认的文本
-
-    // AudioWorklet处理器代码
-    const workletProcessorCode = `
-    class AudioProcessor extends AudioWorkletProcessor {
-        constructor(options) {
-            super();
-            this.silenceThreshold = options.processorOptions.silenceThreshold;
-            this.sampleRate = options.processorOptions.sampleRate;
-            this.samplesPerChunk = Math.floor(this.sampleRate * 0.05); // 50ms的块
-            this.buffer = new Int16Array(this.samplesPerChunk);
-            this.index = 0;
-            this.lastUpdate = 0;
-        }
-        
-        calculateVolume(inputs) {
-            const input = inputs[0];
-            if (!input || input.length === 0) return 0;
-            
-            let sum = 0;
-            const inputChannel = input[0];
-            for (let i = 0; i < inputChannel.length; i++) {
-                sum += inputChannel[i] * inputChannel[i];
-            }
-            return Math.sqrt(sum / inputChannel.length);
-        }
-        
-        process(inputs) {
-            const now = currentTime;
-            const volume = this.calculateVolume(inputs);
-            
-            // 每50ms发送一次分析数据
-            if (now - this.lastUpdate > 0.05) {
-                this.lastUpdate = now;
-                
-                // 简单的频率分析 (模拟16个频段)
-                const simulatedFreqData = [];
-                for (let i = 0; i < 16; i++) {
-                    simulatedFreqData.push(
-                        Math.min(1, volume * 10 + (Math.random() * 0.2 - 0.1))
-                    );
-                }
-                
-                this.port.postMessage({
-                    type: 'analysis',
-                    volume: volume,
-                    frequencyData: simulatedFreqData,
-                    isSilent: volume < this.silenceThreshold,
-                    timestamp: now
-                });
-            }
-            
-            // 原始音频处理
-            const input = inputs[0];
-            if (input && input.length > 0) {
-                const inputChannel = input[0];
-                for (let i = 0; i < inputChannel.length; i++) {
-                    this.buffer[this.index++] = Math.max(-32768, Math.min(32767, inputChannel[i] * 32767));
-                    
-                    if (this.index >= this.samplesPerChunk) {
-                        this.port.postMessage({
-                            type: 'audio',
-                            audioData: this.buffer.buffer,
-                            timestamp: now
-                        }, [this.buffer.buffer]);
-                        
-                        this.buffer = new Int16Array(this.samplesPerChunk);
-                        this.index = 0;
-                    }
-                }
-            }
-            return true;
+    function extractWsParams(wsUrl) {
+        const url = new URL(wsUrl)
+        const appkey = url.searchParams.get('appkey')
+        const token = url.searchParams.get('token')
+        return {
+            appkey,
+            token
        }
    }
-    registerProcessor('audio-processor', AudioProcessor);
-    `;

-    // 初始化WebSocket连接
-    const initSocket = (wsUrl) => {
+
+    const connectWebSocket = async () => {
+        const wsUrl = await fetchWsUrl()
+        const {
+            appkey,
+            token
+        } = extractWsParams(wsUrl)
        return new Promise((resolve, reject) => {
-            socket.value = new WebSocket(wsUrl);
+            websocket = new WebSocket(wsUrl)
+            websocket.binaryType = 'arraybuffer'

-            socket.value.onopen = () => {
-                console.log('open')
-                isSocketConnected.value = true;
-                resolve();
-            };
+            websocket.onopen = () => {
+                isSocketConnected.value = true

-            socket.value.onerror = (error) => {
-                reject(error);
-            };
+                // 发送 StartTranscription 消息（参考 demo.html）
+                const startTranscriptionMessage = {
+                    header: {
+                        appkey: appkey, // 不影响使用，可留空或由 wsUrl 带入
+                        namespace: 'SpeechTranscriber',
+                        name: 'StartTranscription',
+                        task_id: generateUUID(),
+                        message_id: generateUUID()
+                    },
+                    payload: {
+                        format: 'pcm',
+                        sample_rate: 16000,
+                        enable_intermediate_result: true,
+                        enable_punctuation_prediction: true,
+                        enable_inverse_text_normalization: true
+                    }
+                }
+                websocket.send(JSON.stringify(startTranscriptionMessage))
+                resolve()
+            }

-            socket.value.onclose = () => {
-                isSocketConnected.value = false;
-            };
+            websocket.onerror = (e) => {
+                isSocketConnected.value = false
+                reject(e)
+            }

-            socket.value.onmessage = handleMessage;
-        });
-    };
+            websocket.onclose = () => {
+                isSocketConnected.value = false
+            }

-    const handleMessage = (values) => {
-        try {
-            const data = JSON.parse(event.data);
-            if (data.text) {
-                const {
-                    asrEnd,
-                    text
-                } = data
-                if (asrEnd === 'true') {
-                    recognizedText.value += data.text;
-                } else {
-                    lastFinalText.value = '';
+            websocket.onmessage = (e) => {
+                const msg = JSON.parse(e.data)
+                const name = msg?.header?.name
+                const payload = msg?.payload
+
+                switch (name) {
+                    case 'TranscriptionResultChanged': {
+                        // 中间识别文本（可选：使用 stash_result.unfixedText 更精确）
+                        const text = payload?.unfixed_result || payload?.result || ''
+                        lastFinalText.value = text
+                        break
+                    }
+                    case 'SentenceBegin': {
+                        // 可选：开始新的一句，重置状态
+                        // console.log('开始新的句子识别')
+                        break
+                    }
+                    case 'SentenceEnd': {
+                        const text = payload?.result || ''
+                        const confidence = payload?.confidence || 0
+                        if (text && confidence > 0.5) {
+                            recognizedText.value += text
+                            lastFinalText.value = ''
+                            // console.log('识别完成:', {
+                            //     text,
+                            //     confidence
+                            // })
+                        }
+                        break
+                    }
+                    case 'TranscriptionStarted': {
+                        // console.log('识别任务已开始')
+                        break
+                    }
+                    case 'TranscriptionCompleted': {
+                        lastFinalText.value = ''
+                        // console.log('识别全部完成')
+                        break
+                    }
+                    case 'TaskFailed': {
+                        console.error('识别失败:', msg?.header?.status_text)
+                        break
+                    }
+                    default:
+                        console.log('未知消息类型:', name, msg)
+                        break
                }
            }
-        } catch (error) {
-            console.error('解析识别结果失败:', error);
+        })
+    }
+
+    const startRecording = async () => {
+        if (isRecording.value) return
+        try {
+            recognizedText.value = ''
+            lastFinalText.value = ''
+            await connectWebSocket()
+
+            audioStream = await navigator.mediaDevices.getUserMedia({
+                audio: true
+            })
+            audioContext = new(window.AudioContext || window.webkitAudioContext)({
+                sampleRate: 16000
+            })
+            audioInput = audioContext.createMediaStreamSource(audioStream)
+            scriptProcessor = audioContext.createScriptProcessor(2048, 1, 1)
+
+            scriptProcessor.onaudioprocess = (event) => {
+                const input = event.inputBuffer.getChannelData(0)
+                const pcm = new Int16Array(input.length)
+                let sum = 0
+                for (let i = 0; i < input.length; ++i) {
+                    const s = Math.max(-1, Math.min(1, input[i]))
+                    pcm[i] = s * 0x7FFF
+                    sum += s * s
+                }
+
+                volumeLevel.value = Math.sqrt(sum / input.length)
+                audioDataForDisplay.value = Array(16).fill(volumeLevel.value)
+
+                if (websocket?.readyState === WebSocket.OPEN) {
+                    websocket.send(pcm.buffer)
+                }
+            }
+
+            audioInput.connect(scriptProcessor)
+            scriptProcessor.connect(audioContext.destination)
+
+            isRecording.value = true
+            recordingDuration.value = 0
+            durationTimer = setInterval(() => recordingDuration.value++, 1000)
+        } catch (err) {
+            console.error('启动失败:', err)
+            cleanup()
        }
    }

-    // 处理音频切片
-    const processAudioChunk = (isSilent) => {
-        const now = Date.now();
+    const stopRecording = () => {
+        if (!isRecording.value || isStopping.value) return
+        isStopping.value = true

-        if (!isSilent) {
-            // 检测到声音
-            lastSoundTime.value = now;
-
-            if (silenceStartTime.value > 0) {
-                // 从静音恢复到有声音
-                silenceStartTime.value = 0;
-            }
-        } else {
-            // 静音状态
-            if (silenceStartTime.value === 0) {
-                silenceStartTime.value = now;
-            }
-
-            // 检查是否达到静音切片条件
-            if (now - silenceStartTime.value >= SILENCE_DURATION &&
-                now - currentChunkStartTime.value >= MIN_SOUND_DURATION) {
-                sendCurrentChunk();
-            }
-        }
-    };
-
-    // 发送当前音频块
-    const sendCurrentChunk = () => {
-        if (audioChunks.value.length === 0 || !socket.value || socket.value.readyState !== WebSocket.OPEN) {
-            return;
-        }
-        try {
-            // 合并所有块
-            const totalBytes = audioChunks.value.reduce((total, chunk) => total + chunk.byteLength, 0);
-            const combined = new Int16Array(totalBytes / 2);
-            let offset = 0;
-
-            audioChunks.value.forEach(chunk => {
-                const samples = new Int16Array(chunk);
-                combined.set(samples, offset);
-                offset += samples.length;
-            });
-
-            // 发送合并后的数据
-            socket.value.send(combined.buffer);
-            audioChunks.value = [];
-
-            // 记录新块的开始时间
-            currentChunkStartTime.value = Date.now();
-            silenceStartTime.value = 0;
-        } catch (error) {
-            console.error('发送音频数据时出错:', error);
-        }
-    };
-
-    // 开始录音
-    const startRecording = async () => {
-        if (isRecording.value) return;
-
-        try {
-            // 重置状态
-            recognizedText.value = '';
-            lastFinalText.value = '';
-            // 重置状态
-            recordingDuration.value = 0;
-            audioChunks.value = [];
-            lastSoundTime.value = 0;
-            currentChunkStartTime.value = Date.now();
-            silenceStartTime.value = 0;
-
-            // 初始化WebSocket
-            await initSocket(wsUrl);
-
-            // 获取音频流
-            mediaStream.value = await navigator.mediaDevices.getUserMedia({
-                audio: {
-                    sampleRate: SAMPLE_RATE,
-                    channelCount: 1,
-                    echoCancellation: true,
-                    noiseSuppression: true,
-                    autoGainControl: false
-                },
-                video: false
-            });
-
-            // 创建音频上下文
-            audioContext.value = new(window.AudioContext || window.webkitAudioContext)({
-                sampleRate: SAMPLE_RATE
-            });
-
-            // 注册AudioWorklet
-            const blob = new Blob([workletProcessorCode], {
-                type: 'application/javascript'
-            });
-            const workletUrl = URL.createObjectURL(blob);
-            await audioContext.value.audioWorklet.addModule(workletUrl);
-            URL.revokeObjectURL(workletUrl);
-
-            // 创建AudioWorkletNode
-            workletNode.value = new AudioWorkletNode(audioContext.value, 'audio-processor', {
-                processorOptions: {
-                    silenceThreshold: SILENCE_THRESHOLD,
-                    sampleRate: SAMPLE_RATE
+        if (websocket?.readyState === WebSocket.OPEN) {
+            websocket.send(JSON.stringify({
+                header: {
+                    namespace: 'SpeechTranscriber',
+                    name: 'StopTranscription',
+                    message_id: generateUUID()
                }
-            });
-
-            // 处理音频数据
-            workletNode.value.port.onmessage = (e) => {
-                if (e.data.type === 'audio') {
-                    audioChunks.value.push(e.data.audioData);
-                } else if (e.data.type === 'analysis') {
-                    audioDataForDisplay.value = e.data.frequencyData;
-                    volumeLevel.value = e.data.volume;
-                    processAudioChunk(e.data.isSilent);
-                }
-            };
-
-            // 连接音频节点
-            const source = audioContext.value.createMediaStreamSource(mediaStream.value);
-            source.connect(workletNode.value);
-            workletNode.value.connect(audioContext.value.destination);
-
-            isRecording.value = true;
-
-        } catch (error) {
-            console.error('启动录音失败:', error);
-            cleanup();
-            throw error;
+            }))
+            websocket.close()
        }
-    };

-    // 停止录音
-    const stopRecording = async () => {
-        if (!isRecording.value || isStopping.value) return;
+        cleanup()
+        isStopping.value = false
+    }

-        isStopping.value = true;
+    const cancelRecording = () => {
+        if (!isRecording.value || isStopping.value) return
+        isStopping.value = true
+        websocket?.close()
+        cleanup()
+        isStopping.value = false
+    }

-        try {
-            // 发送最后一个音频块（无论是否静音）
-            sendCurrentChunk();
-
-            // 发送结束标记
-            if (socket.value?.readyState === WebSocket.OPEN) {
-                socket.value.send(JSON.stringify({
-                    action: 'end',
-                    duration: recordingDuration.value
-                }));
-
-                await new Promise(resolve => {
-                    if (socket.value.bufferedAmount === 0) {
-                        resolve();
-                    } else {
-                        const timer = setInterval(() => {
-                            if (socket.value.bufferedAmount === 0) {
-                                clearInterval(timer);
-                                resolve();
-                            }
-                        }, 50);
-                    }
-                });
-                socket.value.close();
-            }
-
-            cleanup();
-
-        } catch (error) {
-            console.error('停止录音时出错:', error);
-            throw error;
-        } finally {
-            isStopping.value = false;
-        }
-    };
-
-    // 清理资源
    const cleanup = () => {
-        if (mediaStream.value) {
-            mediaStream.value.getTracks().forEach(track => track.stop());
-            mediaStream.value = null;
-        }
+        clearInterval(durationTimer)

-        if (workletNode.value) {
-            workletNode.value.disconnect();
-            workletNode.value = null;
-        }
+        scriptProcessor?.disconnect()
+        audioInput?.disconnect()
+        audioStream?.getTracks().forEach(track => track.stop())
+        audioContext?.close()

-        if (audioContext.value && audioContext.value.state !== 'closed') {
-            audioContext.value.close();
-            audioContext.value = null;
-        }
+        audioStream = null
+        audioContext = null
+        audioInput = null
+        scriptProcessor = null
+        websocket = null

-        audioChunks.value = [];
-        isRecording.value = false;
-        isSocketConnected.value = false;
-    };
-
-    /// 取消录音
-    const cancelRecording = async () => {
-        if (!isRecording.value || isStopping.value) return;
-        isStopping.value = true;
-        try {
-            if (socket.value?.readyState === WebSocket.OPEN) {
-                console.log('发送结束标记...');
-                socket.value.send(JSON.stringify({
-                    action: 'cancel'
-                }));
-                socket.value.close();
-            }
-            cleanup()
-        } catch (error) {
-            console.error('取消录音时出错:', error);
-            throw error;
-        } finally {
-            isStopping.value = false;
-        }
-    };
+        isRecording.value = false
+        isSocketConnected.value = false
+    }

    onUnmounted(() => {
-        if (isRecording.value) {
-            stopRecording();
-        }
-    });
+        if (isRecording.value) stopRecording()
+    })

    return {
        isRecording,
@@ -390,10 +249,10 @@ export function useAudioRecorder(wsUrl) {
        recordingDuration,
        audioDataForDisplay,
        volumeLevel,
-        startRecording,
-        stopRecording,
        recognizedText,
        lastFinalText,
+        startRecording,
+        stopRecording,
        cancelRecording
-    };
+    }
 }