flat: 添加语音识别sdk+ 文件检测

2025-12-19 10:25:10 +08:00
parent 4c29882f36
commit 4befbb05cc
9 changed files with 1080 additions and 1141 deletions
--- a/hook/useRealtimeRecorder.js
+++ b/hook/useRealtimeRecorder.js
@@ -3,255 +3,344 @@ import {
    onUnmounted
 } from 'vue'
 import {
-    $api,
-
-} from '../common/globalFunction';
-
+    $api
+} from '../common/globalFunction'; // 你的请求封装
 import config from '@/config'

-// Alibaba Cloud
+// 开源
 export function useAudioRecorder() {
+    // --- 状态定义 ---
    const isRecording = ref(false)
-    const isStopping = ref(false)
    const isSocketConnected = ref(false)
    const recordingDuration = ref(0)
-
-    const audioDataForDisplay = ref(new Array(16).fill(0))
-    const volumeLevel = ref(0)
-
+    const volumeLevel = ref(0) // 0-100
    const recognizedText = ref('')
-    const lastFinalText = ref('')

-    let audioStream = null
-    let audioContext = null
-    let audioInput = null
-    let scriptProcessor = null
-    let websocket = null
+    // --- 内部变量 ---
+    let socketTask = null
    let durationTimer = null

-    const generateUUID = () => {
-        return ([1e7] + -1e3 + -4e3 + -8e3 + -1e11)
-            .replace(/[018]/g, c =>
-                (c ^ crypto.getRandomValues(new Uint8Array(1))[0] & 15 >> c / 4).toString(16)
-            ).replace(/-/g, '')
+    // --- APP/小程序 变量 ---
+    let recorderManager = null;
+
+    // --- H5 变量 ---
+    let audioContext = null;
+    let scriptProcessor = null;
+    let mediaStreamSource = null;
+    let h5Stream = null;
+
+    // --- 配置项 ---
+    const RECORD_CONFIG = {
+        duration: 600000,
+        sampleRate: 16000,
+        numberOfChannels: 1,
+        format: 'pcm',
+        frameSize: 4096
    }

-    const fetchWsUrl = async () => {
-        const res = await $api.createRequest('/app/speech/getToken')
-        if (res.code !== 200) throw new Error('无法获取语音识别 wsUrl')
-        const wsUrl = res.msg
-        return wsUrl
+    /**
+     * 获取 WebSocket 地址 (含 Token)
+     */
+    const getWsUrl = async () => {
+        let wsUrl = config.vioceBaseURl
+
+        // 拼接 Token
+        const token = uni.getStorageSync('token') || '';
+        if (token) {
+            const separator = wsUrl.includes('?') ? '&' : '?';
+            wsUrl = `${wsUrl}${separator}token=${encodeURIComponent(token)}`;
+        }
+        return wsUrl;
    }

-    function extractWsParams(wsUrl) {
-        const url = new URL(wsUrl)
-        const appkey = url.searchParams.get('appkey')
-        const token = url.searchParams.get('token')
-        return {
-            appkey,
-            token
+    /**
+     * 开始录音 (入口)
+     */
+    const startRecording = async () => {
+        if (isRecording.value) return
+
+        try {
+            recognizedText.value = ''
+            volumeLevel.value = 0
+
+            // #ifdef H5
+            if (location.protocol !== 'https:' && location.hostname !== 'localhost') {
+                uni.showToast({
+                    title: 'H5录音需要HTTPS环境',
+                    icon: 'none'
+                });
+                return;
+            }
+            // #endif
+
+            const url = await getWsUrl()
+            console.log('正在连接 ASR:', url)
+
+            await connectSocket(url);
+
+        } catch (err) {
+            console.error('启动失败:', err);
+            uni.showToast({
+                title: '启动失败: ' + (err.message || ''),
+                icon: 'none'
+            });
+            cleanup();
        }
    }

-
-    const connectWebSocket = async () => {
-        const wsUrl = await fetchWsUrl()
-        const {
-            appkey,
-            token
-        } = extractWsParams(wsUrl)
+    /**
+     * 连接 WebSocket
+     */
+    const connectSocket = (url) => {
        return new Promise((resolve, reject) => {
-            websocket = new WebSocket(wsUrl)
-            websocket.binaryType = 'arraybuffer'
+            socketTask = uni.connectSocket({
+                url: url,
+                success: () => console.log('Socket 连接请求发送'),
+                fail: (err) => reject(err)
+            });

-            websocket.onopen = () => {
-                isSocketConnected.value = true
+            socketTask.onOpen((res) => {
+                console.log('WebSocket 已连接');
+                isSocketConnected.value = true;

-                // 发送 StartTranscription 消息（参考 demo.html）
-                const startTranscriptionMessage = {
-                    header: {
-                        appkey: appkey, // 不影响使用，可留空或由 wsUrl 带入
-                        namespace: 'SpeechTranscriber',
-                        name: 'StartTranscription',
-                        task_id: generateUUID(),
-                        message_id: generateUUID()
-                    },
-                    payload: {
-                        format: 'pcm',
-                        sample_rate: 16000,
-                        enable_intermediate_result: true,
-                        enable_punctuation_prediction: true,
-                        enable_inverse_text_normalization: true
-                    }
+                // #ifdef H5
+                startH5Recording().then(() => resolve()).catch(err => {
+                    socketTask.close();
+                    reject(err);
+                });
+                // #endif
+
+                // #ifndef H5
+                startAppRecording();
+                resolve();
+                // #endif
+            });
+
+            socketTask.onMessage((res) => {
+                // 接收文本结果
+                if (res.data) {
+                    recognizedText.value = res.data;
                }
-                websocket.send(JSON.stringify(startTranscriptionMessage))
-                resolve()
-            }
+            });

-            websocket.onerror = (e) => {
-                isSocketConnected.value = false
-                reject(e)
-            }
+            socketTask.onError((err) => {
+                console.error('Socket 错误:', err);
+                isSocketConnected.value = false;
+                stopRecording();
+            });

-            websocket.onclose = () => {
-                isSocketConnected.value = false
-            }
-
-            websocket.onmessage = (e) => {
-                const msg = JSON.parse(e.data)
-                const name = msg?.header?.name
-                const payload = msg?.payload
-
-                switch (name) {
-                    case 'TranscriptionResultChanged': {
-                        // 中间识别文本（可选：使用 stash_result.unfixedText 更精确）
-                        const text = payload?.unfixed_result || payload?.result || ''
-                        lastFinalText.value = text
-                        break
-                    }
-                    case 'SentenceBegin': {
-                        // 可选：开始新的一句，重置状态
-                        // console.log('开始新的句子识别')
-                        break
-                    }
-                    case 'SentenceEnd': {
-                        const text = payload?.result || ''
-                        const confidence = payload?.confidence || 0
-                        if (text && confidence > 0.5) {
-                            recognizedText.value += text
-                            lastFinalText.value = ''
-                            // console.log('识别完成:', {
-                            //     text,
-                            //     confidence
-                            // })
-                        }
-                        break
-                    }
-                    case 'TranscriptionStarted': {
-                        // console.log('识别任务已开始')
-                        break
-                    }
-                    case 'TranscriptionCompleted': {
-                        lastFinalText.value = ''
-                        // console.log('识别全部完成')
-                        break
-                    }
-                    case 'TaskFailed': {
-                        console.error('识别失败:', msg?.header?.status_text)
-                        break
-                    }
-                    default:
-                        console.log('未知消息类型:', name, msg)
-                        break
-                }
-            }
+            socketTask.onClose(() => {
+                isSocketConnected.value = false;
+                console.log('Socket 已关闭');
+            });
        })
    }

-    const startRecording = async () => {
-        if (isRecording.value) return
+    const startH5Recording = async () => {
        try {
-            recognizedText.value = ''
-            lastFinalText.value = ''
-            await connectWebSocket()
-
-            audioStream = await navigator.mediaDevices.getUserMedia({
+            // 1. 获取麦克风流
+            const stream = await navigator.mediaDevices.getUserMedia({
                audio: true
-            })
-            audioContext = new(window.AudioContext || window.webkitAudioContext)({
+            });
+            h5Stream = stream;
+
+            // 2. 创建 AudioContext
+            const AudioContext = window.AudioContext || window.webkitAudioContext;
+            audioContext = new AudioContext({
                sampleRate: 16000
-            })
-            audioInput = audioContext.createMediaStreamSource(audioStream)
-            scriptProcessor = audioContext.createScriptProcessor(2048, 1, 1)
+            });
+
+            mediaStreamSource = audioContext.createMediaStreamSource(stream);
+            scriptProcessor = audioContext.createScriptProcessor(4096, 1, 1);

            scriptProcessor.onaudioprocess = (event) => {
-                const input = event.inputBuffer.getChannelData(0)
-                const pcm = new Int16Array(input.length)
-                let sum = 0
-                for (let i = 0; i < input.length; ++i) {
-                    const s = Math.max(-1, Math.min(1, input[i]))
-                    pcm[i] = s * 0x7FFF
-                    sum += s * s
+                if (!isSocketConnected.value || !socketTask) return;
+
+                const inputData = event.inputBuffer.getChannelData(0);
+
+                calculateVolume(inputData, true);
+
+                const buffer = new ArrayBuffer(inputData.length * 2);
+                const view = new DataView(buffer);
+                for (let i = 0; i < inputData.length; i++) {
+                    let s = Math.max(-1, Math.min(1, inputData[i]));
+                    view.setInt16(i * 2, s < 0 ? s * 0x8000 : s * 0x7FFF, true);
                }

-                volumeLevel.value = Math.sqrt(sum / input.length)
-                audioDataForDisplay.value = Array(16).fill(volumeLevel.value)
+                socketTask.send({
+                    data: buffer,
+                    fail: (e) => console.error('发送音频失败', e)
+                });
+            };

-                if (websocket?.readyState === WebSocket.OPEN) {
-                    websocket.send(pcm.buffer)
-                }
-            }
+            mediaStreamSource.connect(scriptProcessor);
+            scriptProcessor.connect(audioContext.destination);

-            audioInput.connect(scriptProcessor)
-            scriptProcessor.connect(audioContext.destination)
+            isRecording.value = true;
+            recordingDuration.value = 0;
+            durationTimer = setInterval(() => recordingDuration.value++, 1000);
+
+            console.log('H5 录音已启动');

-            isRecording.value = true
-            recordingDuration.value = 0
-            durationTimer = setInterval(() => recordingDuration.value++, 1000)
        } catch (err) {
-            console.error('启动失败:', err)
-            cleanup()
+            console.error('H5 录音启动失败:', err);
+            throw err;
        }
    }

-    const stopRecording = () => {
-        if (!isRecording.value || isStopping.value) return
-        isStopping.value = true
+    const stopH5Resources = () => {
+        if (scriptProcessor) scriptProcessor.disconnect();
+        if (mediaStreamSource) mediaStreamSource.disconnect();
+        if (audioContext) audioContext.close();
+        if (h5Stream) h5Stream.getTracks().forEach(track => track.stop());

-        if (websocket?.readyState === WebSocket.OPEN) {
-            websocket.send(JSON.stringify({
-                header: {
-                    namespace: 'SpeechTranscriber',
-                    name: 'StopTranscription',
-                    message_id: generateUUID()
-                }
-            }))
-            websocket.close()
+        scriptProcessor = null;
+        mediaStreamSource = null;
+        audioContext = null;
+        h5Stream = null;
+    }
+
+    const startAppRecording = () => {
+        recorderManager = uni.getRecorderManager();
+
+        recorderManager.onFrameRecorded((res) => {
+            const {
+                frameBuffer
+            } = res;
+
+            calculateVolume(frameBuffer, false);
+
+            if (isSocketConnected.value && socketTask) {
+                socketTask.send({
+                    data: frameBuffer
+                });
+            }
+        });
+
+        recorderManager.onStart(() => {
+            console.log('APP 录音已开始');
+            isRecording.value = true;
+            recordingDuration.value = 0;
+            durationTimer = setInterval(() => recordingDuration.value++, 1000);
+        });
+
+        recorderManager.onError((err) => {
+            console.error('APP 录音报错:', err);
+            cleanup();
+        });
+
+        recorderManager.start(RECORD_CONFIG);
+    }
+    const stopHardwareResource = () => {
+        // APP/小程序停止
+        if (recorderManager) {
+            recorderManager.stop();
        }

-        cleanup()
-        isStopping.value = false
+        // H5停止
+        // #ifdef H5
+        if (scriptProcessor) scriptProcessor.disconnect();
+        if (mediaStreamSource) mediaStreamSource.disconnect();
+        if (audioContext) audioContext.close();
+        if (h5Stream) h5Stream.getTracks().forEach(track => track.stop());
+
+        scriptProcessor = null;
+        mediaStreamSource = null;
+        audioContext = null;
+        h5Stream = null;
+        // #endif
+    }
+
+    /**
+     * 停止录音 (通用)
+     */
+    const stopRecording = () => {
+        // 停止 APP 录音
+        if (recorderManager) {
+            recorderManager.stop();
+        }
+
+        // 停止 H5 录音资源
+        // #ifdef H5
+        stopH5Resources();
+        // #endif
+
+        // 关闭 Socket
+        if (socketTask) {
+            socketTask.close();
+        }
+
+        cleanup();
    }

    const cancelRecording = () => {
-        if (!isRecording.value || isStopping.value) return
-        isStopping.value = true
-        websocket?.close()
-        cleanup()
-        isStopping.value = false
+        if (!isRecording.value) return;
+
+        console.log('取消录音 - 丢弃结果');
+
+        // 1. 停止硬件录音
+        stopHardwareResource();
+
+        // 2. 强制关闭 Socket
+        if (socketTask) {
+            socketTask.close();
+        }
+
+        // 3. 关键：清空已识别的文本
+        recognizedText.value = '';
+
+        // 4. 清理资源
+        cleanup();
    }

+    /**
+     * 清理状态
+     */
    const cleanup = () => {
-        clearInterval(durationTimer)
+        clearInterval(durationTimer);
+        isRecording.value = false;
+        isSocketConnected.value = false;
+        socketTask = null;
+        recorderManager = null;
+        volumeLevel.value = 0;
+    }

-        scriptProcessor?.disconnect()
-        audioInput?.disconnect()
-        audioStream?.getTracks().forEach(track => track.stop())
-        audioContext?.close()
+    /**
+     * 计算音量 (兼容 Float32 和 Int16/ArrayBuffer)
+     */
+    const calculateVolume = (data, isFloat32) => {
+        let sum = 0;
+        let length = 0;

-        audioStream = null
-        audioContext = null
-        audioInput = null
-        scriptProcessor = null
-        websocket = null
-
-        isRecording.value = false
-        isSocketConnected.value = false
+        if (isFloat32) {
+            length = data.length;
+            for (let i = 0; i < length; i += 10) {
+                sum += Math.abs(data[i]);
+            }
+            volumeLevel.value = Math.min(100, Math.floor((sum / (length / 10)) * 100 * 3));
+        } else {
+            const int16Data = new Int16Array(data);
+            length = int16Data.length;
+            for (let i = 0; i < length; i += 10) {
+                sum += Math.abs(int16Data[i]);
+            }
+            const avg = sum / (length / 10);
+            volumeLevel.value = Math.min(100, Math.floor((avg / 10000) * 100));
+        }
    }

    onUnmounted(() => {
-        if (isRecording.value) stopRecording()
+        if (isRecording.value) {
+            stopRecording();
+        }
    })

    return {
        isRecording,
-        isStopping,
        isSocketConnected,
        recordingDuration,
-        audioDataForDisplay,
        volumeLevel,
        recognizedText,
-        lastFinalText,
        startRecording,
        stopRecording,
        cancelRecording