feat : 语音转文字实现

2025-12-26 18:41:58 +08:00
parent e114675eba
commit f6b7755e32
2 changed files with 304 additions and 155 deletions
--- a/hook/useRealtimeRecorderOnce.js
+++ b/hook/useRealtimeRecorderOnce.js
@@ -2,44 +2,138 @@ import {
    ref,
    onUnmounted
 } from 'vue'
-import {
-    $api
-} from '../common/globalFunction';
 import config from '@/config'

 export function useRealtimeRecorderOnce() {
    // --- 状态定义 ---
    const isRecording = ref(false)
-    const isProcessing = ref(false) // 新增：处理录音数据状态
+    const isProcessing = ref(false)
    const recordingDuration = ref(0)
    const volumeLevel = ref(0) // 0-100
    const recognizedText = ref('')
-    const audioData = ref(null) // 新增：存储录音数据
-    const audioDataForDisplay = ref([]) // 新增：用于波形显示的数据
+    const audioData = ref(null)
+    const audioDataForDisplay = ref([])

    // --- 内部变量 ---
    let durationTimer = null

    // --- APP/小程序 变量 ---
    let recorderManager = null;
-    let appAudioChunks = []; // 新增：存储APP录音数据块
+    let appAudioChunks = [];

    // --- H5 变量 ---
    let audioContext = null;
-    let scriptProcessor = null;
-    let mediaStreamSource = null;
+    let mediaRecorder = null;
    let h5Stream = null;
-    let h5AudioChunks = []; // 新增：存储H5录音数据块
+    let h5AudioChunks = [];
+    let analyser = null;
+    let dataArray = null;

    // --- 配置项 ---
    const RECORD_CONFIG = {
        duration: 600000,
        sampleRate: 16000,
        numberOfChannels: 1,
-        format: 'pcm',
+        format: 'wav',
+        encodeBitRate: 16000,
        frameSize: 4096
    }

+    // --- WAV文件头函数 ---
+    const encodeWAV = (samples, sampleRate = 16000, numChannels = 1, bitsPerSample = 16) => {
+        const bytesPerSample = bitsPerSample / 8;
+        const blockAlign = numChannels * bytesPerSample;
+        const byteRate = sampleRate * blockAlign;
+        const dataSize = samples.length * bytesPerSample;
+        const buffer = new ArrayBuffer(44 + dataSize);
+        const view = new DataView(buffer);
+
+        // RIFF chunk descriptor
+        writeString(view, 0, 'RIFF');
+        view.setUint32(4, 36 + dataSize, true);
+        writeString(view, 8, 'WAVE');
+
+        // fmt sub-chunk
+        writeString(view, 12, 'fmt ');
+        view.setUint32(16, 16, true); // Subchunk1Size (16 for PCM)
+        view.setUint16(20, 1, true); // AudioFormat (1 for PCM)
+        view.setUint16(22, numChannels, true);
+        view.setUint32(24, sampleRate, true);
+        view.setUint32(28, byteRate, true);
+        view.setUint16(32, blockAlign, true);
+        view.setUint16(34, bitsPerSample, true);
+
+        // data sub-chunk
+        writeString(view, 36, 'data');
+        view.setUint32(40, dataSize, true);
+
+        // Write audio samples
+        const volume = 1;
+        let offset = 44;
+        for (let i = 0; i < samples.length; i++) {
+            let sample = Math.max(-1, Math.min(1, samples[i]));
+            sample = sample * volume;
+            view.setInt16(offset, sample < 0 ? sample * 0x8000 : sample * 0x7FFF, true);
+            offset += 2;
+        }
+
+        return buffer;
+    }
+
+    const writeString = (view, offset, string) => {
+        for (let i = 0; i < string.length; i++) {
+            view.setUint8(offset + i, string.charCodeAt(i));
+        }
+    }
+
+    const floatTo16BitPCM = (output, offset, input) => {
+        for (let i = 0; i < input.length; i++, offset += 2) {
+            const s = Math.max(-1, Math.min(1, input[i]));
+            output.setInt16(offset, s < 0 ? s * 0x8000 : s * 0x7FFF, true);
+        }
+    }
+
+    // --- 音量计算函数 ---
+    const calculateVolumeFromFloat32 = (float32Array) => {
+        let sum = 0;
+        const length = float32Array.length;
+        
+        // 计算RMS (均方根)
+        for (let i = 0; i < length; i++) {
+            sum += float32Array[i] * float32Array[i];
+        }
+        const rms = Math.sqrt(sum / length);
+        
+        // 转换为0-100的值
+        // 通常对话语音的RMS在0.01-0.1之间，尖叫可达0.3
+        let volume = Math.min(100, Math.floor(rms * 300));
+        
+        // 设置最小阈值，避免静音时完全为0
+        if (volume < 5) volume = 0;
+        
+        return volume;
+    }
+
+    const calculateVolumeFromInt16 = (int16Array) => {
+        let sum = 0;
+        const length = int16Array.length;
+        
+        // 计算RMS
+        for (let i = 0; i < length; i++) {
+            const normalized = int16Array[i] / 32768; // 归一化到[-1, 1]
+            sum += normalized * normalized;
+        }
+        const rms = Math.sqrt(sum / length);
+        
+        // 转换为0-100的值
+        let volume = Math.min(100, Math.floor(rms * 300));
+        
+        // 设置最小阈值
+        if (volume < 5) volume = 0;
+        
+        return volume;
+    }
+
    /**
     * 开始录音 (入口)
     */
@@ -90,51 +184,75 @@ export function useRealtimeRecorderOnce() {
    }

    /**
-     * H5录音实现
+     * H5录音实现 - 手动构建WAV文件
     */
    const startH5Recording = async () => {
        try {
            // 1. 获取麦克风流
            const stream = await navigator.mediaDevices.getUserMedia({
-                audio: true
+                audio: {
+                    sampleRate: 16000,
+                    channelCount: 1,
+                    echoCancellation: true,
+                    noiseSuppression: true,
+                    autoGainControl: false
+                }
            });
            h5Stream = stream;

-            // 2. 创建 AudioContext
+            // 2. 创建 AudioContext 用于处理音频
            const AudioContext = window.AudioContext || window.webkitAudioContext;
            audioContext = new AudioContext({
-                sampleRate: 16000
+                sampleRate: 16000,
+                latencyHint: 'interactive'
            });

-            mediaStreamSource = audioContext.createMediaStreamSource(stream);
-            scriptProcessor = audioContext.createScriptProcessor(4096, 1, 1);
-
-            scriptProcessor.onaudioprocess = (event) => {
+            // 创建音频处理节点
+            const source = audioContext.createMediaStreamSource(stream);
+            
+            // 创建分析器用于音量计算
+            analyser = audioContext.createAnalyser();
+            analyser.fftSize = 256;
+            analyser.smoothingTimeConstant = 0.8;
+            dataArray = new Float32Array(analyser.frequencyBinCount);
+            
+            source.connect(analyser);
+            
+            // 创建脚本处理器用于收集音频数据
+            const processor = audioContext.createScriptProcessor(4096, 1, 1);
+            
+            // 存储所有音频样本
+            let audioSamples = [];
+            
+            processor.onaudioprocess = (e) => {
                if (!isRecording.value) return;
-
-                const inputData = event.inputBuffer.getChannelData(0);
-
-                calculateVolume(inputData, true);
-
-                // 保存音频数据
-                const buffer = new ArrayBuffer(inputData.length * 2);
-                const view = new DataView(buffer);
+                
+                // 获取输入数据
+                const inputData = e.inputBuffer.getChannelData(0);
+                
+                // 计算音量
+                analyser.getFloatTimeDomainData(dataArray);
+                const volume = calculateVolumeFromFloat32(dataArray);
+                volumeLevel.value = volume;
+                
+                // 收集音频样本
                for (let i = 0; i < inputData.length; i++) {
-                    let s = Math.max(-1, Math.min(1, inputData[i]));
-                    view.setInt16(i * 2, s < 0 ? s * 0x8000 : s * 0x7FFF, true);
+                    audioSamples.push(inputData[i]);
                }
-
-                // 保存到数组
+                
+                // 存储当前音频数据块
+                const buffer = new Float32Array(inputData.length);
+                buffer.set(inputData);
                h5AudioChunks.push(buffer);
            };
-
-            mediaStreamSource.connect(scriptProcessor);
-            scriptProcessor.connect(audioContext.destination);
-
-            console.log('H5 录音已启动');
+            
+            source.connect(processor);
+            processor.connect(audioContext.destination);
+            
+            console.log('H5 16kHz WAV录音已启动');

        } catch (err) {
-            console.error('H5 录音启动失败:', err);
+            console.error('H5录音启动失败:', err);
            throw err;
        }
    }
@@ -143,14 +261,19 @@ export function useRealtimeRecorderOnce() {
     * 停止H5录音资源
     */
    const stopH5Resources = () => {
-        if (scriptProcessor) scriptProcessor.disconnect();
-        if (mediaStreamSource) mediaStreamSource.disconnect();
-        if (audioContext) audioContext.close();
-        if (h5Stream) h5Stream.getTracks().forEach(track => track.stop());
+        // 断开所有连接
+        if (audioContext && audioContext.state !== 'closed') {
+            audioContext.close();
+        }
+        
+        // 停止音轨
+        if (h5Stream) {
+            h5Stream.getTracks().forEach(track => track.stop());
+        }

-        scriptProcessor = null;
-        mediaStreamSource = null;
        audioContext = null;
+        analyser = null;
+        dataArray = null;
        h5Stream = null;
    }

@@ -161,24 +284,29 @@ export function useRealtimeRecorderOnce() {
        recorderManager = uni.getRecorderManager();

        recorderManager.onFrameRecorded((res) => {
-            const {
-                frameBuffer
-            } = res;
-
-            calculateVolume(frameBuffer, false);
-
-            // 保存音频数据
+            const { frameBuffer } = res;
+            
            if (frameBuffer && frameBuffer.byteLength > 0) {
+                // 计算音量
+                const int16Data = new Int16Array(frameBuffer);
+                const volume = calculateVolumeFromInt16(int16Data);
+                volumeLevel.value = volume;
+                
+                // 保存音频数据
                appAudioChunks.push(frameBuffer);
            }
        });

        recorderManager.onStart(() => {
-            console.log('APP 录音已开始');
+            console.log('APP 16kHz WAV录音已开始');
        });

        recorderManager.onError((err) => {
-            console.error('APP 录音报错:', err);
+            console.error('APP录音报错:', err);
+            uni.showToast({
+                title: '录音失败: ' + err.errMsg,
+                icon: 'none'
+            });
            cleanup();
        });

@@ -193,17 +321,12 @@ export function useRealtimeRecorderOnce() {

        isRecording.value = false;
        clearInterval(durationTimer);
-        audioDataForDisplay.value = []; // 清空显示数据

        // 停止硬件录音
        stopHardwareResource();

        // 处理录音数据
        await processAudioData();
-
-        // 清理临时数据
-        appAudioChunks = [];
-        h5AudioChunks = [];
    }

    /**
@@ -250,90 +373,136 @@ export function useRealtimeRecorderOnce() {
        const updateInterval = setInterval(() => {
            if (!isRecording.value) {
                clearInterval(updateInterval);
+                audioDataForDisplay.value = [];
                return;
            }

-            // 生成模拟的音频数据显示数据（0-1之间的值）
-            const baseValue = volumeLevel.value / 100; // 基于音量计算基础值
+            // 生成波形数据，基于当前音量
+            const baseValue = volumeLevel.value / 100;
            const data = [];
            
-            // 生成31个数据点（对应WaveDisplay的31个波形条）
+            // 生成31个数据点
            for (let i = 0; i < 31; i++) {
-                // 模拟波形：中间高，两边低
-                const position = i / 30; // 0到1
+                // 使用正弦波生成波形效果，中间高两边低
+                const position = i / 30;
                const centerDistance = Math.abs(position - 0.5);
-                const waveValue = Math.sin(Date.now() / 100 + i * 0.5) * 0.3 + 0.5;
-                const volumeFactor = baseValue * 0.8 + 0.2; // 确保最小值为0.2
-                const finalValue = waveValue * (1 - centerDistance) * volumeFactor;
+                const waveValue = Math.sin(Date.now() / 200 + i * 0.3) * 0.4 + 0.5;
                
-                data.push(Math.max(0.1, Math.min(1, finalValue)));
+                // 音量因子确保最小显示高度
+                const volumeFactor = baseValue * 0.7 + 0.3;
+                
+                // 综合计算最终值
+                let finalValue = waveValue * (1 - centerDistance) * volumeFactor;
+                finalValue = Math.max(0.1, Math.min(1, finalValue));
+                
+                data.push(finalValue);
            }
            
            audioDataForDisplay.value = data;
-        }, 100); // 每100ms更新一次
+        }, 50); // 更快的刷新率，更流畅
    }

    /**
-     * 处理录音数据
+     * 处理录音数据并生成WAV文件
     */
    const processAudioData = async () => {
-        if (!isProcessing.value) {
-            isProcessing.value = true;
+        if (isProcessing.value) return;
+        
+        isProcessing.value = true;

-            try {
-                let audioBlob = null;
+        try {
+            let audioBlob = null;

-                // #ifdef H5
-                // 合并H5录音数据
-                if (h5AudioChunks.length > 0) {
-                    const totalLength = h5AudioChunks.reduce((acc, chunk) => acc + chunk.byteLength, 0);
-                    const combinedBuffer = new ArrayBuffer(totalLength);
-                    const combinedView = new Uint8Array(combinedBuffer);
-                    
-                    let offset = 0;
-                    h5AudioChunks.forEach(chunk => {
-                        const chunkView = new Uint8Array(chunk);
-                        combinedView.set(chunkView, offset);
-                        offset += chunk.byteLength;
-                    });
+            // #ifdef H5
+            // H5端：合并所有音频样本并生成WAV
+            if (h5AudioChunks.length > 0) {
+                // 合并所有Float32Array
+                const totalLength = h5AudioChunks.reduce((sum, chunk) => sum + chunk.length, 0);
+                const mergedSamples = new Float32Array(totalLength);
+                
+                let offset = 0;
+                h5AudioChunks.forEach(chunk => {
+                    mergedSamples.set(chunk, offset);
+                    offset += chunk.length;
+                });

-                    audioBlob = new Blob([combinedBuffer], { type: 'audio/pcm' });
-                }
-                // #endif
-
-                // #ifndef H5
-                // 合并APP录音数据
-                if (appAudioChunks.length > 0) {
-                    const totalLength = appAudioChunks.reduce((acc, chunk) => acc + chunk.byteLength, 0);
-                    const combinedBuffer = new ArrayBuffer(totalLength);
-                    const combinedView = new Uint8Array(combinedBuffer);
-                    
-                    let offset = 0;
-                    appAudioChunks.forEach(chunk => {
-                        const chunkView = new Uint8Array(chunk);
-                        combinedView.set(chunkView, offset);
-                        offset += chunk.byteLength;
-                    });
-
-                    audioBlob = new Blob([combinedBuffer], { type: 'audio/pcm' });
-                }
-                // #endif
-
-                if (audioBlob) {
-                    audioData.value = audioBlob;
-                    // 发送到服务器进行识别
-                    await sendToASR(audioBlob);
-                }
-
-            } catch (error) {
-                console.error('处理音频数据失败:', error);
-                recognizedText.value = '音频处理失败，请重试';
-            } finally {
-                isProcessing.value = false;
+                // 生成WAV文件
+                const wavBuffer = encodeWAV(mergedSamples, 16000, 1, 16);
+                audioBlob = new Blob([wavBuffer], { type: 'audio/wav' });
+                
+                console.log(`H5生成WAV文件: ${audioBlob.size} bytes, 时长: ${mergedSamples.length / 16000}秒`);
            }
+            // #endif
+
+            // #ifndef H5
+            // APP/小程序端：合并Int16数据并生成WAV
+            if (appAudioChunks.length > 0) {
+                // 合并所有Int16Array
+                const totalLength = appAudioChunks.reduce((sum, chunk) => sum + chunk.byteLength / 2, 0);
+                const mergedInt16 = new Int16Array(totalLength);
+                
+                let offset = 0;
+                appAudioChunks.forEach(chunk => {
+                    const int16Data = new Int16Array(chunk);
+                    mergedInt16.set(int16Data, offset);
+                    offset += int16Data.length;
+                });
+
+                // 转换为Float32用于生成WAV
+                const floatSamples = new Float32Array(mergedInt16.length);
+                for (let i = 0; i < mergedInt16.length; i++) {
+                    floatSamples[i] = mergedInt16[i] / 32768;
+                }
+
+                // 生成WAV文件
+                const wavBuffer = encodeWAV(floatSamples, 16000, 1, 16);
+                audioBlob = new Blob([wavBuffer], { type: 'audio/wav' });
+                
+                console.log(`APP生成WAV文件: ${audioBlob.size} bytes, 时长: ${floatSamples.length / 16000}秒`);
+            }
+            // #endif
+
+            if (audioBlob && audioBlob.size > 44) { // 确保至少包含WAV头部
+                audioData.value = audioBlob;
+                
+                // 保存文件用于调试（可选）
+                // debugSaveWavFile(audioBlob);
+                
+                // 发送到服务器进行识别
+                isProcessing.value = false
+                await sendToASR(audioBlob);
+            } else {
+                throw new Error('录音数据为空或无效');
+            }
+
+        } catch (error) {
+            console.error('处理音频数据失败:', error);
+            uni.showToast({
+                title: '音频处理失败，请重试',
+                icon: 'none'
+            });
+        } finally {
+            isProcessing.value = false;
+            appAudioChunks = [];
+            h5AudioChunks = [];
        }
    }

+    /**
+     * 调试用：保存WAV文件
+     */
+    const debugSaveWavFile = (blob) => {
+        const url = URL.createObjectURL(blob);
+        const a = document.createElement('a');
+        a.href = url;
+        a.download = `recording_${Date.now()}.wav`;
+        document.body.appendChild(a);
+        a.click();
+        document.body.removeChild(a);
+        URL.revokeObjectURL(url);
+        console.log('WAV文件已保存用于调试');
+    }
+
    /**
     * 发送音频到ASR服务器
     */
@@ -341,15 +510,12 @@ export function useRealtimeRecorderOnce() {
        try {
            // 创建FormData
            const formData = new FormData();
-            formData.append('audio', audioBlob, 'recording.pcm');
+            formData.append('file', audioBlob, 'recording.wav');
            
            // 添加Token
            const token = uni.getStorageSync('token') || '';
-            if (token) {
-                formData.append('token', token);
-            }
-
-            const asrUrl = `${config.baseUrl}/app/asr/connect`
+            
+            const asrUrl = `${config.baseUrl}/app/speech/asr`
            
            const response = await fetch(asrUrl, {
                method: 'POST',
@@ -361,40 +527,19 @@ export function useRealtimeRecorderOnce() {

            if (response.ok) {
                const result = await response.json();
-                recognizedText.value = result.text || result.data || '';
+                if(result.code == 200){
+                    recognizedText.value = result.data || ''
+                }else{
+                    $api.msg(result.msg || '识别失败')
+                }
+                
            } else {
-                throw new Error(`ASR请求失败: ${response.status}`);
+                const errorText = await response.text();
+                throw new Error(`ASR请求失败: ${response.status} - ${errorText}`);
            }

        } catch (error) {
            console.error('ASR识别失败:', error);
-            recognizedText.value = '语音识别失败，请重试';
-        }
-    }
-
-    /**
-     * 计算音量 (兼容 Float32 和 Int16/ArrayBuffer)
-     */
-    const calculateVolume = (data, isFloat32) => {
-        let sum = 0;
-        let length = 0;
-
-        if (isFloat32) {
-            length = data.length;
-            for (let i = 0; i < length; i += 10) {
-                sum += Math.abs(data[i]);
-            }
-            const calculatedVolume = Math.min(100, Math.floor((sum / (length / 10)) * 100 * 3));
-            volumeLevel.value = calculatedVolume;
-        } else {
-            const int16Data = new Int16Array(data);
-            length = int16Data.length;
-            for (let i = 0; i < length; i += 10) {
-                sum += Math.abs(int16Data[i]);
-            }
-            const avg = sum / (length / 10);
-            const calculatedVolume = Math.min(100, Math.floor((avg / 10000) * 100));
-            volumeLevel.value = calculatedVolume;
        }
    }

@@ -408,7 +553,10 @@ export function useRealtimeRecorderOnce() {
        recordingDuration.value = 0;
        volumeLevel.value = 0;
        audioDataForDisplay.value = [];
-        recorderManager = null;
+        
+        if (recorderManager) {
+            recorderManager = null;
+        }
    }

    onUnmounted(() => {
@@ -425,7 +573,7 @@ export function useRealtimeRecorderOnce() {
        volumeLevel,
        recognizedText,
        audioData,
-        audioDataForDisplay, // 新增：返回给WaveDisplay组件使用
+        audioDataForDisplay,
        startRecording,
        stopRecording,
        cancelRecording