diff --git a/hook/useRealtimeRecorderOnce.js b/hook/useRealtimeRecorderOnce.js index f3ce5be..3dbcd3d 100644 --- a/hook/useRealtimeRecorderOnce.js +++ b/hook/useRealtimeRecorderOnce.js @@ -2,44 +2,138 @@ import { ref, onUnmounted } from 'vue' -import { - $api -} from '../common/globalFunction'; import config from '@/config' export function useRealtimeRecorderOnce() { // --- 状态定义 --- const isRecording = ref(false) - const isProcessing = ref(false) // 新增:处理录音数据状态 + const isProcessing = ref(false) const recordingDuration = ref(0) const volumeLevel = ref(0) // 0-100 const recognizedText = ref('') - const audioData = ref(null) // 新增:存储录音数据 - const audioDataForDisplay = ref([]) // 新增:用于波形显示的数据 + const audioData = ref(null) + const audioDataForDisplay = ref([]) // --- 内部变量 --- let durationTimer = null // --- APP/小程序 变量 --- let recorderManager = null; - let appAudioChunks = []; // 新增:存储APP录音数据块 + let appAudioChunks = []; // --- H5 变量 --- let audioContext = null; - let scriptProcessor = null; - let mediaStreamSource = null; + let mediaRecorder = null; let h5Stream = null; - let h5AudioChunks = []; // 新增:存储H5录音数据块 + let h5AudioChunks = []; + let analyser = null; + let dataArray = null; // --- 配置项 --- const RECORD_CONFIG = { duration: 600000, sampleRate: 16000, numberOfChannels: 1, - format: 'pcm', + format: 'wav', + encodeBitRate: 16000, frameSize: 4096 } + // --- WAV文件头函数 --- + const encodeWAV = (samples, sampleRate = 16000, numChannels = 1, bitsPerSample = 16) => { + const bytesPerSample = bitsPerSample / 8; + const blockAlign = numChannels * bytesPerSample; + const byteRate = sampleRate * blockAlign; + const dataSize = samples.length * bytesPerSample; + const buffer = new ArrayBuffer(44 + dataSize); + const view = new DataView(buffer); + + // RIFF chunk descriptor + writeString(view, 0, 'RIFF'); + view.setUint32(4, 36 + dataSize, true); + writeString(view, 8, 'WAVE'); + + // fmt sub-chunk + writeString(view, 12, 'fmt '); + view.setUint32(16, 16, true); // Subchunk1Size (16 for PCM) + view.setUint16(20, 1, true); // AudioFormat (1 for PCM) + view.setUint16(22, numChannels, true); + view.setUint32(24, sampleRate, true); + view.setUint32(28, byteRate, true); + view.setUint16(32, blockAlign, true); + view.setUint16(34, bitsPerSample, true); + + // data sub-chunk + writeString(view, 36, 'data'); + view.setUint32(40, dataSize, true); + + // Write audio samples + const volume = 1; + let offset = 44; + for (let i = 0; i < samples.length; i++) { + let sample = Math.max(-1, Math.min(1, samples[i])); + sample = sample * volume; + view.setInt16(offset, sample < 0 ? sample * 0x8000 : sample * 0x7FFF, true); + offset += 2; + } + + return buffer; + } + + const writeString = (view, offset, string) => { + for (let i = 0; i < string.length; i++) { + view.setUint8(offset + i, string.charCodeAt(i)); + } + } + + const floatTo16BitPCM = (output, offset, input) => { + for (let i = 0; i < input.length; i++, offset += 2) { + const s = Math.max(-1, Math.min(1, input[i])); + output.setInt16(offset, s < 0 ? s * 0x8000 : s * 0x7FFF, true); + } + } + + // --- 音量计算函数 --- + const calculateVolumeFromFloat32 = (float32Array) => { + let sum = 0; + const length = float32Array.length; + + // 计算RMS (均方根) + for (let i = 0; i < length; i++) { + sum += float32Array[i] * float32Array[i]; + } + const rms = Math.sqrt(sum / length); + + // 转换为0-100的值 + // 通常对话语音的RMS在0.01-0.1之间,尖叫可达0.3 + let volume = Math.min(100, Math.floor(rms * 300)); + + // 设置最小阈值,避免静音时完全为0 + if (volume < 5) volume = 0; + + return volume; + } + + const calculateVolumeFromInt16 = (int16Array) => { + let sum = 0; + const length = int16Array.length; + + // 计算RMS + for (let i = 0; i < length; i++) { + const normalized = int16Array[i] / 32768; // 归一化到[-1, 1] + sum += normalized * normalized; + } + const rms = Math.sqrt(sum / length); + + // 转换为0-100的值 + let volume = Math.min(100, Math.floor(rms * 300)); + + // 设置最小阈值 + if (volume < 5) volume = 0; + + return volume; + } + /** * 开始录音 (入口) */ @@ -90,51 +184,75 @@ export function useRealtimeRecorderOnce() { } /** - * H5录音实现 + * H5录音实现 - 手动构建WAV文件 */ const startH5Recording = async () => { try { // 1. 获取麦克风流 const stream = await navigator.mediaDevices.getUserMedia({ - audio: true + audio: { + sampleRate: 16000, + channelCount: 1, + echoCancellation: true, + noiseSuppression: true, + autoGainControl: false + } }); h5Stream = stream; - // 2. 创建 AudioContext + // 2. 创建 AudioContext 用于处理音频 const AudioContext = window.AudioContext || window.webkitAudioContext; audioContext = new AudioContext({ - sampleRate: 16000 + sampleRate: 16000, + latencyHint: 'interactive' }); - mediaStreamSource = audioContext.createMediaStreamSource(stream); - scriptProcessor = audioContext.createScriptProcessor(4096, 1, 1); - - scriptProcessor.onaudioprocess = (event) => { + // 创建音频处理节点 + const source = audioContext.createMediaStreamSource(stream); + + // 创建分析器用于音量计算 + analyser = audioContext.createAnalyser(); + analyser.fftSize = 256; + analyser.smoothingTimeConstant = 0.8; + dataArray = new Float32Array(analyser.frequencyBinCount); + + source.connect(analyser); + + // 创建脚本处理器用于收集音频数据 + const processor = audioContext.createScriptProcessor(4096, 1, 1); + + // 存储所有音频样本 + let audioSamples = []; + + processor.onaudioprocess = (e) => { if (!isRecording.value) return; - - const inputData = event.inputBuffer.getChannelData(0); - - calculateVolume(inputData, true); - - // 保存音频数据 - const buffer = new ArrayBuffer(inputData.length * 2); - const view = new DataView(buffer); + + // 获取输入数据 + const inputData = e.inputBuffer.getChannelData(0); + + // 计算音量 + analyser.getFloatTimeDomainData(dataArray); + const volume = calculateVolumeFromFloat32(dataArray); + volumeLevel.value = volume; + + // 收集音频样本 for (let i = 0; i < inputData.length; i++) { - let s = Math.max(-1, Math.min(1, inputData[i])); - view.setInt16(i * 2, s < 0 ? s * 0x8000 : s * 0x7FFF, true); + audioSamples.push(inputData[i]); } - - // 保存到数组 + + // 存储当前音频数据块 + const buffer = new Float32Array(inputData.length); + buffer.set(inputData); h5AudioChunks.push(buffer); }; - - mediaStreamSource.connect(scriptProcessor); - scriptProcessor.connect(audioContext.destination); - - console.log('H5 录音已启动'); + + source.connect(processor); + processor.connect(audioContext.destination); + + console.log('H5 16kHz WAV录音已启动'); } catch (err) { - console.error('H5 录音启动失败:', err); + console.error('H5录音启动失败:', err); throw err; } } @@ -143,14 +261,19 @@ export function useRealtimeRecorderOnce() { * 停止H5录音资源 */ const stopH5Resources = () => { - if (scriptProcessor) scriptProcessor.disconnect(); - if (mediaStreamSource) mediaStreamSource.disconnect(); - if (audioContext) audioContext.close(); - if (h5Stream) h5Stream.getTracks().forEach(track => track.stop()); + // 断开所有连接 + if (audioContext && audioContext.state !== 'closed') { + audioContext.close(); + } + + // 停止音轨 + if (h5Stream) { + h5Stream.getTracks().forEach(track => track.stop()); + } - scriptProcessor = null; - mediaStreamSource = null; audioContext = null; + analyser = null; + dataArray = null; h5Stream = null; } @@ -161,24 +284,29 @@ export function useRealtimeRecorderOnce() { recorderManager = uni.getRecorderManager(); recorderManager.onFrameRecorded((res) => { - const { - frameBuffer - } = res; - - calculateVolume(frameBuffer, false); - - // 保存音频数据 + const { frameBuffer } = res; + if (frameBuffer && frameBuffer.byteLength > 0) { + // 计算音量 + const int16Data = new Int16Array(frameBuffer); + const volume = calculateVolumeFromInt16(int16Data); + volumeLevel.value = volume; + + // 保存音频数据 appAudioChunks.push(frameBuffer); } }); recorderManager.onStart(() => { - console.log('APP 录音已开始'); + console.log('APP 16kHz WAV录音已开始'); }); recorderManager.onError((err) => { - console.error('APP 录音报错:', err); + console.error('APP录音报错:', err); + uni.showToast({ + title: '录音失败: ' + err.errMsg, + icon: 'none' + }); cleanup(); }); @@ -193,17 +321,12 @@ export function useRealtimeRecorderOnce() { isRecording.value = false; clearInterval(durationTimer); - audioDataForDisplay.value = []; // 清空显示数据 // 停止硬件录音 stopHardwareResource(); // 处理录音数据 await processAudioData(); - - // 清理临时数据 - appAudioChunks = []; - h5AudioChunks = []; } /** @@ -250,90 +373,136 @@ export function useRealtimeRecorderOnce() { const updateInterval = setInterval(() => { if (!isRecording.value) { clearInterval(updateInterval); + audioDataForDisplay.value = []; return; } - // 生成模拟的音频数据显示数据(0-1之间的值) - const baseValue = volumeLevel.value / 100; // 基于音量计算基础值 + // 生成波形数据,基于当前音量 + const baseValue = volumeLevel.value / 100; const data = []; - // 生成31个数据点(对应WaveDisplay的31个波形条) + // 生成31个数据点 for (let i = 0; i < 31; i++) { - // 模拟波形:中间高,两边低 - const position = i / 30; // 0到1 + // 使用正弦波生成波形效果,中间高两边低 + const position = i / 30; const centerDistance = Math.abs(position - 0.5); - const waveValue = Math.sin(Date.now() / 100 + i * 0.5) * 0.3 + 0.5; - const volumeFactor = baseValue * 0.8 + 0.2; // 确保最小值为0.2 - const finalValue = waveValue * (1 - centerDistance) * volumeFactor; + const waveValue = Math.sin(Date.now() / 200 + i * 0.3) * 0.4 + 0.5; - data.push(Math.max(0.1, Math.min(1, finalValue))); + // 音量因子确保最小显示高度 + const volumeFactor = baseValue * 0.7 + 0.3; + + // 综合计算最终值 + let finalValue = waveValue * (1 - centerDistance) * volumeFactor; + finalValue = Math.max(0.1, Math.min(1, finalValue)); + + data.push(finalValue); } audioDataForDisplay.value = data; - }, 100); // 每100ms更新一次 + }, 50); // 更快的刷新率,更流畅 } /** - * 处理录音数据 + * 处理录音数据并生成WAV文件 */ const processAudioData = async () => { - if (!isProcessing.value) { - isProcessing.value = true; + if (isProcessing.value) return; + + isProcessing.value = true; - try { - let audioBlob = null; + try { + let audioBlob = null; - // #ifdef H5 - // 合并H5录音数据 - if (h5AudioChunks.length > 0) { - const totalLength = h5AudioChunks.reduce((acc, chunk) => acc + chunk.byteLength, 0); - const combinedBuffer = new ArrayBuffer(totalLength); - const combinedView = new Uint8Array(combinedBuffer); - - let offset = 0; - h5AudioChunks.forEach(chunk => { - const chunkView = new Uint8Array(chunk); - combinedView.set(chunkView, offset); - offset += chunk.byteLength; - }); + // #ifdef H5 + // H5端:合并所有音频样本并生成WAV + if (h5AudioChunks.length > 0) { + // 合并所有Float32Array + const totalLength = h5AudioChunks.reduce((sum, chunk) => sum + chunk.length, 0); + const mergedSamples = new Float32Array(totalLength); + + let offset = 0; + h5AudioChunks.forEach(chunk => { + mergedSamples.set(chunk, offset); + offset += chunk.length; + }); - audioBlob = new Blob([combinedBuffer], { type: 'audio/pcm' }); - } - // #endif - - // #ifndef H5 - // 合并APP录音数据 - if (appAudioChunks.length > 0) { - const totalLength = appAudioChunks.reduce((acc, chunk) => acc + chunk.byteLength, 0); - const combinedBuffer = new ArrayBuffer(totalLength); - const combinedView = new Uint8Array(combinedBuffer); - - let offset = 0; - appAudioChunks.forEach(chunk => { - const chunkView = new Uint8Array(chunk); - combinedView.set(chunkView, offset); - offset += chunk.byteLength; - }); - - audioBlob = new Blob([combinedBuffer], { type: 'audio/pcm' }); - } - // #endif - - if (audioBlob) { - audioData.value = audioBlob; - // 发送到服务器进行识别 - await sendToASR(audioBlob); - } - - } catch (error) { - console.error('处理音频数据失败:', error); - recognizedText.value = '音频处理失败,请重试'; - } finally { - isProcessing.value = false; + // 生成WAV文件 + const wavBuffer = encodeWAV(mergedSamples, 16000, 1, 16); + audioBlob = new Blob([wavBuffer], { type: 'audio/wav' }); + + console.log(`H5生成WAV文件: ${audioBlob.size} bytes, 时长: ${mergedSamples.length / 16000}秒`); } + // #endif + + // #ifndef H5 + // APP/小程序端:合并Int16数据并生成WAV + if (appAudioChunks.length > 0) { + // 合并所有Int16Array + const totalLength = appAudioChunks.reduce((sum, chunk) => sum + chunk.byteLength / 2, 0); + const mergedInt16 = new Int16Array(totalLength); + + let offset = 0; + appAudioChunks.forEach(chunk => { + const int16Data = new Int16Array(chunk); + mergedInt16.set(int16Data, offset); + offset += int16Data.length; + }); + + // 转换为Float32用于生成WAV + const floatSamples = new Float32Array(mergedInt16.length); + for (let i = 0; i < mergedInt16.length; i++) { + floatSamples[i] = mergedInt16[i] / 32768; + } + + // 生成WAV文件 + const wavBuffer = encodeWAV(floatSamples, 16000, 1, 16); + audioBlob = new Blob([wavBuffer], { type: 'audio/wav' }); + + console.log(`APP生成WAV文件: ${audioBlob.size} bytes, 时长: ${floatSamples.length / 16000}秒`); + } + // #endif + + if (audioBlob && audioBlob.size > 44) { // 确保至少包含WAV头部 + audioData.value = audioBlob; + + // 保存文件用于调试(可选) + // debugSaveWavFile(audioBlob); + + // 发送到服务器进行识别 + isProcessing.value = false + await sendToASR(audioBlob); + } else { + throw new Error('录音数据为空或无效'); + } + + } catch (error) { + console.error('处理音频数据失败:', error); + uni.showToast({ + title: '音频处理失败,请重试', + icon: 'none' + }); + } finally { + isProcessing.value = false; + appAudioChunks = []; + h5AudioChunks = []; } } + /** + * 调试用:保存WAV文件 + */ + const debugSaveWavFile = (blob) => { + const url = URL.createObjectURL(blob); + const a = document.createElement('a'); + a.href = url; + a.download = `recording_${Date.now()}.wav`; + document.body.appendChild(a); + a.click(); + document.body.removeChild(a); + URL.revokeObjectURL(url); + console.log('WAV文件已保存用于调试'); + } + /** * 发送音频到ASR服务器 */ @@ -341,15 +510,12 @@ export function useRealtimeRecorderOnce() { try { // 创建FormData const formData = new FormData(); - formData.append('audio', audioBlob, 'recording.pcm'); + formData.append('file', audioBlob, 'recording.wav'); // 添加Token const token = uni.getStorageSync('token') || ''; - if (token) { - formData.append('token', token); - } - - const asrUrl = `${config.baseUrl}/app/asr/connect` + + const asrUrl = `${config.baseUrl}/app/speech/asr` const response = await fetch(asrUrl, { method: 'POST', @@ -361,40 +527,19 @@ export function useRealtimeRecorderOnce() { if (response.ok) { const result = await response.json(); - recognizedText.value = result.text || result.data || ''; + if(result.code == 200){ + recognizedText.value = result.data || '' + }else{ + $api.msg(result.msg || '识别失败') + } + } else { - throw new Error(`ASR请求失败: ${response.status}`); + const errorText = await response.text(); + throw new Error(`ASR请求失败: ${response.status} - ${errorText}`); } } catch (error) { console.error('ASR识别失败:', error); - recognizedText.value = '语音识别失败,请重试'; - } - } - - /** - * 计算音量 (兼容 Float32 和 Int16/ArrayBuffer) - */ - const calculateVolume = (data, isFloat32) => { - let sum = 0; - let length = 0; - - if (isFloat32) { - length = data.length; - for (let i = 0; i < length; i += 10) { - sum += Math.abs(data[i]); - } - const calculatedVolume = Math.min(100, Math.floor((sum / (length / 10)) * 100 * 3)); - volumeLevel.value = calculatedVolume; - } else { - const int16Data = new Int16Array(data); - length = int16Data.length; - for (let i = 0; i < length; i += 10) { - sum += Math.abs(int16Data[i]); - } - const avg = sum / (length / 10); - const calculatedVolume = Math.min(100, Math.floor((avg / 10000) * 100)); - volumeLevel.value = calculatedVolume; } } @@ -408,7 +553,10 @@ export function useRealtimeRecorderOnce() { recordingDuration.value = 0; volumeLevel.value = 0; audioDataForDisplay.value = []; - recorderManager = null; + + if (recorderManager) { + recorderManager = null; + } } onUnmounted(() => { @@ -425,7 +573,7 @@ export function useRealtimeRecorderOnce() { volumeLevel, recognizedText, audioData, - audioDataForDisplay, // 新增:返回给WaveDisplay组件使用 + audioDataForDisplay, startRecording, stopRecording, cancelRecording diff --git a/pages/chat/components/ai-paging.vue b/pages/chat/components/ai-paging.vue index a420cd4..2e9bbe4 100644 --- a/pages/chat/components/ai-paging.vue +++ b/pages/chat/components/ai-paging.vue @@ -300,6 +300,7 @@ const { } = useRealtimeRecorderOnce(); watch(recognizedText, (newText) => { + console.log(newText,'++++++++') if (newText && newText.trim() && !isProcessing.value) { setTimeout(() => { sendMessage(newText);