feat : 语音转文字实现

This commit is contained in:
2025-12-26 18:41:58 +08:00
parent e114675eba
commit f6b7755e32
2 changed files with 304 additions and 155 deletions

View File

@@ -2,44 +2,138 @@ import {
ref, ref,
onUnmounted onUnmounted
} from 'vue' } from 'vue'
import {
$api
} from '../common/globalFunction';
import config from '@/config' import config from '@/config'
export function useRealtimeRecorderOnce() { export function useRealtimeRecorderOnce() {
// --- 状态定义 --- // --- 状态定义 ---
const isRecording = ref(false) const isRecording = ref(false)
const isProcessing = ref(false) // 新增:处理录音数据状态 const isProcessing = ref(false)
const recordingDuration = ref(0) const recordingDuration = ref(0)
const volumeLevel = ref(0) // 0-100 const volumeLevel = ref(0) // 0-100
const recognizedText = ref('') const recognizedText = ref('')
const audioData = ref(null) // 新增:存储录音数据 const audioData = ref(null)
const audioDataForDisplay = ref([]) // 新增:用于波形显示的数据 const audioDataForDisplay = ref([])
// --- 内部变量 --- // --- 内部变量 ---
let durationTimer = null let durationTimer = null
// --- APP/小程序 变量 --- // --- APP/小程序 变量 ---
let recorderManager = null; let recorderManager = null;
let appAudioChunks = []; // 新增存储APP录音数据块 let appAudioChunks = [];
// --- H5 变量 --- // --- H5 变量 ---
let audioContext = null; let audioContext = null;
let scriptProcessor = null; let mediaRecorder = null;
let mediaStreamSource = null;
let h5Stream = null; let h5Stream = null;
let h5AudioChunks = []; // 新增存储H5录音数据块 let h5AudioChunks = [];
let analyser = null;
let dataArray = null;
// --- 配置项 --- // --- 配置项 ---
const RECORD_CONFIG = { const RECORD_CONFIG = {
duration: 600000, duration: 600000,
sampleRate: 16000, sampleRate: 16000,
numberOfChannels: 1, numberOfChannels: 1,
format: 'pcm', format: 'wav',
encodeBitRate: 16000,
frameSize: 4096 frameSize: 4096
} }
// --- WAV文件头函数 ---
const encodeWAV = (samples, sampleRate = 16000, numChannels = 1, bitsPerSample = 16) => {
const bytesPerSample = bitsPerSample / 8;
const blockAlign = numChannels * bytesPerSample;
const byteRate = sampleRate * blockAlign;
const dataSize = samples.length * bytesPerSample;
const buffer = new ArrayBuffer(44 + dataSize);
const view = new DataView(buffer);
// RIFF chunk descriptor
writeString(view, 0, 'RIFF');
view.setUint32(4, 36 + dataSize, true);
writeString(view, 8, 'WAVE');
// fmt sub-chunk
writeString(view, 12, 'fmt ');
view.setUint32(16, 16, true); // Subchunk1Size (16 for PCM)
view.setUint16(20, 1, true); // AudioFormat (1 for PCM)
view.setUint16(22, numChannels, true);
view.setUint32(24, sampleRate, true);
view.setUint32(28, byteRate, true);
view.setUint16(32, blockAlign, true);
view.setUint16(34, bitsPerSample, true);
// data sub-chunk
writeString(view, 36, 'data');
view.setUint32(40, dataSize, true);
// Write audio samples
const volume = 1;
let offset = 44;
for (let i = 0; i < samples.length; i++) {
let sample = Math.max(-1, Math.min(1, samples[i]));
sample = sample * volume;
view.setInt16(offset, sample < 0 ? sample * 0x8000 : sample * 0x7FFF, true);
offset += 2;
}
return buffer;
}
const writeString = (view, offset, string) => {
for (let i = 0; i < string.length; i++) {
view.setUint8(offset + i, string.charCodeAt(i));
}
}
const floatTo16BitPCM = (output, offset, input) => {
for (let i = 0; i < input.length; i++, offset += 2) {
const s = Math.max(-1, Math.min(1, input[i]));
output.setInt16(offset, s < 0 ? s * 0x8000 : s * 0x7FFF, true);
}
}
// --- 音量计算函数 ---
const calculateVolumeFromFloat32 = (float32Array) => {
let sum = 0;
const length = float32Array.length;
// 计算RMS (均方根)
for (let i = 0; i < length; i++) {
sum += float32Array[i] * float32Array[i];
}
const rms = Math.sqrt(sum / length);
// 转换为0-100的值
// 通常对话语音的RMS在0.01-0.1之间尖叫可达0.3
let volume = Math.min(100, Math.floor(rms * 300));
// 设置最小阈值避免静音时完全为0
if (volume < 5) volume = 0;
return volume;
}
const calculateVolumeFromInt16 = (int16Array) => {
let sum = 0;
const length = int16Array.length;
// 计算RMS
for (let i = 0; i < length; i++) {
const normalized = int16Array[i] / 32768; // 归一化到[-1, 1]
sum += normalized * normalized;
}
const rms = Math.sqrt(sum / length);
// 转换为0-100的值
let volume = Math.min(100, Math.floor(rms * 300));
// 设置最小阈值
if (volume < 5) volume = 0;
return volume;
}
/** /**
* 开始录音 (入口) * 开始录音 (入口)
*/ */
@@ -90,51 +184,75 @@ export function useRealtimeRecorderOnce() {
} }
/** /**
* H5录音实现 * H5录音实现 - 手动构建WAV文件
*/ */
const startH5Recording = async () => { const startH5Recording = async () => {
try { try {
// 1. 获取麦克风流 // 1. 获取麦克风流
const stream = await navigator.mediaDevices.getUserMedia({ const stream = await navigator.mediaDevices.getUserMedia({
audio: true audio: {
sampleRate: 16000,
channelCount: 1,
echoCancellation: true,
noiseSuppression: true,
autoGainControl: false
}
}); });
h5Stream = stream; h5Stream = stream;
// 2. 创建 AudioContext // 2. 创建 AudioContext 用于处理音频
const AudioContext = window.AudioContext || window.webkitAudioContext; const AudioContext = window.AudioContext || window.webkitAudioContext;
audioContext = new AudioContext({ audioContext = new AudioContext({
sampleRate: 16000 sampleRate: 16000,
latencyHint: 'interactive'
}); });
mediaStreamSource = audioContext.createMediaStreamSource(stream); // 创建音频处理节点
scriptProcessor = audioContext.createScriptProcessor(4096, 1, 1); const source = audioContext.createMediaStreamSource(stream);
scriptProcessor.onaudioprocess = (event) => { // 创建分析器用于音量计算
analyser = audioContext.createAnalyser();
analyser.fftSize = 256;
analyser.smoothingTimeConstant = 0.8;
dataArray = new Float32Array(analyser.frequencyBinCount);
source.connect(analyser);
// 创建脚本处理器用于收集音频数据
const processor = audioContext.createScriptProcessor(4096, 1, 1);
// 存储所有音频样本
let audioSamples = [];
processor.onaudioprocess = (e) => {
if (!isRecording.value) return; if (!isRecording.value) return;
const inputData = event.inputBuffer.getChannelData(0); // 获取输入数据
const inputData = e.inputBuffer.getChannelData(0);
calculateVolume(inputData, true); // 计算音量
analyser.getFloatTimeDomainData(dataArray);
const volume = calculateVolumeFromFloat32(dataArray);
volumeLevel.value = volume;
// 保存音频数据 // 收集音频样本
const buffer = new ArrayBuffer(inputData.length * 2);
const view = new DataView(buffer);
for (let i = 0; i < inputData.length; i++) { for (let i = 0; i < inputData.length; i++) {
let s = Math.max(-1, Math.min(1, inputData[i])); audioSamples.push(inputData[i]);
view.setInt16(i * 2, s < 0 ? s * 0x8000 : s * 0x7FFF, true);
} }
// 保存到数组 // 存储当前音频数据块
const buffer = new Float32Array(inputData.length);
buffer.set(inputData);
h5AudioChunks.push(buffer); h5AudioChunks.push(buffer);
}; };
mediaStreamSource.connect(scriptProcessor); source.connect(processor);
scriptProcessor.connect(audioContext.destination); processor.connect(audioContext.destination);
console.log('H5 录音已启动'); console.log('H5 16kHz WAV录音已启动');
} catch (err) { } catch (err) {
console.error('H5 录音启动失败:', err); console.error('H5录音启动失败:', err);
throw err; throw err;
} }
} }
@@ -143,14 +261,19 @@ export function useRealtimeRecorderOnce() {
* 停止H5录音资源 * 停止H5录音资源
*/ */
const stopH5Resources = () => { const stopH5Resources = () => {
if (scriptProcessor) scriptProcessor.disconnect(); // 断开所有连接
if (mediaStreamSource) mediaStreamSource.disconnect(); if (audioContext && audioContext.state !== 'closed') {
if (audioContext) audioContext.close(); audioContext.close();
if (h5Stream) h5Stream.getTracks().forEach(track => track.stop()); }
// 停止音轨
if (h5Stream) {
h5Stream.getTracks().forEach(track => track.stop());
}
scriptProcessor = null;
mediaStreamSource = null;
audioContext = null; audioContext = null;
analyser = null;
dataArray = null;
h5Stream = null; h5Stream = null;
} }
@@ -161,24 +284,29 @@ export function useRealtimeRecorderOnce() {
recorderManager = uni.getRecorderManager(); recorderManager = uni.getRecorderManager();
recorderManager.onFrameRecorded((res) => { recorderManager.onFrameRecorded((res) => {
const { const { frameBuffer } = res;
frameBuffer
} = res;
calculateVolume(frameBuffer, false);
// 保存音频数据
if (frameBuffer && frameBuffer.byteLength > 0) { if (frameBuffer && frameBuffer.byteLength > 0) {
// 计算音量
const int16Data = new Int16Array(frameBuffer);
const volume = calculateVolumeFromInt16(int16Data);
volumeLevel.value = volume;
// 保存音频数据
appAudioChunks.push(frameBuffer); appAudioChunks.push(frameBuffer);
} }
}); });
recorderManager.onStart(() => { recorderManager.onStart(() => {
console.log('APP 录音已开始'); console.log('APP 16kHz WAV录音已开始');
}); });
recorderManager.onError((err) => { recorderManager.onError((err) => {
console.error('APP 录音报错:', err); console.error('APP录音报错:', err);
uni.showToast({
title: '录音失败: ' + err.errMsg,
icon: 'none'
});
cleanup(); cleanup();
}); });
@@ -193,17 +321,12 @@ export function useRealtimeRecorderOnce() {
isRecording.value = false; isRecording.value = false;
clearInterval(durationTimer); clearInterval(durationTimer);
audioDataForDisplay.value = []; // 清空显示数据
// 停止硬件录音 // 停止硬件录音
stopHardwareResource(); stopHardwareResource();
// 处理录音数据 // 处理录音数据
await processAudioData(); await processAudioData();
// 清理临时数据
appAudioChunks = [];
h5AudioChunks = [];
} }
/** /**
@@ -250,90 +373,136 @@ export function useRealtimeRecorderOnce() {
const updateInterval = setInterval(() => { const updateInterval = setInterval(() => {
if (!isRecording.value) { if (!isRecording.value) {
clearInterval(updateInterval); clearInterval(updateInterval);
audioDataForDisplay.value = [];
return; return;
} }
// 生成模拟的音频数据显示数据0-1之间的值 // 生成波形数据,基于当前音量
const baseValue = volumeLevel.value / 100; // 基于音量计算基础值 const baseValue = volumeLevel.value / 100;
const data = []; const data = [];
// 生成31个数据点对应WaveDisplay的31个波形条 // 生成31个数据点
for (let i = 0; i < 31; i++) { for (let i = 0; i < 31; i++) {
// 模拟波形:中间高两边低 // 使用正弦波生成波形效果,中间高两边低
const position = i / 30; // 0到1 const position = i / 30;
const centerDistance = Math.abs(position - 0.5); const centerDistance = Math.abs(position - 0.5);
const waveValue = Math.sin(Date.now() / 100 + i * 0.5) * 0.3 + 0.5; const waveValue = Math.sin(Date.now() / 200 + i * 0.3) * 0.4 + 0.5;
const volumeFactor = baseValue * 0.8 + 0.2; // 确保最小值为0.2
const finalValue = waveValue * (1 - centerDistance) * volumeFactor;
data.push(Math.max(0.1, Math.min(1, finalValue))); // 音量因子确保最小显示高度
const volumeFactor = baseValue * 0.7 + 0.3;
// 综合计算最终值
let finalValue = waveValue * (1 - centerDistance) * volumeFactor;
finalValue = Math.max(0.1, Math.min(1, finalValue));
data.push(finalValue);
} }
audioDataForDisplay.value = data; audioDataForDisplay.value = data;
}, 100); // 每100ms更新一次 }, 50); // 更快的刷新率,更流畅
} }
/** /**
* 处理录音数据 * 处理录音数据并生成WAV文件
*/ */
const processAudioData = async () => { const processAudioData = async () => {
if (!isProcessing.value) { if (isProcessing.value) return;
isProcessing.value = true;
try { isProcessing.value = true;
let audioBlob = null;
// #ifdef H5 try {
// 合并H5录音数据 let audioBlob = null;
if (h5AudioChunks.length > 0) {
const totalLength = h5AudioChunks.reduce((acc, chunk) => acc + chunk.byteLength, 0);
const combinedBuffer = new ArrayBuffer(totalLength);
const combinedView = new Uint8Array(combinedBuffer);
let offset = 0; // #ifdef H5
h5AudioChunks.forEach(chunk => { // H5端合并所有音频样本并生成WAV
const chunkView = new Uint8Array(chunk); if (h5AudioChunks.length > 0) {
combinedView.set(chunkView, offset); // 合并所有Float32Array
offset += chunk.byteLength; const totalLength = h5AudioChunks.reduce((sum, chunk) => sum + chunk.length, 0);
}); const mergedSamples = new Float32Array(totalLength);
audioBlob = new Blob([combinedBuffer], { type: 'audio/pcm' }); let offset = 0;
} h5AudioChunks.forEach(chunk => {
// #endif mergedSamples.set(chunk, offset);
offset += chunk.length;
});
// #ifndef H5 // 生成WAV文件
// 合并APP录音数据 const wavBuffer = encodeWAV(mergedSamples, 16000, 1, 16);
if (appAudioChunks.length > 0) { audioBlob = new Blob([wavBuffer], { type: 'audio/wav' });
const totalLength = appAudioChunks.reduce((acc, chunk) => acc + chunk.byteLength, 0);
const combinedBuffer = new ArrayBuffer(totalLength);
const combinedView = new Uint8Array(combinedBuffer);
let offset = 0; console.log(`H5生成WAV文件: ${audioBlob.size} bytes, 时长: ${mergedSamples.length / 16000}`);
appAudioChunks.forEach(chunk => {
const chunkView = new Uint8Array(chunk);
combinedView.set(chunkView, offset);
offset += chunk.byteLength;
});
audioBlob = new Blob([combinedBuffer], { type: 'audio/pcm' });
}
// #endif
if (audioBlob) {
audioData.value = audioBlob;
// 发送到服务器进行识别
await sendToASR(audioBlob);
}
} catch (error) {
console.error('处理音频数据失败:', error);
recognizedText.value = '音频处理失败,请重试';
} finally {
isProcessing.value = false;
} }
// #endif
// #ifndef H5
// APP/小程序端合并Int16数据并生成WAV
if (appAudioChunks.length > 0) {
// 合并所有Int16Array
const totalLength = appAudioChunks.reduce((sum, chunk) => sum + chunk.byteLength / 2, 0);
const mergedInt16 = new Int16Array(totalLength);
let offset = 0;
appAudioChunks.forEach(chunk => {
const int16Data = new Int16Array(chunk);
mergedInt16.set(int16Data, offset);
offset += int16Data.length;
});
// 转换为Float32用于生成WAV
const floatSamples = new Float32Array(mergedInt16.length);
for (let i = 0; i < mergedInt16.length; i++) {
floatSamples[i] = mergedInt16[i] / 32768;
}
// 生成WAV文件
const wavBuffer = encodeWAV(floatSamples, 16000, 1, 16);
audioBlob = new Blob([wavBuffer], { type: 'audio/wav' });
console.log(`APP生成WAV文件: ${audioBlob.size} bytes, 时长: ${floatSamples.length / 16000}`);
}
// #endif
if (audioBlob && audioBlob.size > 44) { // 确保至少包含WAV头部
audioData.value = audioBlob;
// 保存文件用于调试(可选)
// debugSaveWavFile(audioBlob);
// 发送到服务器进行识别
isProcessing.value = false
await sendToASR(audioBlob);
} else {
throw new Error('录音数据为空或无效');
}
} catch (error) {
console.error('处理音频数据失败:', error);
uni.showToast({
title: '音频处理失败,请重试',
icon: 'none'
});
} finally {
isProcessing.value = false;
appAudioChunks = [];
h5AudioChunks = [];
} }
} }
/**
* 调试用保存WAV文件
*/
const debugSaveWavFile = (blob) => {
const url = URL.createObjectURL(blob);
const a = document.createElement('a');
a.href = url;
a.download = `recording_${Date.now()}.wav`;
document.body.appendChild(a);
a.click();
document.body.removeChild(a);
URL.revokeObjectURL(url);
console.log('WAV文件已保存用于调试');
}
/** /**
* 发送音频到ASR服务器 * 发送音频到ASR服务器
*/ */
@@ -341,15 +510,12 @@ export function useRealtimeRecorderOnce() {
try { try {
// 创建FormData // 创建FormData
const formData = new FormData(); const formData = new FormData();
formData.append('audio', audioBlob, 'recording.pcm'); formData.append('file', audioBlob, 'recording.wav');
// 添加Token // 添加Token
const token = uni.getStorageSync('token') || ''; const token = uni.getStorageSync('token') || '';
if (token) {
formData.append('token', token);
}
const asrUrl = `${config.baseUrl}/app/asr/connect` const asrUrl = `${config.baseUrl}/app/speech/asr`
const response = await fetch(asrUrl, { const response = await fetch(asrUrl, {
method: 'POST', method: 'POST',
@@ -361,40 +527,19 @@ export function useRealtimeRecorderOnce() {
if (response.ok) { if (response.ok) {
const result = await response.json(); const result = await response.json();
recognizedText.value = result.text || result.data || ''; if(result.code == 200){
recognizedText.value = result.data || ''
}else{
$api.msg(result.msg || '识别失败')
}
} else { } else {
throw new Error(`ASR请求失败: ${response.status}`); const errorText = await response.text();
throw new Error(`ASR请求失败: ${response.status} - ${errorText}`);
} }
} catch (error) { } catch (error) {
console.error('ASR识别失败:', error); console.error('ASR识别失败:', error);
recognizedText.value = '语音识别失败,请重试';
}
}
/**
* 计算音量 (兼容 Float32 和 Int16/ArrayBuffer)
*/
const calculateVolume = (data, isFloat32) => {
let sum = 0;
let length = 0;
if (isFloat32) {
length = data.length;
for (let i = 0; i < length; i += 10) {
sum += Math.abs(data[i]);
}
const calculatedVolume = Math.min(100, Math.floor((sum / (length / 10)) * 100 * 3));
volumeLevel.value = calculatedVolume;
} else {
const int16Data = new Int16Array(data);
length = int16Data.length;
for (let i = 0; i < length; i += 10) {
sum += Math.abs(int16Data[i]);
}
const avg = sum / (length / 10);
const calculatedVolume = Math.min(100, Math.floor((avg / 10000) * 100));
volumeLevel.value = calculatedVolume;
} }
} }
@@ -408,7 +553,10 @@ export function useRealtimeRecorderOnce() {
recordingDuration.value = 0; recordingDuration.value = 0;
volumeLevel.value = 0; volumeLevel.value = 0;
audioDataForDisplay.value = []; audioDataForDisplay.value = [];
recorderManager = null;
if (recorderManager) {
recorderManager = null;
}
} }
onUnmounted(() => { onUnmounted(() => {
@@ -425,7 +573,7 @@ export function useRealtimeRecorderOnce() {
volumeLevel, volumeLevel,
recognizedText, recognizedText,
audioData, audioData,
audioDataForDisplay, // 新增返回给WaveDisplay组件使用 audioDataForDisplay,
startRecording, startRecording,
stopRecording, stopRecording,
cancelRecording cancelRecording

View File

@@ -300,6 +300,7 @@ const {
} = useRealtimeRecorderOnce(); } = useRealtimeRecorderOnce();
watch(recognizedText, (newText) => { watch(recognizedText, (newText) => {
console.log(newText,'++++++++')
if (newText && newText.trim() && !isProcessing.value) { if (newText && newText.trim() && !isProcessing.value) {
setTimeout(() => { setTimeout(() => {
sendMessage(newText); sendMessage(newText);