flat:语音功能优化

This commit is contained in:
史典卓
2025-07-22 15:20:21 +08:00
parent ea04387b58
commit 58c36c01a0
11 changed files with 229 additions and 479 deletions

View File

@@ -1,387 +1,246 @@
import {
ref,
onUnmounted
} from 'vue';
} from 'vue'
import {
$api,
function mergeText(prevText, newText) {
if (newText.startsWith(prevText)) {
return newText; // 直接替换,避免重复拼接
} from '../common/globalFunction';
import config from '@/config'
export function useAudioRecorder() {
const isRecording = ref(false)
const isStopping = ref(false)
const isSocketConnected = ref(false)
const recordingDuration = ref(0)
const audioDataForDisplay = ref(new Array(16).fill(0))
const volumeLevel = ref(0)
const recognizedText = ref('')
const lastFinalText = ref('')
let audioStream = null
let audioContext = null
let audioInput = null
let scriptProcessor = null
let websocket = null
let durationTimer = null
const generateUUID = () => {
return ([1e7] + -1e3 + -4e3 + -8e3 + -1e11)
.replace(/[018]/g, c =>
(c ^ crypto.getRandomValues(new Uint8Array(1))[0] & 15 >> c / 4).toString(16)
).replace(/-/g, '')
}
return prevText + newText; // 兼容意外情况
}
export function useAudioRecorder(wsUrl) {
// 状态变量
const isRecording = ref(false);
const isStopping = ref(false);
const isSocketConnected = ref(false);
const recordingDuration = ref(0);
const audioDataForDisplay = ref(new Array(16).fill(0.01));
const volumeLevel = ref(0);
const fetchWsUrl = async () => {
const res = await $api.createRequest('/app/speech/getToken')
if (res.code !== 200) throw new Error('无法获取语音识别 wsUrl')
const wsUrl = res.msg
return wsUrl
}
// 音频相关
const audioContext = ref(null);
const mediaStream = ref(null);
const workletNode = ref(null);
const analyser = ref(null);
// 网络相关
const socket = ref(null);
// 配置常量
const SAMPLE_RATE = 16000;
const SILENCE_THRESHOLD = 0.05; // 静音阈值 (0-1)
const SILENCE_DURATION = 100; // 静音持续时间(ms)后切片
const MIN_SOUND_DURATION = 200; // 最小有效声音持续时间(ms)
// 音频处理变量
const lastSoundTime = ref(0);
const audioChunks = ref([]);
const currentChunkStartTime = ref(0);
const silenceStartTime = ref(0);
// 语音识别结果
const recognizedText = ref('');
const lastFinalText = ref(''); // 保存最终确认的文本
// AudioWorklet处理器代码
const workletProcessorCode = `
class AudioProcessor extends AudioWorkletProcessor {
constructor(options) {
super();
this.silenceThreshold = options.processorOptions.silenceThreshold;
this.sampleRate = options.processorOptions.sampleRate;
this.samplesPerChunk = Math.floor(this.sampleRate * 0.05); // 50ms的块
this.buffer = new Int16Array(this.samplesPerChunk);
this.index = 0;
this.lastUpdate = 0;
}
calculateVolume(inputs) {
const input = inputs[0];
if (!input || input.length === 0) return 0;
let sum = 0;
const inputChannel = input[0];
for (let i = 0; i < inputChannel.length; i++) {
sum += inputChannel[i] * inputChannel[i];
}
return Math.sqrt(sum / inputChannel.length);
}
process(inputs) {
const now = currentTime;
const volume = this.calculateVolume(inputs);
// 每50ms发送一次分析数据
if (now - this.lastUpdate > 0.05) {
this.lastUpdate = now;
// 简单的频率分析 (模拟16个频段)
const simulatedFreqData = [];
for (let i = 0; i < 16; i++) {
simulatedFreqData.push(
Math.min(1, volume * 10 + (Math.random() * 0.2 - 0.1))
);
}
this.port.postMessage({
type: 'analysis',
volume: volume,
frequencyData: simulatedFreqData,
isSilent: volume < this.silenceThreshold,
timestamp: now
});
}
// 原始音频处理
const input = inputs[0];
if (input && input.length > 0) {
const inputChannel = input[0];
for (let i = 0; i < inputChannel.length; i++) {
this.buffer[this.index++] = Math.max(-32768, Math.min(32767, inputChannel[i] * 32767));
if (this.index >= this.samplesPerChunk) {
this.port.postMessage({
type: 'audio',
audioData: this.buffer.buffer,
timestamp: now
}, [this.buffer.buffer]);
this.buffer = new Int16Array(this.samplesPerChunk);
this.index = 0;
}
}
}
return true;
function extractWsParams(wsUrl) {
const url = new URL(wsUrl)
const appkey = url.searchParams.get('appkey')
const token = url.searchParams.get('token')
return {
appkey,
token
}
}
registerProcessor('audio-processor', AudioProcessor);
`;
// 初始化WebSocket连接
const initSocket = (wsUrl) => {
const connectWebSocket = async () => {
const wsUrl = await fetchWsUrl()
const {
appkey,
token
} = extractWsParams(wsUrl)
return new Promise((resolve, reject) => {
socket.value = new WebSocket(wsUrl);
websocket = new WebSocket(wsUrl)
websocket.binaryType = 'arraybuffer'
socket.value.onopen = () => {
console.log('open')
isSocketConnected.value = true;
resolve();
};
websocket.onopen = () => {
isSocketConnected.value = true
socket.value.onerror = (error) => {
reject(error);
};
// 发送 StartTranscription 消息(参考 demo.html
const startTranscriptionMessage = {
header: {
appkey: appkey, // 不影响使用,可留空或由 wsUrl 带入
namespace: 'SpeechTranscriber',
name: 'StartTranscription',
task_id: generateUUID(),
message_id: generateUUID()
},
payload: {
format: 'pcm',
sample_rate: 16000,
enable_intermediate_result: true,
enable_punctuation_prediction: true,
enable_inverse_text_normalization: true
}
}
websocket.send(JSON.stringify(startTranscriptionMessage))
resolve()
}
socket.value.onclose = () => {
isSocketConnected.value = false;
};
websocket.onerror = (e) => {
isSocketConnected.value = false
reject(e)
}
socket.value.onmessage = handleMessage;
});
};
websocket.onclose = () => {
isSocketConnected.value = false
}
const handleMessage = (values) => {
try {
const data = JSON.parse(event.data);
if (data.text) {
const {
asrEnd,
text
} = data
if (asrEnd === 'true') {
recognizedText.value += data.text;
} else {
lastFinalText.value = '';
websocket.onmessage = (e) => {
const msg = JSON.parse(e.data)
const name = msg?.header?.name
const payload = msg?.payload
switch (name) {
case 'TranscriptionResultChanged': {
// 中间识别文本(可选:使用 stash_result.unfixedText 更精确)
const text = payload?.unfixed_result || payload?.result || ''
lastFinalText.value = text
break
}
case 'SentenceBegin': {
// 可选:开始新的一句,重置状态
// console.log('开始新的句子识别')
break
}
case 'SentenceEnd': {
const text = payload?.result || ''
const confidence = payload?.confidence || 0
if (text && confidence > 0.5) {
recognizedText.value += text
lastFinalText.value = ''
// console.log('识别完成:', {
// text,
// confidence
// })
}
break
}
case 'TranscriptionStarted': {
// console.log('识别任务已开始')
break
}
case 'TranscriptionCompleted': {
lastFinalText.value = ''
// console.log('识别全部完成')
break
}
case 'TaskFailed': {
console.error('识别失败:', msg?.header?.status_text)
break
}
default:
console.log('未知消息类型:', name, msg)
break
}
}
} catch (error) {
console.error('解析识别结果失败:', error);
})
}
const startRecording = async () => {
if (isRecording.value) return
try {
recognizedText.value = ''
lastFinalText.value = ''
await connectWebSocket()
audioStream = await navigator.mediaDevices.getUserMedia({
audio: true
})
audioContext = new(window.AudioContext || window.webkitAudioContext)({
sampleRate: 16000
})
audioInput = audioContext.createMediaStreamSource(audioStream)
scriptProcessor = audioContext.createScriptProcessor(2048, 1, 1)
scriptProcessor.onaudioprocess = (event) => {
const input = event.inputBuffer.getChannelData(0)
const pcm = new Int16Array(input.length)
let sum = 0
for (let i = 0; i < input.length; ++i) {
const s = Math.max(-1, Math.min(1, input[i]))
pcm[i] = s * 0x7FFF
sum += s * s
}
volumeLevel.value = Math.sqrt(sum / input.length)
audioDataForDisplay.value = Array(16).fill(volumeLevel.value)
if (websocket?.readyState === WebSocket.OPEN) {
websocket.send(pcm.buffer)
}
}
audioInput.connect(scriptProcessor)
scriptProcessor.connect(audioContext.destination)
isRecording.value = true
recordingDuration.value = 0
durationTimer = setInterval(() => recordingDuration.value++, 1000)
} catch (err) {
console.error('启动失败:', err)
cleanup()
}
}
// 处理音频切片
const processAudioChunk = (isSilent) => {
const now = Date.now();
const stopRecording = () => {
if (!isRecording.value || isStopping.value) return
isStopping.value = true
if (!isSilent) {
// 检测到声音
lastSoundTime.value = now;
if (silenceStartTime.value > 0) {
// 从静音恢复到有声音
silenceStartTime.value = 0;
}
} else {
// 静音状态
if (silenceStartTime.value === 0) {
silenceStartTime.value = now;
}
// 检查是否达到静音切片条件
if (now - silenceStartTime.value >= SILENCE_DURATION &&
now - currentChunkStartTime.value >= MIN_SOUND_DURATION) {
sendCurrentChunk();
}
}
};
// 发送当前音频块
const sendCurrentChunk = () => {
if (audioChunks.value.length === 0 || !socket.value || socket.value.readyState !== WebSocket.OPEN) {
return;
}
try {
// 合并所有块
const totalBytes = audioChunks.value.reduce((total, chunk) => total + chunk.byteLength, 0);
const combined = new Int16Array(totalBytes / 2);
let offset = 0;
audioChunks.value.forEach(chunk => {
const samples = new Int16Array(chunk);
combined.set(samples, offset);
offset += samples.length;
});
// 发送合并后的数据
socket.value.send(combined.buffer);
audioChunks.value = [];
// 记录新块的开始时间
currentChunkStartTime.value = Date.now();
silenceStartTime.value = 0;
} catch (error) {
console.error('发送音频数据时出错:', error);
}
};
// 开始录音
const startRecording = async () => {
if (isRecording.value) return;
try {
// 重置状态
recognizedText.value = '';
lastFinalText.value = '';
// 重置状态
recordingDuration.value = 0;
audioChunks.value = [];
lastSoundTime.value = 0;
currentChunkStartTime.value = Date.now();
silenceStartTime.value = 0;
// 初始化WebSocket
await initSocket(wsUrl);
// 获取音频流
mediaStream.value = await navigator.mediaDevices.getUserMedia({
audio: {
sampleRate: SAMPLE_RATE,
channelCount: 1,
echoCancellation: true,
noiseSuppression: true,
autoGainControl: false
},
video: false
});
// 创建音频上下文
audioContext.value = new(window.AudioContext || window.webkitAudioContext)({
sampleRate: SAMPLE_RATE
});
// 注册AudioWorklet
const blob = new Blob([workletProcessorCode], {
type: 'application/javascript'
});
const workletUrl = URL.createObjectURL(blob);
await audioContext.value.audioWorklet.addModule(workletUrl);
URL.revokeObjectURL(workletUrl);
// 创建AudioWorkletNode
workletNode.value = new AudioWorkletNode(audioContext.value, 'audio-processor', {
processorOptions: {
silenceThreshold: SILENCE_THRESHOLD,
sampleRate: SAMPLE_RATE
if (websocket?.readyState === WebSocket.OPEN) {
websocket.send(JSON.stringify({
header: {
namespace: 'SpeechTranscriber',
name: 'StopTranscription',
message_id: generateUUID()
}
});
// 处理音频数据
workletNode.value.port.onmessage = (e) => {
if (e.data.type === 'audio') {
audioChunks.value.push(e.data.audioData);
} else if (e.data.type === 'analysis') {
audioDataForDisplay.value = e.data.frequencyData;
volumeLevel.value = e.data.volume;
processAudioChunk(e.data.isSilent);
}
};
// 连接音频节点
const source = audioContext.value.createMediaStreamSource(mediaStream.value);
source.connect(workletNode.value);
workletNode.value.connect(audioContext.value.destination);
isRecording.value = true;
} catch (error) {
console.error('启动录音失败:', error);
cleanup();
throw error;
}))
websocket.close()
}
};
// 停止录音
const stopRecording = async () => {
if (!isRecording.value || isStopping.value) return;
cleanup()
isStopping.value = false
}
isStopping.value = true;
const cancelRecording = () => {
if (!isRecording.value || isStopping.value) return
isStopping.value = true
websocket?.close()
cleanup()
isStopping.value = false
}
try {
// 发送最后一个音频块(无论是否静音)
sendCurrentChunk();
// 发送结束标记
if (socket.value?.readyState === WebSocket.OPEN) {
socket.value.send(JSON.stringify({
action: 'end',
duration: recordingDuration.value
}));
await new Promise(resolve => {
if (socket.value.bufferedAmount === 0) {
resolve();
} else {
const timer = setInterval(() => {
if (socket.value.bufferedAmount === 0) {
clearInterval(timer);
resolve();
}
}, 50);
}
});
socket.value.close();
}
cleanup();
} catch (error) {
console.error('停止录音时出错:', error);
throw error;
} finally {
isStopping.value = false;
}
};
// 清理资源
const cleanup = () => {
if (mediaStream.value) {
mediaStream.value.getTracks().forEach(track => track.stop());
mediaStream.value = null;
}
clearInterval(durationTimer)
if (workletNode.value) {
workletNode.value.disconnect();
workletNode.value = null;
}
scriptProcessor?.disconnect()
audioInput?.disconnect()
audioStream?.getTracks().forEach(track => track.stop())
audioContext?.close()
if (audioContext.value && audioContext.value.state !== 'closed') {
audioContext.value.close();
audioContext.value = null;
}
audioStream = null
audioContext = null
audioInput = null
scriptProcessor = null
websocket = null
audioChunks.value = [];
isRecording.value = false;
isSocketConnected.value = false;
};
/// 取消录音
const cancelRecording = async () => {
if (!isRecording.value || isStopping.value) return;
isStopping.value = true;
try {
if (socket.value?.readyState === WebSocket.OPEN) {
console.log('发送结束标记...');
socket.value.send(JSON.stringify({
action: 'cancel'
}));
socket.value.close();
}
cleanup()
} catch (error) {
console.error('取消录音时出错:', error);
throw error;
} finally {
isStopping.value = false;
}
};
isRecording.value = false
isSocketConnected.value = false
}
onUnmounted(() => {
if (isRecording.value) {
stopRecording();
}
});
if (isRecording.value) stopRecording()
})
return {
isRecording,
@@ -390,10 +249,10 @@ export function useAudioRecorder(wsUrl) {
recordingDuration,
audioDataForDisplay,
volumeLevel,
startRecording,
stopRecording,
recognizedText,
lastFinalText,
startRecording,
stopRecording,
cancelRecording
};
}
}