flat: 语音合成、语音识别

This commit is contained in:
Apcallover
2025-12-07 17:06:20 +08:00
parent a2ca068669
commit e0c4f18da0
7 changed files with 324 additions and 221 deletions

View File

@@ -1,285 +1,168 @@
import {
ref,
onUnmounted,
onMounted
onMounted,
watch
} from 'vue'
// 如果是 uni-app 环境,保留这些导入;如果是纯 Web Vue3可以移除
import {
onHide,
onUnload
} from '@dcloudio/uni-app'
import config from '@/config'
// 请确保 piper-sdk.js 已经正确 export class PiperTTS
import {
PiperTTS
} from './piper-sdk.js'
/**
* Piper TTS 播放钩子 (WebSocket MSE 流式版 - 含 cancelAudio)
* 依赖: 后端必须去除 MP3 ID3 标签 (-map_metadata -1)
*/
export function useTTSPlayer() {
// 状态管理
// UI 状态
const isSpeaking = ref(false)
const isPaused = ref(false)
const isLoading = ref(false)
// 核心对象
let audio = null
let mediaSource = null
let sourceBuffer = null
let ws = null
// SDK 实例
let piper = null
// 缓冲队列管理
let bufferQueue = []
let isAppending = false
let isStreamEnded = false
/**
* 初始化 SDK 实例
* 每次 stop 后 piper 会被置空,这里会重新创建
*/
const initPiper = () => {
if (piper) return
// 初始化 Audio 监听器 (只运行一次)
const initAudioElement = () => {
if (!audio && typeof window !== 'undefined') {
audio = new Audio()
let baseUrl = config.speechSynthesis2 || ''
baseUrl = baseUrl.replace(/\/$/, '')
// 错误监听
audio.addEventListener('error', (e) => {
// 如果是手动停止导致的 error (src 被置空),忽略
if (!audio.src) return
console.error('Audio Player Error:', e)
piper = new PiperTTS({
baseUrl: baseUrl,
onStatus: (msg, type) => {
if (type === 'error') {
console.error('[TTS Error]', msg)
// 出错时不重置状态,交给用户手动处理或结束事件处理
resetState()
}
},
onStart: () => {
isLoading.value = false
isSpeaking.value = true
isPaused.value = false
},
onEnd: () => {
resetState()
})
// 播放结束监听
audio.addEventListener('ended', () => {
resetState()
})
}
}
})
}
/**
* 核心朗读方法 (WebSocket)
* @param {string} text - 要朗读的文本
* 核心朗读方法
*/
const speak = async (text) => {
if (!text) return
// 1. 提取文本
const processedText = extractSpeechText(text)
if (!processedText) return
// 2. 彻底清理旧状态
cancelAudio()
initAudioElement()
// 1. 【关键修改】先彻底停止并销毁旧实例
// 这会断开 socket 并且 close AudioContext确保上一个声音立即消失
await stop()
// 2. 初始化新实例 (因为 stop() 把 piper 设为了 null)
initPiper()
// 3. 更新 UI 为加载中
isLoading.value = true
isSpeaking.value = true
isPaused.value = false
isStreamEnded = false
isSpeaking.value = true // 预先设为 true防止按钮闪烁
// 3. 检查环境
if (!window.MediaSource || !window.WebSocket) {
console.error('当前环境不支持 MediaSource 或 WebSocket')
try {
// 4. 激活音频引擎 (移动端防静音关键)
await piper.init()
// 5. 发送请求
piper.speak(processedText, {
speakerId: 0,
noiseScale: 0.667,
lengthScale: 1.0
})
} catch (e) {
console.error('TTS Speak Error:', e)
resetState()
return
}
try {
// 4. 初始化 MSE
mediaSource = new MediaSource()
// 绑定 MSE 到 Audio
audio.src = URL.createObjectURL(mediaSource)
// 监听 MSE 打开事件
mediaSource.addEventListener('sourceopen', () => {
// 防止多次触发
if (mediaSource.sourceBuffers.length > 0) return
startWebSocketStream(processedText)
})
// 尝试播放 (处理浏览器自动播放策略)
const playPromise = audio.play()
if (playPromise !== undefined) {
playPromise.catch(e => {
console.warn('自动播放被拦截 (需用户交互):', e)
// 保持 isSpeaking 为 trueUI 显示播放按钮,用户点击后调用 resume() 即可
})
}
} catch (err) {
console.error('TTS Initialization Failed:', err)
cancelAudio()
}
}
// 启动 WebSocket 流程
const startWebSocketStream = (text) => {
const mime = 'audio/mpeg'
// 4.1 创建 SourceBuffer
try {
sourceBuffer = mediaSource.addSourceBuffer(mime)
sourceBuffer.addEventListener('updateend', () => {
isAppending = false
processQueue()
})
} catch (e) {
console.error('SourceBuffer Create Failed:', e)
return
}
// 4.2 计算 WebSocket 地址
let baseUrl = config.speechSynthesis2 || ''
baseUrl = baseUrl.replace(/\/$/, '')
const wsUrl = baseUrl.replace(/^http/, 'ws') + '/ws/synthesize'
// 4.3 建立连接
ws = new WebSocket(wsUrl)
ws.binaryType = 'arraybuffer' // 关键
ws.onopen = () => {
// console.log('WS Open')
ws.send(JSON.stringify({
text: text,
speaker_id: 0,
length_scale: 1.0,
noise_scale: 0.667
}))
isLoading.value = false
}
ws.onmessage = (event) => {
if (event.data instanceof ArrayBuffer) {
bufferQueue.push(event.data)
processQueue()
}
}
ws.onerror = (e) => {
console.error('WS Error:', e)
cancelAudio()
}
ws.onclose = () => {
// console.log('WS Closed')
isStreamEnded = true
// 检查是否需要结束 MSE 流
checkEndOfStream()
}
}
// 处理缓冲队列
const processQueue = () => {
if (!sourceBuffer || sourceBuffer.updating || bufferQueue.length === 0) {
// 如果队列空了,且流已结束,尝试结束 MSE
if (bufferQueue.length === 0 && isStreamEnded && !sourceBuffer.updating) {
checkEndOfStream()
}
return
}
isAppending = true
const chunk = bufferQueue.shift()
try {
sourceBuffer.appendBuffer(chunk)
} catch (e) {
// console.error('AppendBuffer Error:', e)
isAppending = false
}
}
// 结束 MSE 流
const checkEndOfStream = () => {
if (mediaSource && mediaSource.readyState === 'open' && bufferQueue.length === 0 && !sourceBuffer
?.updating) {
try {
mediaSource.endOfStream()
} catch (e) {}
}
}
const pause = () => {
if (audio && !audio.paused) {
audio.pause()
/**
* 暂停
*/
const pause = async () => {
if (piper && piper.audioCtx && piper.audioCtx.state === 'running') {
await piper.audioCtx.suspend()
isPaused.value = true
isSpeaking.value = false
}
}
const resume = () => {
if (audio && audio.paused) {
audio.play()
/**
* 恢复
*/
const resume = async () => {
if (piper && piper.audioCtx && piper.audioCtx.state === 'suspended') {
await piper.audioCtx.resume()
isPaused.value = false
isSpeaking.value = true
}
}
// === 新增/核心方法:取消并停止 ===
const cancelAudio = () => {
// 1. 断开 WebSocket (停止数据接收)
if (ws) {
// 移除监听器防止报错
ws.onclose = null
ws.onerror = null
ws.onmessage = null
ws.close()
ws = null
}
/**
* 停止并重置 (核打击模式)
*/
const stop = async () => {
if (piper) {
// 1. 断开 WebSocket
piper.stop()
// 2. 停止音频播放
if (audio) {
audio.pause()
// 释放 Blob URL 内存
if (audio.src) {
URL.revokeObjectURL(audio.src)
audio.removeAttribute('src')
}
audio.currentTime = 0
}
// 3. 清理 MSE 对象
if (mediaSource) {
try {
if (mediaSource.readyState === 'open') {
mediaSource.endOfStream()
// 2. 【关键】关闭 AudioContext
// Web Audio API 中,已经 schedule 的 buffer 很难单独取消
// 最直接的方法是关闭整个 Context
if (piper.audioCtx && piper.audioCtx.state !== 'closed') {
try {
await piper.audioCtx.close()
} catch (e) {
console.warn('AudioContext close failed', e)
}
} catch (e) {}
mediaSource = null
}
// 3. 销毁实例引用
piper = null
}
sourceBuffer = null
bufferQueue = []
isAppending = false
isStreamEnded = false
// 4. 重置 UI 状态
resetState()
}
// 只是重置 UI 变量的辅助函数
// UI 状态重置
const resetState = () => {
isSpeaking.value = false
isPaused.value = false
isLoading.value = false
}
// 别名 stop -> cancelAudio (保持兼容性)
const stop = cancelAudio
// === 生命周期 ===
onMounted(() => {
initAudioElement()
// 预初始化可以不做,等到点击时再做,避免空闲占用 AudioContext 资源
// initPiper()
})
onUnmounted(() => {
cancelAudio()
audio = null
stop()
})
if (typeof onHide === 'function') onHide(cancelAudio)
if (typeof onUnload === 'function') onUnload(cancelAudio)
// Uniapp 生命周期
if (typeof onHide === 'function') onHide(stop)
if (typeof onUnload === 'function') onUnload(stop)
return {
speak,
pause,
resume,
stop,
cancelAudio, // 新增导出
cancelAudio: stop,
isSpeaking,
isPaused,
isLoading
@@ -287,7 +170,7 @@ export function useTTSPlayer() {
}
/**
* 提取文本逻辑
* 提取文本逻辑 (保持不变)
*/
function extractSpeechText(markdown) {
if (!markdown || markdown.indexOf('job-json') === -1) {