flat: 语音合成、语音识别

2025-12-07 17:06:20 +08:00
parent a2ca068669
commit e0c4f18da0
7 changed files with 324 additions and 221 deletions
--- a/hook/useTTSPlayer2.js
+++ b/hook/useTTSPlayer2.js
@@ -1,285 +1,168 @@
 import {
    ref,
    onUnmounted,
-    onMounted
+    onMounted,
+    watch
 } from 'vue'
-// 如果是 uni-app 环境，保留这些导入；如果是纯 Web Vue3，可以移除
 import {
    onHide,
    onUnload
 } from '@dcloudio/uni-app'
 import config from '@/config'
+// 请确保 piper-sdk.js 已经正确 export class PiperTTS
+import {
+    PiperTTS
+} from './piper-sdk.js'

-/**
- * Piper TTS 播放钩子 (WebSocket MSE 流式版 - 含 cancelAudio)
- * 依赖: 后端必须去除 MP3 ID3 标签 (-map_metadata -1)
- */
 export function useTTSPlayer() {
-    // 状态管理
+    // UI 状态
    const isSpeaking = ref(false)
    const isPaused = ref(false)
    const isLoading = ref(false)

-    // 核心对象
-    let audio = null
-    let mediaSource = null
-    let sourceBuffer = null
-    let ws = null
+    // SDK 实例
+    let piper = null

-    // 缓冲队列管理
-    let bufferQueue = []
-    let isAppending = false
-    let isStreamEnded = false
+    /**
+     * 初始化 SDK 实例
+     * 每次 stop 后 piper 会被置空，这里会重新创建
+     */
+    const initPiper = () => {
+        if (piper) return

-    // 初始化 Audio 监听器 (只运行一次)
-    const initAudioElement = () => {
-        if (!audio && typeof window !== 'undefined') {
-            audio = new Audio()
+        let baseUrl = config.speechSynthesis2 || ''
+        baseUrl = baseUrl.replace(/\/$/, '')

-            // 错误监听
-            audio.addEventListener('error', (e) => {
-                // 如果是手动停止导致的 error (src 被置空)，忽略
-                if (!audio.src) return
-                console.error('Audio Player Error:', e)
+        piper = new PiperTTS({
+            baseUrl: baseUrl,
+            onStatus: (msg, type) => {
+                if (type === 'error') {
+                    console.error('[TTS Error]', msg)
+                    // 出错时不重置状态，交给用户手动处理或结束事件处理
+                    resetState()
+                }
+            },
+            onStart: () => {
+                isLoading.value = false
+                isSpeaking.value = true
+                isPaused.value = false
+            },
+            onEnd: () => {
                resetState()
-            })
-
-            // 播放结束监听
-            audio.addEventListener('ended', () => {
-                resetState()
-            })
-        }
+            }
+        })
    }

    /**
-     * 核心朗读方法 (WebSocket)
-     * @param {string} text - 要朗读的文本
+     * 核心朗读方法
     */
    const speak = async (text) => {
        if (!text) return

-        // 1. 提取文本
        const processedText = extractSpeechText(text)
        if (!processedText) return

-        // 2. 彻底清理旧状态
-        cancelAudio()
-        initAudioElement()
+        // 1. 【关键修改】先彻底停止并销毁旧实例
+        // 这会断开 socket 并且 close AudioContext，确保上一个声音立即消失
+        await stop()

+        // 2. 初始化新实例 (因为 stop() 把 piper 设为了 null)
+        initPiper()
+
+        // 3. 更新 UI 为加载中
        isLoading.value = true
-        isSpeaking.value = true
        isPaused.value = false
-        isStreamEnded = false
+        isSpeaking.value = true // 预先设为 true，防止按钮闪烁

-        // 3. 检查环境
-        if (!window.MediaSource || !window.WebSocket) {
-            console.error('当前环境不支持 MediaSource 或 WebSocket')
+        try {
+            // 4. 激活音频引擎 (移动端防静音关键)
+            await piper.init()
+
+            // 5. 发送请求
+            piper.speak(processedText, {
+                speakerId: 0,
+                noiseScale: 0.667,
+                lengthScale: 1.0
+            })
+        } catch (e) {
+            console.error('TTS Speak Error:', e)
            resetState()
-            return
-        }
-
-        try {
-            // 4. 初始化 MSE
-            mediaSource = new MediaSource()
-            // 绑定 MSE 到 Audio
-            audio.src = URL.createObjectURL(mediaSource)
-
-            // 监听 MSE 打开事件
-            mediaSource.addEventListener('sourceopen', () => {
-                // 防止多次触发
-                if (mediaSource.sourceBuffers.length > 0) return
-                startWebSocketStream(processedText)
-            })
-
-            // 尝试播放 (处理浏览器自动播放策略)
-            const playPromise = audio.play()
-            if (playPromise !== undefined) {
-                playPromise.catch(e => {
-                    console.warn('自动播放被拦截 (需用户交互):', e)
-                    // 保持 isSpeaking 为 true，UI 显示播放按钮，用户点击后调用 resume() 即可
-                })
-            }
-
-        } catch (err) {
-            console.error('TTS Initialization Failed:', err)
-            cancelAudio()
        }
    }

-    // 启动 WebSocket 流程
-    const startWebSocketStream = (text) => {
-        const mime = 'audio/mpeg'
-
-        // 4.1 创建 SourceBuffer
-        try {
-            sourceBuffer = mediaSource.addSourceBuffer(mime)
-            sourceBuffer.addEventListener('updateend', () => {
-                isAppending = false
-                processQueue()
-            })
-        } catch (e) {
-            console.error('SourceBuffer Create Failed:', e)
-            return
-        }
-
-        // 4.2 计算 WebSocket 地址
-        let baseUrl = config.speechSynthesis2 || ''
-        baseUrl = baseUrl.replace(/\/$/, '')
-        const wsUrl = baseUrl.replace(/^http/, 'ws') + '/ws/synthesize'
-
-        // 4.3 建立连接
-        ws = new WebSocket(wsUrl)
-        ws.binaryType = 'arraybuffer' // 关键
-
-        ws.onopen = () => {
-            // console.log('WS Open')
-            ws.send(JSON.stringify({
-                text: text,
-                speaker_id: 0,
-                length_scale: 1.0,
-                noise_scale: 0.667
-            }))
-            isLoading.value = false
-        }
-
-        ws.onmessage = (event) => {
-            if (event.data instanceof ArrayBuffer) {
-                bufferQueue.push(event.data)
-                processQueue()
-            }
-        }
-
-        ws.onerror = (e) => {
-            console.error('WS Error:', e)
-            cancelAudio()
-        }
-
-        ws.onclose = () => {
-            // console.log('WS Closed')
-            isStreamEnded = true
-            // 检查是否需要结束 MSE 流
-            checkEndOfStream()
-        }
-    }
-
-    // 处理缓冲队列
-    const processQueue = () => {
-        if (!sourceBuffer || sourceBuffer.updating || bufferQueue.length === 0) {
-            // 如果队列空了，且流已结束，尝试结束 MSE
-            if (bufferQueue.length === 0 && isStreamEnded && !sourceBuffer.updating) {
-                checkEndOfStream()
-            }
-            return
-        }
-
-        isAppending = true
-        const chunk = bufferQueue.shift()
-
-        try {
-            sourceBuffer.appendBuffer(chunk)
-        } catch (e) {
-            // console.error('AppendBuffer Error:', e)
-            isAppending = false
-        }
-    }
-
-    // 结束 MSE 流
-    const checkEndOfStream = () => {
-        if (mediaSource && mediaSource.readyState === 'open' && bufferQueue.length === 0 && !sourceBuffer
-            ?.updating) {
-            try {
-                mediaSource.endOfStream()
-            } catch (e) {}
-        }
-    }
-
-    const pause = () => {
-        if (audio && !audio.paused) {
-            audio.pause()
+    /**
+     * 暂停
+     */
+    const pause = async () => {
+        if (piper && piper.audioCtx && piper.audioCtx.state === 'running') {
+            await piper.audioCtx.suspend()
            isPaused.value = true
-            isSpeaking.value = false
        }
    }

-    const resume = () => {
-        if (audio && audio.paused) {
-            audio.play()
+    /**
+     * 恢复
+     */
+    const resume = async () => {
+        if (piper && piper.audioCtx && piper.audioCtx.state === 'suspended') {
+            await piper.audioCtx.resume()
            isPaused.value = false
            isSpeaking.value = true
        }
    }

-    // === 新增/核心方法：取消并停止 ===
-    const cancelAudio = () => {
-        // 1. 断开 WebSocket (停止数据接收)
-        if (ws) {
-            // 移除监听器防止报错
-            ws.onclose = null
-            ws.onerror = null
-            ws.onmessage = null
-            ws.close()
-            ws = null
-        }
+    /**
+     * 停止并重置 (核打击模式)
+     */
+    const stop = async () => {
+        if (piper) {
+            // 1. 断开 WebSocket
+            piper.stop()

-        // 2. 停止音频播放
-        if (audio) {
-            audio.pause()
-            // 释放 Blob URL 内存
-            if (audio.src) {
-                URL.revokeObjectURL(audio.src)
-                audio.removeAttribute('src')
-            }
-            audio.currentTime = 0
-        }
-
-        // 3. 清理 MSE 对象
-        if (mediaSource) {
-            try {
-                if (mediaSource.readyState === 'open') {
-                    mediaSource.endOfStream()
+            // 2. 【关键】关闭 AudioContext
+            // Web Audio API 中，已经 schedule 的 buffer 很难单独取消
+            // 最直接的方法是关闭整个 Context
+            if (piper.audioCtx && piper.audioCtx.state !== 'closed') {
+                try {
+                    await piper.audioCtx.close()
+                } catch (e) {
+                    console.warn('AudioContext close failed', e)
                }
-            } catch (e) {}
-            mediaSource = null
+            }
+
+            // 3. 销毁实例引用
+            piper = null
        }
-
-        sourceBuffer = null
-        bufferQueue = []
-        isAppending = false
-        isStreamEnded = false
-
-        // 4. 重置 UI 状态
        resetState()
    }

-    // 只是重置 UI 变量的辅助函数
+    // UI 状态重置
    const resetState = () => {
        isSpeaking.value = false
        isPaused.value = false
        isLoading.value = false
    }

-    // 别名 stop -> cancelAudio (保持兼容性)
-    const stop = cancelAudio
-
    // === 生命周期 ===
    onMounted(() => {
-        initAudioElement()
+        // 预初始化可以不做，等到点击时再做，避免空闲占用 AudioContext 资源
+        // initPiper() 
    })

    onUnmounted(() => {
-        cancelAudio()
-        audio = null
+        stop()
    })

-    if (typeof onHide === 'function') onHide(cancelAudio)
-    if (typeof onUnload === 'function') onUnload(cancelAudio)
+    // Uniapp 生命周期
+    if (typeof onHide === 'function') onHide(stop)
+    if (typeof onUnload === 'function') onUnload(stop)

    return {
        speak,
        pause,
        resume,
        stop,
-        cancelAudio, // 新增导出
+        cancelAudio: stop,
        isSpeaking,
        isPaused,
        isLoading
@@ -287,7 +170,7 @@ export function useTTSPlayer() {
 }

 /**
- * 提取文本逻辑
+ * 提取文本逻辑 (保持不变)
 */
 function extractSpeechText(markdown) {
    if (!markdown || markdown.indexOf('job-json') === -1) {