qingdao-employment-service/hook/piper-sdk.js

/**
 * PiperTTS SDK - 兼容移动端的流式语音合成客户端
 * 特性：
 * 1. Web Audio API 实时调度，解决移动端不支持 MSE 的问题
 * 2. 头部注入 (Header Injection) 技术，解决分片解码错误
 * 3. 自动状态管理与事件回调
 */
export class PiperTTS {
    constructor(config = {}) {
        this.baseUrl = config.baseUrl || 'http://localhost:5001';
        this.audioCtx = config.audioCtx || new(window.AudioContext || window.webkitAudioContext)();
        this.onStatus = config.onStatus || ((msg, type) => console.log(`[Piper] ${msg}`));
        this.onStart = config.onStart || (() => {});
        this.onEnd = config.onEnd || (() => {});

        // 内部状态
        this.ws = null;
        this.nextTime = 0; // 下一段音频的预定播放时间
        this.audioHeader = null; // 保存WAV/MP3头部
        this.chunkQueue = []; // 数据缓冲队列
        this.queueSize = 0; // 当前缓冲区字节数
        this.analyser = null; // 可视化分析器节点

        // 配置参数
        this.flushThreshold = 8 * 1024; // 8KB 阈值
    }

    /**
     * [重要] 初始化音频引擎
     * 必须在用户点击事件（click/touch）中调用一次，否则手机上没声音
     */
    async init() {
        if (this.audioCtx.state === 'suspended') {
            await this.audioCtx.resume();
            this.onStatus('音频引擎已激活', 'success');
        }
    }

    /**
     * 绑定可视化分析器
     * @param {AnalyserNode} analyserNode - Web Audio Analyser节点
     */
    attachVisualizer(analyserNode) {
        this.analyser = analyserNode;
    }

    /**
     * 开始合成并播放
     * @param {string} text - 要合成的文本
     * @param {object} options - 可选参数 {speaker_id, noise_scale, etc.}
     */
    speak(text, options = {}) {
        if (!text) return;

        this.stop(); // 清理上一次播放
        this.onStatus('正在建立连接...', 'processing');

        try {
            const wsUrl = this.baseUrl.replace(/^http/, 'ws') + '/ws/synthesize';
            this.ws = new WebSocket(wsUrl);
            this.ws.binaryType = 'arraybuffer';

            this.ws.onopen = () => {
                this.onStatus('连接成功，请求生成...', 'processing');
                // 初始化时间轴：当前时间 + 缓冲延迟
                this.nextTime = this.audioCtx.currentTime + 0.1;
                this.onStart();

                this.ws.send(
                    JSON.stringify({
                        text: text,
                        speaker_id: options.speakerId || null,
                        length_scale: options.lengthScale || 1.0,
                        noise_scale: options.noiseScale || 0.667,
                    })
                );
            };

            this.ws.onmessage = (event) => this._handleMessage(event);

            this.ws.onclose = async () => {
                // 处理剩余残余数据
                if (this.chunkQueue.length > 0) {
                    await this._processQueue(true);
                }
                this.onStatus('播放结束', 'success');
                this.onEnd();
            };

            this.ws.onerror = (err) => {
                console.error(err);
                this.onStatus('连接发生错误', 'error');
            };
        } catch (e) {
            this.onStatus(`启动失败: ${e.message}`, 'error');
        }
    }

    /**
     * 停止播放并重置状态
     */
    stop() {
        if (this.ws) {
            this.ws.close();
            this.ws = null;
        }
        // 重置缓冲
        this.chunkQueue = [];
        this.queueSize = 0;
        this.audioHeader = null;
        // 注意：Web Audio API 很难"立即停止"已经在 flight 中的 node，
        // 除非我们追踪所有的 sourceNode 并调用 .stop()。
        // 简单实现：suspend 再 resume 或者关闭 context (不推荐频繁关闭)。
        // 这里的 stop 主要停止数据接收。
    }

    // --- 内部私有方法 ---

    async _handleMessage(event) {
        if (!(event.data instanceof ArrayBuffer)) return;

        const chunk = event.data;

        // 1. 捕获头部 (Header Injection 核心)
        if (!this.audioHeader) {
            // 截取前100字节作为通用头
            this.audioHeader = chunk.slice(0, 100);
        }

        // 2. 入队
        this.chunkQueue.push(chunk);
        this.queueSize += chunk.byteLength;

        // 3. 达到阈值则解码播放
        if (this.queueSize >= this.flushThreshold) {
            await this._processQueue();
        }
    }

    async _processQueue(isLast = false) {
        if (this.chunkQueue.length === 0) return;

        // 1. 合并 Buffer
        const rawData = new Uint8Array(this.queueSize);
        let offset = 0;
        for (const chunk of this.chunkQueue) {
            rawData.set(new Uint8Array(chunk), offset);
            offset += chunk.byteLength;
        }

        // 清空队列
        this.chunkQueue = [];
        this.queueSize = 0;

        try {
            // 2. 构造带头部的 Buffer
            let decodeTarget;
            // 简单的头部检测逻辑，如果没有头，就拼上去
            if (this.audioHeader && !this._hasHeader(rawData)) {
                const newBuffer = new Uint8Array(this.audioHeader.byteLength + rawData.byteLength);
                newBuffer.set(new Uint8Array(this.audioHeader), 0);
                newBuffer.set(rawData, this.audioHeader.byteLength);
                decodeTarget = newBuffer.buffer;
            } else {
                decodeTarget = rawData.buffer;
            }

            // 3. 解码
            const decodedBuffer = await this.audioCtx.decodeAudioData(decodeTarget);

            // 4. 播放调度
            this._scheduleBuffer(decodedBuffer);
        } catch (err) {
            // 解码失败处理：如果是中间数据，放回队列头部等待拼接
            if (!isLast) {
                this.chunkQueue.unshift(rawData);
                this.queueSize += rawData.byteLength;
            } else {
                console.warn('最后一段数据解码失败，丢弃', err);
            }
        }
    }

    _scheduleBuffer(decodedBuffer) {
        const source = this.audioCtx.createBufferSource();
        source.buffer = decodedBuffer;

        // 连接可视化
        if (this.analyser) {
            source.connect(this.analyser);
            this.analyser.connect(this.audioCtx.destination);
        } else {
            source.connect(this.audioCtx.destination);
        }

        // 计算播放时间：如果发生卡顿，立即播放；否则无缝衔接
        const scheduleTime = Math.max(this.audioCtx.currentTime, this.nextTime);
        source.start(scheduleTime);

        // 更新下一段的开始时间
        this.nextTime = scheduleTime + decodedBuffer.duration;
    }

    _hasHeader(uint8Arr) {
        if (uint8Arr.byteLength < 4) return false;
        // Check "RIFF" (WAV)
        if (uint8Arr[0] === 82 && uint8Arr[1] === 73 && uint8Arr[2] === 70) return true;
        // Check "ID3" (MP3)
        if (uint8Arr[0] === 73 && uint8Arr[1] === 68 && uint8Arr[2] === 51) return true;
        // Check MP3 Sync Word (Simplify)
        if (uint8Arr[0] === 0xff && (uint8Arr[1] & 0xe0) === 0xe0) return true;
        return false;
    }
}