/** * PiperTTS Bundle (SDK + Worker + PCMPlayer) * Fix: Smart End Detection that supports Pause/Resume */ class PCMPlayer { constructor(options) { this.init(options); } init(options) { this.option = Object.assign({}, { inputCodec: 'Int16', channels: 1, sampleRate: 16000, flushTime: 50, fftSize: 2048, }, options); this.samples = new Float32Array(); this.interval = setInterval(this.flush.bind(this), this.option.flushTime); this.convertValue = this.getConvertValue(); this.typedArray = this.getTypedArray(); this.initAudioContext(); this.bindAudioContextEvent(); } getConvertValue() { const map = { Int8: 128, Int16: 32768, Int32: 2147483648, Float32: 1 }; if (!map[this.option.inputCodec]) throw new Error('Codec Error'); return map[this.option.inputCodec]; } getTypedArray() { const map = { Int8: Int8Array, Int16: Int16Array, Int32: Int32Array, Float32: Float32Array }; if (!map[this.option.inputCodec]) throw new Error('Codec Error'); return map[this.option.inputCodec]; } initAudioContext() { this.audioCtx = new(window.AudioContext || window.webkitAudioContext)(); this.gainNode = this.audioCtx.createGain(); this.gainNode.gain.value = 1.0; this.gainNode.connect(this.audioCtx.destination); this.startTime = this.audioCtx.currentTime; this.analyserNode = this.audioCtx.createAnalyser(); this.analyserNode.fftSize = this.option.fftSize; } static isTypedArray(data) { return (data.byteLength && data.buffer && data.buffer.constructor == ArrayBuffer) || data.constructor == ArrayBuffer; } isSupported(data) { if (!PCMPlayer.isTypedArray(data)) throw new Error('Data must be ArrayBuffer or TypedArray'); return true; } feed(data) { this.isSupported(data); data = this.getFormattedValue(data); const tmp = new Float32Array(this.samples.length + data.length); tmp.set(this.samples, 0); tmp.set(data, this.samples.length); this.samples = tmp; } getFormattedValue(data) { data = data.constructor == ArrayBuffer ? new this.typedArray(data) : new this.typedArray(data.buffer); let float32 = new Float32Array(data.length); for (let i = 0; i < data.length; i++) { float32[i] = data[i] / this.convertValue; } return float32; } volume(val) { this.gainNode.gain.value = val; } destroy() { if (this.interval) clearInterval(this.interval); this.samples = null; if (this.audioCtx) { this.audioCtx.close(); this.audioCtx = null; } } flush() { if (!this.samples.length) return; const bufferSource = this.audioCtx.createBufferSource(); if (typeof this.option.onended === 'function') { bufferSource.onended = (e) => this.option.onended(this, e); } const length = this.samples.length / this.option.channels; const audioBuffer = this.audioCtx.createBuffer(this.option.channels, length, this.option.sampleRate); for (let channel = 0; channel < this.option.channels; channel++) { const audioData = audioBuffer.getChannelData(channel); let offset = channel; let decrement = 50; for (let i = 0; i < length; i++) { audioData[i] = this.samples[offset]; if (i < 50) audioData[i] = (audioData[i] * i) / 50; if (i >= length - 51) audioData[i] = (audioData[i] * decrement--) / 50; offset += this.option.channels; } } if (this.startTime < this.audioCtx.currentTime) { this.startTime = this.audioCtx.currentTime; } bufferSource.buffer = audioBuffer; bufferSource.connect(this.gainNode); bufferSource.connect(this.analyserNode); bufferSource.start(this.startTime); this.startTime += audioBuffer.duration; this.samples = new Float32Array(); } async pause() { await this.audioCtx.suspend(); } async continue () { await this.audioCtx.resume(); } bindAudioContextEvent() { if (typeof this.option.onstatechange === 'function') { this.audioCtx.onstatechange = (e) => { this.option.onstatechange(this, e, this.audioCtx.state); }; } } } // ========================================== // Worker 源码 // ========================================== const WORKER_SOURCE = ` let globalWs = null; self.onmessage = function (e) { const { type, data } = e.data; switch (type) { case 'connect': connectWebSocket(data); break; case 'stop': closeWs(); break; } }; function closeWs() { if (globalWs) { globalWs.onerror = null; globalWs.onclose = null; globalWs.onmessage = null; try { globalWs.close(1000, 'User stopped'); } catch (e) {} globalWs = null; } } function connectWebSocket(config) { closeWs(); const { url, text, options } = config; self.postMessage({ type: 'status', data: 'ws_connecting' }); try { const currentWs = new WebSocket(url); currentWs.binaryType = 'arraybuffer'; globalWs = currentWs; currentWs.onopen = () => { if (globalWs !== currentWs) return; self.postMessage({ type: 'status', data: 'ws_connected' }); currentWs.send(JSON.stringify({ text: text, speaker_id: options.speakerId || 0, length_scale: options.lengthScale || 1.0, noise_scale: options.noiseScale || 0.667, })); self.postMessage({ type: 'status', data: 'generating' }); }; currentWs.onmessage = (event) => { if (globalWs !== currentWs) return; if (typeof event.data === 'string' && event.data === 'END') { const wsToClose = currentWs; globalWs = null; wsToClose.onmessage = null; wsToClose.onerror = null; wsToClose.onclose = null; try { wsToClose.close(1000, 'Done'); } catch(e) {} self.postMessage({ type: 'end' }); } else { self.postMessage({ type: 'audio-data', buffer: event.data }, [event.data]); } }; currentWs.onclose = (e) => { if (globalWs === currentWs) { self.postMessage({ type: 'end' }); globalWs = null; } }; currentWs.onerror = () => { if (globalWs === currentWs) { self.postMessage({ type: 'error', data: 'WebSocket error' }); } }; } catch (e) { self.postMessage({ type: 'error', data: e.message }); } } `; // ========================================== // PiperTTS SDK // ========================================== class PiperTTS { constructor(config = {}) { this.baseUrl = config.baseUrl || 'http://localhost:5001'; this.onStatus = config.onStatus || console.log; this.onStart = config.onStart || (() => {}); this.onEnd = config.onEnd || (() => {}); this.sampleRate = config.sampleRate || 16000; this.player = null; this.worker = null; this.recordedChunks = []; this.isRecording = false; // 新增:检测音频结束的定时器 ID this.endCheckInterval = null; this._initWorker(); } _initWorker() { const blob = new Blob([WORKER_SOURCE], { type: 'application/javascript' }); this.worker = new Worker(URL.createObjectURL(blob)); this.worker.onmessage = (e) => { const { type, data, buffer } = e.data; switch (type) { case 'status': const map = { ws_connecting: '正在连接...', ws_connected: '已连接', generating: '流式接收中...' }; this.onStatus(map[data] || data, 'processing'); break; case 'error': if (this.recordedChunks.length > 0) { this.onStatus('数据接收完毕', 'success'); this._triggerEndWithDelay(); } else { this.onStatus(`错误: ${data}`, 'error'); this.stop(); } break; case 'audio-data': this._handleAudio(buffer); break; case 'end': this.onStatus('数据接收完毕', 'success'); this._triggerEndWithDelay(); break; } }; } /** * 【核心修改】智能轮询检测 * 只有当 AudioContext 处于 running 状态且时间走完时,才触发 onEnd */ _triggerEndWithDelay() { // 先清除可能存在的旧定时器 if (this.endCheckInterval) clearInterval(this.endCheckInterval); // 每 200ms 检查一次 this.endCheckInterval = setInterval(() => { // 1. 如果播放器没了,直接结束 if (!this.player || !this.player.audioCtx) { this._finishEndCheck(); return; } // 2. 如果处于暂停状态 (suspended),什么都不做,继续等 if (this.player.audioCtx.state === 'suspended') { return; } // 3. 计算剩余时间 // startTime 是缓冲区结束的绝对时间,currentTime 是当前时间 const remainingTime = this.player.startTime - this.player.audioCtx.currentTime; // 4. 如果剩余时间小于 0.1秒(留点冗余),说明播完了 if (remainingTime <= 0.1) { this._finishEndCheck(); } }, 200); } _finishEndCheck() { if (this.endCheckInterval) { clearInterval(this.endCheckInterval); this.endCheckInterval = null; } this.onEnd(); } _initPlayer() { if (this.player) { this.player.destroy(); } this.player = new PCMPlayer({ inputCodec: 'Int16', channels: 1, sampleRate: this.sampleRate, flushTime: 50, }); } async speak(text, options = {}) { if (!text) return; this.stop(); this._initPlayer(); if (this.player) { await this.player.continue(); } this.recordedChunks = []; this.isRecording = true; this.onStart(); const wsUrl = this.baseUrl.replace(/^http/, 'ws') + '/ws/synthesize'; this.worker.postMessage({ type: 'connect', data: { url: wsUrl, text, options }, }); } stop() { // 停止时必须清除轮询检测 if (this.endCheckInterval) { clearInterval(this.endCheckInterval); this.endCheckInterval = null; } this.worker.postMessage({ type: 'stop' }); if (this.player) { this.player.destroy(); this.player = null; } this.onStatus('已停止', 'default'); } _handleAudio(arrayBuffer) { if (this.isRecording) { this.recordedChunks.push(arrayBuffer); } if (this.player) { this.player.feed(arrayBuffer); } } getAnalyserNode() { return this.player ? this.player.analyserNode : null; } downloadAudio(filename = 'tts_output.wav') { if (this.recordedChunks.length === 0) return; let totalLen = 0; for (let chunk of this.recordedChunks) totalLen += chunk.byteLength; const tmp = new Uint8Array(totalLen); let offset = 0; for (let chunk of this.recordedChunks) { tmp.set(new Uint8Array(chunk), offset); offset += chunk.byteLength; } const wavBuffer = this._encodeWAV(new Int16Array(tmp.buffer), this.sampleRate); const blob = new Blob([wavBuffer], { type: 'audio/wav' }); const url = URL.createObjectURL(blob); const a = document.createElement('a'); a.style = 'display: none'; a.href = url; a.download = filename; document.body.appendChild(a); a.click(); window.URL.revokeObjectURL(url); } _encodeWAV(samples, sampleRate) { const buffer = new ArrayBuffer(44 + samples.length * 2); const view = new DataView(buffer); const writeString = (view, offset, string) => { for (let i = 0; i < string.length; i++) view.setUint8(offset + i, string.charCodeAt(i)); }; writeString(view, 0, 'RIFF'); view.setUint32(4, 36 + samples.length * 2, true); writeString(view, 8, 'WAVE'); writeString(view, 12, 'fmt '); view.setUint32(16, 16, true); view.setUint16(20, 1, true); view.setUint16(22, 1, true); view.setUint32(24, sampleRate, true); view.setUint32(28, sampleRate * 2, true); view.setUint16(32, 2, true); view.setUint16(34, 16, true); writeString(view, 36, 'data'); view.setUint32(40, samples.length * 2, true); let offset = 44; for (let i = 0; i < samples.length; i++) { view.setInt16(offset, samples[i], true); offset += 2; } return view; } } export default PiperTTS;