214 lines
7.5 KiB
JavaScript
214 lines
7.5 KiB
JavaScript
|
|
/**
|
|||
|
|
* PiperTTS SDK - 兼容移动端的流式语音合成客户端
|
|||
|
|
* 特性:
|
|||
|
|
* 1. Web Audio API 实时调度,解决移动端不支持 MSE 的问题
|
|||
|
|
* 2. 头部注入 (Header Injection) 技术,解决分片解码错误
|
|||
|
|
* 3. 自动状态管理与事件回调
|
|||
|
|
*/
|
|||
|
|
export class PiperTTS {
|
|||
|
|
constructor(config = {}) {
|
|||
|
|
this.baseUrl = config.baseUrl || 'http://localhost:5001';
|
|||
|
|
this.audioCtx = config.audioCtx || new(window.AudioContext || window.webkitAudioContext)();
|
|||
|
|
this.onStatus = config.onStatus || ((msg, type) => console.log(`[Piper] ${msg}`));
|
|||
|
|
this.onStart = config.onStart || (() => {});
|
|||
|
|
this.onEnd = config.onEnd || (() => {});
|
|||
|
|
|
|||
|
|
// 内部状态
|
|||
|
|
this.ws = null;
|
|||
|
|
this.nextTime = 0; // 下一段音频的预定播放时间
|
|||
|
|
this.audioHeader = null; // 保存WAV/MP3头部
|
|||
|
|
this.chunkQueue = []; // 数据缓冲队列
|
|||
|
|
this.queueSize = 0; // 当前缓冲区字节数
|
|||
|
|
this.analyser = null; // 可视化分析器节点
|
|||
|
|
|
|||
|
|
// 配置参数
|
|||
|
|
this.flushThreshold = 8 * 1024; // 8KB 阈值
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
/**
|
|||
|
|
* [重要] 初始化音频引擎
|
|||
|
|
* 必须在用户点击事件(click/touch)中调用一次,否则手机上没声音
|
|||
|
|
*/
|
|||
|
|
async init() {
|
|||
|
|
if (this.audioCtx.state === 'suspended') {
|
|||
|
|
await this.audioCtx.resume();
|
|||
|
|
this.onStatus('音频引擎已激活', 'success');
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
/**
|
|||
|
|
* 绑定可视化分析器
|
|||
|
|
* @param {AnalyserNode} analyserNode - Web Audio Analyser节点
|
|||
|
|
*/
|
|||
|
|
attachVisualizer(analyserNode) {
|
|||
|
|
this.analyser = analyserNode;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
/**
|
|||
|
|
* 开始合成并播放
|
|||
|
|
* @param {string} text - 要合成的文本
|
|||
|
|
* @param {object} options - 可选参数 {speaker_id, noise_scale, etc.}
|
|||
|
|
*/
|
|||
|
|
speak(text, options = {}) {
|
|||
|
|
if (!text) return;
|
|||
|
|
|
|||
|
|
this.stop(); // 清理上一次播放
|
|||
|
|
this.onStatus('正在建立连接...', 'processing');
|
|||
|
|
|
|||
|
|
try {
|
|||
|
|
const wsUrl = this.baseUrl.replace(/^http/, 'ws') + '/ws/synthesize';
|
|||
|
|
this.ws = new WebSocket(wsUrl);
|
|||
|
|
this.ws.binaryType = 'arraybuffer';
|
|||
|
|
|
|||
|
|
this.ws.onopen = () => {
|
|||
|
|
this.onStatus('连接成功,请求生成...', 'processing');
|
|||
|
|
// 初始化时间轴:当前时间 + 缓冲延迟
|
|||
|
|
this.nextTime = this.audioCtx.currentTime + 0.1;
|
|||
|
|
this.onStart();
|
|||
|
|
|
|||
|
|
this.ws.send(
|
|||
|
|
JSON.stringify({
|
|||
|
|
text: text,
|
|||
|
|
speaker_id: options.speakerId || null,
|
|||
|
|
length_scale: options.lengthScale || 1.0,
|
|||
|
|
noise_scale: options.noiseScale || 0.667,
|
|||
|
|
})
|
|||
|
|
);
|
|||
|
|
};
|
|||
|
|
|
|||
|
|
this.ws.onmessage = (event) => this._handleMessage(event);
|
|||
|
|
|
|||
|
|
this.ws.onclose = async () => {
|
|||
|
|
// 处理剩余残余数据
|
|||
|
|
if (this.chunkQueue.length > 0) {
|
|||
|
|
await this._processQueue(true);
|
|||
|
|
}
|
|||
|
|
this.onStatus('播放结束', 'success');
|
|||
|
|
this.onEnd();
|
|||
|
|
};
|
|||
|
|
|
|||
|
|
this.ws.onerror = (err) => {
|
|||
|
|
console.error(err);
|
|||
|
|
this.onStatus('连接发生错误', 'error');
|
|||
|
|
};
|
|||
|
|
} catch (e) {
|
|||
|
|
this.onStatus(`启动失败: ${e.message}`, 'error');
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
/**
|
|||
|
|
* 停止播放并重置状态
|
|||
|
|
*/
|
|||
|
|
stop() {
|
|||
|
|
if (this.ws) {
|
|||
|
|
this.ws.close();
|
|||
|
|
this.ws = null;
|
|||
|
|
}
|
|||
|
|
// 重置缓冲
|
|||
|
|
this.chunkQueue = [];
|
|||
|
|
this.queueSize = 0;
|
|||
|
|
this.audioHeader = null;
|
|||
|
|
// 注意:Web Audio API 很难"立即停止"已经在 flight 中的 node,
|
|||
|
|
// 除非我们追踪所有的 sourceNode 并调用 .stop()。
|
|||
|
|
// 简单实现:suspend 再 resume 或者关闭 context (不推荐频繁关闭)。
|
|||
|
|
// 这里的 stop 主要停止数据接收。
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// --- 内部私有方法 ---
|
|||
|
|
|
|||
|
|
async _handleMessage(event) {
|
|||
|
|
if (!(event.data instanceof ArrayBuffer)) return;
|
|||
|
|
|
|||
|
|
const chunk = event.data;
|
|||
|
|
|
|||
|
|
// 1. 捕获头部 (Header Injection 核心)
|
|||
|
|
if (!this.audioHeader) {
|
|||
|
|
// 截取前100字节作为通用头
|
|||
|
|
this.audioHeader = chunk.slice(0, 100);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// 2. 入队
|
|||
|
|
this.chunkQueue.push(chunk);
|
|||
|
|
this.queueSize += chunk.byteLength;
|
|||
|
|
|
|||
|
|
// 3. 达到阈值则解码播放
|
|||
|
|
if (this.queueSize >= this.flushThreshold) {
|
|||
|
|
await this._processQueue();
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
async _processQueue(isLast = false) {
|
|||
|
|
if (this.chunkQueue.length === 0) return;
|
|||
|
|
|
|||
|
|
// 1. 合并 Buffer
|
|||
|
|
const rawData = new Uint8Array(this.queueSize);
|
|||
|
|
let offset = 0;
|
|||
|
|
for (const chunk of this.chunkQueue) {
|
|||
|
|
rawData.set(new Uint8Array(chunk), offset);
|
|||
|
|
offset += chunk.byteLength;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// 清空队列
|
|||
|
|
this.chunkQueue = [];
|
|||
|
|
this.queueSize = 0;
|
|||
|
|
|
|||
|
|
try {
|
|||
|
|
// 2. 构造带头部的 Buffer
|
|||
|
|
let decodeTarget;
|
|||
|
|
// 简单的头部检测逻辑,如果没有头,就拼上去
|
|||
|
|
if (this.audioHeader && !this._hasHeader(rawData)) {
|
|||
|
|
const newBuffer = new Uint8Array(this.audioHeader.byteLength + rawData.byteLength);
|
|||
|
|
newBuffer.set(new Uint8Array(this.audioHeader), 0);
|
|||
|
|
newBuffer.set(rawData, this.audioHeader.byteLength);
|
|||
|
|
decodeTarget = newBuffer.buffer;
|
|||
|
|
} else {
|
|||
|
|
decodeTarget = rawData.buffer;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// 3. 解码
|
|||
|
|
const decodedBuffer = await this.audioCtx.decodeAudioData(decodeTarget);
|
|||
|
|
|
|||
|
|
// 4. 播放调度
|
|||
|
|
this._scheduleBuffer(decodedBuffer);
|
|||
|
|
} catch (err) {
|
|||
|
|
// 解码失败处理:如果是中间数据,放回队列头部等待拼接
|
|||
|
|
if (!isLast) {
|
|||
|
|
this.chunkQueue.unshift(rawData);
|
|||
|
|
this.queueSize += rawData.byteLength;
|
|||
|
|
} else {
|
|||
|
|
console.warn('最后一段数据解码失败,丢弃', err);
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
_scheduleBuffer(decodedBuffer) {
|
|||
|
|
const source = this.audioCtx.createBufferSource();
|
|||
|
|
source.buffer = decodedBuffer;
|
|||
|
|
|
|||
|
|
// 连接可视化
|
|||
|
|
if (this.analyser) {
|
|||
|
|
source.connect(this.analyser);
|
|||
|
|
this.analyser.connect(this.audioCtx.destination);
|
|||
|
|
} else {
|
|||
|
|
source.connect(this.audioCtx.destination);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// 计算播放时间:如果发生卡顿,立即播放;否则无缝衔接
|
|||
|
|
const scheduleTime = Math.max(this.audioCtx.currentTime, this.nextTime);
|
|||
|
|
source.start(scheduleTime);
|
|||
|
|
|
|||
|
|
// 更新下一段的开始时间
|
|||
|
|
this.nextTime = scheduleTime + decodedBuffer.duration;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
_hasHeader(uint8Arr) {
|
|||
|
|
if (uint8Arr.byteLength < 4) return false;
|
|||
|
|
// Check "RIFF" (WAV)
|
|||
|
|
if (uint8Arr[0] === 82 && uint8Arr[1] === 73 && uint8Arr[2] === 70) return true;
|
|||
|
|
// Check "ID3" (MP3)
|
|||
|
|
if (uint8Arr[0] === 73 && uint8Arr[1] === 68 && uint8Arr[2] === 51) return true;
|
|||
|
|
// Check MP3 Sync Word (Simplify)
|
|||
|
|
if (uint8Arr[0] === 0xff && (uint8Arr[1] & 0xe0) === 0xe0) return true;
|
|||
|
|
return false;
|
|||
|
|
}
|
|||
|
|
}
|