459 lines
14 KiB
JavaScript
459 lines
14 KiB
JavaScript
|
|
/**
|
|||
|
|
* PiperTTS Bundle (SDK + Worker + PCMPlayer)
|
|||
|
|
* Fix: Smart End Detection that supports Pause/Resume
|
|||
|
|
*/
|
|||
|
|
class PCMPlayer {
|
|||
|
|
constructor(options) {
|
|||
|
|
this.init(options);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
init(options) {
|
|||
|
|
this.option = Object.assign({}, {
|
|||
|
|
inputCodec: 'Int16',
|
|||
|
|
channels: 1,
|
|||
|
|
sampleRate: 16000,
|
|||
|
|
flushTime: 50,
|
|||
|
|
fftSize: 2048,
|
|||
|
|
}, options);
|
|||
|
|
|
|||
|
|
this.samples = new Float32Array();
|
|||
|
|
this.interval = setInterval(this.flush.bind(this), this.option.flushTime);
|
|||
|
|
this.convertValue = this.getConvertValue();
|
|||
|
|
this.typedArray = this.getTypedArray();
|
|||
|
|
|
|||
|
|
this.initAudioContext();
|
|||
|
|
this.bindAudioContextEvent();
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
getConvertValue() {
|
|||
|
|
const map = {
|
|||
|
|
Int8: 128,
|
|||
|
|
Int16: 32768,
|
|||
|
|
Int32: 2147483648,
|
|||
|
|
Float32: 1
|
|||
|
|
};
|
|||
|
|
if (!map[this.option.inputCodec]) throw new Error('Codec Error');
|
|||
|
|
return map[this.option.inputCodec];
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
getTypedArray() {
|
|||
|
|
const map = {
|
|||
|
|
Int8: Int8Array,
|
|||
|
|
Int16: Int16Array,
|
|||
|
|
Int32: Int32Array,
|
|||
|
|
Float32: Float32Array
|
|||
|
|
};
|
|||
|
|
if (!map[this.option.inputCodec]) throw new Error('Codec Error');
|
|||
|
|
return map[this.option.inputCodec];
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
initAudioContext() {
|
|||
|
|
this.audioCtx = new(window.AudioContext || window.webkitAudioContext)();
|
|||
|
|
this.gainNode = this.audioCtx.createGain();
|
|||
|
|
this.gainNode.gain.value = 1.0;
|
|||
|
|
this.gainNode.connect(this.audioCtx.destination);
|
|||
|
|
this.startTime = this.audioCtx.currentTime;
|
|||
|
|
this.analyserNode = this.audioCtx.createAnalyser();
|
|||
|
|
this.analyserNode.fftSize = this.option.fftSize;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
static isTypedArray(data) {
|
|||
|
|
return (data.byteLength && data.buffer && data.buffer.constructor == ArrayBuffer) || data.constructor ==
|
|||
|
|
ArrayBuffer;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
isSupported(data) {
|
|||
|
|
if (!PCMPlayer.isTypedArray(data)) throw new Error('Data must be ArrayBuffer or TypedArray');
|
|||
|
|
return true;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
feed(data) {
|
|||
|
|
this.isSupported(data);
|
|||
|
|
data = this.getFormattedValue(data);
|
|||
|
|
const tmp = new Float32Array(this.samples.length + data.length);
|
|||
|
|
tmp.set(this.samples, 0);
|
|||
|
|
tmp.set(data, this.samples.length);
|
|||
|
|
this.samples = tmp;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
getFormattedValue(data) {
|
|||
|
|
data = data.constructor == ArrayBuffer ? new this.typedArray(data) : new this.typedArray(data.buffer);
|
|||
|
|
let float32 = new Float32Array(data.length);
|
|||
|
|
for (let i = 0; i < data.length; i++) {
|
|||
|
|
float32[i] = data[i] / this.convertValue;
|
|||
|
|
}
|
|||
|
|
return float32;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
volume(val) {
|
|||
|
|
this.gainNode.gain.value = val;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
destroy() {
|
|||
|
|
if (this.interval) clearInterval(this.interval);
|
|||
|
|
this.samples = null;
|
|||
|
|
if (this.audioCtx) {
|
|||
|
|
this.audioCtx.close();
|
|||
|
|
this.audioCtx = null;
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
flush() {
|
|||
|
|
if (!this.samples.length) return;
|
|||
|
|
|
|||
|
|
const bufferSource = this.audioCtx.createBufferSource();
|
|||
|
|
if (typeof this.option.onended === 'function') {
|
|||
|
|
bufferSource.onended = (e) => this.option.onended(this, e);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
const length = this.samples.length / this.option.channels;
|
|||
|
|
const audioBuffer = this.audioCtx.createBuffer(this.option.channels, length, this.option.sampleRate);
|
|||
|
|
|
|||
|
|
for (let channel = 0; channel < this.option.channels; channel++) {
|
|||
|
|
const audioData = audioBuffer.getChannelData(channel);
|
|||
|
|
let offset = channel;
|
|||
|
|
let decrement = 50;
|
|||
|
|
for (let i = 0; i < length; i++) {
|
|||
|
|
audioData[i] = this.samples[offset];
|
|||
|
|
if (i < 50) audioData[i] = (audioData[i] * i) / 50;
|
|||
|
|
if (i >= length - 51) audioData[i] = (audioData[i] * decrement--) / 50;
|
|||
|
|
offset += this.option.channels;
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
if (this.startTime < this.audioCtx.currentTime) {
|
|||
|
|
this.startTime = this.audioCtx.currentTime;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
bufferSource.buffer = audioBuffer;
|
|||
|
|
bufferSource.connect(this.gainNode);
|
|||
|
|
bufferSource.connect(this.analyserNode);
|
|||
|
|
bufferSource.start(this.startTime);
|
|||
|
|
|
|||
|
|
this.startTime += audioBuffer.duration;
|
|||
|
|
this.samples = new Float32Array();
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
async pause() {
|
|||
|
|
await this.audioCtx.suspend();
|
|||
|
|
}
|
|||
|
|
async continue () {
|
|||
|
|
await this.audioCtx.resume();
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
bindAudioContextEvent() {
|
|||
|
|
if (typeof this.option.onstatechange === 'function') {
|
|||
|
|
this.audioCtx.onstatechange = (e) => {
|
|||
|
|
this.option.onstatechange(this, e, this.audioCtx.state);
|
|||
|
|
};
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// ==========================================
|
|||
|
|
// Worker 源码
|
|||
|
|
// ==========================================
|
|||
|
|
const WORKER_SOURCE = `
|
|||
|
|
let globalWs = null;
|
|||
|
|
|
|||
|
|
self.onmessage = function (e) {
|
|||
|
|
const { type, data } = e.data;
|
|||
|
|
switch (type) {
|
|||
|
|
case 'connect': connectWebSocket(data); break;
|
|||
|
|
case 'stop': closeWs(); break;
|
|||
|
|
}
|
|||
|
|
};
|
|||
|
|
|
|||
|
|
function closeWs() {
|
|||
|
|
if (globalWs) {
|
|||
|
|
globalWs.onerror = null;
|
|||
|
|
globalWs.onclose = null;
|
|||
|
|
globalWs.onmessage = null;
|
|||
|
|
try { globalWs.close(1000, 'User stopped'); } catch (e) {}
|
|||
|
|
globalWs = null;
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
function connectWebSocket(config) {
|
|||
|
|
closeWs();
|
|||
|
|
|
|||
|
|
const { url, text, options } = config;
|
|||
|
|
self.postMessage({ type: 'status', data: 'ws_connecting' });
|
|||
|
|
|
|||
|
|
try {
|
|||
|
|
const currentWs = new WebSocket(url);
|
|||
|
|
currentWs.binaryType = 'arraybuffer';
|
|||
|
|
globalWs = currentWs;
|
|||
|
|
|
|||
|
|
currentWs.onopen = () => {
|
|||
|
|
if (globalWs !== currentWs) return;
|
|||
|
|
self.postMessage({ type: 'status', data: 'ws_connected' });
|
|||
|
|
currentWs.send(JSON.stringify({
|
|||
|
|
text: text,
|
|||
|
|
speaker_id: options.speakerId || 0,
|
|||
|
|
length_scale: options.lengthScale || 1.0,
|
|||
|
|
noise_scale: options.noiseScale || 0.667,
|
|||
|
|
}));
|
|||
|
|
self.postMessage({ type: 'status', data: 'generating' });
|
|||
|
|
};
|
|||
|
|
|
|||
|
|
currentWs.onmessage = (event) => {
|
|||
|
|
if (globalWs !== currentWs) return;
|
|||
|
|
|
|||
|
|
if (typeof event.data === 'string' && event.data === 'END') {
|
|||
|
|
const wsToClose = currentWs;
|
|||
|
|
globalWs = null;
|
|||
|
|
|
|||
|
|
wsToClose.onmessage = null;
|
|||
|
|
wsToClose.onerror = null;
|
|||
|
|
wsToClose.onclose = null;
|
|||
|
|
|
|||
|
|
try { wsToClose.close(1000, 'Done'); } catch(e) {}
|
|||
|
|
|
|||
|
|
self.postMessage({ type: 'end' });
|
|||
|
|
} else {
|
|||
|
|
self.postMessage({ type: 'audio-data', buffer: event.data }, [event.data]);
|
|||
|
|
}
|
|||
|
|
};
|
|||
|
|
|
|||
|
|
currentWs.onclose = (e) => {
|
|||
|
|
if (globalWs === currentWs) {
|
|||
|
|
self.postMessage({ type: 'end' });
|
|||
|
|
globalWs = null;
|
|||
|
|
}
|
|||
|
|
};
|
|||
|
|
|
|||
|
|
currentWs.onerror = () => {
|
|||
|
|
if (globalWs === currentWs) {
|
|||
|
|
self.postMessage({ type: 'error', data: 'WebSocket error' });
|
|||
|
|
}
|
|||
|
|
};
|
|||
|
|
|
|||
|
|
} catch (e) {
|
|||
|
|
self.postMessage({ type: 'error', data: e.message });
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
`;
|
|||
|
|
|
|||
|
|
// ==========================================
|
|||
|
|
// PiperTTS SDK
|
|||
|
|
// ==========================================
|
|||
|
|
class PiperTTS {
|
|||
|
|
constructor(config = {}) {
|
|||
|
|
this.baseUrl = config.baseUrl || 'http://localhost:5001';
|
|||
|
|
this.onStatus = config.onStatus || console.log;
|
|||
|
|
this.onStart = config.onStart || (() => {});
|
|||
|
|
this.onEnd = config.onEnd || (() => {});
|
|||
|
|
this.sampleRate = config.sampleRate || 16000;
|
|||
|
|
|
|||
|
|
this.player = null;
|
|||
|
|
this.worker = null;
|
|||
|
|
this.recordedChunks = [];
|
|||
|
|
this.isRecording = false;
|
|||
|
|
|
|||
|
|
// 新增:检测音频结束的定时器 ID
|
|||
|
|
this.endCheckInterval = null;
|
|||
|
|
|
|||
|
|
this._initWorker();
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
_initWorker() {
|
|||
|
|
const blob = new Blob([WORKER_SOURCE], {
|
|||
|
|
type: 'application/javascript'
|
|||
|
|
});
|
|||
|
|
this.worker = new Worker(URL.createObjectURL(blob));
|
|||
|
|
|
|||
|
|
this.worker.onmessage = (e) => {
|
|||
|
|
const {
|
|||
|
|
type,
|
|||
|
|
data,
|
|||
|
|
buffer
|
|||
|
|
} = e.data;
|
|||
|
|
switch (type) {
|
|||
|
|
case 'status':
|
|||
|
|
const map = {
|
|||
|
|
ws_connecting: '正在连接...',
|
|||
|
|
ws_connected: '已连接',
|
|||
|
|
generating: '流式接收中...'
|
|||
|
|
};
|
|||
|
|
this.onStatus(map[data] || data, 'processing');
|
|||
|
|
break;
|
|||
|
|
case 'error':
|
|||
|
|
if (this.recordedChunks.length > 0) {
|
|||
|
|
this.onStatus('数据接收完毕', 'success');
|
|||
|
|
this._triggerEndWithDelay();
|
|||
|
|
} else {
|
|||
|
|
this.onStatus(`错误: ${data}`, 'error');
|
|||
|
|
this.stop();
|
|||
|
|
}
|
|||
|
|
break;
|
|||
|
|
case 'audio-data':
|
|||
|
|
this._handleAudio(buffer);
|
|||
|
|
break;
|
|||
|
|
case 'end':
|
|||
|
|
this.onStatus('数据接收完毕', 'success');
|
|||
|
|
this._triggerEndWithDelay();
|
|||
|
|
break;
|
|||
|
|
}
|
|||
|
|
};
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
/**
|
|||
|
|
* 【核心修改】智能轮询检测
|
|||
|
|
* 只有当 AudioContext 处于 running 状态且时间走完时,才触发 onEnd
|
|||
|
|
*/
|
|||
|
|
_triggerEndWithDelay() {
|
|||
|
|
// 先清除可能存在的旧定时器
|
|||
|
|
if (this.endCheckInterval) clearInterval(this.endCheckInterval);
|
|||
|
|
|
|||
|
|
// 每 200ms 检查一次
|
|||
|
|
this.endCheckInterval = setInterval(() => {
|
|||
|
|
// 1. 如果播放器没了,直接结束
|
|||
|
|
if (!this.player || !this.player.audioCtx) {
|
|||
|
|
this._finishEndCheck();
|
|||
|
|
return;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// 2. 如果处于暂停状态 (suspended),什么都不做,继续等
|
|||
|
|
if (this.player.audioCtx.state === 'suspended') {
|
|||
|
|
return;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// 3. 计算剩余时间
|
|||
|
|
// startTime 是缓冲区结束的绝对时间,currentTime 是当前时间
|
|||
|
|
const remainingTime = this.player.startTime - this.player.audioCtx.currentTime;
|
|||
|
|
|
|||
|
|
// 4. 如果剩余时间小于 0.1秒(留点冗余),说明播完了
|
|||
|
|
if (remainingTime <= 0.1) {
|
|||
|
|
this._finishEndCheck();
|
|||
|
|
}
|
|||
|
|
}, 200);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
_finishEndCheck() {
|
|||
|
|
if (this.endCheckInterval) {
|
|||
|
|
clearInterval(this.endCheckInterval);
|
|||
|
|
this.endCheckInterval = null;
|
|||
|
|
}
|
|||
|
|
this.onEnd();
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
_initPlayer() {
|
|||
|
|
if (this.player) {
|
|||
|
|
this.player.destroy();
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
this.player = new PCMPlayer({
|
|||
|
|
inputCodec: 'Int16',
|
|||
|
|
channels: 1,
|
|||
|
|
sampleRate: this.sampleRate,
|
|||
|
|
flushTime: 50,
|
|||
|
|
});
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
async speak(text, options = {}) {
|
|||
|
|
if (!text) return;
|
|||
|
|
this.stop();
|
|||
|
|
|
|||
|
|
this._initPlayer();
|
|||
|
|
if (this.player) {
|
|||
|
|
await this.player.continue();
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
this.recordedChunks = [];
|
|||
|
|
this.isRecording = true;
|
|||
|
|
this.onStart();
|
|||
|
|
|
|||
|
|
const wsUrl = this.baseUrl.replace(/^http/, 'ws') + '/ws/synthesize';
|
|||
|
|
this.worker.postMessage({
|
|||
|
|
type: 'connect',
|
|||
|
|
data: {
|
|||
|
|
url: wsUrl,
|
|||
|
|
text,
|
|||
|
|
options
|
|||
|
|
},
|
|||
|
|
});
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
stop() {
|
|||
|
|
// 停止时必须清除轮询检测
|
|||
|
|
if (this.endCheckInterval) {
|
|||
|
|
clearInterval(this.endCheckInterval);
|
|||
|
|
this.endCheckInterval = null;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
this.worker.postMessage({
|
|||
|
|
type: 'stop'
|
|||
|
|
});
|
|||
|
|
if (this.player) {
|
|||
|
|
this.player.destroy();
|
|||
|
|
this.player = null;
|
|||
|
|
}
|
|||
|
|
this.onStatus('已停止', 'default');
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
_handleAudio(arrayBuffer) {
|
|||
|
|
if (this.isRecording) {
|
|||
|
|
this.recordedChunks.push(arrayBuffer);
|
|||
|
|
}
|
|||
|
|
if (this.player) {
|
|||
|
|
this.player.feed(arrayBuffer);
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
getAnalyserNode() {
|
|||
|
|
return this.player ? this.player.analyserNode : null;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
downloadAudio(filename = 'tts_output.wav') {
|
|||
|
|
if (this.recordedChunks.length === 0) return;
|
|||
|
|
let totalLen = 0;
|
|||
|
|
for (let chunk of this.recordedChunks) totalLen += chunk.byteLength;
|
|||
|
|
const tmp = new Uint8Array(totalLen);
|
|||
|
|
let offset = 0;
|
|||
|
|
for (let chunk of this.recordedChunks) {
|
|||
|
|
tmp.set(new Uint8Array(chunk), offset);
|
|||
|
|
offset += chunk.byteLength;
|
|||
|
|
}
|
|||
|
|
const wavBuffer = this._encodeWAV(new Int16Array(tmp.buffer), this.sampleRate);
|
|||
|
|
const blob = new Blob([wavBuffer], {
|
|||
|
|
type: 'audio/wav'
|
|||
|
|
});
|
|||
|
|
const url = URL.createObjectURL(blob);
|
|||
|
|
const a = document.createElement('a');
|
|||
|
|
a.style = 'display: none';
|
|||
|
|
a.href = url;
|
|||
|
|
a.download = filename;
|
|||
|
|
document.body.appendChild(a);
|
|||
|
|
a.click();
|
|||
|
|
window.URL.revokeObjectURL(url);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
_encodeWAV(samples, sampleRate) {
|
|||
|
|
const buffer = new ArrayBuffer(44 + samples.length * 2);
|
|||
|
|
const view = new DataView(buffer);
|
|||
|
|
const writeString = (view, offset, string) => {
|
|||
|
|
for (let i = 0; i < string.length; i++) view.setUint8(offset + i, string.charCodeAt(i));
|
|||
|
|
};
|
|||
|
|
writeString(view, 0, 'RIFF');
|
|||
|
|
view.setUint32(4, 36 + samples.length * 2, true);
|
|||
|
|
writeString(view, 8, 'WAVE');
|
|||
|
|
writeString(view, 12, 'fmt ');
|
|||
|
|
view.setUint32(16, 16, true);
|
|||
|
|
view.setUint16(20, 1, true);
|
|||
|
|
view.setUint16(22, 1, true);
|
|||
|
|
view.setUint32(24, sampleRate, true);
|
|||
|
|
view.setUint32(28, sampleRate * 2, true);
|
|||
|
|
view.setUint16(32, 2, true);
|
|||
|
|
view.setUint16(34, 16, true);
|
|||
|
|
writeString(view, 36, 'data');
|
|||
|
|
view.setUint32(40, samples.length * 2, true);
|
|||
|
|
let offset = 44;
|
|||
|
|
for (let i = 0; i < samples.length; i++) {
|
|||
|
|
view.setInt16(offset, samples[i], true);
|
|||
|
|
offset += 2;
|
|||
|
|
}
|
|||
|
|
return view;
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
export default PiperTTS;
|