修改TTS断句逻辑

This commit is contained in:
2025-12-29 10:28:49 +08:00
parent 9769f486f0
commit 14af7b8b20

View File

@@ -112,47 +112,111 @@ export const useAudioSpeak = (config = {}) => {
if (!text || typeof text !== 'string') return [];
const cleanText = text.replace(/\s+/g, ' ').trim();
if (!cleanText) return [];
// 先按标点粗分
const rawChunks = cleanText.split(/([。?!;\n\r]|……)/).filter((t) => t.trim());
const mergedChunks = [];
let temp = '';
for (let i = 0; i < rawChunks.length; i++) {
const part = rawChunks[i];
// 如果是标点,追加到上一句
if (/^[。?!;\n\r……]$/.test(part)) {
temp += part;
const segments = [];
// 1. 按完整标点分割成独立的句子(包括中英文标点)
// 正则解释:匹配非标点字符 + 标点符号(或者匹配到结尾)
const sentenceRegex = /([^。?!;,、\n\r\.\?!;,]+[。?!;,、\n\r\.\?!;,]+|.+$)/g;
let currentIndex = 0;
let match;
const rawSentences = [];
while ((match = sentenceRegex.exec(cleanText)) !== null) {
const sentence = match[0].trim();
if (sentence) {
rawSentences.push(sentence);
}
currentIndex = match.index + match[0].length;
}
// 处理最后剩余的部分
if (currentIndex < cleanText.length) {
const remaining = cleanText.substring(currentIndex).trim();
if (remaining) {
rawSentences.push(remaining);
}
}
// 如果正则没有匹配到,整个文本作为一句话
if (rawSentences.length === 0) {
rawSentences.push(cleanText);
}
// 2. 处理每个句子
for (const sentence of rawSentences) {
if (sentence.length <= maxSegmentLength) {
// 句子长度正常,直接作为一个片段
segments.push(sentence);
} else {
// 如果当前积累的句子太长超过50字先推入队列
if (temp.length > 50) {
mergedChunks.push(temp);
temp = part;
} else if (temp.length + part.length < 15) {
// 如果当前积累的太短少于15字则合并下一句
temp += part;
} else {
// 正常长度,推入
if (temp) mergedChunks.push(temp);
temp = part;
// 句子超长,需要分割
console.log('检测到超长句子,需要分割:', sentence);
let currentPos = 0;
const sentenceLength = sentence.length;
while (currentPos < sentenceLength) {
// 优先在标点处分割
let splitPos = -1;
const searchStart = currentPos;
const searchEnd = Math.min(currentPos + maxSegmentLength, sentenceLength);
// 在搜索范围内找标点
for (let i = searchEnd - 1; i > searchStart; i--) {
if (/[。?!;,、\n\r\.\?!;,]/u.test(sentence[i])) {
splitPos = i + 1; // 包含标点
break;
}
}
// 如果没找到标点,在最大限制处分割
if (splitPos === -1) {
splitPos = searchEnd;
}
// 确保至少分割出一个字符
if (splitPos <= currentPos) {
splitPos = currentPos + 1;
}
const segment = sentence.substring(currentPos, splitPos).trim();
if (segment) {
segments.push(segment);
}
currentPos = splitPos;
}
}
}
if (temp) mergedChunks.push(temp);
// 如果还有超过 maxSegmentLength 的,强制分割
// 3. 特殊情况:合并以冒号开头的短片段到上一句
const finalSegments = [];
mergedChunks.forEach(segment => {
if (segment.length <= maxSegmentLength) {
finalSegments.push(segment);
} else {
// 按字数强制分割
for (let i = 0; i < segment.length; i += maxSegmentLength) {
finalSegments.push(segment.substring(i, Math.min(i + maxSegmentLength, segment.length)));
for (let i = 0; i < segments.length; i++) {
const currentSegment = segments[i];
// 检查是否以冒号开头且很短(可能是被错误分割的部分)
if (i > 0 &&
(currentSegment.startsWith('') || currentSegment.startsWith(':')) &&
currentSegment.length < 15 &&
!currentSegment.endsWith('。') &&
!currentSegment.endsWith('!') &&
!currentSegment.endsWith('?')) {
// 尝试合并到上一句
const previousSegment = finalSegments[finalSegments.length - 1];
if (previousSegment && (previousSegment.length + currentSegment.length) <= maxSegmentLength) {
finalSegments[finalSegments.length - 1] = previousSegment + currentSegment;
} else {
finalSegments.push(currentSegment);
}
} else {
finalSegments.push(currentSegment);
}
});
}
// 清理:移除空白和空字符串
return finalSegments.filter(seg => seg && seg.trim());
}