修改TTS断句逻辑
This commit is contained in:
@@ -112,47 +112,111 @@ export const useAudioSpeak = (config = {}) => {
|
||||
if (!text || typeof text !== 'string') return [];
|
||||
|
||||
const cleanText = text.replace(/\s+/g, ' ').trim();
|
||||
if (!cleanText) return [];
|
||||
|
||||
// 先按标点粗分
|
||||
const rawChunks = cleanText.split(/([。?!;\n\r]|……)/).filter((t) => t.trim());
|
||||
const mergedChunks = [];
|
||||
let temp = '';
|
||||
const segments = [];
|
||||
|
||||
for (let i = 0; i < rawChunks.length; i++) {
|
||||
const part = rawChunks[i];
|
||||
// 如果是标点,追加到上一句
|
||||
if (/^[。?!;\n\r……]$/.test(part)) {
|
||||
temp += part;
|
||||
// 1. 按完整标点分割成独立的句子(包括中英文标点)
|
||||
// 正则解释:匹配非标点字符 + 标点符号(或者匹配到结尾)
|
||||
const sentenceRegex = /([^。?!;,、\n\r\.\?!;,]+[。?!;,、\n\r\.\?!;,]+|.+$)/g;
|
||||
|
||||
let currentIndex = 0;
|
||||
let match;
|
||||
const rawSentences = [];
|
||||
|
||||
while ((match = sentenceRegex.exec(cleanText)) !== null) {
|
||||
const sentence = match[0].trim();
|
||||
if (sentence) {
|
||||
rawSentences.push(sentence);
|
||||
}
|
||||
currentIndex = match.index + match[0].length;
|
||||
}
|
||||
|
||||
// 处理最后剩余的部分
|
||||
if (currentIndex < cleanText.length) {
|
||||
const remaining = cleanText.substring(currentIndex).trim();
|
||||
if (remaining) {
|
||||
rawSentences.push(remaining);
|
||||
}
|
||||
}
|
||||
|
||||
// 如果正则没有匹配到,整个文本作为一句话
|
||||
if (rawSentences.length === 0) {
|
||||
rawSentences.push(cleanText);
|
||||
}
|
||||
|
||||
// 2. 处理每个句子
|
||||
for (const sentence of rawSentences) {
|
||||
if (sentence.length <= maxSegmentLength) {
|
||||
// 句子长度正常,直接作为一个片段
|
||||
segments.push(sentence);
|
||||
} else {
|
||||
// 如果当前积累的句子太长(超过50字),先推入队列
|
||||
if (temp.length > 50) {
|
||||
mergedChunks.push(temp);
|
||||
temp = part;
|
||||
} else if (temp.length + part.length < 15) {
|
||||
// 如果当前积累的太短(少于15字),则合并下一句
|
||||
temp += part;
|
||||
} else {
|
||||
// 正常长度,推入
|
||||
if (temp) mergedChunks.push(temp);
|
||||
temp = part;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (temp) mergedChunks.push(temp);
|
||||
// 句子超长,需要分割
|
||||
console.log('检测到超长句子,需要分割:', sentence);
|
||||
|
||||
// 如果还有超过 maxSegmentLength 的,强制分割
|
||||
let currentPos = 0;
|
||||
const sentenceLength = sentence.length;
|
||||
|
||||
while (currentPos < sentenceLength) {
|
||||
// 优先在标点处分割
|
||||
let splitPos = -1;
|
||||
const searchStart = currentPos;
|
||||
const searchEnd = Math.min(currentPos + maxSegmentLength, sentenceLength);
|
||||
|
||||
// 在搜索范围内找标点
|
||||
for (let i = searchEnd - 1; i > searchStart; i--) {
|
||||
if (/[。?!;,、\n\r\.\?!;,]/u.test(sentence[i])) {
|
||||
splitPos = i + 1; // 包含标点
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// 如果没找到标点,在最大限制处分割
|
||||
if (splitPos === -1) {
|
||||
splitPos = searchEnd;
|
||||
}
|
||||
|
||||
// 确保至少分割出一个字符
|
||||
if (splitPos <= currentPos) {
|
||||
splitPos = currentPos + 1;
|
||||
}
|
||||
|
||||
const segment = sentence.substring(currentPos, splitPos).trim();
|
||||
if (segment) {
|
||||
segments.push(segment);
|
||||
}
|
||||
|
||||
currentPos = splitPos;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 3. 特殊情况:合并以冒号开头的短片段到上一句
|
||||
const finalSegments = [];
|
||||
mergedChunks.forEach(segment => {
|
||||
if (segment.length <= maxSegmentLength) {
|
||||
finalSegments.push(segment);
|
||||
} else {
|
||||
// 按字数强制分割
|
||||
for (let i = 0; i < segment.length; i += maxSegmentLength) {
|
||||
finalSegments.push(segment.substring(i, Math.min(i + maxSegmentLength, segment.length)));
|
||||
}
|
||||
}
|
||||
});
|
||||
for (let i = 0; i < segments.length; i++) {
|
||||
const currentSegment = segments[i];
|
||||
|
||||
// 检查是否以冒号开头且很短(可能是被错误分割的部分)
|
||||
if (i > 0 &&
|
||||
(currentSegment.startsWith(':') || currentSegment.startsWith(':')) &&
|
||||
currentSegment.length < 15 &&
|
||||
!currentSegment.endsWith('。') &&
|
||||
!currentSegment.endsWith('!') &&
|
||||
!currentSegment.endsWith('?')) {
|
||||
|
||||
// 尝试合并到上一句
|
||||
const previousSegment = finalSegments[finalSegments.length - 1];
|
||||
if (previousSegment && (previousSegment.length + currentSegment.length) <= maxSegmentLength) {
|
||||
finalSegments[finalSegments.length - 1] = previousSegment + currentSegment;
|
||||
} else {
|
||||
finalSegments.push(currentSegment);
|
||||
}
|
||||
} else {
|
||||
finalSegments.push(currentSegment);
|
||||
}
|
||||
}
|
||||
|
||||
// 清理:移除空白和空字符串
|
||||
return finalSegments.filter(seg => seg && seg.trim());
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user