修改TTS断句逻辑
This commit is contained in:
@@ -112,47 +112,111 @@ export const useAudioSpeak = (config = {}) => {
|
|||||||
if (!text || typeof text !== 'string') return [];
|
if (!text || typeof text !== 'string') return [];
|
||||||
|
|
||||||
const cleanText = text.replace(/\s+/g, ' ').trim();
|
const cleanText = text.replace(/\s+/g, ' ').trim();
|
||||||
|
if (!cleanText) return [];
|
||||||
|
|
||||||
// 先按标点粗分
|
const segments = [];
|
||||||
const rawChunks = cleanText.split(/([。?!;\n\r]|……)/).filter((t) => t.trim());
|
|
||||||
const mergedChunks = [];
|
// 1. 按完整标点分割成独立的句子(包括中英文标点)
|
||||||
let temp = '';
|
// 正则解释:匹配非标点字符 + 标点符号(或者匹配到结尾)
|
||||||
|
const sentenceRegex = /([^。?!;,、\n\r\.\?!;,]+[。?!;,、\n\r\.\?!;,]+|.+$)/g;
|
||||||
for (let i = 0; i < rawChunks.length; i++) {
|
|
||||||
const part = rawChunks[i];
|
let currentIndex = 0;
|
||||||
// 如果是标点,追加到上一句
|
let match;
|
||||||
if (/^[。?!;\n\r……]$/.test(part)) {
|
const rawSentences = [];
|
||||||
temp += part;
|
|
||||||
|
while ((match = sentenceRegex.exec(cleanText)) !== null) {
|
||||||
|
const sentence = match[0].trim();
|
||||||
|
if (sentence) {
|
||||||
|
rawSentences.push(sentence);
|
||||||
|
}
|
||||||
|
currentIndex = match.index + match[0].length;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 处理最后剩余的部分
|
||||||
|
if (currentIndex < cleanText.length) {
|
||||||
|
const remaining = cleanText.substring(currentIndex).trim();
|
||||||
|
if (remaining) {
|
||||||
|
rawSentences.push(remaining);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 如果正则没有匹配到,整个文本作为一句话
|
||||||
|
if (rawSentences.length === 0) {
|
||||||
|
rawSentences.push(cleanText);
|
||||||
|
}
|
||||||
|
|
||||||
|
// 2. 处理每个句子
|
||||||
|
for (const sentence of rawSentences) {
|
||||||
|
if (sentence.length <= maxSegmentLength) {
|
||||||
|
// 句子长度正常,直接作为一个片段
|
||||||
|
segments.push(sentence);
|
||||||
} else {
|
} else {
|
||||||
// 如果当前积累的句子太长(超过50字),先推入队列
|
// 句子超长,需要分割
|
||||||
if (temp.length > 50) {
|
console.log('检测到超长句子,需要分割:', sentence);
|
||||||
mergedChunks.push(temp);
|
|
||||||
temp = part;
|
let currentPos = 0;
|
||||||
} else if (temp.length + part.length < 15) {
|
const sentenceLength = sentence.length;
|
||||||
// 如果当前积累的太短(少于15字),则合并下一句
|
|
||||||
temp += part;
|
while (currentPos < sentenceLength) {
|
||||||
} else {
|
// 优先在标点处分割
|
||||||
// 正常长度,推入
|
let splitPos = -1;
|
||||||
if (temp) mergedChunks.push(temp);
|
const searchStart = currentPos;
|
||||||
temp = part;
|
const searchEnd = Math.min(currentPos + maxSegmentLength, sentenceLength);
|
||||||
|
|
||||||
|
// 在搜索范围内找标点
|
||||||
|
for (let i = searchEnd - 1; i > searchStart; i--) {
|
||||||
|
if (/[。?!;,、\n\r\.\?!;,]/u.test(sentence[i])) {
|
||||||
|
splitPos = i + 1; // 包含标点
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 如果没找到标点,在最大限制处分割
|
||||||
|
if (splitPos === -1) {
|
||||||
|
splitPos = searchEnd;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 确保至少分割出一个字符
|
||||||
|
if (splitPos <= currentPos) {
|
||||||
|
splitPos = currentPos + 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
const segment = sentence.substring(currentPos, splitPos).trim();
|
||||||
|
if (segment) {
|
||||||
|
segments.push(segment);
|
||||||
|
}
|
||||||
|
|
||||||
|
currentPos = splitPos;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (temp) mergedChunks.push(temp);
|
|
||||||
|
|
||||||
// 如果还有超过 maxSegmentLength 的,强制分割
|
// 3. 特殊情况:合并以冒号开头的短片段到上一句
|
||||||
const finalSegments = [];
|
const finalSegments = [];
|
||||||
mergedChunks.forEach(segment => {
|
for (let i = 0; i < segments.length; i++) {
|
||||||
if (segment.length <= maxSegmentLength) {
|
const currentSegment = segments[i];
|
||||||
finalSegments.push(segment);
|
|
||||||
} else {
|
// 检查是否以冒号开头且很短(可能是被错误分割的部分)
|
||||||
// 按字数强制分割
|
if (i > 0 &&
|
||||||
for (let i = 0; i < segment.length; i += maxSegmentLength) {
|
(currentSegment.startsWith(':') || currentSegment.startsWith(':')) &&
|
||||||
finalSegments.push(segment.substring(i, Math.min(i + maxSegmentLength, segment.length)));
|
currentSegment.length < 15 &&
|
||||||
|
!currentSegment.endsWith('。') &&
|
||||||
|
!currentSegment.endsWith('!') &&
|
||||||
|
!currentSegment.endsWith('?')) {
|
||||||
|
|
||||||
|
// 尝试合并到上一句
|
||||||
|
const previousSegment = finalSegments[finalSegments.length - 1];
|
||||||
|
if (previousSegment && (previousSegment.length + currentSegment.length) <= maxSegmentLength) {
|
||||||
|
finalSegments[finalSegments.length - 1] = previousSegment + currentSegment;
|
||||||
|
} else {
|
||||||
|
finalSegments.push(currentSegment);
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
finalSegments.push(currentSegment);
|
||||||
}
|
}
|
||||||
});
|
}
|
||||||
|
|
||||||
|
// 清理:移除空白和空字符串
|
||||||
return finalSegments.filter(seg => seg && seg.trim());
|
return finalSegments.filter(seg => seg && seg.trim());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user