修改TTS断句逻辑

2025-12-29 10:28:49 +08:00
parent 9769f486f0
commit 14af7b8b20
1 changed files with 95 additions and 31 deletions
--- a/hook/useAudioSpeak.js
+++ b/hook/useAudioSpeak.js
@@ -112,47 +112,111 @@ export const useAudioSpeak = (config = {}) => {
    if (!text || typeof text !== 'string') return [];
    const cleanText = text.replace(/\s+/g, ' ').trim();
    if (!cleanText) return [];
-    // 先按标点粗分
+    const segments = [];
-    const rawChunks = cleanText.split(/([。？！；\n\r]|……)/).filter((t) => t.trim());
+    
-    const mergedChunks = [];
+    // 1. 按完整标点分割成独立的句子（包括中英文标点）
-    let temp = '';
+    // 正则解释：匹配非标点字符 + 标点符号（或者匹配到结尾）
-
+    const sentenceRegex = /([^。？！；，、\n\r\.\?!;,]+[。？！；，、\n\r\.\?!;,]+|.+$)/g;
-    for (let i = 0; i < rawChunks.length; i++) {
+    
-      const part = rawChunks[i];
+    let currentIndex = 0;
-      // 如果是标点，追加到上一句
+    let match;
-      if (/^[。？！；\n\r……]$/.test(part)) {
+    const rawSentences = [];
-        temp += part;
+    
    while ((match = sentenceRegex.exec(cleanText)) !== null) {
      const sentence = match[0].trim();
      if (sentence) {
        rawSentences.push(sentence);
      }
      currentIndex = match.index + match[0].length;
    }
    // 处理最后剩余的部分
    if (currentIndex < cleanText.length) {
      const remaining = cleanText.substring(currentIndex).trim();
      if (remaining) {
        rawSentences.push(remaining);
      }
    }
    // 如果正则没有匹配到，整个文本作为一句话
    if (rawSentences.length === 0) {
      rawSentences.push(cleanText);
    }
    // 2. 处理每个句子
    for (const sentence of rawSentences) {
      if (sentence.length <= maxSegmentLength) {
        // 句子长度正常，直接作为一个片段
        segments.push(sentence);
      } else {
-        // 如果当前积累的句子太长（超过50字），先推入队列
+        // 句子超长，需要分割
-        if (temp.length > 50) {
+        console.log('检测到超长句子，需要分割:', sentence);
-          mergedChunks.push(temp);
+        
-          temp = part;
+        let currentPos = 0;
-        } else if (temp.length + part.length < 15) {
+        const sentenceLength = sentence.length;
-          // 如果当前积累的太短（少于15字），则合并下一句
+        
-          temp += part;
+        while (currentPos < sentenceLength) {
-        } else {
+          // 优先在标点处分割
-          // 正常长度，推入
+          let splitPos = -1;
-          if (temp) mergedChunks.push(temp);
+          const searchStart = currentPos;
-          temp = part;
+          const searchEnd = Math.min(currentPos + maxSegmentLength, sentenceLength);
          // 在搜索范围内找标点
          for (let i = searchEnd - 1; i > searchStart; i--) {
            if (/[。？！；，、\n\r\.\?!;,]/u.test(sentence[i])) {
              splitPos = i + 1; // 包含标点
              break;
            }
          }
          // 如果没找到标点，在最大限制处分割
          if (splitPos === -1) {
            splitPos = searchEnd;
          }
          // 确保至少分割出一个字符
          if (splitPos <= currentPos) {
            splitPos = currentPos + 1;
          }
          const segment = sentence.substring(currentPos, splitPos).trim();
          if (segment) {
            segments.push(segment);
          }
          currentPos = splitPos;
        }
      }
    }
    if (temp) mergedChunks.push(temp);
-    // 如果还有超过 maxSegmentLength 的，强制分割
+    // 3. 特殊情况：合并以冒号开头的短片段到上一句
    const finalSegments = [];
-    mergedChunks.forEach(segment => {
+    for (let i = 0; i < segments.length; i++) {
-      if (segment.length <= maxSegmentLength) {
+      const currentSegment = segments[i];
-        finalSegments.push(segment);
+      
-      } else {
+      // 检查是否以冒号开头且很短（可能是被错误分割的部分）
-        // 按字数强制分割
+      if (i > 0 && 
-        for (let i = 0; i < segment.length; i += maxSegmentLength) {
+          (currentSegment.startsWith('：') || currentSegment.startsWith(':')) && 
-          finalSegments.push(segment.substring(i, Math.min(i + maxSegmentLength, segment.length)));
+          currentSegment.length < 15 &&
          !currentSegment.endsWith('。') && 
          !currentSegment.endsWith('!') && 
          !currentSegment.endsWith('?')) {
        // 尝试合并到上一句
        const previousSegment = finalSegments[finalSegments.length - 1];
        if (previousSegment && (previousSegment.length + currentSegment.length) <= maxSegmentLength) {
          finalSegments[finalSegments.length - 1] = previousSegment + currentSegment;
        } else {
          finalSegments.push(currentSegment);
        }
      } else {
        finalSegments.push(currentSegment);
      }
-    });
+    }
    // 清理：移除空白和空字符串
    return finalSegments.filter(seg => seg && seg.trim());
  }