diff --git a/hook/useAudioSpeak.js b/hook/useAudioSpeak.js index 46beabb..cbe566f 100644 --- a/hook/useAudioSpeak.js +++ b/hook/useAudioSpeak.js @@ -112,47 +112,111 @@ export const useAudioSpeak = (config = {}) => { if (!text || typeof text !== 'string') return []; const cleanText = text.replace(/\s+/g, ' ').trim(); + if (!cleanText) return []; - // 先按标点粗分 - const rawChunks = cleanText.split(/([。?!;\n\r]|……)/).filter((t) => t.trim()); - const mergedChunks = []; - let temp = ''; - - for (let i = 0; i < rawChunks.length; i++) { - const part = rawChunks[i]; - // 如果是标点,追加到上一句 - if (/^[。?!;\n\r……]$/.test(part)) { - temp += part; + const segments = []; + + // 1. 按完整标点分割成独立的句子(包括中英文标点) + // 正则解释:匹配非标点字符 + 标点符号(或者匹配到结尾) + const sentenceRegex = /([^。?!;,、\n\r\.\?!;,]+[。?!;,、\n\r\.\?!;,]+|.+$)/g; + + let currentIndex = 0; + let match; + const rawSentences = []; + + while ((match = sentenceRegex.exec(cleanText)) !== null) { + const sentence = match[0].trim(); + if (sentence) { + rawSentences.push(sentence); + } + currentIndex = match.index + match[0].length; + } + + // 处理最后剩余的部分 + if (currentIndex < cleanText.length) { + const remaining = cleanText.substring(currentIndex).trim(); + if (remaining) { + rawSentences.push(remaining); + } + } + + // 如果正则没有匹配到,整个文本作为一句话 + if (rawSentences.length === 0) { + rawSentences.push(cleanText); + } + + // 2. 处理每个句子 + for (const sentence of rawSentences) { + if (sentence.length <= maxSegmentLength) { + // 句子长度正常,直接作为一个片段 + segments.push(sentence); } else { - // 如果当前积累的句子太长(超过50字),先推入队列 - if (temp.length > 50) { - mergedChunks.push(temp); - temp = part; - } else if (temp.length + part.length < 15) { - // 如果当前积累的太短(少于15字),则合并下一句 - temp += part; - } else { - // 正常长度,推入 - if (temp) mergedChunks.push(temp); - temp = part; + // 句子超长,需要分割 + console.log('检测到超长句子,需要分割:', sentence); + + let currentPos = 0; + const sentenceLength = sentence.length; + + while (currentPos < sentenceLength) { + // 优先在标点处分割 + let splitPos = -1; + const searchStart = currentPos; + const searchEnd = Math.min(currentPos + maxSegmentLength, sentenceLength); + + // 在搜索范围内找标点 + for (let i = searchEnd - 1; i > searchStart; i--) { + if (/[。?!;,、\n\r\.\?!;,]/u.test(sentence[i])) { + splitPos = i + 1; // 包含标点 + break; + } + } + + // 如果没找到标点,在最大限制处分割 + if (splitPos === -1) { + splitPos = searchEnd; + } + + // 确保至少分割出一个字符 + if (splitPos <= currentPos) { + splitPos = currentPos + 1; + } + + const segment = sentence.substring(currentPos, splitPos).trim(); + if (segment) { + segments.push(segment); + } + + currentPos = splitPos; } } } - if (temp) mergedChunks.push(temp); - // 如果还有超过 maxSegmentLength 的,强制分割 + // 3. 特殊情况:合并以冒号开头的短片段到上一句 const finalSegments = []; - mergedChunks.forEach(segment => { - if (segment.length <= maxSegmentLength) { - finalSegments.push(segment); - } else { - // 按字数强制分割 - for (let i = 0; i < segment.length; i += maxSegmentLength) { - finalSegments.push(segment.substring(i, Math.min(i + maxSegmentLength, segment.length))); + for (let i = 0; i < segments.length; i++) { + const currentSegment = segments[i]; + + // 检查是否以冒号开头且很短(可能是被错误分割的部分) + if (i > 0 && + (currentSegment.startsWith(':') || currentSegment.startsWith(':')) && + currentSegment.length < 15 && + !currentSegment.endsWith('。') && + !currentSegment.endsWith('!') && + !currentSegment.endsWith('?')) { + + // 尝试合并到上一句 + const previousSegment = finalSegments[finalSegments.length - 1]; + if (previousSegment && (previousSegment.length + currentSegment.length) <= maxSegmentLength) { + finalSegments[finalSegments.length - 1] = previousSegment + currentSegment; + } else { + finalSegments.push(currentSegment); } + } else { + finalSegments.push(currentSegment); } - }); + } + // 清理:移除空白和空字符串 return finalSegments.filter(seg => seg && seg.trim()); }