feat : tts 和 asr 重构暂存

2025-12-26 16:44:11 +08:00
parent 29615c394a
commit 2c5ff4220a
5 changed files with 1677 additions and 20 deletions
--- a/hook/useAudioSpeak.js
+++ b/hook/useAudioSpeak.js
@@ -1,5 +1,7 @@
 // useAudioSpeak.js
 import { ref } from 'vue'
 import globalConfig from '@/config.js';
 import useUserStore from '@/stores/useUserStore';
 /**
 * TTS语音合成Hook
@@ -10,7 +12,8 @@ import { ref } from 'vue'
 */
 export const useAudioSpeak = (config = {}) => {
  const {
-    apiUrl = 'http://39.98.44.136:19527/synthesize',
+    // apiUrl = 'http://39.98.44.136:19527/synthesize',
    apiUrl = `${globalConfig.baseUrl}/app/synthesize`,
    maxSegmentLength = 30
  } = config
@@ -113,10 +116,15 @@ export const useAudioSpeak = (config = {}) => {
    try {
      console.log(`📶正在请求第${index + 1}段音频: "${text}"`)
      let Authorization = ''
      if (useUserStore().token) {
          Authorization = `${useUserStore().token}`
      }
      const response = await fetch(apiUrl, {
        method: 'POST',
        headers: {
          'Content-Type': 'application/json',
          'Authorization':encodeURIComponent(Authorization)
        },
        body: JSON.stringify({
          text: text,
--- a/hook/useRealtimeRecorderOnce.js
+++ b/hook/useRealtimeRecorderOnce.js
@@ -0,0 +1,433 @@
 import {
    ref,
    onUnmounted
 } from 'vue'
 import {
    $api
 } from '../common/globalFunction';
 import config from '@/config'
 export function useRealtimeRecorderOnce() {
    // --- 状态定义 ---
    const isRecording = ref(false)
    const isProcessing = ref(false) // 新增：处理录音数据状态
    const recordingDuration = ref(0)
    const volumeLevel = ref(0) // 0-100
    const recognizedText = ref('')
    const audioData = ref(null) // 新增：存储录音数据
    const audioDataForDisplay = ref([]) // 新增：用于波形显示的数据
    // --- 内部变量 ---
    let durationTimer = null
    // --- APP/小程序 变量 ---
    let recorderManager = null;
    let appAudioChunks = []; // 新增：存储APP录音数据块
    // --- H5 变量 ---
    let audioContext = null;
    let scriptProcessor = null;
    let mediaStreamSource = null;
    let h5Stream = null;
    let h5AudioChunks = []; // 新增：存储H5录音数据块
    // --- 配置项 ---
    const RECORD_CONFIG = {
        duration: 600000,
        sampleRate: 16000,
        numberOfChannels: 1,
        format: 'pcm',
        frameSize: 4096
    }
    /**
     * 开始录音 (入口)
     */
    const startRecording = async () => {
        if (isRecording.value) return
        try {
            recognizedText.value = ''
            volumeLevel.value = 0
            audioData.value = null
            audioDataForDisplay.value = []
            appAudioChunks = []
            h5AudioChunks = []
            // #ifdef H5
            if (location.protocol !== 'https:' && location.hostname !== 'localhost') {
                uni.showToast({
                    title: 'H5录音需要HTTPS环境',
                    icon: 'none'
                });
                return;
            }
            // #endif
            // #ifdef H5
            await startH5Recording();
            // #endif
            // #ifndef H5
            startAppRecording();
            // #endif
            isRecording.value = true;
            recordingDuration.value = 0;
            durationTimer = setInterval(() => recordingDuration.value++, 1000);
            // 启动波形显示更新
            updateAudioDataForDisplay();
        } catch (err) {
            console.error('启动失败:', err);
            uni.showToast({
                title: '启动失败: ' + (err.message || ''),
                icon: 'none'
            });
            cleanup();
        }
    }
    /**
     * H5录音实现
     */
    const startH5Recording = async () => {
        try {
            // 1. 获取麦克风流
            const stream = await navigator.mediaDevices.getUserMedia({
                audio: true
            });
            h5Stream = stream;
            // 2. 创建 AudioContext
            const AudioContext = window.AudioContext || window.webkitAudioContext;
            audioContext = new AudioContext({
                sampleRate: 16000
            });
            mediaStreamSource = audioContext.createMediaStreamSource(stream);
            scriptProcessor = audioContext.createScriptProcessor(4096, 1, 1);
            scriptProcessor.onaudioprocess = (event) => {
                if (!isRecording.value) return;
                const inputData = event.inputBuffer.getChannelData(0);
                calculateVolume(inputData, true);
                // 保存音频数据
                const buffer = new ArrayBuffer(inputData.length * 2);
                const view = new DataView(buffer);
                for (let i = 0; i < inputData.length; i++) {
                    let s = Math.max(-1, Math.min(1, inputData[i]));
                    view.setInt16(i * 2, s < 0 ? s * 0x8000 : s * 0x7FFF, true);
                }
                // 保存到数组
                h5AudioChunks.push(buffer);
            };
            mediaStreamSource.connect(scriptProcessor);
            scriptProcessor.connect(audioContext.destination);
            console.log('H5 录音已启动');
        } catch (err) {
            console.error('H5 录音启动失败:', err);
            throw err;
        }
    }
    /**
     * 停止H5录音资源
     */
    const stopH5Resources = () => {
        if (scriptProcessor) scriptProcessor.disconnect();
        if (mediaStreamSource) mediaStreamSource.disconnect();
        if (audioContext) audioContext.close();
        if (h5Stream) h5Stream.getTracks().forEach(track => track.stop());
        scriptProcessor = null;
        mediaStreamSource = null;
        audioContext = null;
        h5Stream = null;
    }
    /**
     * APP/小程序录音实现
     */
    const startAppRecording = () => {
        recorderManager = uni.getRecorderManager();
        recorderManager.onFrameRecorded((res) => {
            const {
                frameBuffer
            } = res;
            calculateVolume(frameBuffer, false);
            // 保存音频数据
            if (frameBuffer && frameBuffer.byteLength > 0) {
                appAudioChunks.push(frameBuffer);
            }
        });
        recorderManager.onStart(() => {
            console.log('APP 录音已开始');
        });
        recorderManager.onError((err) => {
            console.error('APP 录音报错:', err);
            cleanup();
        });
        recorderManager.start(RECORD_CONFIG);
    }
    /**
     * 停止录音 (通用)
     */
    const stopRecording = async () => {
        if (!isRecording.value) return;
        isRecording.value = false;
        clearInterval(durationTimer);
        audioDataForDisplay.value = []; // 清空显示数据
        // 停止硬件录音
        stopHardwareResource();
        // 处理录音数据
        await processAudioData();
        // 清理临时数据
        appAudioChunks = [];
        h5AudioChunks = [];
    }
    /**
     * 取消录音
     */
    const cancelRecording = () => {
        if (!isRecording.value) return;
        console.log('取消录音 - 丢弃结果');
        // 1. 停止硬件录音
        stopHardwareResource();
        // 2. 清理状态
        recognizedText.value = '';
        audioData.value = null;
        audioDataForDisplay.value = [];
        appAudioChunks = [];
        h5AudioChunks = [];
        // 3. 清理资源
        cleanup();
    }
    /**
     * 停止硬件资源
     */
    const stopHardwareResource = () => {
        // APP/小程序停止
        if (recorderManager) {
            recorderManager.stop();
        }
        // H5停止
        // #ifdef H5
        stopH5Resources();
        // #endif
    }
    /**
     * 更新音频数据显示
     */
    const updateAudioDataForDisplay = () => {
        const updateInterval = setInterval(() => {
            if (!isRecording.value) {
                clearInterval(updateInterval);
                return;
            }
            // 生成模拟的音频数据显示数据（0-1之间的值）
            const baseValue = volumeLevel.value / 100; // 基于音量计算基础值
            const data = [];
            // 生成31个数据点（对应WaveDisplay的31个波形条）
            for (let i = 0; i < 31; i++) {
                // 模拟波形：中间高，两边低
                const position = i / 30; // 0到1
                const centerDistance = Math.abs(position - 0.5);
                const waveValue = Math.sin(Date.now() / 100 + i * 0.5) * 0.3 + 0.5;
                const volumeFactor = baseValue * 0.8 + 0.2; // 确保最小值为0.2
                const finalValue = waveValue * (1 - centerDistance) * volumeFactor;
                data.push(Math.max(0.1, Math.min(1, finalValue)));
            }
            audioDataForDisplay.value = data;
        }, 100); // 每100ms更新一次
    }
    /**
     * 处理录音数据
     */
    const processAudioData = async () => {
        if (!isProcessing.value) {
            isProcessing.value = true;
            try {
                let audioBlob = null;
                // #ifdef H5
                // 合并H5录音数据
                if (h5AudioChunks.length > 0) {
                    const totalLength = h5AudioChunks.reduce((acc, chunk) => acc + chunk.byteLength, 0);
                    const combinedBuffer = new ArrayBuffer(totalLength);
                    const combinedView = new Uint8Array(combinedBuffer);
                    let offset = 0;
                    h5AudioChunks.forEach(chunk => {
                        const chunkView = new Uint8Array(chunk);
                        combinedView.set(chunkView, offset);
                        offset += chunk.byteLength;
                    });
                    audioBlob = new Blob([combinedBuffer], { type: 'audio/pcm' });
                }
                // #endif
                // #ifndef H5
                // 合并APP录音数据
                if (appAudioChunks.length > 0) {
                    const totalLength = appAudioChunks.reduce((acc, chunk) => acc + chunk.byteLength, 0);
                    const combinedBuffer = new ArrayBuffer(totalLength);
                    const combinedView = new Uint8Array(combinedBuffer);
                    let offset = 0;
                    appAudioChunks.forEach(chunk => {
                        const chunkView = new Uint8Array(chunk);
                        combinedView.set(chunkView, offset);
                        offset += chunk.byteLength;
                    });
                    audioBlob = new Blob([combinedBuffer], { type: 'audio/pcm' });
                }
                // #endif
                if (audioBlob) {
                    audioData.value = audioBlob;
                    // 发送到服务器进行识别
                    await sendToASR(audioBlob);
                }
            } catch (error) {
                console.error('处理音频数据失败:', error);
                recognizedText.value = '音频处理失败，请重试';
            } finally {
                isProcessing.value = false;
            }
        }
    }
    /**
     * 发送音频到ASR服务器
     */
    const sendToASR = async (audioBlob) => {
        try {
            // 创建FormData
            const formData = new FormData();
            formData.append('audio', audioBlob, 'recording.pcm');
            // 添加Token
            const token = uni.getStorageSync('token') || '';
            if (token) {
                formData.append('token', token);
            }
            const asrUrl = `${config.baseUrl}/app/asr/connect`
            const response = await fetch(asrUrl, {
                method: 'POST',
                headers: {
                    'Authorization': `Bearer ${token}`
                },
                body: formData
            });
            if (response.ok) {
                const result = await response.json();
                recognizedText.value = result.text || result.data || '';
            } else {
                throw new Error(`ASR请求失败: ${response.status}`);
            }
        } catch (error) {
            console.error('ASR识别失败:', error);
            recognizedText.value = '语音识别失败，请重试';
        }
    }
    /**
     * 计算音量 (兼容 Float32 和 Int16/ArrayBuffer)
     */
    const calculateVolume = (data, isFloat32) => {
        let sum = 0;
        let length = 0;
        if (isFloat32) {
            length = data.length;
            for (let i = 0; i < length; i += 10) {
                sum += Math.abs(data[i]);
            }
            const calculatedVolume = Math.min(100, Math.floor((sum / (length / 10)) * 100 * 3));
            volumeLevel.value = calculatedVolume;
        } else {
            const int16Data = new Int16Array(data);
            length = int16Data.length;
            for (let i = 0; i < length; i += 10) {
                sum += Math.abs(int16Data[i]);
            }
            const avg = sum / (length / 10);
            const calculatedVolume = Math.min(100, Math.floor((avg / 10000) * 100));
            volumeLevel.value = calculatedVolume;
        }
    }
    /**
     * 清理状态
     */
    const cleanup = () => {
        clearInterval(durationTimer);
        isRecording.value = false;
        isProcessing.value = false;
        recordingDuration.value = 0;
        volumeLevel.value = 0;
        audioDataForDisplay.value = [];
        recorderManager = null;
    }
    onUnmounted(() => {
        if (isRecording.value) {
            stopRecording();
        }
        cleanup();
    })
    return {
        isRecording,
        isProcessing,
        recordingDuration,
        volumeLevel,
        recognizedText,
        audioData,
        audioDataForDisplay, // 新增：返回给WaveDisplay组件使用
        startRecording,
        stopRecording,
        cancelRecording
    }
 }
--- a/manifest.json
+++ b/manifest.json
@@ -50,7 +50,7 @@
    "quickapp" : {},
    /* 小程序特有相关 */
    "mp-weixin" : {
-        "appid" : "wxdbdcc6a10153c99b",
+        "appid" : "",
        "setting" : {
            "urlCheck" : false,
            "es6" : true,
--- a/pages/chat/components/ai-paging
+++ b/pages/chat/components/ai-paging
--- a/pages/chat/components/ai-paging.vue
+++ b/pages/chat/components/ai-paging.vue
@@ -20,8 +20,8 @@
            >
                {{ item }}
            </view>
-            <view class="chat-item self" v-if="isRecording">
+            <view class="chat-item self" v-if="isRecording || isProcessing">
-                <view class="message">{{ recognizedText }} {{ lastFinalText }}</view>
+                <view class="message">{{ recognizedText || (isProcessing ? '正在识别语音...' : '正在录音 '+recordingDuration+'s') }}</view>
            </view>
        </view>
        <scroll-view class="chat-list scrollView" :scroll-top="scrollTop" :scroll-y="true" scroll-with-animation>
@@ -118,9 +118,8 @@
                        </view>
                    </view>
                </view>
-                <view class="chat-item self" v-if="isRecording">
+                <view class="chat-item self" v-if="isRecording || isProcessing">
-                    <!-- <view class="message">{{ recognizedText }} {{ lastFinalText }}</view> -->
+                    <view class="message">{{ recognizedText || (isProcessing ? '正在识别语音...' : '正在录音 '+recordingDuration+'s') }}</view>
                    <view class="message">{{ recognizedText }}</view>
                </view>
                <view v-if="isTyping" class="self">
                    <text class="message msg-loading">
@@ -275,7 +274,7 @@ import AudioWave from './AudioWave.vue';
 import WaveDisplay from './WaveDisplay.vue';
 import useScreenStore from '@/stores/useScreenStore'
 const screenStore = useScreenStore();
-import { useAudioRecorder } from '@/hook/useRealtimeRecorder.js';
+import { useRealtimeRecorderOnce } from '@/hook/useRealtimeRecorderOnce.js';
 import { useAudioSpeak } from '@/hook/useAudioSpeak.js';
 // 全局
 const { $api, navTo, throttle } = inject('globalFunction');
@@ -290,14 +289,24 @@ import { FileValidator } from '@/utils/fileValidator.js'; //文件校验
 // 语音识别
 const {
    isRecording,
    isProcessing,
    startRecording,
    stopRecording,
    cancelRecording,
    audioDataForDisplay,
    volumeLevel,
    recognizedText,
-    lastFinalText,
+    recordingDuration
-} = useAudioRecorder();
+} = useRealtimeRecorderOnce();
 watch(recognizedText, (newText) => {
    if (newText && newText.trim() && !isProcessing.value) {
        setTimeout(() => {
            sendMessage(newText);
        }, 300);
    }
 });
 // 语音合成
 const { speak, pause, resume, isSpeaking, isPaused, isLoading, cancelAudio,cleanup } = useAudioSpeak();
@@ -382,6 +391,8 @@ function showControll(index) {
    return true;
 }
 const sendMessage = (text) => {
    const values = textInput.value || text;
    showfile.value = false;
@@ -660,19 +671,12 @@ const handleTouchMove = (e) => {
    }
 };
-const handleTouchEnd = () => {
+const handleTouchEnd = async () => {
    if (status.value === 'cancel') {
        console.log('取消发送');
        cancelRecording();
    } else {
-        stopRecording();
+        await stopRecording();
        if (isAudioPermission.value) {
            if (recognizedText.value) {
                sendMessage(recognizedText.value);
            } else {
                $api.msg('说话时长太短');
            }
        }
    }
    status.value = 'idle';
 };