ks-app-employment-service/hook/useRealtimeRecorder.js

import {
    ref,
    onUnmounted
} from 'vue'
import {
    $api,

} from '../common/globalFunction';

import config from '@/config'

export function useAudioRecorder() {
    const isRecording = ref(false)
    const isStopping = ref(false)
    const isSocketConnected = ref(false)
    const recordingDuration = ref(0)

    const audioDataForDisplay = ref(new Array(16).fill(0))
    const volumeLevel = ref(0)

    const recognizedText = ref('')
    const lastFinalText = ref('')

    let audioStream = null
    let audioContext = null
    let audioInput = null
    let scriptProcessor = null
    let websocket = null
    let durationTimer = null

    const generateUUID = () => {
        return ([1e7] + -1e3 + -4e3 + -8e3 + -1e11)
            .replace(/[018]/g, c =>
                (c ^ crypto.getRandomValues(new Uint8Array(1))[0] & 15 >> c / 4).toString(16)
            ).replace(/-/g, '')
    }

    const fetchWsUrl = async () => {
        const res = await $api.createRequest('/app/speech/getToken')
        if (res.code !== 200) throw new Error('无法获取语音识别 wsUrl')
        const wsUrl = res.msg
        return wsUrl
    }

    function extractWsParams(wsUrl) {
        const url = new URL(wsUrl)
        const appkey = url.searchParams.get('appkey')
        const token = url.searchParams.get('token')
        return {
            appkey,
            token
        }
    }


    const connectWebSocket = async () => {
        const wsUrl = await fetchWsUrl()
        const {
            appkey,
            token
        } = extractWsParams(wsUrl)
        return new Promise((resolve, reject) => {
            websocket = new WebSocket(wsUrl)
            websocket.binaryType = 'arraybuffer'

            websocket.onopen = () => {
                isSocketConnected.value = true

                // 发送 StartTranscription 消息（参考 demo.html）
                const startTranscriptionMessage = {
                    header: {
                        appkey: appkey, // 不影响使用，可留空或由 wsUrl 带入
                        namespace: 'SpeechTranscriber',
                        name: 'StartTranscription',
                        task_id: generateUUID(),
                        message_id: generateUUID()
                    },
                    payload: {
                        format: 'pcm',
                        sample_rate: 16000,
                        enable_intermediate_result: true,
                        enable_punctuation_prediction: true,
                        enable_inverse_text_normalization: true
                    }
                }
                websocket.send(JSON.stringify(startTranscriptionMessage))
                resolve()
            }

            websocket.onerror = (e) => {
                isSocketConnected.value = false
                reject(e)
            }

            websocket.onclose = () => {
                isSocketConnected.value = false
            }

            websocket.onmessage = (e) => {
                const msg = JSON.parse(e.data)
                const name = msg?.header?.name
                const payload = msg?.payload

                switch (name) {
                    case 'TranscriptionResultChanged': {
                        // 中间识别文本（可选：使用 stash_result.unfixedText 更精确）
                        const text = payload?.unfixed_result || payload?.result || ''
                        lastFinalText.value = text
                        break
                    }
                    case 'SentenceBegin': {
                        // 可选：开始新的一句，重置状态
                        // console.log('开始新的句子识别')
                        break
                    }
                    case 'SentenceEnd': {
                        const text = payload?.result || ''
                        const confidence = payload?.confidence || 0
                        if (text && confidence > 0.5) {
                            recognizedText.value += text
                            lastFinalText.value = ''
                            // console.log('识别完成:', {
                            //     text,
                            //     confidence
                            // })
                        }
                        break
                    }
                    case 'TranscriptionStarted': {
                        // console.log('识别任务已开始')
                        break
                    }
                    case 'TranscriptionCompleted': {
                        lastFinalText.value = ''
                        // console.log('识别全部完成')
                        break
                    }
                    case 'TaskFailed': {
                        console.error('识别失败:', msg?.header?.status_text)
                        break
                    }
                    default:
                        console.log('未知消息类型:', name, msg)
                        break
                }
            }
        })
    }

    const startRecording = async () => {
        if (isRecording.value) return

        // #ifdef MP-WEIXIN
        $api.msg('小程序暂不支持语音识别功能');
        return;
        // #endif

        // #ifdef H5
        try {
            if (typeof navigator === 'undefined' || !navigator.mediaDevices) {
                $api.msg('当前环境不支持录音功能');
                return;
            }

            recognizedText.value = ''
            lastFinalText.value = ''
            await connectWebSocket()

            audioStream = await navigator.mediaDevices.getUserMedia({
                audio: true
            })
            audioContext = new(window.AudioContext || window.webkitAudioContext)({
                sampleRate: 16000
            })
            audioInput = audioContext.createMediaStreamSource(audioStream)
            scriptProcessor = audioContext.createScriptProcessor(2048, 1, 1)

            scriptProcessor.onaudioprocess = (event) => {
                const input = event.inputBuffer.getChannelData(0)
                const pcm = new Int16Array(input.length)
                let sum = 0
                for (let i = 0; i < input.length; ++i) {
                    const s = Math.max(-1, Math.min(1, input[i]))
                    pcm[i] = s * 0x7FFF
                    sum += s * s
                }

                volumeLevel.value = Math.sqrt(sum / input.length)
                audioDataForDisplay.value = Array(16).fill(volumeLevel.value)

                if (websocket?.readyState === WebSocket.OPEN) {
                    websocket.send(pcm.buffer)
                }
            }

            audioInput.connect(scriptProcessor)
            scriptProcessor.connect(audioContext.destination)

            isRecording.value = true
            recordingDuration.value = 0
            durationTimer = setInterval(() => recordingDuration.value++, 1000)
        } catch (err) {
            console.error('启动失败:', err)
            cleanup()
        }
        // #endif
    }

    const stopRecording = () => {
        if (!isRecording.value || isStopping.value) return
        isStopping.value = true

        if (websocket?.readyState === WebSocket.OPEN) {
            websocket.send(JSON.stringify({
                header: {
                    namespace: 'SpeechTranscriber',
                    name: 'StopTranscription',
                    message_id: generateUUID()
                }
            }))
            websocket.close()
        }

        cleanup()
        isStopping.value = false
    }

    const cancelRecording = () => {
        if (!isRecording.value || isStopping.value) return
        isStopping.value = true
        websocket?.close()
        cleanup()
        isStopping.value = false
    }

    const cleanup = () => {
        clearInterval(durationTimer)

        scriptProcessor?.disconnect()
        audioInput?.disconnect()
        audioStream?.getTracks().forEach(track => track.stop())
        audioContext?.close()

        audioStream = null
        audioContext = null
        audioInput = null
        scriptProcessor = null
        websocket = null

        isRecording.value = false
        isSocketConnected.value = false
    }

    onUnmounted(() => {
        if (isRecording.value) stopRecording()
    })

    return {
        isRecording,
        isStopping,
        isSocketConnected,
        recordingDuration,
        audioDataForDisplay,
        volumeLevel,
        recognizedText,
        lastFinalText,
        startRecording,
        stopRecording,
        cancelRecording
    }
}