Files
ks-app-employment-service/hook/useRealtimeRecorder.js
2025-07-22 15:20:21 +08:00

258 lines
8.4 KiB
JavaScript
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import {
ref,
onUnmounted
} from 'vue'
import {
$api,
} from '../common/globalFunction';
import config from '@/config'
export function useAudioRecorder() {
const isRecording = ref(false)
const isStopping = ref(false)
const isSocketConnected = ref(false)
const recordingDuration = ref(0)
const audioDataForDisplay = ref(new Array(16).fill(0))
const volumeLevel = ref(0)
const recognizedText = ref('')
const lastFinalText = ref('')
let audioStream = null
let audioContext = null
let audioInput = null
let scriptProcessor = null
let websocket = null
let durationTimer = null
const generateUUID = () => {
return ([1e7] + -1e3 + -4e3 + -8e3 + -1e11)
.replace(/[018]/g, c =>
(c ^ crypto.getRandomValues(new Uint8Array(1))[0] & 15 >> c / 4).toString(16)
).replace(/-/g, '')
}
const fetchWsUrl = async () => {
const res = await $api.createRequest('/app/speech/getToken')
if (res.code !== 200) throw new Error('无法获取语音识别 wsUrl')
const wsUrl = res.msg
return wsUrl
}
function extractWsParams(wsUrl) {
const url = new URL(wsUrl)
const appkey = url.searchParams.get('appkey')
const token = url.searchParams.get('token')
return {
appkey,
token
}
}
const connectWebSocket = async () => {
const wsUrl = await fetchWsUrl()
const {
appkey,
token
} = extractWsParams(wsUrl)
return new Promise((resolve, reject) => {
websocket = new WebSocket(wsUrl)
websocket.binaryType = 'arraybuffer'
websocket.onopen = () => {
isSocketConnected.value = true
// 发送 StartTranscription 消息(参考 demo.html
const startTranscriptionMessage = {
header: {
appkey: appkey, // 不影响使用,可留空或由 wsUrl 带入
namespace: 'SpeechTranscriber',
name: 'StartTranscription',
task_id: generateUUID(),
message_id: generateUUID()
},
payload: {
format: 'pcm',
sample_rate: 16000,
enable_intermediate_result: true,
enable_punctuation_prediction: true,
enable_inverse_text_normalization: true
}
}
websocket.send(JSON.stringify(startTranscriptionMessage))
resolve()
}
websocket.onerror = (e) => {
isSocketConnected.value = false
reject(e)
}
websocket.onclose = () => {
isSocketConnected.value = false
}
websocket.onmessage = (e) => {
const msg = JSON.parse(e.data)
const name = msg?.header?.name
const payload = msg?.payload
switch (name) {
case 'TranscriptionResultChanged': {
// 中间识别文本(可选:使用 stash_result.unfixedText 更精确)
const text = payload?.unfixed_result || payload?.result || ''
lastFinalText.value = text
break
}
case 'SentenceBegin': {
// 可选:开始新的一句,重置状态
// console.log('开始新的句子识别')
break
}
case 'SentenceEnd': {
const text = payload?.result || ''
const confidence = payload?.confidence || 0
if (text && confidence > 0.5) {
recognizedText.value += text
lastFinalText.value = ''
// console.log('识别完成:', {
// text,
// confidence
// })
}
break
}
case 'TranscriptionStarted': {
// console.log('识别任务已开始')
break
}
case 'TranscriptionCompleted': {
lastFinalText.value = ''
// console.log('识别全部完成')
break
}
case 'TaskFailed': {
console.error('识别失败:', msg?.header?.status_text)
break
}
default:
console.log('未知消息类型:', name, msg)
break
}
}
})
}
const startRecording = async () => {
if (isRecording.value) return
try {
recognizedText.value = ''
lastFinalText.value = ''
await connectWebSocket()
audioStream = await navigator.mediaDevices.getUserMedia({
audio: true
})
audioContext = new(window.AudioContext || window.webkitAudioContext)({
sampleRate: 16000
})
audioInput = audioContext.createMediaStreamSource(audioStream)
scriptProcessor = audioContext.createScriptProcessor(2048, 1, 1)
scriptProcessor.onaudioprocess = (event) => {
const input = event.inputBuffer.getChannelData(0)
const pcm = new Int16Array(input.length)
let sum = 0
for (let i = 0; i < input.length; ++i) {
const s = Math.max(-1, Math.min(1, input[i]))
pcm[i] = s * 0x7FFF
sum += s * s
}
volumeLevel.value = Math.sqrt(sum / input.length)
audioDataForDisplay.value = Array(16).fill(volumeLevel.value)
if (websocket?.readyState === WebSocket.OPEN) {
websocket.send(pcm.buffer)
}
}
audioInput.connect(scriptProcessor)
scriptProcessor.connect(audioContext.destination)
isRecording.value = true
recordingDuration.value = 0
durationTimer = setInterval(() => recordingDuration.value++, 1000)
} catch (err) {
console.error('启动失败:', err)
cleanup()
}
}
const stopRecording = () => {
if (!isRecording.value || isStopping.value) return
isStopping.value = true
if (websocket?.readyState === WebSocket.OPEN) {
websocket.send(JSON.stringify({
header: {
namespace: 'SpeechTranscriber',
name: 'StopTranscription',
message_id: generateUUID()
}
}))
websocket.close()
}
cleanup()
isStopping.value = false
}
const cancelRecording = () => {
if (!isRecording.value || isStopping.value) return
isStopping.value = true
websocket?.close()
cleanup()
isStopping.value = false
}
const cleanup = () => {
clearInterval(durationTimer)
scriptProcessor?.disconnect()
audioInput?.disconnect()
audioStream?.getTracks().forEach(track => track.stop())
audioContext?.close()
audioStream = null
audioContext = null
audioInput = null
scriptProcessor = null
websocket = null
isRecording.value = false
isSocketConnected.value = false
}
onUnmounted(() => {
if (isRecording.value) stopRecording()
})
return {
isRecording,
isStopping,
isSocketConnected,
recordingDuration,
audioDataForDisplay,
volumeLevel,
recognizedText,
lastFinalText,
startRecording,
stopRecording,
cancelRecording
}
}