package com.ruoyi.cms.handler; import com.alibaba.nls.client.AccessToken; import com.alibaba.nls.client.protocol.InputFormatEnum; import com.alibaba.nls.client.protocol.NlsClient; import com.alibaba.nls.client.protocol.SampleRateEnum; import com.alibaba.nls.client.protocol.asr.SpeechRecognizer; import com.alibaba.nls.client.protocol.asr.SpeechRecognizerListener; import com.alibaba.nls.client.protocol.asr.SpeechRecognizerResponse; import lombok.Data; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import javax.websocket.Session; import java.io.IOException; import java.io.InputStream; @Data public class SpeechRecognizerAI { private static final Logger logger = LoggerFactory.getLogger(SpeechRecognizerAI.class); private String appKey; private NlsClient client; private AccessToken accessToken; public SpeechRecognizerAI(String appKey, String id, String secret, String url) { this.appKey = appKey; // 获取 AccessToken accessToken = new AccessToken(id, secret); try { accessToken.apply(); // 申请 Token logger.info("Token: {}, Expire Time: {}", accessToken.getToken(), accessToken.getExpireTime()); // 初始化 NlsClient if (url.isEmpty()) { this.client = new NlsClient(accessToken.getToken()); // 使用默认服务地址 } else { this.client = new NlsClient(url, accessToken.getToken()); // 使用自定义服务地址 } } catch (IOException e) { logger.error("Failed to initialize NlsClient: {}", e.getMessage()); } } public void processStream(Session session, InputStream inputStream, int sampleRate) { SpeechRecognizer recognizer = null; try { // 创建 SpeechRecognizer 实例 recognizer = new SpeechRecognizer(client, new SpeechRecognizerListener() { @Override public void onRecognitionResultChanged(SpeechRecognizerResponse response) { // 打印中间识别结果 String text = response.getRecognizedText(); logger.info("中间识别结果: {}", text); sendResult(session, text,false); } @Override public void onRecognitionCompleted(SpeechRecognizerResponse response) { // 打印最终识别结果 String text = response.getRecognizedText(); logger.info("最终识别结果: {}", text); sendResult(session, text,true); } @Override public void onStarted(SpeechRecognizerResponse response) { logger.info("识别开始, TaskId: {}", response.getTaskId()); } @Override public void onFail(SpeechRecognizerResponse response) { logger.error("识别失败: {}", response.getStatusText()); } }); // 设置语音识别参数 recognizer.setAppKey(appKey); recognizer.setFormat(InputFormatEnum.PCM); recognizer.setSampleRate(sampleRate == 16000 ? SampleRateEnum.SAMPLE_RATE_16K : SampleRateEnum.SAMPLE_RATE_8K); recognizer.setEnableIntermediateResult(true); recognizer.addCustomedParam("enable_voice_detection", true); // 启动识别 recognizer.start(); // 读取音频流并发送 byte[] buffer = new byte[3200]; int len; while ((len = inputStream.read(buffer)) > 0) { recognizer.send(buffer, len); } // 停止识别 recognizer.stop(); } catch (Exception e) { logger.error("处理音频流时出错: {}", e.getMessage()); } finally { if (recognizer != null) { recognizer.close(); } } } private void sendResult(Session session, String text,Boolean asrEnd) { try { session.getBasicRemote().sendText("{\"text\": \"" + text + "\",\"asrEnd\":\"" + asrEnd + "\"}"); } catch (IOException e) { logger.error("发送识别结果失败: {}", e.getMessage()); } } public void shutdown() { if (client != null) { client.shutdown(); } } /** * 获取当前有效的 AccessToken * * @param id 阿里云 AccessKey ID * @param secret 阿里云 AccessKey Secret * @return 返回申请到的 AccessToken 字符串,失败时返回 null */ public static String getAccessToken(String id, String secret) { try { AccessToken accessToken = new AccessToken(id, secret); accessToken.apply(); // 申请 token if (accessToken.getToken() != null) { logger.info("成功获取 Token: {}, 过期时间: {}", accessToken.getToken(), accessToken.getExpireTime()); return accessToken.getToken(); } else { logger.error("get token fail:"+accessToken.getToken()); return null; } } catch (IOException e) { logger.error("申请 Token 时发生网络错误: {}", e.getMessage()); return null; } } }