package com.ruoyi.cms.handler; import com.alibaba.nls.client.AccessToken; import com.alibaba.nls.client.protocol.NlsClient; import com.alibaba.nls.client.protocol.OutputFormatEnum; import com.alibaba.nls.client.protocol.SampleRateEnum; import com.alibaba.nls.client.protocol.tts.SpeechSynthesizer; import com.alibaba.nls.client.protocol.tts.SpeechSynthesizerListener; import com.alibaba.nls.client.protocol.tts.SpeechSynthesizerResponse; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.stereotype.Component; import javax.websocket.*; import javax.websocket.server.ServerEndpoint; import java.io.IOException; import java.nio.ByteBuffer; @Component @ServerEndpoint("/speech-synthesis") public class SpeechSynthesisWebSocketHandler { private static final Logger logger = LoggerFactory.getLogger(SpeechSynthesisWebSocketHandler.class); private NlsClient client; private String appKey = "mtA2pwmvCeefHT3Y"; private String accessKeyId = "LTAI5tRBahK93vPNF1JDVEPA"; private String accessKeySecret = "x95OWb4cV6ccQVtbEJ2Gxm2Uwl2thJ"; private String url = "wss://nls-gateway-cn-shanghai.aliyuncs.com/ws/v1"; public SpeechSynthesisWebSocketHandler() { // Initialize NLS client with token AccessToken accessToken = new AccessToken(accessKeyId, accessKeySecret); try { accessToken.apply(); String token = accessToken.getToken(); if(url.isEmpty()) { this.client = new NlsClient(token); } else { this.client = new NlsClient(url, token); } } catch (Exception e) { logger.error("Failed to initialize NLS client", e); } } @OnOpen public void onOpen(Session session) { logger.info("WebSocket connected for speech synthesis, sessionId: {}", session.getId()); } @OnMessage(maxMessageSize=5242880) public void onMessage(String text, Session session) { logger.info("Received text for synthesis: {}", text); SpeechSynthesizer synthesizer = null; try { // Create synthesizer with a session-specific listener synthesizer = new SpeechSynthesizer(client, createSynthesizerListener(session)); // Configure synthesizer synthesizer.setAppKey(appKey); synthesizer.setFormat(OutputFormatEnum.WAV); synthesizer.setSampleRate(SampleRateEnum.SAMPLE_RATE_16K); synthesizer.setVoice("aiqi"); synthesizer.setPitchRate(0); synthesizer.setSpeechRate(0); // Use long text synthesis synthesizer.setLongText(text); // Start synthesis synthesizer.start(); } catch (Exception e) { logger.error("Error during speech synthesis", e); try { session.close(new CloseReason(CloseReason.CloseCodes.UNEXPECTED_CONDITION, "Synthesis error")); } catch (IOException ioException) { logger.error("Error closing session", ioException); } } finally { // Note: We can't close the synthesizer here because synthesis is async // It should be closed in the listener's onComplete/onFail methods } } @OnClose public void onClose(Session session) { logger.info("WebSocket closed for speech synthesis, sessionId: {}", session.getId()); } @OnError public void onError(Session session, Throwable error) { logger.error("WebSocket error for session {}: {}", session.getId(), error.getMessage(), error); } private SpeechSynthesizerListener createSynthesizerListener(Session session) { return new SpeechSynthesizerListener() { private boolean firstRecvBinary = true; private long startTime; @Override public void onComplete(SpeechSynthesizerResponse response) { logger.info("Synthesis completed for session {}, status: {}", session.getId(), response.getStatus()); try { // Send a close message or marker to indicate completion session.getBasicRemote().sendText("{\"status\":\"complete\"}"); } catch (IOException e) { logger.error("Error sending completion message", e); } } @Override public void onMessage(ByteBuffer message) { try { if (firstRecvBinary) { firstRecvBinary = false; startTime = System.currentTimeMillis(); logger.info("First audio packet received for session {}", session.getId()); } // Send audio data to client byte[] bytesArray = new byte[message.remaining()]; message.get(bytesArray, 0, bytesArray.length); session.getBasicRemote().sendBinary(ByteBuffer.wrap(bytesArray)); } catch (IOException e) { logger.error("Error sending audio data to client", e); } } @Override public void onFail(SpeechSynthesizerResponse response) { logger.error("Synthesis failed for session {}: task_id: {}, status: {}, status_text: {}", session.getId(), response.getTaskId(), response.getStatus(), response.getStatusText()); try { session.close(new CloseReason(CloseReason.CloseCodes.UNEXPECTED_CONDITION, "Synthesis failed: " + response.getStatusText())); } catch (IOException e) { logger.error("Error closing failed session", e); } } }; } }