145 lines
5.8 KiB
Java
145 lines
5.8 KiB
Java
|
|
package com.ruoyi.cms.handler;
|
||
|
|
|
||
|
|
import com.alibaba.nls.client.AccessToken;
|
||
|
|
import com.alibaba.nls.client.protocol.NlsClient;
|
||
|
|
import com.alibaba.nls.client.protocol.OutputFormatEnum;
|
||
|
|
import com.alibaba.nls.client.protocol.SampleRateEnum;
|
||
|
|
import com.alibaba.nls.client.protocol.tts.SpeechSynthesizer;
|
||
|
|
import com.alibaba.nls.client.protocol.tts.SpeechSynthesizerListener;
|
||
|
|
import com.alibaba.nls.client.protocol.tts.SpeechSynthesizerResponse;
|
||
|
|
import org.slf4j.Logger;
|
||
|
|
import org.slf4j.LoggerFactory;
|
||
|
|
import org.springframework.stereotype.Component;
|
||
|
|
|
||
|
|
import javax.websocket.*;
|
||
|
|
import javax.websocket.server.ServerEndpoint;
|
||
|
|
import java.io.IOException;
|
||
|
|
import java.nio.ByteBuffer;
|
||
|
|
|
||
|
|
@Component
|
||
|
|
@ServerEndpoint("/speech-synthesis")
|
||
|
|
public class SpeechSynthesisWebSocketHandler {
|
||
|
|
private static final Logger logger = LoggerFactory.getLogger(SpeechSynthesisWebSocketHandler.class);
|
||
|
|
|
||
|
|
private NlsClient client;
|
||
|
|
private String appKey = "mtA2pwmvCeefHT3Y";
|
||
|
|
private String accessKeyId = "LTAI5tRBahK93vPNF1JDVEPA";
|
||
|
|
private String accessKeySecret = "x95OWb4cV6ccQVtbEJ2Gxm2Uwl2thJ";
|
||
|
|
private String url = "wss://nls-gateway-cn-shanghai.aliyuncs.com/ws/v1";
|
||
|
|
|
||
|
|
public SpeechSynthesisWebSocketHandler() {
|
||
|
|
// Initialize NLS client with token
|
||
|
|
AccessToken accessToken = new AccessToken(accessKeyId, accessKeySecret);
|
||
|
|
try {
|
||
|
|
accessToken.apply();
|
||
|
|
String token = accessToken.getToken();
|
||
|
|
if(url.isEmpty()) {
|
||
|
|
this.client = new NlsClient(token);
|
||
|
|
} else {
|
||
|
|
this.client = new NlsClient(url, token);
|
||
|
|
}
|
||
|
|
} catch (Exception e) {
|
||
|
|
logger.error("Failed to initialize NLS client", e);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
@OnOpen
|
||
|
|
public void onOpen(Session session) {
|
||
|
|
logger.info("WebSocket connected for speech synthesis, sessionId: {}", session.getId());
|
||
|
|
}
|
||
|
|
|
||
|
|
@OnMessage(maxMessageSize=5242880)
|
||
|
|
public void onMessage(String text, Session session) {
|
||
|
|
logger.info("Received text for synthesis: {}", text);
|
||
|
|
|
||
|
|
SpeechSynthesizer synthesizer = null;
|
||
|
|
try {
|
||
|
|
// Create synthesizer with a session-specific listener
|
||
|
|
synthesizer = new SpeechSynthesizer(client, createSynthesizerListener(session));
|
||
|
|
|
||
|
|
// Configure synthesizer
|
||
|
|
synthesizer.setAppKey(appKey);
|
||
|
|
synthesizer.setFormat(OutputFormatEnum.WAV);
|
||
|
|
synthesizer.setSampleRate(SampleRateEnum.SAMPLE_RATE_16K);
|
||
|
|
synthesizer.setVoice("aiqi");
|
||
|
|
synthesizer.setPitchRate(0);
|
||
|
|
synthesizer.setSpeechRate(0);
|
||
|
|
|
||
|
|
// Use long text synthesis
|
||
|
|
synthesizer.setLongText(text);
|
||
|
|
|
||
|
|
// Start synthesis
|
||
|
|
synthesizer.start();
|
||
|
|
|
||
|
|
} catch (Exception e) {
|
||
|
|
logger.error("Error during speech synthesis", e);
|
||
|
|
try {
|
||
|
|
session.close(new CloseReason(CloseReason.CloseCodes.UNEXPECTED_CONDITION, "Synthesis error"));
|
||
|
|
} catch (IOException ioException) {
|
||
|
|
logger.error("Error closing session", ioException);
|
||
|
|
}
|
||
|
|
} finally {
|
||
|
|
// Note: We can't close the synthesizer here because synthesis is async
|
||
|
|
// It should be closed in the listener's onComplete/onFail methods
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
@OnClose
|
||
|
|
public void onClose(Session session) {
|
||
|
|
logger.info("WebSocket closed for speech synthesis, sessionId: {}", session.getId());
|
||
|
|
}
|
||
|
|
|
||
|
|
@OnError
|
||
|
|
public void onError(Session session, Throwable error) {
|
||
|
|
logger.error("WebSocket error for session {}: {}", session.getId(), error.getMessage(), error);
|
||
|
|
}
|
||
|
|
|
||
|
|
private SpeechSynthesizerListener createSynthesizerListener(Session session) {
|
||
|
|
return new SpeechSynthesizerListener() {
|
||
|
|
private boolean firstRecvBinary = true;
|
||
|
|
private long startTime;
|
||
|
|
|
||
|
|
@Override
|
||
|
|
public void onComplete(SpeechSynthesizerResponse response) {
|
||
|
|
logger.info("Synthesis completed for session {}, status: {}", session.getId(), response.getStatus());
|
||
|
|
try {
|
||
|
|
// Send a close message or marker to indicate completion
|
||
|
|
session.getBasicRemote().sendText("{\"status\":\"complete\"}");
|
||
|
|
} catch (IOException e) {
|
||
|
|
logger.error("Error sending completion message", e);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
@Override
|
||
|
|
public void onMessage(ByteBuffer message) {
|
||
|
|
try {
|
||
|
|
if (firstRecvBinary) {
|
||
|
|
firstRecvBinary = false;
|
||
|
|
startTime = System.currentTimeMillis();
|
||
|
|
logger.info("First audio packet received for session {}", session.getId());
|
||
|
|
}
|
||
|
|
|
||
|
|
// Send audio data to client
|
||
|
|
byte[] bytesArray = new byte[message.remaining()];
|
||
|
|
message.get(bytesArray, 0, bytesArray.length);
|
||
|
|
session.getBasicRemote().sendBinary(ByteBuffer.wrap(bytesArray));
|
||
|
|
|
||
|
|
} catch (IOException e) {
|
||
|
|
logger.error("Error sending audio data to client", e);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
@Override
|
||
|
|
public void onFail(SpeechSynthesizerResponse response) {
|
||
|
|
logger.error("Synthesis failed for session {}: task_id: {}, status: {}, status_text: {}",
|
||
|
|
session.getId(), response.getTaskId(), response.getStatus(), response.getStatusText());
|
||
|
|
try {
|
||
|
|
session.close(new CloseReason(CloseReason.CloseCodes.UNEXPECTED_CONDITION,
|
||
|
|
"Synthesis failed: " + response.getStatusText()));
|
||
|
|
} catch (IOException e) {
|
||
|
|
logger.error("Error closing failed session", e);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
};
|
||
|
|
}
|
||
|
|
}
|