feat: 添加腾讯实时 ASR 支持并优化相关逻辑
- 在 `AiModelServiceImpl` 中添加腾讯 ASR 配置验证和处理逻辑 - 更新前端 `RealtimeAsrSession` 组件,支持 `sentenceKey` 和腾讯 ASR 配置 - 在 `RealtimeMeetingProxyWebSocketHandler` 中添加对腾讯 ASR 会话的处理 - 添加 `TencentRealtimeAsrChannel` 类,实现腾讯实时 ASR 通道逻辑 - 更新 `RealtimeMeetingSocketSessionServiceImpl` 和 `RealtimeMeetingSessionStateServiceImpl`,支持腾讯 ASR 会话状态管理 - 在 `RealtimeSocketSessionData` 中添加 `modelCode` 和 `mediaConfig` 字段 - 更新 `RealtimeMeetingTranscriptCacheItem`,添加 `sentenceGroupKey` 字段 - 在 `AiModels` 页面中添加腾讯 ASR 配置表单字段 - 添加 `TENCENT_PROVIDER` 常量,并在 `ModelProviderEnum` 中添加腾讯云枚举值 - 添加单元测试以验证腾讯 ASR 模型保存逻辑dev_na
parent
ba62c9e0c0
commit
77fe6d4e77
|
|
@ -177,6 +177,12 @@
|
||||||
<artifactId>springdoc-openapi-starter-webmvc-ui</artifactId>
|
<artifactId>springdoc-openapi-starter-webmvc-ui</artifactId>
|
||||||
<version>2.3.0</version>
|
<version>2.3.0</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
<!-- Source: https://mvnrepository.com/artifact/com.tencentcloudapi/tencentcloud-speech-sdk-java -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.tencentcloudapi</groupId>
|
||||||
|
<artifactId>tencentcloud-speech-sdk-java</artifactId>
|
||||||
|
<version>1.0.67</version>
|
||||||
|
</dependency>
|
||||||
</dependencies>
|
</dependencies>
|
||||||
|
|
||||||
<build>
|
<build>
|
||||||
|
|
|
||||||
|
|
@ -27,4 +27,6 @@ public class RealtimeMeetingResumeConfig {
|
||||||
private Boolean saveAudio;
|
private Boolean saveAudio;
|
||||||
@Schema(description = "热词列表")
|
@Schema(description = "热词列表")
|
||||||
private List<Map<String, Object>> hotwords;
|
private List<Map<String, Object>> hotwords;
|
||||||
|
@Schema(description = "腾讯说话人上下文 ID")
|
||||||
|
private String speakerContextId;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -5,6 +5,7 @@ import lombok.Data;
|
||||||
@Data
|
@Data
|
||||||
public class RealtimeMeetingTranscriptCacheItem {
|
public class RealtimeMeetingTranscriptCacheItem {
|
||||||
private String sentenceKey;
|
private String sentenceKey;
|
||||||
|
private String sentenceGroupKey;
|
||||||
private Integer sentenceId;
|
private Integer sentenceId;
|
||||||
private Integer sentenceType;
|
private Integer sentenceType;
|
||||||
private String speakerId;
|
private String speakerId;
|
||||||
|
|
|
||||||
|
|
@ -2,6 +2,8 @@ package com.imeeting.dto.biz;
|
||||||
|
|
||||||
import lombok.Data;
|
import lombok.Data;
|
||||||
|
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
@Data
|
@Data
|
||||||
public class RealtimeSocketSessionData {
|
public class RealtimeSocketSessionData {
|
||||||
private Long meetingId;
|
private Long meetingId;
|
||||||
|
|
@ -10,4 +12,6 @@ public class RealtimeSocketSessionData {
|
||||||
private Long asrModelId;
|
private Long asrModelId;
|
||||||
private String provider;
|
private String provider;
|
||||||
private String targetWsUrl;
|
private String targetWsUrl;
|
||||||
|
private String modelCode;
|
||||||
|
private Map<String, Object> mediaConfig;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -5,8 +5,7 @@ import lombok.Getter;
|
||||||
@Getter
|
@Getter
|
||||||
public enum ModelProviderEnum {
|
public enum ModelProviderEnum {
|
||||||
LOCAL("local", "本地"),
|
LOCAL("local", "本地"),
|
||||||
|
TENCENT("tencent", "腾讯云");
|
||||||
;
|
|
||||||
|
|
||||||
private final String code;
|
private final String code;
|
||||||
private final String description;
|
private final String description;
|
||||||
|
|
|
||||||
|
|
@ -11,6 +11,8 @@ public interface RealtimeMeetingSessionStateService {
|
||||||
|
|
||||||
void rememberResumeConfig(Long meetingId, RealtimeMeetingResumeConfig resumeConfig);
|
void rememberResumeConfig(Long meetingId, RealtimeMeetingResumeConfig resumeConfig);
|
||||||
|
|
||||||
|
void rememberSpeakerContext(Long meetingId, String speakerContextId);
|
||||||
|
|
||||||
void assertCanOpenSession(Long meetingId);
|
void assertCanOpenSession(Long meetingId);
|
||||||
|
|
||||||
boolean activate(Long meetingId, String connectionId);
|
boolean activate(Long meetingId, String connectionId);
|
||||||
|
|
|
||||||
|
|
@ -45,6 +45,10 @@ public class AiModelServiceImpl implements AiModelService {
|
||||||
|
|
||||||
private static final String TYPE_ASR = "ASR";
|
private static final String TYPE_ASR = "ASR";
|
||||||
private static final String TYPE_LLM = "LLM";
|
private static final String TYPE_LLM = "LLM";
|
||||||
|
private static final String TENCENT_PROVIDER = "tencent";
|
||||||
|
private static final String MEDIA_TENCENT_APP_ID = "tencentAppId";
|
||||||
|
private static final String MEDIA_TENCENT_SECRET_ID = "tencentSecretId";
|
||||||
|
private static final String MEDIA_TENCENT_SECRET_KEY = "tencentSecretKey";
|
||||||
private static final int DEFAULT_SORT_ORDER = 0;
|
private static final int DEFAULT_SORT_ORDER = 0;
|
||||||
private static final String DEFAULT_LLM_API_PATH = "/v1/chat/completions";
|
private static final String DEFAULT_LLM_API_PATH = "/v1/chat/completions";
|
||||||
private static final String DEFAULT_ANTHROPIC_API_PATH = "/messages";
|
private static final String DEFAULT_ANTHROPIC_API_PATH = "/messages";
|
||||||
|
|
@ -741,6 +745,7 @@ public class AiModelServiceImpl implements AiModelService {
|
||||||
if (Integer.valueOf(1).equals(dto.getIsDefault()) && !Integer.valueOf(1).equals(dto.getStatus())) {
|
if (Integer.valueOf(1).equals(dto.getIsDefault()) && !Integer.valueOf(1).equals(dto.getStatus())) {
|
||||||
throw new RuntimeException("默认模型必须为启用状态");
|
throw new RuntimeException("默认模型必须为启用状态");
|
||||||
}
|
}
|
||||||
|
validateTencentAsrConfig(dto);
|
||||||
// if ("custom".equals(normalizeProvider(dto.getProvider()))) {
|
// if ("custom".equals(normalizeProvider(dto.getProvider()))) {
|
||||||
// if (TYPE_ASR.equals(normalizeType(dto.getModelType()))) {
|
// if (TYPE_ASR.equals(normalizeType(dto.getModelType()))) {
|
||||||
// Map<String, Object> mediaConfig = dto.getMediaConfig() == null ? Collections.emptyMap() : dto.getMediaConfig();
|
// Map<String, Object> mediaConfig = dto.getMediaConfig() == null ? Collections.emptyMap() : dto.getMediaConfig();
|
||||||
|
|
@ -833,7 +838,8 @@ public class AiModelServiceImpl implements AiModelService {
|
||||||
}
|
}
|
||||||
|
|
||||||
private void pushAsrConfig(AsrModel entity) {
|
private void pushAsrConfig(AsrModel entity) {
|
||||||
if (ModelProviderEnum.LOCAL.getCode().equals(normalizeProvider(entity.getProvider()))) {
|
String provider = normalizeProvider(entity.getProvider());
|
||||||
|
if (ModelProviderEnum.LOCAL.getCode().equals(provider)) {
|
||||||
if (entity.getApiKey() == null || entity.getApiKey().isBlank()) {
|
if (entity.getApiKey() == null || entity.getApiKey().isBlank()) {
|
||||||
log.info("Skip syncing local ASR profile because apiKey is blank, modelName={}", entity.getModelName());
|
log.info("Skip syncing local ASR profile because apiKey is blank, modelName={}", entity.getModelName());
|
||||||
return;
|
return;
|
||||||
|
|
@ -841,6 +847,12 @@ public class AiModelServiceImpl implements AiModelService {
|
||||||
updateLocalProfile(entity);
|
updateLocalProfile(entity);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
if ("custom".equals(provider)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (TENCENT_PROVIDER.equals(provider)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
if (entity.getBaseUrl() == null || entity.getBaseUrl().isBlank()) {
|
if (entity.getBaseUrl() == null || entity.getBaseUrl().isBlank()) {
|
||||||
throw new RuntimeException("ASR 模型必须配置 baseUrl");
|
throw new RuntimeException("ASR 模型必须配置 baseUrl");
|
||||||
}
|
}
|
||||||
|
|
@ -935,6 +947,28 @@ public class AiModelServiceImpl implements AiModelService {
|
||||||
return text.isEmpty() ? null : text;
|
return text.isEmpty() ? null : text;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void validateTencentAsrConfig(AiModelDTO dto) {
|
||||||
|
if (!TYPE_ASR.equals(normalizeType(dto.getModelType()))) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (!TENCENT_PROVIDER.equals(normalizeProvider(dto.getProvider()))) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
Map<String, Object> mediaConfig = dto.getMediaConfig() == null ? Collections.emptyMap() : dto.getMediaConfig();
|
||||||
|
if (readConfigString(mediaConfig.get(MEDIA_TENCENT_APP_ID)) == null) {
|
||||||
|
throw new RuntimeException("腾讯实时 ASR 模型必须配置 mediaConfig.tencentAppId");
|
||||||
|
}
|
||||||
|
if (readConfigString(mediaConfig.get(MEDIA_TENCENT_SECRET_ID)) == null) {
|
||||||
|
throw new RuntimeException("腾讯实时 ASR 模型必须配置 mediaConfig.tencentSecretId");
|
||||||
|
}
|
||||||
|
if (readConfigString(mediaConfig.get(MEDIA_TENCENT_SECRET_KEY)) == null) {
|
||||||
|
throw new RuntimeException("腾讯实时 ASR 模型必须配置 mediaConfig.tencentSecretKey");
|
||||||
|
}
|
||||||
|
if (dto.getModelCode() == null || dto.getModelCode().isBlank()) {
|
||||||
|
throw new RuntimeException("腾讯实时 ASR 模型必须配置 modelCode");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private BigDecimal readConfigDecimal(Object value) {
|
private BigDecimal readConfigDecimal(Object value) {
|
||||||
if (value == null) {
|
if (value == null) {
|
||||||
return null;
|
return null;
|
||||||
|
|
|
||||||
|
|
@ -67,6 +67,22 @@ public class RealtimeMeetingSessionStateServiceImpl implements RealtimeMeetingSe
|
||||||
writeState(state);
|
writeState(state);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void rememberSpeakerContext(Long meetingId, String speakerContextId) {
|
||||||
|
if (meetingId == null || speakerContextId == null || speakerContextId.isBlank()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
RealtimeMeetingSessionState state = getOrCreateState(meetingId);
|
||||||
|
RealtimeMeetingResumeConfig resumeConfig = state.getResumeConfig();
|
||||||
|
if (resumeConfig == null) {
|
||||||
|
resumeConfig = new RealtimeMeetingResumeConfig();
|
||||||
|
state.setResumeConfig(resumeConfig);
|
||||||
|
}
|
||||||
|
resumeConfig.setSpeakerContextId(speakerContextId.trim());
|
||||||
|
state.setUpdatedAt(System.currentTimeMillis());
|
||||||
|
writeState(state);
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void assertCanOpenSession(Long meetingId) {
|
public void assertCanOpenSession(Long meetingId) {
|
||||||
RealtimeMeetingSessionStatusVO status = getStatus(meetingId);
|
RealtimeMeetingSessionStatusVO status = getStatus(meetingId);
|
||||||
|
|
|
||||||
|
|
@ -39,10 +39,10 @@ public class RealtimeMeetingSocketSessionServiceImpl implements RealtimeMeetingS
|
||||||
Boolean enableTextRefine, Boolean saveAudio,
|
Boolean enableTextRefine, Boolean saveAudio,
|
||||||
List<Map<String, Object>> hotwords, LoginUser loginUser) {
|
List<Map<String, Object>> hotwords, LoginUser loginUser) {
|
||||||
if (meetingId == null) {
|
if (meetingId == null) {
|
||||||
throw new RuntimeException("浼氳 ID 涓嶈兘涓虹┖");
|
throw new RuntimeException("会议 ID 不能为空");
|
||||||
}
|
}
|
||||||
if (asrModelId == null) {
|
if (asrModelId == null) {
|
||||||
throw new RuntimeException("ASR 妯″瀷 ID 涓嶈兘涓虹┖");
|
throw new RuntimeException("ASR 模型 ID 不能为空");
|
||||||
}
|
}
|
||||||
|
|
||||||
Meeting meeting = meetingAccessService.requireMeeting(meetingId);
|
Meeting meeting = meetingAccessService.requireMeeting(meetingId);
|
||||||
|
|
@ -53,13 +53,13 @@ public class RealtimeMeetingSocketSessionServiceImpl implements RealtimeMeetingS
|
||||||
|
|
||||||
AiModelVO asrModel = aiModelService.getModelById(asrModelId, "ASR");
|
AiModelVO asrModel = aiModelService.getModelById(asrModelId, "ASR");
|
||||||
if (asrModel == null) {
|
if (asrModel == null) {
|
||||||
throw new RuntimeException("ASR 妯″瀷涓嶅瓨鍦?");
|
throw new RuntimeException("ASR 模型不存在");
|
||||||
}
|
}
|
||||||
|
|
||||||
RealtimeAsrChannel realtimeAsrChannel = realtimeAsrChannelFactory.getRequired(asrModel.getProvider());
|
RealtimeAsrChannel realtimeAsrChannel = realtimeAsrChannelFactory.getRequired(asrModel.getProvider());
|
||||||
String targetWsUrl = realtimeAsrChannel.resolveTargetWsUrl(asrModel);
|
String targetWsUrl = realtimeAsrChannel.resolveTargetWsUrl(asrModel);
|
||||||
if (targetWsUrl == null || targetWsUrl.isBlank()) {
|
if (targetWsUrl == null || targetWsUrl.isBlank()) {
|
||||||
throw new RuntimeException("ASR 妯″瀷鏈厤缃?WebSocket 鍦板潃");
|
throw new RuntimeException("ASR 模型未配置 WebSocket 地址");
|
||||||
}
|
}
|
||||||
|
|
||||||
RealtimeMeetingResumeConfig resumeConfig = new RealtimeMeetingResumeConfig();
|
RealtimeMeetingResumeConfig resumeConfig = new RealtimeMeetingResumeConfig();
|
||||||
|
|
@ -87,6 +87,8 @@ public class RealtimeMeetingSocketSessionServiceImpl implements RealtimeMeetingS
|
||||||
sessionData.setAsrModelId(asrModelId);
|
sessionData.setAsrModelId(asrModelId);
|
||||||
sessionData.setProvider(realtimeAsrChannelFactory.normalizeProvider(asrModel.getProvider()));
|
sessionData.setProvider(realtimeAsrChannelFactory.normalizeProvider(asrModel.getProvider()));
|
||||||
sessionData.setTargetWsUrl(targetWsUrl);
|
sessionData.setTargetWsUrl(targetWsUrl);
|
||||||
|
sessionData.setModelCode(asrModel.getModelCode());
|
||||||
|
sessionData.setMediaConfig(asrModel.getMediaConfig());
|
||||||
|
|
||||||
String sessionToken = UUID.randomUUID().toString().replace("-", "");
|
String sessionToken = UUID.randomUUID().toString().replace("-", "");
|
||||||
socketSessionCache.save(sessionToken, sessionData);
|
socketSessionCache.save(sessionToken, sessionData);
|
||||||
|
|
|
||||||
|
|
@ -26,6 +26,10 @@ public interface RealtimeAsrChannel {
|
||||||
|
|
||||||
void handleFrontendBinary(RealtimeAsrChannelContext context, byte[] payload);
|
void handleFrontendBinary(RealtimeAsrChannelContext context, byte[] payload);
|
||||||
|
|
||||||
|
default void onFrontendDetached(RealtimeAsrChannelContext context) {
|
||||||
|
// default no-op
|
||||||
|
}
|
||||||
|
|
||||||
void closeMeeting(RealtimeAsrChannelContext context);
|
void closeMeeting(RealtimeAsrChannelContext context);
|
||||||
|
|
||||||
boolean isOpen(RealtimeAsrChannelContext context);
|
boolean isOpen(RealtimeAsrChannelContext context);
|
||||||
|
|
|
||||||
|
|
@ -100,17 +100,22 @@ public class RealtimeMeetingTranscriptCacheServiceImpl implements RealtimeMeetin
|
||||||
Integer sentenceId = sentence.has("sentence_id") && sentence.get("sentence_id").canConvertToInt()
|
Integer sentenceId = sentence.has("sentence_id") && sentence.get("sentence_id").canConvertToInt()
|
||||||
? sentence.get("sentence_id").asInt()
|
? sentence.get("sentence_id").asInt()
|
||||||
: null;
|
: null;
|
||||||
String sentenceKey = sentenceId == null ? "sentence-" + nextLegacySequence(state) : "sentence-" + sentenceId;
|
String upstreamSentenceKey = readText(sentence, "sentence_key");
|
||||||
|
String sentenceKey = upstreamSentenceKey != null && !upstreamSentenceKey.isBlank()
|
||||||
|
? upstreamSentenceKey
|
||||||
|
: sentenceId == null ? "sentence-" + nextLegacySequence(state) : "sentence-" + sentenceId;
|
||||||
RealtimeMeetingTranscriptCacheItem item = findBySentenceKey(state, sentenceKey);
|
RealtimeMeetingTranscriptCacheItem item = findBySentenceKey(state, sentenceKey);
|
||||||
long now = System.currentTimeMillis();
|
long now = System.currentTimeMillis();
|
||||||
if (item == null) {
|
if (item == null) {
|
||||||
item = new RealtimeMeetingTranscriptCacheItem();
|
item = new RealtimeMeetingTranscriptCacheItem();
|
||||||
item.setSentenceKey(sentenceKey);
|
item.setSentenceKey(sentenceKey);
|
||||||
|
item.setSentenceGroupKey(upstreamSentenceKey);
|
||||||
item.setSentenceId(sentenceId);
|
item.setSentenceId(sentenceId);
|
||||||
item.setSortOrder(nextSortOrder(state));
|
item.setSortOrder(nextSortOrder(state));
|
||||||
item.setFirstReceivedAt(now);
|
item.setFirstReceivedAt(now);
|
||||||
state.getItems().add(item);
|
state.getItems().add(item);
|
||||||
}
|
}
|
||||||
|
item.setSentenceGroupKey(upstreamSentenceKey);
|
||||||
item.setSentenceType(readInteger(sentence, "sentence_type"));
|
item.setSentenceType(readInteger(sentence, "sentence_type"));
|
||||||
item.setSpeakerId(resolveSpeakerId(sentence));
|
item.setSpeakerId(resolveSpeakerId(sentence));
|
||||||
item.setSpeakerName(readText(sentence, "speaker_name"));
|
item.setSpeakerName(readText(sentence, "speaker_name"));
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,506 @@
|
||||||
|
package com.imeeting.service.realtime.impl;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.core.JsonProcessingException;
|
||||||
|
import com.fasterxml.jackson.databind.JsonNode;
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
import com.fasterxml.jackson.databind.node.ArrayNode;
|
||||||
|
import com.fasterxml.jackson.databind.node.ObjectNode;
|
||||||
|
import com.imeeting.dto.biz.AiModelVO;
|
||||||
|
import com.imeeting.enums.ModelProviderEnum;
|
||||||
|
import com.imeeting.service.biz.RealtimeMeetingSessionStateService;
|
||||||
|
import com.imeeting.service.realtime.RealtimeAsrChannel;
|
||||||
|
import com.imeeting.service.realtime.RealtimeAsrChannelContext;
|
||||||
|
import com.imeeting.service.realtime.RealtimeMeetingTranscriptCacheService;
|
||||||
|
import com.tencent.asrspeaker.SpeakerConstant;
|
||||||
|
import com.tencent.asrspeaker.SpeakerRecognitionListener;
|
||||||
|
import com.tencent.asrspeaker.SpeakerRecognitionResponse;
|
||||||
|
import com.tencent.asrspeaker.SpeakerRecognizer;
|
||||||
|
import com.tencent.asrspeaker.SpeakerRecognizerRequest;
|
||||||
|
import com.tencent.asrspeaker.SpeakerSentenceItem;
|
||||||
|
import com.tencent.core.ws.Credential;
|
||||||
|
import com.tencent.core.ws.SpeechClient;
|
||||||
|
import lombok.RequiredArgsConstructor;
|
||||||
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
import org.springframework.stereotype.Component;
|
||||||
|
import org.springframework.web.socket.CloseStatus;
|
||||||
|
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.UUID;
|
||||||
|
import java.util.concurrent.CompletableFuture;
|
||||||
|
import java.util.concurrent.TimeUnit;
|
||||||
|
|
||||||
|
@Slf4j
|
||||||
|
@Component
|
||||||
|
@RequiredArgsConstructor
|
||||||
|
public class TencentRealtimeAsrChannel implements RealtimeAsrChannel {
|
||||||
|
|
||||||
|
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||||
|
private static final String TARGET_WS_URL = "tencent-sdk://speaker-recognizer";
|
||||||
|
private static final String MEDIA_TENCENT_APP_ID = "tencentAppId";
|
||||||
|
private static final String MEDIA_TENCENT_SECRET_ID = "tencentSecretId";
|
||||||
|
private static final String MEDIA_TENCENT_SECRET_KEY = "tencentSecretKey";
|
||||||
|
private static final String STATE_CONNECTED = "tencentConnected";
|
||||||
|
private static final String STATE_STARTED = "tencentStarted";
|
||||||
|
private static final String STATE_RECOGNIZER = "tencentRecognizer";
|
||||||
|
private static final String STATE_SPEECH_CLIENT = "tencentSpeechClient";
|
||||||
|
private static final String STATE_STOP_REQUESTED = "tencentStopRequested";
|
||||||
|
private static final String STATE_MEETING_COMPLETE_REQUESTED = "tencentMeetingCompleteRequested";
|
||||||
|
private static final String STATE_FRONTEND_DETACHED = "tencentFrontendDetached";
|
||||||
|
private static final String STATE_SPEAKER_CONTEXT_ID = "speakerContextId";
|
||||||
|
private static final String STATE_VOICE_ID = "voiceId";
|
||||||
|
private static final String STATE_PENDING_AUDIO_FRAMES = "pendingAudioFrames";
|
||||||
|
private static final String STATE_MODEL_CODE = "modelCode";
|
||||||
|
private static final String STATE_MEDIA_CONFIG = "mediaConfig";
|
||||||
|
|
||||||
|
private final RealtimeMeetingSessionStateService realtimeMeetingSessionStateService;
|
||||||
|
private final RealtimeMeetingTranscriptCacheService realtimeMeetingTranscriptCacheService;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean supports(String provider) {
|
||||||
|
return ModelProviderEnum.TENCENT.getCode().equalsIgnoreCase(provider);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String resolveTargetWsUrl(AiModelVO model) {
|
||||||
|
return TARGET_WS_URL;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Map<String, Object> buildStartMessage(AiModelVO model,
|
||||||
|
String mode,
|
||||||
|
String language,
|
||||||
|
Integer useSpkId,
|
||||||
|
Boolean enablePunctuation,
|
||||||
|
Boolean enableItn,
|
||||||
|
Boolean enableTextRefine,
|
||||||
|
Boolean saveAudio,
|
||||||
|
List<Map<String, Object>> hotwords) {
|
||||||
|
Map<String, Object> payload = new HashMap<>();
|
||||||
|
payload.put("provider", ModelProviderEnum.TENCENT.getCode());
|
||||||
|
payload.put("engine_model_type", model.getModelCode());
|
||||||
|
payload.put("language", language);
|
||||||
|
|
||||||
|
Map<String, Object> root = new HashMap<>();
|
||||||
|
root.put("type", "start");
|
||||||
|
root.put("payload", payload);
|
||||||
|
return root;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void connect(RealtimeAsrChannelContext context) throws Exception {
|
||||||
|
String connectionId = currentConnectionId(context);
|
||||||
|
if (connectionId == null || !realtimeMeetingSessionStateService.activate(context.getMeetingId(), connectionId)) {
|
||||||
|
context.getCallback().sendFrontendError(context.getMeetingId(), "REALTIME_ACTIVE_CONNECTION_EXISTS", "当前会议无法激活这条前端连接");
|
||||||
|
context.getCallback().closeFrontend(context.getMeetingId(), CloseStatus.POLICY_VIOLATION.withReason("当前会议无法激活这条前端连接"));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
context.getChannelState().put(STATE_CONNECTED, Boolean.TRUE);
|
||||||
|
context.getChannelState().put(STATE_STARTED, Boolean.FALSE);
|
||||||
|
context.getChannelState().put(STATE_STOP_REQUESTED, Boolean.FALSE);
|
||||||
|
context.getChannelState().put(STATE_MEETING_COMPLETE_REQUESTED, Boolean.FALSE);
|
||||||
|
context.getChannelState().put(STATE_FRONTEND_DETACHED, Boolean.FALSE);
|
||||||
|
context.getChannelState().putIfAbsent(STATE_PENDING_AUDIO_FRAMES, new java.util.ArrayList<byte[]>());
|
||||||
|
context.getCallback().onChannelOpen(context.getMeetingId());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void handleFrontendText(RealtimeAsrChannelContext context, String payload) {
|
||||||
|
if (looksLikeStartMessage(payload)) {
|
||||||
|
startRecognizerIfNecessary(context);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (looksLikeStopMessage(payload)) {
|
||||||
|
context.getChannelState().put(STATE_STOP_REQUESTED, Boolean.TRUE);
|
||||||
|
stopRecognizer(context);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void handleFrontendBinary(RealtimeAsrChannelContext context, byte[] payload) {
|
||||||
|
SpeakerRecognizer recognizer = getRecognizer(context);
|
||||||
|
if (payload == null || payload.length == 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (recognizer == null) {
|
||||||
|
queuePendingAudioFrame(context, payload);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
recognizer.write(payload);
|
||||||
|
} catch (Exception ex) {
|
||||||
|
handleChannelFailure(context, "REALTIME_UPSTREAM_ERROR", "腾讯实时 ASR 音频发送失败", ex);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void closeMeeting(RealtimeAsrChannelContext context) {
|
||||||
|
context.getChannelState().put(STATE_STOP_REQUESTED, Boolean.TRUE);
|
||||||
|
context.getChannelState().put(STATE_MEETING_COMPLETE_REQUESTED, Boolean.TRUE);
|
||||||
|
stopRecognizer(context);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean isOpen(RealtimeAsrChannelContext context) {
|
||||||
|
return !Boolean.TRUE.equals(context.getChannelState().get(STATE_MEETING_COMPLETE_REQUESTED));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void onFrontendDetached(RealtimeAsrChannelContext context) {
|
||||||
|
context.getChannelState().put(STATE_FRONTEND_DETACHED, Boolean.TRUE);
|
||||||
|
context.getChannelState().put(STATE_STOP_REQUESTED, Boolean.TRUE);
|
||||||
|
stopRecognizer(context);
|
||||||
|
}
|
||||||
|
|
||||||
|
static String buildFrontendTranscriptMessage(String sentenceKey,
|
||||||
|
String text,
|
||||||
|
boolean isFinal,
|
||||||
|
Integer sentenceId,
|
||||||
|
Long startTime,
|
||||||
|
Long endTime,
|
||||||
|
Integer speakerId) throws JsonProcessingException {
|
||||||
|
ObjectNode root = OBJECT_MAPPER.createObjectNode();
|
||||||
|
root.put("type", isFinal ? "segment" : "partial");
|
||||||
|
ObjectNode data = root.putObject("data");
|
||||||
|
data.put("text", text);
|
||||||
|
data.put("is_final", isFinal);
|
||||||
|
if (sentenceId != null) {
|
||||||
|
data.put("sentence_id", sentenceId);
|
||||||
|
}
|
||||||
|
if (sentenceKey != null && !sentenceKey.isBlank()) {
|
||||||
|
data.put("sentence_key", sentenceKey);
|
||||||
|
}
|
||||||
|
if (startTime != null) {
|
||||||
|
data.put("start", startTime / 1000D);
|
||||||
|
}
|
||||||
|
if (endTime != null) {
|
||||||
|
data.put("end", endTime / 1000D);
|
||||||
|
}
|
||||||
|
if (speakerId != null) {
|
||||||
|
data.put("speaker_id", String.valueOf(speakerId));
|
||||||
|
}
|
||||||
|
return OBJECT_MAPPER.writeValueAsString(root);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void startRecognizerIfNecessary(RealtimeAsrChannelContext context) {
|
||||||
|
synchronized (context.getChannelState()) {
|
||||||
|
if (Boolean.TRUE.equals(context.getChannelState().get(STATE_STARTED))) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
SpeechClient speechClient = createSpeechClient();
|
||||||
|
SpeakerRecognizerRequest request = createRecognizerRequest(context);
|
||||||
|
SpeakerRecognizer recognizer = createRecognizer(context, speechClient, request);
|
||||||
|
context.getChannelState().put(STATE_SPEECH_CLIENT, speechClient);
|
||||||
|
context.getChannelState().put(STATE_RECOGNIZER, recognizer);
|
||||||
|
context.getChannelState().put(STATE_VOICE_ID, request.getVoiceId());
|
||||||
|
recognizer.start();
|
||||||
|
context.getChannelState().put(STATE_STARTED, Boolean.TRUE);
|
||||||
|
flushPendingAudioFrames(context, recognizer);
|
||||||
|
} catch (Exception ex) {
|
||||||
|
handleChannelFailure(context, "REALTIME_UPSTREAM_CONNECT_FAILED", "腾讯实时 ASR 启动失败", ex);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
protected SpeechClient createSpeechClient() {
|
||||||
|
return new SpeechClient(SpeakerConstant.DEFAULT_RT_REQ_URL);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected SpeakerRecognizer createRecognizer(RealtimeAsrChannelContext context,
|
||||||
|
SpeechClient speechClient,
|
||||||
|
SpeakerRecognizerRequest request) {
|
||||||
|
return new SpeakerRecognizer(
|
||||||
|
speechClient,
|
||||||
|
buildCredential(context),
|
||||||
|
request,
|
||||||
|
new TencentRecognitionListener(context)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
private Credential buildCredential(RealtimeAsrChannelContext context) {
|
||||||
|
Map<String, Object> mediaConfig = getMediaConfig(context);
|
||||||
|
String appId = readConfigString(mediaConfig, MEDIA_TENCENT_APP_ID);
|
||||||
|
String secretId = readConfigString(mediaConfig, MEDIA_TENCENT_SECRET_ID);
|
||||||
|
String secretKey = readConfigString(mediaConfig, MEDIA_TENCENT_SECRET_KEY);
|
||||||
|
if (appId == null || secretId == null || secretKey == null) {
|
||||||
|
throw new RuntimeException("腾讯实时 ASR 会话缺少鉴权配置");
|
||||||
|
}
|
||||||
|
return new Credential(appId, secretId, secretKey);
|
||||||
|
}
|
||||||
|
|
||||||
|
SpeakerRecognizerRequest createRecognizerRequest(RealtimeAsrChannelContext context) {
|
||||||
|
SpeakerRecognizerRequest request = SpeakerRecognizerRequest.init();
|
||||||
|
request.setEngineModelType(resolveEngineModelType(context));
|
||||||
|
request.setVoiceFormat(SpeakerConstant.AUDIO_FORMAT_PCM);
|
||||||
|
request.setVoiceId(UUID.randomUUID().toString());
|
||||||
|
//是否需要vad
|
||||||
|
request.setNeedVad(1);
|
||||||
|
//vad静默时间
|
||||||
|
request.setVadSilenceTime(1000);
|
||||||
|
// 分句策略参数 0小1大
|
||||||
|
request.setSentenceStrategy(0);
|
||||||
|
//是否进行阿拉伯数字智能转换 0否1智能 23:打开数学相关转化
|
||||||
|
request.setConvertNumMode(1);
|
||||||
|
|
||||||
|
request.setSpeakerDiarization(1);
|
||||||
|
//启动断点续传
|
||||||
|
request.setEnableSpeakerContext(1);
|
||||||
|
String speakerContextId = resolveSpeakerContextId(context);
|
||||||
|
if (speakerContextId != null) {
|
||||||
|
request.setSpeakerContextId(speakerContextId);
|
||||||
|
}
|
||||||
|
return request;
|
||||||
|
}
|
||||||
|
|
||||||
|
private String resolveEngineModelType(RealtimeAsrChannelContext context) {
|
||||||
|
Object modelCode = context.getChannelState().get(STATE_MODEL_CODE);
|
||||||
|
if (modelCode instanceof String value && !value.isBlank()) {
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
return "16k_zh";
|
||||||
|
}
|
||||||
|
|
||||||
|
private String resolveSpeakerContextId(RealtimeAsrChannelContext context) {
|
||||||
|
Object speakerContextId = context.getChannelState().get(STATE_SPEAKER_CONTEXT_ID);
|
||||||
|
if (speakerContextId instanceof String value && !value.isBlank()) {
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
var status = realtimeMeetingSessionStateService.getStatus(context.getMeetingId());
|
||||||
|
if (status == null || status.getResumeConfig() == null) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
String value = status.getResumeConfig().getSpeakerContextId();
|
||||||
|
if (value == null || value.isBlank()) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
context.getChannelState().put(STATE_SPEAKER_CONTEXT_ID, value);
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
|
||||||
|
@SuppressWarnings("unchecked")
|
||||||
|
private Map<String, Object> getMediaConfig(RealtimeAsrChannelContext context) {
|
||||||
|
Object mediaConfig = context.getChannelState().get(STATE_MEDIA_CONFIG);
|
||||||
|
if (mediaConfig instanceof Map<?, ?> map) {
|
||||||
|
return (Map<String, Object>) map;
|
||||||
|
}
|
||||||
|
return Map.of();
|
||||||
|
}
|
||||||
|
|
||||||
|
private String readConfigString(Map<String, Object> mediaConfig, String key) {
|
||||||
|
Object value = mediaConfig.get(key);
|
||||||
|
if (value == null) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
String text = String.valueOf(value).trim();
|
||||||
|
return text.isEmpty() ? null : text;
|
||||||
|
}
|
||||||
|
|
||||||
|
private SpeakerRecognizer getRecognizer(RealtimeAsrChannelContext context) {
|
||||||
|
Object recognizer = context.getChannelState().get(STATE_RECOGNIZER);
|
||||||
|
return recognizer instanceof SpeakerRecognizer value ? value : null;
|
||||||
|
}
|
||||||
|
|
||||||
|
private SpeechClient getSpeechClient(RealtimeAsrChannelContext context) {
|
||||||
|
Object speechClient = context.getChannelState().get(STATE_SPEECH_CLIENT);
|
||||||
|
return speechClient instanceof SpeechClient value ? value : null;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void stopRecognizer(RealtimeAsrChannelContext context) {
|
||||||
|
SpeakerRecognizer recognizer = getRecognizer(context);
|
||||||
|
if (recognizer == null) {
|
||||||
|
shutdownSdkResources(context);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
recognizer.stop();
|
||||||
|
} catch (Exception ex) {
|
||||||
|
log.warn("Tencent realtime ASR stop failed, meetingId={}, sessionId={}",
|
||||||
|
context.getMeetingId(), currentConnectionId(context), ex);
|
||||||
|
shutdownSdkResources(context);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void shutdownSdkResources(RealtimeAsrChannelContext context) {
|
||||||
|
SpeakerRecognizer recognizer = getRecognizer(context);
|
||||||
|
if (recognizer != null) {
|
||||||
|
try {
|
||||||
|
recognizer.close();
|
||||||
|
} catch (Exception ignored) {
|
||||||
|
// ignore
|
||||||
|
}
|
||||||
|
}
|
||||||
|
SpeechClient speechClient = getSpeechClient(context);
|
||||||
|
if (speechClient != null) {
|
||||||
|
try {
|
||||||
|
speechClient.shutdown();
|
||||||
|
} catch (Exception ignored) {
|
||||||
|
// ignore
|
||||||
|
}
|
||||||
|
}
|
||||||
|
context.getChannelState().remove(STATE_RECOGNIZER);
|
||||||
|
context.getChannelState().remove(STATE_SPEECH_CLIENT);
|
||||||
|
context.getChannelState().put(STATE_STARTED, Boolean.FALSE);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void forwardResponse(RealtimeAsrChannelContext context,
|
||||||
|
SpeakerRecognitionResponse response,
|
||||||
|
boolean forceFinal) {
|
||||||
|
if (response == null || response.getSentences() == null || response.getSentences().getSentenceList() == null) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
rememberSpeakerContext(context, response);
|
||||||
|
String cachePayload = buildCachePayload(response, forceFinal);
|
||||||
|
realtimeMeetingTranscriptCacheService.mergeUpstreamMessage(context.getMeetingId(), cachePayload);
|
||||||
|
for (SpeakerSentenceItem item : response.getSentences().getSentenceList()) {
|
||||||
|
if (item == null || item.getSentence() == null || item.getSentence().trim().isEmpty()) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
boolean isFinal = forceFinal || item.getSentenceType() == 1;
|
||||||
|
context.getCallback().sendFrontendText(
|
||||||
|
context.getMeetingId(),
|
||||||
|
buildFrontendTranscriptMessage(
|
||||||
|
buildSentenceKey(context, item.getSentenceId()),
|
||||||
|
item.getSentence().trim(),
|
||||||
|
isFinal,
|
||||||
|
item.getSentenceId(),
|
||||||
|
item.getStartTime(),
|
||||||
|
item.getEndTime(),
|
||||||
|
item.getSpeakerId()
|
||||||
|
)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
} catch (Exception ex) {
|
||||||
|
handleChannelFailure(context, "REALTIME_UPSTREAM_ERROR", "腾讯实时 ASR 结果转发失败", ex);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void rememberSpeakerContext(RealtimeAsrChannelContext context, SpeakerRecognitionResponse response) {
|
||||||
|
if (response == null || response.getSpeakerContextId() == null || response.getSpeakerContextId().isBlank()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
String speakerContextId = response.getSpeakerContextId().trim();
|
||||||
|
context.getChannelState().put(STATE_SPEAKER_CONTEXT_ID, speakerContextId);
|
||||||
|
realtimeMeetingSessionStateService.rememberSpeakerContext(context.getMeetingId(), speakerContextId);
|
||||||
|
}
|
||||||
|
|
||||||
|
private String buildSentenceKey(RealtimeAsrChannelContext context, Integer sentenceId) {
|
||||||
|
Object voiceId = context.getChannelState().get(STATE_VOICE_ID);
|
||||||
|
if (!(voiceId instanceof String value) || value.isBlank() || sentenceId == null) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
return value + "-" + sentenceId;
|
||||||
|
}
|
||||||
|
|
||||||
|
@SuppressWarnings("unchecked")
|
||||||
|
private void queuePendingAudioFrame(RealtimeAsrChannelContext context, byte[] payload) {
|
||||||
|
Object frames = context.getChannelState().get(STATE_PENDING_AUDIO_FRAMES);
|
||||||
|
if (frames instanceof List<?> list) {
|
||||||
|
((List<byte[]>) list).add(payload.clone());
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
List<byte[]> next = new java.util.ArrayList<>();
|
||||||
|
next.add(payload.clone());
|
||||||
|
context.getChannelState().put(STATE_PENDING_AUDIO_FRAMES, next);
|
||||||
|
}
|
||||||
|
|
||||||
|
@SuppressWarnings("unchecked")
|
||||||
|
private void flushPendingAudioFrames(RealtimeAsrChannelContext context, SpeakerRecognizer recognizer) {
|
||||||
|
Object frames = context.getChannelState().get(STATE_PENDING_AUDIO_FRAMES);
|
||||||
|
if (!(frames instanceof List<?> list) || list.isEmpty()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
List<byte[]> pendingFrames = (List<byte[]>) list;
|
||||||
|
for (byte[] frame : pendingFrames) {
|
||||||
|
if (frame != null && frame.length > 0) {
|
||||||
|
recognizer.write(frame);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
pendingFrames.clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
private String buildCachePayload(SpeakerRecognitionResponse response, boolean forceFinal) throws JsonProcessingException {
|
||||||
|
ObjectNode root = OBJECT_MAPPER.createObjectNode();
|
||||||
|
root.put("type", forceFinal ? "end" : "sentences");
|
||||||
|
ArrayNode sentences = root.putArray("sentences");
|
||||||
|
for (SpeakerSentenceItem item : response.getSentences().getSentenceList()) {
|
||||||
|
if (item == null || item.getSentence() == null || item.getSentence().trim().isEmpty()) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
ObjectNode sentenceNode = sentences.addObject();
|
||||||
|
sentenceNode.put("sentence", item.getSentence().trim());
|
||||||
|
sentenceNode.put("sentence_type", forceFinal ? 1 : item.getSentenceType());
|
||||||
|
sentenceNode.put("sentence_id", item.getSentenceId());
|
||||||
|
sentenceNode.put("speaker_id", String.valueOf(item.getSpeakerId()));
|
||||||
|
sentenceNode.put("start_time", item.getStartTime());
|
||||||
|
sentenceNode.put("end_time", item.getEndTime());
|
||||||
|
}
|
||||||
|
return OBJECT_MAPPER.writeValueAsString(root);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void handleChannelFailure(RealtimeAsrChannelContext context, String code, String message, Exception ex) {
|
||||||
|
log.error("Tencent realtime ASR channel failed, meetingId={}, sessionId={}",
|
||||||
|
context.getMeetingId(), currentConnectionId(context), ex);
|
||||||
|
shutdownSdkResources(context);
|
||||||
|
context.getCallback().sendFrontendError(context.getMeetingId(), code, message);
|
||||||
|
CompletableFuture.delayedExecutor(200, TimeUnit.MILLISECONDS).execute(
|
||||||
|
() -> context.getCallback().closeFrontend(context.getMeetingId(), CloseStatus.SERVER_ERROR)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
private String currentConnectionId(RealtimeAsrChannelContext context) {
|
||||||
|
return context.getRawSession() == null ? null : context.getRawSession().getId();
|
||||||
|
}
|
||||||
|
|
||||||
|
private static boolean looksLikeStartMessage(String payload) {
|
||||||
|
if (payload == null || payload.isBlank()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
String normalized = payload.replaceAll("\\s+", "");
|
||||||
|
return normalized.contains("\"type\":\"start\"");
|
||||||
|
}
|
||||||
|
|
||||||
|
private static boolean looksLikeStopMessage(String payload) {
|
||||||
|
if (payload == null || payload.isBlank()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
String normalized = payload.replaceAll("\\s+", "");
|
||||||
|
return normalized.contains("\"type\":\"stop\"");
|
||||||
|
}
|
||||||
|
|
||||||
|
private final class TencentRecognitionListener extends SpeakerRecognitionListener {
|
||||||
|
private final RealtimeAsrChannelContext context;
|
||||||
|
|
||||||
|
private TencentRecognitionListener(RealtimeAsrChannelContext context) {
|
||||||
|
this.context = context;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void onRecognitionStart(SpeakerRecognitionResponse response) {
|
||||||
|
rememberSpeakerContext(context, response);
|
||||||
|
log.info("Tencent realtime ASR started, meetingId={}, sessionId={}",
|
||||||
|
context.getMeetingId(), currentConnectionId(context));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void onRecognitionSentences(SpeakerRecognitionResponse response) {
|
||||||
|
forwardResponse(context, response, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void onSentenceEnd(SpeakerRecognitionResponse response) {
|
||||||
|
forwardResponse(context, response, true);
|
||||||
|
shutdownSdkResources(context);
|
||||||
|
if (Boolean.TRUE.equals(context.getChannelState().get(STATE_MEETING_COMPLETE_REQUESTED))) {
|
||||||
|
context.getCallback().removeMeetingSession(context.getMeetingId());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void onFail(SpeakerRecognitionResponse response, Exception error) {
|
||||||
|
handleChannelFailure(context, "REALTIME_UPSTREAM_ERROR", "腾讯实时 ASR 识别失败", error);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -165,7 +165,13 @@ public class RealtimeMeetingProxyWebSocketHandler extends AbstractWebSocketHandl
|
||||||
synchronized (lockForMeeting(meetingId)) {
|
synchronized (lockForMeeting(meetingId)) {
|
||||||
meetingSession = meetingSessions.get(meetingId);
|
meetingSession = meetingSessions.get(meetingId);
|
||||||
if (meetingSession != null && meetingSession.isChannelOpen()) {
|
if (meetingSession != null && meetingSession.isChannelOpen()) {
|
||||||
|
String previousSessionId = meetingSession.context.getRawSession() == null
|
||||||
|
? null
|
||||||
|
: meetingSession.context.getRawSession().getId();
|
||||||
meetingSession.clearFrontendIfClosed();
|
meetingSession.clearFrontendIfClosed();
|
||||||
|
if (previousSessionId != null && !meetingSession.hasOpenFrontend()) {
|
||||||
|
realtimeMeetingSessionStateService.pauseByDisconnect(meetingId, previousSessionId);
|
||||||
|
}
|
||||||
if (meetingSession.hasOpenFrontend()) {
|
if (meetingSession.hasOpenFrontend()) {
|
||||||
sendFrontendError(frontendSession, "REALTIME_ACTIVE_CONNECTION_EXISTS", "当前会议已有活跃前端连接");
|
sendFrontendError(frontendSession, "REALTIME_ACTIVE_CONNECTION_EXISTS", "当前会议已有活跃前端连接");
|
||||||
frontendSession.close(CloseStatus.POLICY_VIOLATION.withReason("已存在活跃的前端连接"));
|
frontendSession.close(CloseStatus.POLICY_VIOLATION.withReason("已存在活跃的前端连接"));
|
||||||
|
|
@ -188,6 +194,8 @@ public class RealtimeMeetingProxyWebSocketHandler extends AbstractWebSocketHandl
|
||||||
context.setTargetWsUrl(sessionData.getTargetWsUrl());
|
context.setTargetWsUrl(sessionData.getTargetWsUrl());
|
||||||
context.setCallback(new HandlerChannelCallback());
|
context.setCallback(new HandlerChannelCallback());
|
||||||
context.bindFrontendSession(rawSession, frontendSession);
|
context.bindFrontendSession(rawSession, frontendSession);
|
||||||
|
context.getChannelState().put("modelCode", sessionData.getModelCode());
|
||||||
|
context.getChannelState().put("mediaConfig", sessionData.getMediaConfig());
|
||||||
meetingSession = new MeetingChannelSession(meetingId, channel, context);
|
meetingSession = new MeetingChannelSession(meetingId, channel, context);
|
||||||
meetingSessions.put(meetingId, meetingSession);
|
meetingSessions.put(meetingId, meetingSession);
|
||||||
}
|
}
|
||||||
|
|
@ -250,6 +258,9 @@ public class RealtimeMeetingProxyWebSocketHandler extends AbstractWebSocketHandl
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
synchronized (lockForMeeting(meetingId)) {
|
synchronized (lockForMeeting(meetingId)) {
|
||||||
|
if (meetingSession.context.getRawSession() != null && meetingSession.context.getRawSession().getId().equals(sessionId)) {
|
||||||
|
meetingSession.channel.onFrontendDetached(meetingSession.context);
|
||||||
|
}
|
||||||
meetingSession.detachFrontend(sessionId);
|
meetingSession.detachFrontend(sessionId);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -349,6 +349,64 @@ class AiModelServiceImplTest {
|
||||||
assertNull(captor.getValue().getApiKey());
|
assertNull(captor.getValue().getApiKey());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void saveModelShouldRejectTencentAsrWithoutSecretKey() {
|
||||||
|
AiModelServiceImpl service = new AiModelServiceImpl(
|
||||||
|
objectMapper,
|
||||||
|
mock(AsrModelMapper.class),
|
||||||
|
mock(LlmModelMapper.class)
|
||||||
|
);
|
||||||
|
|
||||||
|
AiModelDTO dto = new AiModelDTO();
|
||||||
|
dto.setModelType("ASR");
|
||||||
|
dto.setModelName("tencent-asr");
|
||||||
|
dto.setProvider("tencent");
|
||||||
|
dto.setModelCode("16k_zh");
|
||||||
|
dto.setIsDefault(0);
|
||||||
|
dto.setStatus(1);
|
||||||
|
dto.setMediaConfig(Map.of(
|
||||||
|
"tencentAppId", "app-id",
|
||||||
|
"tencentSecretId", "secret-id"
|
||||||
|
));
|
||||||
|
|
||||||
|
RuntimeException ex = assertThrows(RuntimeException.class, () -> service.saveModel(dto));
|
||||||
|
assertEquals("腾讯实时 ASR 模型必须配置 mediaConfig.tencentSecretKey", ex.getMessage());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void saveModelShouldPersistTencentAsrWithoutBaseUrl() {
|
||||||
|
AsrModelMapper asrModelMapper = mock(AsrModelMapper.class);
|
||||||
|
when(asrModelMapper.insert(any(AsrModel.class))).thenReturn(1);
|
||||||
|
|
||||||
|
AiModelServiceImpl service = new AiModelServiceImpl(
|
||||||
|
objectMapper,
|
||||||
|
asrModelMapper,
|
||||||
|
mock(LlmModelMapper.class)
|
||||||
|
);
|
||||||
|
|
||||||
|
AiModelDTO dto = new AiModelDTO();
|
||||||
|
dto.setModelType("ASR");
|
||||||
|
dto.setModelName("tencent-asr");
|
||||||
|
dto.setProvider("tencent");
|
||||||
|
dto.setModelCode("16k_zh");
|
||||||
|
dto.setIsDefault(0);
|
||||||
|
dto.setStatus(1);
|
||||||
|
dto.setMediaConfig(Map.of(
|
||||||
|
"tencentAppId", "app-id",
|
||||||
|
"tencentSecretId", "secret-id",
|
||||||
|
"tencentSecretKey", "secret-key"
|
||||||
|
));
|
||||||
|
|
||||||
|
service.saveModel(dto);
|
||||||
|
|
||||||
|
ArgumentCaptor<AsrModel> captor = ArgumentCaptor.forClass(AsrModel.class);
|
||||||
|
verify(asrModelMapper, times(1)).insert(captor.capture());
|
||||||
|
assertEquals("tencent", captor.getValue().getProvider());
|
||||||
|
assertEquals("16k_zh", captor.getValue().getModelCode());
|
||||||
|
assertEquals("secret-key", captor.getValue().getMediaConfig().get("tencentSecretKey"));
|
||||||
|
assertNull(captor.getValue().getBaseUrl());
|
||||||
|
}
|
||||||
|
|
||||||
private void captureRequest(HttpExchange exchange,
|
private void captureRequest(HttpExchange exchange,
|
||||||
AtomicReference<String> requestPath,
|
AtomicReference<String> requestPath,
|
||||||
AtomicReference<String> authorization,
|
AtomicReference<String> authorization,
|
||||||
|
|
|
||||||
|
|
@ -70,6 +70,7 @@ const AiModels: React.FC = () => {
|
||||||
const provider = Form.useWatch("provider", form);
|
const provider = Form.useWatch("provider", form);
|
||||||
const isDefaultChecked = Form.useWatch("isDefaultChecked", form);
|
const isDefaultChecked = Form.useWatch("isDefaultChecked", form);
|
||||||
const isLocalProvider = String(provider || "").toLowerCase() === "custom";
|
const isLocalProvider = String(provider || "").toLowerCase() === "custom";
|
||||||
|
const isTencentProvider = String(provider || "").toLowerCase() === "tencent";
|
||||||
|
|
||||||
const isPlatformAdmin = useMemo(() => {
|
const isPlatformAdmin = useMemo(() => {
|
||||||
const profileStr = sessionStorage.getItem("userProfile");
|
const profileStr = sessionStorage.getItem("userProfile");
|
||||||
|
|
@ -133,11 +134,17 @@ const AiModels: React.FC = () => {
|
||||||
setEditingId(record.id);
|
setEditingId(record.id);
|
||||||
const speakerModel = record.mediaConfig?.speakerModel;
|
const speakerModel = record.mediaConfig?.speakerModel;
|
||||||
const svThreshold = record.mediaConfig?.svThreshold;
|
const svThreshold = record.mediaConfig?.svThreshold;
|
||||||
|
const tencentAppId = record.mediaConfig?.tencentAppId;
|
||||||
|
const tencentSecretId = record.mediaConfig?.tencentSecretId;
|
||||||
|
const tencentSecretKey = record.mediaConfig?.tencentSecretKey;
|
||||||
form.setFieldsValue({
|
form.setFieldsValue({
|
||||||
...record,
|
...record,
|
||||||
modelType: record.modelType,
|
modelType: record.modelType,
|
||||||
speakerModel,
|
speakerModel,
|
||||||
svThreshold,
|
svThreshold,
|
||||||
|
tencentAppId,
|
||||||
|
tencentSecretId,
|
||||||
|
tencentSecretKey,
|
||||||
isDefaultChecked: record.isDefault === 1,
|
isDefaultChecked: record.isDefault === 1,
|
||||||
statusChecked: record.status === 1,
|
statusChecked: record.status === 1,
|
||||||
});
|
});
|
||||||
|
|
@ -262,6 +269,12 @@ const AiModels: React.FC = () => {
|
||||||
speakerModel: values.speakerModel,
|
speakerModel: values.speakerModel,
|
||||||
svThreshold: values.svThreshold,
|
svThreshold: values.svThreshold,
|
||||||
}
|
}
|
||||||
|
: activeType === "ASR" && isTencentProvider
|
||||||
|
? {
|
||||||
|
tencentAppId: values.tencentAppId,
|
||||||
|
tencentSecretId: values.tencentSecretId,
|
||||||
|
tencentSecretKey: values.tencentSecretKey,
|
||||||
|
}
|
||||||
: undefined,
|
: undefined,
|
||||||
temperature: values.temperature,
|
temperature: values.temperature,
|
||||||
topP: values.topP,
|
topP: values.topP,
|
||||||
|
|
@ -524,6 +537,8 @@ const AiModels: React.FC = () => {
|
||||||
</Col>
|
</Col>
|
||||||
</Row>
|
</Row>
|
||||||
|
|
||||||
|
{!isTencentProvider && (
|
||||||
|
<>
|
||||||
<Form.Item name="baseUrl" label="Base URL" rules={[{required: true, message: "请输入 Base URL"}]}>
|
<Form.Item name="baseUrl" label="Base URL" rules={[{required: true, message: "请输入 Base URL"}]}>
|
||||||
<Input placeholder="https://api.example.com"/>
|
<Input placeholder="https://api.example.com"/>
|
||||||
</Form.Item>
|
</Form.Item>
|
||||||
|
|
@ -534,6 +549,8 @@ const AiModels: React.FC = () => {
|
||||||
>
|
>
|
||||||
<Input.Password/>
|
<Input.Password/>
|
||||||
</Form.Item>
|
</Form.Item>
|
||||||
|
</>
|
||||||
|
)}
|
||||||
|
|
||||||
{(activeType === "LLM" || isLocalProvider) && (
|
{(activeType === "LLM" || isLocalProvider) && (
|
||||||
<Form.Item label="连通性测试">
|
<Form.Item label="连通性测试">
|
||||||
|
|
@ -556,7 +573,10 @@ const AiModels: React.FC = () => {
|
||||||
<Form.Item
|
<Form.Item
|
||||||
name="modelCode"
|
name="modelCode"
|
||||||
noStyle
|
noStyle
|
||||||
rules={activeType === "LLM" ? [{ required: true, message: "请输入或选择模型名称" }] : []}
|
rules={activeType === "LLM" || isTencentProvider ? [{
|
||||||
|
required: true,
|
||||||
|
message: "请输入或选择模型名称"
|
||||||
|
}] : []}
|
||||||
>
|
>
|
||||||
<AutoComplete
|
<AutoComplete
|
||||||
style={{ width: "calc(100% - 100px)" }}
|
style={{ width: "calc(100% - 100px)" }}
|
||||||
|
|
@ -574,9 +594,11 @@ const AiModels: React.FC = () => {
|
||||||
<Input allowClear placeholder="可选择或自定义输入模型名称" />
|
<Input allowClear placeholder="可选择或自定义输入模型名称" />
|
||||||
</AutoComplete>
|
</AutoComplete>
|
||||||
</Form.Item>
|
</Form.Item>
|
||||||
|
{!isTencentProvider && (
|
||||||
<Button icon={<SyncOutlined spin={fetchLoading}/>} onClick={handleFetchRemote} style={{width: 100}}>
|
<Button icon={<SyncOutlined spin={fetchLoading}/>} onClick={handleFetchRemote} style={{width: 100}}>
|
||||||
刷新
|
刷新
|
||||||
</Button>
|
</Button>
|
||||||
|
)}
|
||||||
</Space.Compact>
|
</Space.Compact>
|
||||||
</Form.Item>
|
</Form.Item>
|
||||||
|
|
||||||
|
|
@ -609,6 +631,38 @@ const AiModels: React.FC = () => {
|
||||||
</Row>
|
</Row>
|
||||||
)}
|
)}
|
||||||
|
|
||||||
|
{activeType === "ASR" && isTencentProvider && (
|
||||||
|
<Row gutter={16}>
|
||||||
|
<Col span={12}>
|
||||||
|
<Form.Item
|
||||||
|
name="tencentAppId"
|
||||||
|
label="App ID"
|
||||||
|
rules={[{required: true, message: "请输入 App ID"}]}
|
||||||
|
>
|
||||||
|
<Input/>
|
||||||
|
</Form.Item>
|
||||||
|
</Col>
|
||||||
|
<Col span={12}>
|
||||||
|
<Form.Item
|
||||||
|
name="tencentSecretId"
|
||||||
|
label="Secret ID"
|
||||||
|
rules={[{required: true, message: "请输入 Secret ID"}]}
|
||||||
|
>
|
||||||
|
<Input/>
|
||||||
|
</Form.Item>
|
||||||
|
</Col>
|
||||||
|
<Col span={24}>
|
||||||
|
<Form.Item
|
||||||
|
name="tencentSecretKey"
|
||||||
|
label="Secret Key"
|
||||||
|
rules={[{required: true, message: "请输入 Secret Key"}]}
|
||||||
|
>
|
||||||
|
<Input.Password/>
|
||||||
|
</Form.Item>
|
||||||
|
</Col>
|
||||||
|
</Row>
|
||||||
|
)}
|
||||||
|
|
||||||
{activeType === "LLM" && (
|
{activeType === "LLM" && (
|
||||||
<>
|
<>
|
||||||
<Form.Item name="apiPath" label="API 路径" initialValue="/v1/chat/completions">
|
<Form.Item name="apiPath" label="API 路径" initialValue="/v1/chat/completions">
|
||||||
|
|
|
||||||
|
|
@ -43,6 +43,7 @@ type WsMessage = {
|
||||||
text?: string;
|
text?: string;
|
||||||
is_final?: boolean;
|
is_final?: boolean;
|
||||||
sentence_id?: number;
|
sentence_id?: number;
|
||||||
|
sentence_key?: string;
|
||||||
start?: number;
|
start?: number;
|
||||||
end?: number;
|
end?: number;
|
||||||
speaker_id?: string;
|
speaker_id?: string;
|
||||||
|
|
@ -57,6 +58,7 @@ type WsMessage = {
|
||||||
|
|
||||||
type TranscriptCard = {
|
type TranscriptCard = {
|
||||||
id: string;
|
id: string;
|
||||||
|
sentenceKey?: string;
|
||||||
sentenceId?: number;
|
sentenceId?: number;
|
||||||
speakerName: string;
|
speakerName: string;
|
||||||
userId?: string | number;
|
userId?: string | number;
|
||||||
|
|
@ -67,6 +69,7 @@ type TranscriptCard = {
|
||||||
};
|
};
|
||||||
|
|
||||||
type NormalizedWsMessage = {
|
type NormalizedWsMessage = {
|
||||||
|
sentenceKey?: string;
|
||||||
text: string;
|
text: string;
|
||||||
isFinal: boolean;
|
isFinal: boolean;
|
||||||
sentenceId?: number;
|
sentenceId?: number;
|
||||||
|
|
@ -174,7 +177,10 @@ function toMs(value?: number) {
|
||||||
return Math.round(value * 1000);
|
return Math.round(value * 1000);
|
||||||
}
|
}
|
||||||
|
|
||||||
function buildTranscriptCardId(sentenceId?: number) {
|
function buildTranscriptCardId(sentenceKey?: string, sentenceId?: number) {
|
||||||
|
if (sentenceKey) {
|
||||||
|
return sentenceKey;
|
||||||
|
}
|
||||||
if (sentenceId === undefined || sentenceId === null) {
|
if (sentenceId === undefined || sentenceId === null) {
|
||||||
return `live-${Date.now()}-${Math.random()}`;
|
return `live-${Date.now()}-${Math.random()}`;
|
||||||
}
|
}
|
||||||
|
|
@ -192,6 +198,7 @@ function normalizeWsMessage(payload: WsMessage): NormalizedWsMessage | null {
|
||||||
return {
|
return {
|
||||||
text: data.text || "",
|
text: data.text || "",
|
||||||
isFinal: payload.type === "segment" || !!data.is_final,
|
isFinal: payload.type === "segment" || !!data.is_final,
|
||||||
|
sentenceKey: data.sentence_key,
|
||||||
sentenceId: data.sentence_id,
|
sentenceId: data.sentence_id,
|
||||||
speaker: {
|
speaker: {
|
||||||
name: data.speaker_name,
|
name: data.speaker_name,
|
||||||
|
|
@ -397,9 +404,10 @@ export function RealtimeAsrSession() {
|
||||||
const upsertTranscriptCard = (normalized: NormalizedWsMessage, speaker: ReturnType<typeof resolveSpeaker>) => {
|
const upsertTranscriptCard = (normalized: NormalizedWsMessage, speaker: ReturnType<typeof resolveSpeaker>) => {
|
||||||
setTranscripts((prev) => {
|
setTranscripts((prev) => {
|
||||||
const next = [...prev];
|
const next = [...prev];
|
||||||
const cardId = buildTranscriptCardId(normalized.sentenceId);
|
const cardId = buildTranscriptCardId(normalized.sentenceKey, normalized.sentenceId);
|
||||||
const nextCard: TranscriptCard = {
|
const nextCard: TranscriptCard = {
|
||||||
id: cardId,
|
id: cardId,
|
||||||
|
sentenceKey: normalized.sentenceKey,
|
||||||
sentenceId: normalized.sentenceId,
|
sentenceId: normalized.sentenceId,
|
||||||
speakerName: speaker.speakerName,
|
speakerName: speaker.speakerName,
|
||||||
userId: speaker.userId,
|
userId: speaker.userId,
|
||||||
|
|
@ -408,7 +416,7 @@ export function RealtimeAsrSession() {
|
||||||
endTime: normalized.endTime,
|
endTime: normalized.endTime,
|
||||||
final: true,
|
final: true,
|
||||||
};
|
};
|
||||||
if (normalized.sentenceId !== undefined && normalized.sentenceId !== null) {
|
if (normalized.sentenceKey || (normalized.sentenceId !== undefined && normalized.sentenceId !== null)) {
|
||||||
const index = next.findIndex((item) => item.id === cardId);
|
const index = next.findIndex((item) => item.id === cardId);
|
||||||
if (index >= 0) {
|
if (index >= 0) {
|
||||||
next[index] = {...next[index], ...nextCard};
|
next[index] = {...next[index], ...nextCard};
|
||||||
|
|
@ -496,9 +504,9 @@ export function RealtimeAsrSession() {
|
||||||
if (recording && startedAtRef.current) {
|
if (recording && startedAtRef.current) {
|
||||||
elapsedOffsetRef.current += Math.floor((Date.now() - startedAtRef.current) / 1000);
|
elapsedOffsetRef.current += Math.floor((Date.now() - startedAtRef.current) / 1000);
|
||||||
}
|
}
|
||||||
|
const pauseRes = await pauseRealtimeMeeting(meetingId);
|
||||||
await closeFrontendSocket(false);
|
await closeFrontendSocket(false);
|
||||||
await shutdownAudioPipeline();
|
await shutdownAudioPipeline();
|
||||||
const pauseRes = await pauseRealtimeMeeting(meetingId);
|
|
||||||
setSessionStatus(pauseRes.data.data);
|
setSessionStatus(pauseRes.data.data);
|
||||||
setRecording(false);
|
setRecording(false);
|
||||||
setConnecting(false);
|
setConnecting(false);
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue