feat: 添加腾讯实时 ASR 支持并优化相关逻辑
- 在 `AiModelServiceImpl` 中添加腾讯 ASR 配置验证和处理逻辑 - 更新前端 `RealtimeAsrSession` 组件,支持 `sentenceKey` 和腾讯 ASR 配置 - 在 `RealtimeMeetingProxyWebSocketHandler` 中添加对腾讯 ASR 会话的处理 - 添加 `TencentRealtimeAsrChannel` 类,实现腾讯实时 ASR 通道逻辑 - 更新 `RealtimeMeetingSocketSessionServiceImpl` 和 `RealtimeMeetingSessionStateServiceImpl`,支持腾讯 ASR 会话状态管理 - 在 `RealtimeSocketSessionData` 中添加 `modelCode` 和 `mediaConfig` 字段 - 更新 `RealtimeMeetingTranscriptCacheItem`,添加 `sentenceGroupKey` 字段 - 在 `AiModels` 页面中添加腾讯 ASR 配置表单字段 - 添加 `TENCENT_PROVIDER` 常量,并在 `ModelProviderEnum` 中添加腾讯云枚举值 - 添加单元测试以验证腾讯 ASR 模型保存逻辑dev_na
parent
ba62c9e0c0
commit
77fe6d4e77
|
|
@ -177,6 +177,12 @@
|
|||
<artifactId>springdoc-openapi-starter-webmvc-ui</artifactId>
|
||||
<version>2.3.0</version>
|
||||
</dependency>
|
||||
<!-- Source: https://mvnrepository.com/artifact/com.tencentcloudapi/tencentcloud-speech-sdk-java -->
|
||||
<dependency>
|
||||
<groupId>com.tencentcloudapi</groupId>
|
||||
<artifactId>tencentcloud-speech-sdk-java</artifactId>
|
||||
<version>1.0.67</version>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
<build>
|
||||
|
|
|
|||
|
|
@ -27,4 +27,6 @@ public class RealtimeMeetingResumeConfig {
|
|||
private Boolean saveAudio;
|
||||
@Schema(description = "热词列表")
|
||||
private List<Map<String, Object>> hotwords;
|
||||
@Schema(description = "腾讯说话人上下文 ID")
|
||||
private String speakerContextId;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@ import lombok.Data;
|
|||
@Data
|
||||
public class RealtimeMeetingTranscriptCacheItem {
|
||||
private String sentenceKey;
|
||||
private String sentenceGroupKey;
|
||||
private Integer sentenceId;
|
||||
private Integer sentenceType;
|
||||
private String speakerId;
|
||||
|
|
|
|||
|
|
@ -2,6 +2,8 @@ package com.imeeting.dto.biz;
|
|||
|
||||
import lombok.Data;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
@Data
|
||||
public class RealtimeSocketSessionData {
|
||||
private Long meetingId;
|
||||
|
|
@ -10,4 +12,6 @@ public class RealtimeSocketSessionData {
|
|||
private Long asrModelId;
|
||||
private String provider;
|
||||
private String targetWsUrl;
|
||||
private String modelCode;
|
||||
private Map<String, Object> mediaConfig;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -5,8 +5,7 @@ import lombok.Getter;
|
|||
@Getter
|
||||
public enum ModelProviderEnum {
|
||||
LOCAL("local", "本地"),
|
||||
|
||||
;
|
||||
TENCENT("tencent", "腾讯云");
|
||||
|
||||
private final String code;
|
||||
private final String description;
|
||||
|
|
|
|||
|
|
@ -11,6 +11,8 @@ public interface RealtimeMeetingSessionStateService {
|
|||
|
||||
void rememberResumeConfig(Long meetingId, RealtimeMeetingResumeConfig resumeConfig);
|
||||
|
||||
void rememberSpeakerContext(Long meetingId, String speakerContextId);
|
||||
|
||||
void assertCanOpenSession(Long meetingId);
|
||||
|
||||
boolean activate(Long meetingId, String connectionId);
|
||||
|
|
|
|||
|
|
@ -45,6 +45,10 @@ public class AiModelServiceImpl implements AiModelService {
|
|||
|
||||
private static final String TYPE_ASR = "ASR";
|
||||
private static final String TYPE_LLM = "LLM";
|
||||
private static final String TENCENT_PROVIDER = "tencent";
|
||||
private static final String MEDIA_TENCENT_APP_ID = "tencentAppId";
|
||||
private static final String MEDIA_TENCENT_SECRET_ID = "tencentSecretId";
|
||||
private static final String MEDIA_TENCENT_SECRET_KEY = "tencentSecretKey";
|
||||
private static final int DEFAULT_SORT_ORDER = 0;
|
||||
private static final String DEFAULT_LLM_API_PATH = "/v1/chat/completions";
|
||||
private static final String DEFAULT_ANTHROPIC_API_PATH = "/messages";
|
||||
|
|
@ -741,6 +745,7 @@ public class AiModelServiceImpl implements AiModelService {
|
|||
if (Integer.valueOf(1).equals(dto.getIsDefault()) && !Integer.valueOf(1).equals(dto.getStatus())) {
|
||||
throw new RuntimeException("默认模型必须为启用状态");
|
||||
}
|
||||
validateTencentAsrConfig(dto);
|
||||
// if ("custom".equals(normalizeProvider(dto.getProvider()))) {
|
||||
// if (TYPE_ASR.equals(normalizeType(dto.getModelType()))) {
|
||||
// Map<String, Object> mediaConfig = dto.getMediaConfig() == null ? Collections.emptyMap() : dto.getMediaConfig();
|
||||
|
|
@ -833,13 +838,20 @@ public class AiModelServiceImpl implements AiModelService {
|
|||
}
|
||||
|
||||
private void pushAsrConfig(AsrModel entity) {
|
||||
if (ModelProviderEnum.LOCAL.getCode().equals(normalizeProvider(entity.getProvider()))) {
|
||||
String provider = normalizeProvider(entity.getProvider());
|
||||
if (ModelProviderEnum.LOCAL.getCode().equals(provider)) {
|
||||
if (entity.getApiKey() == null || entity.getApiKey().isBlank()) {
|
||||
log.info("Skip syncing local ASR profile because apiKey is blank, modelName={}", entity.getModelName());
|
||||
return;
|
||||
}
|
||||
updateLocalProfile(entity);
|
||||
return;
|
||||
}
|
||||
if ("custom".equals(provider)) {
|
||||
return;
|
||||
}
|
||||
if (TENCENT_PROVIDER.equals(provider)) {
|
||||
return;
|
||||
}
|
||||
if (entity.getBaseUrl() == null || entity.getBaseUrl().isBlank()) {
|
||||
throw new RuntimeException("ASR 模型必须配置 baseUrl");
|
||||
|
|
@ -935,6 +947,28 @@ public class AiModelServiceImpl implements AiModelService {
|
|||
return text.isEmpty() ? null : text;
|
||||
}
|
||||
|
||||
private void validateTencentAsrConfig(AiModelDTO dto) {
|
||||
if (!TYPE_ASR.equals(normalizeType(dto.getModelType()))) {
|
||||
return;
|
||||
}
|
||||
if (!TENCENT_PROVIDER.equals(normalizeProvider(dto.getProvider()))) {
|
||||
return;
|
||||
}
|
||||
Map<String, Object> mediaConfig = dto.getMediaConfig() == null ? Collections.emptyMap() : dto.getMediaConfig();
|
||||
if (readConfigString(mediaConfig.get(MEDIA_TENCENT_APP_ID)) == null) {
|
||||
throw new RuntimeException("腾讯实时 ASR 模型必须配置 mediaConfig.tencentAppId");
|
||||
}
|
||||
if (readConfigString(mediaConfig.get(MEDIA_TENCENT_SECRET_ID)) == null) {
|
||||
throw new RuntimeException("腾讯实时 ASR 模型必须配置 mediaConfig.tencentSecretId");
|
||||
}
|
||||
if (readConfigString(mediaConfig.get(MEDIA_TENCENT_SECRET_KEY)) == null) {
|
||||
throw new RuntimeException("腾讯实时 ASR 模型必须配置 mediaConfig.tencentSecretKey");
|
||||
}
|
||||
if (dto.getModelCode() == null || dto.getModelCode().isBlank()) {
|
||||
throw new RuntimeException("腾讯实时 ASR 模型必须配置 modelCode");
|
||||
}
|
||||
}
|
||||
|
||||
private BigDecimal readConfigDecimal(Object value) {
|
||||
if (value == null) {
|
||||
return null;
|
||||
|
|
|
|||
|
|
@ -63,6 +63,22 @@ public class RealtimeMeetingSessionStateServiceImpl implements RealtimeMeetingSe
|
|||
public void rememberResumeConfig(Long meetingId, RealtimeMeetingResumeConfig resumeConfig) {
|
||||
RealtimeMeetingSessionState state = getOrCreateState(meetingId);
|
||||
state.setResumeConfig(resumeConfig);
|
||||
state.setUpdatedAt(System.currentTimeMillis());
|
||||
writeState(state);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void rememberSpeakerContext(Long meetingId, String speakerContextId) {
|
||||
if (meetingId == null || speakerContextId == null || speakerContextId.isBlank()) {
|
||||
return;
|
||||
}
|
||||
RealtimeMeetingSessionState state = getOrCreateState(meetingId);
|
||||
RealtimeMeetingResumeConfig resumeConfig = state.getResumeConfig();
|
||||
if (resumeConfig == null) {
|
||||
resumeConfig = new RealtimeMeetingResumeConfig();
|
||||
state.setResumeConfig(resumeConfig);
|
||||
}
|
||||
resumeConfig.setSpeakerContextId(speakerContextId.trim());
|
||||
state.setUpdatedAt(System.currentTimeMillis());
|
||||
writeState(state);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -39,10 +39,10 @@ public class RealtimeMeetingSocketSessionServiceImpl implements RealtimeMeetingS
|
|||
Boolean enableTextRefine, Boolean saveAudio,
|
||||
List<Map<String, Object>> hotwords, LoginUser loginUser) {
|
||||
if (meetingId == null) {
|
||||
throw new RuntimeException("浼氳 ID 涓嶈兘涓虹┖");
|
||||
throw new RuntimeException("会议 ID 不能为空");
|
||||
}
|
||||
if (asrModelId == null) {
|
||||
throw new RuntimeException("ASR 妯″瀷 ID 涓嶈兘涓虹┖");
|
||||
throw new RuntimeException("ASR 模型 ID 不能为空");
|
||||
}
|
||||
|
||||
Meeting meeting = meetingAccessService.requireMeeting(meetingId);
|
||||
|
|
@ -53,13 +53,13 @@ public class RealtimeMeetingSocketSessionServiceImpl implements RealtimeMeetingS
|
|||
|
||||
AiModelVO asrModel = aiModelService.getModelById(asrModelId, "ASR");
|
||||
if (asrModel == null) {
|
||||
throw new RuntimeException("ASR 妯″瀷涓嶅瓨鍦?");
|
||||
throw new RuntimeException("ASR 模型不存在");
|
||||
}
|
||||
|
||||
RealtimeAsrChannel realtimeAsrChannel = realtimeAsrChannelFactory.getRequired(asrModel.getProvider());
|
||||
String targetWsUrl = realtimeAsrChannel.resolveTargetWsUrl(asrModel);
|
||||
if (targetWsUrl == null || targetWsUrl.isBlank()) {
|
||||
throw new RuntimeException("ASR 妯″瀷鏈厤缃?WebSocket 鍦板潃");
|
||||
throw new RuntimeException("ASR 模型未配置 WebSocket 地址");
|
||||
}
|
||||
|
||||
RealtimeMeetingResumeConfig resumeConfig = new RealtimeMeetingResumeConfig();
|
||||
|
|
@ -87,6 +87,8 @@ public class RealtimeMeetingSocketSessionServiceImpl implements RealtimeMeetingS
|
|||
sessionData.setAsrModelId(asrModelId);
|
||||
sessionData.setProvider(realtimeAsrChannelFactory.normalizeProvider(asrModel.getProvider()));
|
||||
sessionData.setTargetWsUrl(targetWsUrl);
|
||||
sessionData.setModelCode(asrModel.getModelCode());
|
||||
sessionData.setMediaConfig(asrModel.getMediaConfig());
|
||||
|
||||
String sessionToken = UUID.randomUUID().toString().replace("-", "");
|
||||
socketSessionCache.save(sessionToken, sessionData);
|
||||
|
|
|
|||
|
|
@ -26,6 +26,10 @@ public interface RealtimeAsrChannel {
|
|||
|
||||
void handleFrontendBinary(RealtimeAsrChannelContext context, byte[] payload);
|
||||
|
||||
default void onFrontendDetached(RealtimeAsrChannelContext context) {
|
||||
// default no-op
|
||||
}
|
||||
|
||||
void closeMeeting(RealtimeAsrChannelContext context);
|
||||
|
||||
boolean isOpen(RealtimeAsrChannelContext context);
|
||||
|
|
|
|||
|
|
@ -100,17 +100,22 @@ public class RealtimeMeetingTranscriptCacheServiceImpl implements RealtimeMeetin
|
|||
Integer sentenceId = sentence.has("sentence_id") && sentence.get("sentence_id").canConvertToInt()
|
||||
? sentence.get("sentence_id").asInt()
|
||||
: null;
|
||||
String sentenceKey = sentenceId == null ? "sentence-" + nextLegacySequence(state) : "sentence-" + sentenceId;
|
||||
String upstreamSentenceKey = readText(sentence, "sentence_key");
|
||||
String sentenceKey = upstreamSentenceKey != null && !upstreamSentenceKey.isBlank()
|
||||
? upstreamSentenceKey
|
||||
: sentenceId == null ? "sentence-" + nextLegacySequence(state) : "sentence-" + sentenceId;
|
||||
RealtimeMeetingTranscriptCacheItem item = findBySentenceKey(state, sentenceKey);
|
||||
long now = System.currentTimeMillis();
|
||||
if (item == null) {
|
||||
item = new RealtimeMeetingTranscriptCacheItem();
|
||||
item.setSentenceKey(sentenceKey);
|
||||
item.setSentenceGroupKey(upstreamSentenceKey);
|
||||
item.setSentenceId(sentenceId);
|
||||
item.setSortOrder(nextSortOrder(state));
|
||||
item.setFirstReceivedAt(now);
|
||||
state.getItems().add(item);
|
||||
}
|
||||
item.setSentenceGroupKey(upstreamSentenceKey);
|
||||
item.setSentenceType(readInteger(sentence, "sentence_type"));
|
||||
item.setSpeakerId(resolveSpeakerId(sentence));
|
||||
item.setSpeakerName(readText(sentence, "speaker_name"));
|
||||
|
|
|
|||
|
|
@ -0,0 +1,506 @@
|
|||
package com.imeeting.service.realtime.impl;
|
||||
|
||||
import com.fasterxml.jackson.core.JsonProcessingException;
|
||||
import com.fasterxml.jackson.databind.JsonNode;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import com.fasterxml.jackson.databind.node.ArrayNode;
|
||||
import com.fasterxml.jackson.databind.node.ObjectNode;
|
||||
import com.imeeting.dto.biz.AiModelVO;
|
||||
import com.imeeting.enums.ModelProviderEnum;
|
||||
import com.imeeting.service.biz.RealtimeMeetingSessionStateService;
|
||||
import com.imeeting.service.realtime.RealtimeAsrChannel;
|
||||
import com.imeeting.service.realtime.RealtimeAsrChannelContext;
|
||||
import com.imeeting.service.realtime.RealtimeMeetingTranscriptCacheService;
|
||||
import com.tencent.asrspeaker.SpeakerConstant;
|
||||
import com.tencent.asrspeaker.SpeakerRecognitionListener;
|
||||
import com.tencent.asrspeaker.SpeakerRecognitionResponse;
|
||||
import com.tencent.asrspeaker.SpeakerRecognizer;
|
||||
import com.tencent.asrspeaker.SpeakerRecognizerRequest;
|
||||
import com.tencent.asrspeaker.SpeakerSentenceItem;
|
||||
import com.tencent.core.ws.Credential;
|
||||
import com.tencent.core.ws.SpeechClient;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.stereotype.Component;
|
||||
import org.springframework.web.socket.CloseStatus;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.UUID;
|
||||
import java.util.concurrent.CompletableFuture;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
@Slf4j
|
||||
@Component
|
||||
@RequiredArgsConstructor
|
||||
public class TencentRealtimeAsrChannel implements RealtimeAsrChannel {
|
||||
|
||||
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||
private static final String TARGET_WS_URL = "tencent-sdk://speaker-recognizer";
|
||||
private static final String MEDIA_TENCENT_APP_ID = "tencentAppId";
|
||||
private static final String MEDIA_TENCENT_SECRET_ID = "tencentSecretId";
|
||||
private static final String MEDIA_TENCENT_SECRET_KEY = "tencentSecretKey";
|
||||
private static final String STATE_CONNECTED = "tencentConnected";
|
||||
private static final String STATE_STARTED = "tencentStarted";
|
||||
private static final String STATE_RECOGNIZER = "tencentRecognizer";
|
||||
private static final String STATE_SPEECH_CLIENT = "tencentSpeechClient";
|
||||
private static final String STATE_STOP_REQUESTED = "tencentStopRequested";
|
||||
private static final String STATE_MEETING_COMPLETE_REQUESTED = "tencentMeetingCompleteRequested";
|
||||
private static final String STATE_FRONTEND_DETACHED = "tencentFrontendDetached";
|
||||
private static final String STATE_SPEAKER_CONTEXT_ID = "speakerContextId";
|
||||
private static final String STATE_VOICE_ID = "voiceId";
|
||||
private static final String STATE_PENDING_AUDIO_FRAMES = "pendingAudioFrames";
|
||||
private static final String STATE_MODEL_CODE = "modelCode";
|
||||
private static final String STATE_MEDIA_CONFIG = "mediaConfig";
|
||||
|
||||
private final RealtimeMeetingSessionStateService realtimeMeetingSessionStateService;
|
||||
private final RealtimeMeetingTranscriptCacheService realtimeMeetingTranscriptCacheService;
|
||||
|
||||
@Override
|
||||
public boolean supports(String provider) {
|
||||
return ModelProviderEnum.TENCENT.getCode().equalsIgnoreCase(provider);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String resolveTargetWsUrl(AiModelVO model) {
|
||||
return TARGET_WS_URL;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Map<String, Object> buildStartMessage(AiModelVO model,
|
||||
String mode,
|
||||
String language,
|
||||
Integer useSpkId,
|
||||
Boolean enablePunctuation,
|
||||
Boolean enableItn,
|
||||
Boolean enableTextRefine,
|
||||
Boolean saveAudio,
|
||||
List<Map<String, Object>> hotwords) {
|
||||
Map<String, Object> payload = new HashMap<>();
|
||||
payload.put("provider", ModelProviderEnum.TENCENT.getCode());
|
||||
payload.put("engine_model_type", model.getModelCode());
|
||||
payload.put("language", language);
|
||||
|
||||
Map<String, Object> root = new HashMap<>();
|
||||
root.put("type", "start");
|
||||
root.put("payload", payload);
|
||||
return root;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void connect(RealtimeAsrChannelContext context) throws Exception {
|
||||
String connectionId = currentConnectionId(context);
|
||||
if (connectionId == null || !realtimeMeetingSessionStateService.activate(context.getMeetingId(), connectionId)) {
|
||||
context.getCallback().sendFrontendError(context.getMeetingId(), "REALTIME_ACTIVE_CONNECTION_EXISTS", "当前会议无法激活这条前端连接");
|
||||
context.getCallback().closeFrontend(context.getMeetingId(), CloseStatus.POLICY_VIOLATION.withReason("当前会议无法激活这条前端连接"));
|
||||
return;
|
||||
}
|
||||
context.getChannelState().put(STATE_CONNECTED, Boolean.TRUE);
|
||||
context.getChannelState().put(STATE_STARTED, Boolean.FALSE);
|
||||
context.getChannelState().put(STATE_STOP_REQUESTED, Boolean.FALSE);
|
||||
context.getChannelState().put(STATE_MEETING_COMPLETE_REQUESTED, Boolean.FALSE);
|
||||
context.getChannelState().put(STATE_FRONTEND_DETACHED, Boolean.FALSE);
|
||||
context.getChannelState().putIfAbsent(STATE_PENDING_AUDIO_FRAMES, new java.util.ArrayList<byte[]>());
|
||||
context.getCallback().onChannelOpen(context.getMeetingId());
|
||||
}
|
||||
|
||||
@Override
|
||||
public void handleFrontendText(RealtimeAsrChannelContext context, String payload) {
|
||||
if (looksLikeStartMessage(payload)) {
|
||||
startRecognizerIfNecessary(context);
|
||||
return;
|
||||
}
|
||||
if (looksLikeStopMessage(payload)) {
|
||||
context.getChannelState().put(STATE_STOP_REQUESTED, Boolean.TRUE);
|
||||
stopRecognizer(context);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void handleFrontendBinary(RealtimeAsrChannelContext context, byte[] payload) {
|
||||
SpeakerRecognizer recognizer = getRecognizer(context);
|
||||
if (payload == null || payload.length == 0) {
|
||||
return;
|
||||
}
|
||||
if (recognizer == null) {
|
||||
queuePendingAudioFrame(context, payload);
|
||||
return;
|
||||
}
|
||||
try {
|
||||
recognizer.write(payload);
|
||||
} catch (Exception ex) {
|
||||
handleChannelFailure(context, "REALTIME_UPSTREAM_ERROR", "腾讯实时 ASR 音频发送失败", ex);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void closeMeeting(RealtimeAsrChannelContext context) {
|
||||
context.getChannelState().put(STATE_STOP_REQUESTED, Boolean.TRUE);
|
||||
context.getChannelState().put(STATE_MEETING_COMPLETE_REQUESTED, Boolean.TRUE);
|
||||
stopRecognizer(context);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isOpen(RealtimeAsrChannelContext context) {
|
||||
return !Boolean.TRUE.equals(context.getChannelState().get(STATE_MEETING_COMPLETE_REQUESTED));
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onFrontendDetached(RealtimeAsrChannelContext context) {
|
||||
context.getChannelState().put(STATE_FRONTEND_DETACHED, Boolean.TRUE);
|
||||
context.getChannelState().put(STATE_STOP_REQUESTED, Boolean.TRUE);
|
||||
stopRecognizer(context);
|
||||
}
|
||||
|
||||
static String buildFrontendTranscriptMessage(String sentenceKey,
|
||||
String text,
|
||||
boolean isFinal,
|
||||
Integer sentenceId,
|
||||
Long startTime,
|
||||
Long endTime,
|
||||
Integer speakerId) throws JsonProcessingException {
|
||||
ObjectNode root = OBJECT_MAPPER.createObjectNode();
|
||||
root.put("type", isFinal ? "segment" : "partial");
|
||||
ObjectNode data = root.putObject("data");
|
||||
data.put("text", text);
|
||||
data.put("is_final", isFinal);
|
||||
if (sentenceId != null) {
|
||||
data.put("sentence_id", sentenceId);
|
||||
}
|
||||
if (sentenceKey != null && !sentenceKey.isBlank()) {
|
||||
data.put("sentence_key", sentenceKey);
|
||||
}
|
||||
if (startTime != null) {
|
||||
data.put("start", startTime / 1000D);
|
||||
}
|
||||
if (endTime != null) {
|
||||
data.put("end", endTime / 1000D);
|
||||
}
|
||||
if (speakerId != null) {
|
||||
data.put("speaker_id", String.valueOf(speakerId));
|
||||
}
|
||||
return OBJECT_MAPPER.writeValueAsString(root);
|
||||
}
|
||||
|
||||
private void startRecognizerIfNecessary(RealtimeAsrChannelContext context) {
|
||||
synchronized (context.getChannelState()) {
|
||||
if (Boolean.TRUE.equals(context.getChannelState().get(STATE_STARTED))) {
|
||||
return;
|
||||
}
|
||||
try {
|
||||
SpeechClient speechClient = createSpeechClient();
|
||||
SpeakerRecognizerRequest request = createRecognizerRequest(context);
|
||||
SpeakerRecognizer recognizer = createRecognizer(context, speechClient, request);
|
||||
context.getChannelState().put(STATE_SPEECH_CLIENT, speechClient);
|
||||
context.getChannelState().put(STATE_RECOGNIZER, recognizer);
|
||||
context.getChannelState().put(STATE_VOICE_ID, request.getVoiceId());
|
||||
recognizer.start();
|
||||
context.getChannelState().put(STATE_STARTED, Boolean.TRUE);
|
||||
flushPendingAudioFrames(context, recognizer);
|
||||
} catch (Exception ex) {
|
||||
handleChannelFailure(context, "REALTIME_UPSTREAM_CONNECT_FAILED", "腾讯实时 ASR 启动失败", ex);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
protected SpeechClient createSpeechClient() {
|
||||
return new SpeechClient(SpeakerConstant.DEFAULT_RT_REQ_URL);
|
||||
}
|
||||
|
||||
protected SpeakerRecognizer createRecognizer(RealtimeAsrChannelContext context,
|
||||
SpeechClient speechClient,
|
||||
SpeakerRecognizerRequest request) {
|
||||
return new SpeakerRecognizer(
|
||||
speechClient,
|
||||
buildCredential(context),
|
||||
request,
|
||||
new TencentRecognitionListener(context)
|
||||
);
|
||||
}
|
||||
|
||||
private Credential buildCredential(RealtimeAsrChannelContext context) {
|
||||
Map<String, Object> mediaConfig = getMediaConfig(context);
|
||||
String appId = readConfigString(mediaConfig, MEDIA_TENCENT_APP_ID);
|
||||
String secretId = readConfigString(mediaConfig, MEDIA_TENCENT_SECRET_ID);
|
||||
String secretKey = readConfigString(mediaConfig, MEDIA_TENCENT_SECRET_KEY);
|
||||
if (appId == null || secretId == null || secretKey == null) {
|
||||
throw new RuntimeException("腾讯实时 ASR 会话缺少鉴权配置");
|
||||
}
|
||||
return new Credential(appId, secretId, secretKey);
|
||||
}
|
||||
|
||||
SpeakerRecognizerRequest createRecognizerRequest(RealtimeAsrChannelContext context) {
|
||||
SpeakerRecognizerRequest request = SpeakerRecognizerRequest.init();
|
||||
request.setEngineModelType(resolveEngineModelType(context));
|
||||
request.setVoiceFormat(SpeakerConstant.AUDIO_FORMAT_PCM);
|
||||
request.setVoiceId(UUID.randomUUID().toString());
|
||||
//是否需要vad
|
||||
request.setNeedVad(1);
|
||||
//vad静默时间
|
||||
request.setVadSilenceTime(1000);
|
||||
// 分句策略参数 0小1大
|
||||
request.setSentenceStrategy(0);
|
||||
//是否进行阿拉伯数字智能转换 0否1智能 23:打开数学相关转化
|
||||
request.setConvertNumMode(1);
|
||||
|
||||
request.setSpeakerDiarization(1);
|
||||
//启动断点续传
|
||||
request.setEnableSpeakerContext(1);
|
||||
String speakerContextId = resolveSpeakerContextId(context);
|
||||
if (speakerContextId != null) {
|
||||
request.setSpeakerContextId(speakerContextId);
|
||||
}
|
||||
return request;
|
||||
}
|
||||
|
||||
private String resolveEngineModelType(RealtimeAsrChannelContext context) {
|
||||
Object modelCode = context.getChannelState().get(STATE_MODEL_CODE);
|
||||
if (modelCode instanceof String value && !value.isBlank()) {
|
||||
return value;
|
||||
}
|
||||
return "16k_zh";
|
||||
}
|
||||
|
||||
private String resolveSpeakerContextId(RealtimeAsrChannelContext context) {
|
||||
Object speakerContextId = context.getChannelState().get(STATE_SPEAKER_CONTEXT_ID);
|
||||
if (speakerContextId instanceof String value && !value.isBlank()) {
|
||||
return value;
|
||||
}
|
||||
var status = realtimeMeetingSessionStateService.getStatus(context.getMeetingId());
|
||||
if (status == null || status.getResumeConfig() == null) {
|
||||
return null;
|
||||
}
|
||||
String value = status.getResumeConfig().getSpeakerContextId();
|
||||
if (value == null || value.isBlank()) {
|
||||
return null;
|
||||
}
|
||||
context.getChannelState().put(STATE_SPEAKER_CONTEXT_ID, value);
|
||||
return value;
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
private Map<String, Object> getMediaConfig(RealtimeAsrChannelContext context) {
|
||||
Object mediaConfig = context.getChannelState().get(STATE_MEDIA_CONFIG);
|
||||
if (mediaConfig instanceof Map<?, ?> map) {
|
||||
return (Map<String, Object>) map;
|
||||
}
|
||||
return Map.of();
|
||||
}
|
||||
|
||||
private String readConfigString(Map<String, Object> mediaConfig, String key) {
|
||||
Object value = mediaConfig.get(key);
|
||||
if (value == null) {
|
||||
return null;
|
||||
}
|
||||
String text = String.valueOf(value).trim();
|
||||
return text.isEmpty() ? null : text;
|
||||
}
|
||||
|
||||
private SpeakerRecognizer getRecognizer(RealtimeAsrChannelContext context) {
|
||||
Object recognizer = context.getChannelState().get(STATE_RECOGNIZER);
|
||||
return recognizer instanceof SpeakerRecognizer value ? value : null;
|
||||
}
|
||||
|
||||
private SpeechClient getSpeechClient(RealtimeAsrChannelContext context) {
|
||||
Object speechClient = context.getChannelState().get(STATE_SPEECH_CLIENT);
|
||||
return speechClient instanceof SpeechClient value ? value : null;
|
||||
}
|
||||
|
||||
private void stopRecognizer(RealtimeAsrChannelContext context) {
|
||||
SpeakerRecognizer recognizer = getRecognizer(context);
|
||||
if (recognizer == null) {
|
||||
shutdownSdkResources(context);
|
||||
return;
|
||||
}
|
||||
try {
|
||||
recognizer.stop();
|
||||
} catch (Exception ex) {
|
||||
log.warn("Tencent realtime ASR stop failed, meetingId={}, sessionId={}",
|
||||
context.getMeetingId(), currentConnectionId(context), ex);
|
||||
shutdownSdkResources(context);
|
||||
}
|
||||
}
|
||||
|
||||
private void shutdownSdkResources(RealtimeAsrChannelContext context) {
|
||||
SpeakerRecognizer recognizer = getRecognizer(context);
|
||||
if (recognizer != null) {
|
||||
try {
|
||||
recognizer.close();
|
||||
} catch (Exception ignored) {
|
||||
// ignore
|
||||
}
|
||||
}
|
||||
SpeechClient speechClient = getSpeechClient(context);
|
||||
if (speechClient != null) {
|
||||
try {
|
||||
speechClient.shutdown();
|
||||
} catch (Exception ignored) {
|
||||
// ignore
|
||||
}
|
||||
}
|
||||
context.getChannelState().remove(STATE_RECOGNIZER);
|
||||
context.getChannelState().remove(STATE_SPEECH_CLIENT);
|
||||
context.getChannelState().put(STATE_STARTED, Boolean.FALSE);
|
||||
}
|
||||
|
||||
private void forwardResponse(RealtimeAsrChannelContext context,
|
||||
SpeakerRecognitionResponse response,
|
||||
boolean forceFinal) {
|
||||
if (response == null || response.getSentences() == null || response.getSentences().getSentenceList() == null) {
|
||||
return;
|
||||
}
|
||||
try {
|
||||
rememberSpeakerContext(context, response);
|
||||
String cachePayload = buildCachePayload(response, forceFinal);
|
||||
realtimeMeetingTranscriptCacheService.mergeUpstreamMessage(context.getMeetingId(), cachePayload);
|
||||
for (SpeakerSentenceItem item : response.getSentences().getSentenceList()) {
|
||||
if (item == null || item.getSentence() == null || item.getSentence().trim().isEmpty()) {
|
||||
continue;
|
||||
}
|
||||
boolean isFinal = forceFinal || item.getSentenceType() == 1;
|
||||
context.getCallback().sendFrontendText(
|
||||
context.getMeetingId(),
|
||||
buildFrontendTranscriptMessage(
|
||||
buildSentenceKey(context, item.getSentenceId()),
|
||||
item.getSentence().trim(),
|
||||
isFinal,
|
||||
item.getSentenceId(),
|
||||
item.getStartTime(),
|
||||
item.getEndTime(),
|
||||
item.getSpeakerId()
|
||||
)
|
||||
);
|
||||
}
|
||||
} catch (Exception ex) {
|
||||
handleChannelFailure(context, "REALTIME_UPSTREAM_ERROR", "腾讯实时 ASR 结果转发失败", ex);
|
||||
}
|
||||
}
|
||||
|
||||
private void rememberSpeakerContext(RealtimeAsrChannelContext context, SpeakerRecognitionResponse response) {
|
||||
if (response == null || response.getSpeakerContextId() == null || response.getSpeakerContextId().isBlank()) {
|
||||
return;
|
||||
}
|
||||
String speakerContextId = response.getSpeakerContextId().trim();
|
||||
context.getChannelState().put(STATE_SPEAKER_CONTEXT_ID, speakerContextId);
|
||||
realtimeMeetingSessionStateService.rememberSpeakerContext(context.getMeetingId(), speakerContextId);
|
||||
}
|
||||
|
||||
private String buildSentenceKey(RealtimeAsrChannelContext context, Integer sentenceId) {
|
||||
Object voiceId = context.getChannelState().get(STATE_VOICE_ID);
|
||||
if (!(voiceId instanceof String value) || value.isBlank() || sentenceId == null) {
|
||||
return null;
|
||||
}
|
||||
return value + "-" + sentenceId;
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
private void queuePendingAudioFrame(RealtimeAsrChannelContext context, byte[] payload) {
|
||||
Object frames = context.getChannelState().get(STATE_PENDING_AUDIO_FRAMES);
|
||||
if (frames instanceof List<?> list) {
|
||||
((List<byte[]>) list).add(payload.clone());
|
||||
return;
|
||||
}
|
||||
List<byte[]> next = new java.util.ArrayList<>();
|
||||
next.add(payload.clone());
|
||||
context.getChannelState().put(STATE_PENDING_AUDIO_FRAMES, next);
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
private void flushPendingAudioFrames(RealtimeAsrChannelContext context, SpeakerRecognizer recognizer) {
|
||||
Object frames = context.getChannelState().get(STATE_PENDING_AUDIO_FRAMES);
|
||||
if (!(frames instanceof List<?> list) || list.isEmpty()) {
|
||||
return;
|
||||
}
|
||||
List<byte[]> pendingFrames = (List<byte[]>) list;
|
||||
for (byte[] frame : pendingFrames) {
|
||||
if (frame != null && frame.length > 0) {
|
||||
recognizer.write(frame);
|
||||
}
|
||||
}
|
||||
pendingFrames.clear();
|
||||
}
|
||||
|
||||
private String buildCachePayload(SpeakerRecognitionResponse response, boolean forceFinal) throws JsonProcessingException {
|
||||
ObjectNode root = OBJECT_MAPPER.createObjectNode();
|
||||
root.put("type", forceFinal ? "end" : "sentences");
|
||||
ArrayNode sentences = root.putArray("sentences");
|
||||
for (SpeakerSentenceItem item : response.getSentences().getSentenceList()) {
|
||||
if (item == null || item.getSentence() == null || item.getSentence().trim().isEmpty()) {
|
||||
continue;
|
||||
}
|
||||
ObjectNode sentenceNode = sentences.addObject();
|
||||
sentenceNode.put("sentence", item.getSentence().trim());
|
||||
sentenceNode.put("sentence_type", forceFinal ? 1 : item.getSentenceType());
|
||||
sentenceNode.put("sentence_id", item.getSentenceId());
|
||||
sentenceNode.put("speaker_id", String.valueOf(item.getSpeakerId()));
|
||||
sentenceNode.put("start_time", item.getStartTime());
|
||||
sentenceNode.put("end_time", item.getEndTime());
|
||||
}
|
||||
return OBJECT_MAPPER.writeValueAsString(root);
|
||||
}
|
||||
|
||||
private void handleChannelFailure(RealtimeAsrChannelContext context, String code, String message, Exception ex) {
|
||||
log.error("Tencent realtime ASR channel failed, meetingId={}, sessionId={}",
|
||||
context.getMeetingId(), currentConnectionId(context), ex);
|
||||
shutdownSdkResources(context);
|
||||
context.getCallback().sendFrontendError(context.getMeetingId(), code, message);
|
||||
CompletableFuture.delayedExecutor(200, TimeUnit.MILLISECONDS).execute(
|
||||
() -> context.getCallback().closeFrontend(context.getMeetingId(), CloseStatus.SERVER_ERROR)
|
||||
);
|
||||
}
|
||||
|
||||
private String currentConnectionId(RealtimeAsrChannelContext context) {
|
||||
return context.getRawSession() == null ? null : context.getRawSession().getId();
|
||||
}
|
||||
|
||||
private static boolean looksLikeStartMessage(String payload) {
|
||||
if (payload == null || payload.isBlank()) {
|
||||
return false;
|
||||
}
|
||||
String normalized = payload.replaceAll("\\s+", "");
|
||||
return normalized.contains("\"type\":\"start\"");
|
||||
}
|
||||
|
||||
private static boolean looksLikeStopMessage(String payload) {
|
||||
if (payload == null || payload.isBlank()) {
|
||||
return false;
|
||||
}
|
||||
String normalized = payload.replaceAll("\\s+", "");
|
||||
return normalized.contains("\"type\":\"stop\"");
|
||||
}
|
||||
|
||||
private final class TencentRecognitionListener extends SpeakerRecognitionListener {
|
||||
private final RealtimeAsrChannelContext context;
|
||||
|
||||
private TencentRecognitionListener(RealtimeAsrChannelContext context) {
|
||||
this.context = context;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onRecognitionStart(SpeakerRecognitionResponse response) {
|
||||
rememberSpeakerContext(context, response);
|
||||
log.info("Tencent realtime ASR started, meetingId={}, sessionId={}",
|
||||
context.getMeetingId(), currentConnectionId(context));
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onRecognitionSentences(SpeakerRecognitionResponse response) {
|
||||
forwardResponse(context, response, false);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onSentenceEnd(SpeakerRecognitionResponse response) {
|
||||
forwardResponse(context, response, true);
|
||||
shutdownSdkResources(context);
|
||||
if (Boolean.TRUE.equals(context.getChannelState().get(STATE_MEETING_COMPLETE_REQUESTED))) {
|
||||
context.getCallback().removeMeetingSession(context.getMeetingId());
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onFail(SpeakerRecognitionResponse response, Exception error) {
|
||||
handleChannelFailure(context, "REALTIME_UPSTREAM_ERROR", "腾讯实时 ASR 识别失败", error);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -165,7 +165,13 @@ public class RealtimeMeetingProxyWebSocketHandler extends AbstractWebSocketHandl
|
|||
synchronized (lockForMeeting(meetingId)) {
|
||||
meetingSession = meetingSessions.get(meetingId);
|
||||
if (meetingSession != null && meetingSession.isChannelOpen()) {
|
||||
String previousSessionId = meetingSession.context.getRawSession() == null
|
||||
? null
|
||||
: meetingSession.context.getRawSession().getId();
|
||||
meetingSession.clearFrontendIfClosed();
|
||||
if (previousSessionId != null && !meetingSession.hasOpenFrontend()) {
|
||||
realtimeMeetingSessionStateService.pauseByDisconnect(meetingId, previousSessionId);
|
||||
}
|
||||
if (meetingSession.hasOpenFrontend()) {
|
||||
sendFrontendError(frontendSession, "REALTIME_ACTIVE_CONNECTION_EXISTS", "当前会议已有活跃前端连接");
|
||||
frontendSession.close(CloseStatus.POLICY_VIOLATION.withReason("已存在活跃的前端连接"));
|
||||
|
|
@ -188,6 +194,8 @@ public class RealtimeMeetingProxyWebSocketHandler extends AbstractWebSocketHandl
|
|||
context.setTargetWsUrl(sessionData.getTargetWsUrl());
|
||||
context.setCallback(new HandlerChannelCallback());
|
||||
context.bindFrontendSession(rawSession, frontendSession);
|
||||
context.getChannelState().put("modelCode", sessionData.getModelCode());
|
||||
context.getChannelState().put("mediaConfig", sessionData.getMediaConfig());
|
||||
meetingSession = new MeetingChannelSession(meetingId, channel, context);
|
||||
meetingSessions.put(meetingId, meetingSession);
|
||||
}
|
||||
|
|
@ -250,6 +258,9 @@ public class RealtimeMeetingProxyWebSocketHandler extends AbstractWebSocketHandl
|
|||
return;
|
||||
}
|
||||
synchronized (lockForMeeting(meetingId)) {
|
||||
if (meetingSession.context.getRawSession() != null && meetingSession.context.getRawSession().getId().equals(sessionId)) {
|
||||
meetingSession.channel.onFrontendDetached(meetingSession.context);
|
||||
}
|
||||
meetingSession.detachFrontend(sessionId);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -349,6 +349,64 @@ class AiModelServiceImplTest {
|
|||
assertNull(captor.getValue().getApiKey());
|
||||
}
|
||||
|
||||
@Test
|
||||
void saveModelShouldRejectTencentAsrWithoutSecretKey() {
|
||||
AiModelServiceImpl service = new AiModelServiceImpl(
|
||||
objectMapper,
|
||||
mock(AsrModelMapper.class),
|
||||
mock(LlmModelMapper.class)
|
||||
);
|
||||
|
||||
AiModelDTO dto = new AiModelDTO();
|
||||
dto.setModelType("ASR");
|
||||
dto.setModelName("tencent-asr");
|
||||
dto.setProvider("tencent");
|
||||
dto.setModelCode("16k_zh");
|
||||
dto.setIsDefault(0);
|
||||
dto.setStatus(1);
|
||||
dto.setMediaConfig(Map.of(
|
||||
"tencentAppId", "app-id",
|
||||
"tencentSecretId", "secret-id"
|
||||
));
|
||||
|
||||
RuntimeException ex = assertThrows(RuntimeException.class, () -> service.saveModel(dto));
|
||||
assertEquals("腾讯实时 ASR 模型必须配置 mediaConfig.tencentSecretKey", ex.getMessage());
|
||||
}
|
||||
|
||||
@Test
|
||||
void saveModelShouldPersistTencentAsrWithoutBaseUrl() {
|
||||
AsrModelMapper asrModelMapper = mock(AsrModelMapper.class);
|
||||
when(asrModelMapper.insert(any(AsrModel.class))).thenReturn(1);
|
||||
|
||||
AiModelServiceImpl service = new AiModelServiceImpl(
|
||||
objectMapper,
|
||||
asrModelMapper,
|
||||
mock(LlmModelMapper.class)
|
||||
);
|
||||
|
||||
AiModelDTO dto = new AiModelDTO();
|
||||
dto.setModelType("ASR");
|
||||
dto.setModelName("tencent-asr");
|
||||
dto.setProvider("tencent");
|
||||
dto.setModelCode("16k_zh");
|
||||
dto.setIsDefault(0);
|
||||
dto.setStatus(1);
|
||||
dto.setMediaConfig(Map.of(
|
||||
"tencentAppId", "app-id",
|
||||
"tencentSecretId", "secret-id",
|
||||
"tencentSecretKey", "secret-key"
|
||||
));
|
||||
|
||||
service.saveModel(dto);
|
||||
|
||||
ArgumentCaptor<AsrModel> captor = ArgumentCaptor.forClass(AsrModel.class);
|
||||
verify(asrModelMapper, times(1)).insert(captor.capture());
|
||||
assertEquals("tencent", captor.getValue().getProvider());
|
||||
assertEquals("16k_zh", captor.getValue().getModelCode());
|
||||
assertEquals("secret-key", captor.getValue().getMediaConfig().get("tencentSecretKey"));
|
||||
assertNull(captor.getValue().getBaseUrl());
|
||||
}
|
||||
|
||||
private void captureRequest(HttpExchange exchange,
|
||||
AtomicReference<String> requestPath,
|
||||
AtomicReference<String> authorization,
|
||||
|
|
|
|||
|
|
@ -70,6 +70,7 @@ const AiModels: React.FC = () => {
|
|||
const provider = Form.useWatch("provider", form);
|
||||
const isDefaultChecked = Form.useWatch("isDefaultChecked", form);
|
||||
const isLocalProvider = String(provider || "").toLowerCase() === "custom";
|
||||
const isTencentProvider = String(provider || "").toLowerCase() === "tencent";
|
||||
|
||||
const isPlatformAdmin = useMemo(() => {
|
||||
const profileStr = sessionStorage.getItem("userProfile");
|
||||
|
|
@ -133,11 +134,17 @@ const AiModels: React.FC = () => {
|
|||
setEditingId(record.id);
|
||||
const speakerModel = record.mediaConfig?.speakerModel;
|
||||
const svThreshold = record.mediaConfig?.svThreshold;
|
||||
const tencentAppId = record.mediaConfig?.tencentAppId;
|
||||
const tencentSecretId = record.mediaConfig?.tencentSecretId;
|
||||
const tencentSecretKey = record.mediaConfig?.tencentSecretKey;
|
||||
form.setFieldsValue({
|
||||
...record,
|
||||
modelType: record.modelType,
|
||||
speakerModel,
|
||||
svThreshold,
|
||||
tencentAppId,
|
||||
tencentSecretId,
|
||||
tencentSecretKey,
|
||||
isDefaultChecked: record.isDefault === 1,
|
||||
statusChecked: record.status === 1,
|
||||
});
|
||||
|
|
@ -262,6 +269,12 @@ const AiModels: React.FC = () => {
|
|||
speakerModel: values.speakerModel,
|
||||
svThreshold: values.svThreshold,
|
||||
}
|
||||
: activeType === "ASR" && isTencentProvider
|
||||
? {
|
||||
tencentAppId: values.tencentAppId,
|
||||
tencentSecretId: values.tencentSecretId,
|
||||
tencentSecretKey: values.tencentSecretKey,
|
||||
}
|
||||
: undefined,
|
||||
temperature: values.temperature,
|
||||
topP: values.topP,
|
||||
|
|
@ -524,16 +537,20 @@ const AiModels: React.FC = () => {
|
|||
</Col>
|
||||
</Row>
|
||||
|
||||
<Form.Item name="baseUrl" label="Base URL" rules={[{ required: true, message: "请输入 Base URL" }]}>
|
||||
<Input placeholder="https://api.example.com" />
|
||||
</Form.Item>
|
||||
{!isTencentProvider && (
|
||||
<>
|
||||
<Form.Item name="baseUrl" label="Base URL" rules={[{required: true, message: "请输入 Base URL"}]}>
|
||||
<Input placeholder="https://api.example.com"/>
|
||||
</Form.Item>
|
||||
|
||||
<Form.Item
|
||||
name="apiKey"
|
||||
label="API Key"
|
||||
>
|
||||
<Input.Password />
|
||||
</Form.Item>
|
||||
<Form.Item
|
||||
name="apiKey"
|
||||
label="API Key"
|
||||
>
|
||||
<Input.Password/>
|
||||
</Form.Item>
|
||||
</>
|
||||
)}
|
||||
|
||||
{(activeType === "LLM" || isLocalProvider) && (
|
||||
<Form.Item label="连通性测试">
|
||||
|
|
@ -556,7 +573,10 @@ const AiModels: React.FC = () => {
|
|||
<Form.Item
|
||||
name="modelCode"
|
||||
noStyle
|
||||
rules={activeType === "LLM" ? [{ required: true, message: "请输入或选择模型名称" }] : []}
|
||||
rules={activeType === "LLM" || isTencentProvider ? [{
|
||||
required: true,
|
||||
message: "请输入或选择模型名称"
|
||||
}] : []}
|
||||
>
|
||||
<AutoComplete
|
||||
style={{ width: "calc(100% - 100px)" }}
|
||||
|
|
@ -574,9 +594,11 @@ const AiModels: React.FC = () => {
|
|||
<Input allowClear placeholder="可选择或自定义输入模型名称" />
|
||||
</AutoComplete>
|
||||
</Form.Item>
|
||||
<Button icon={<SyncOutlined spin={fetchLoading} />} onClick={handleFetchRemote} style={{ width: 100 }}>
|
||||
刷新
|
||||
</Button>
|
||||
{!isTencentProvider && (
|
||||
<Button icon={<SyncOutlined spin={fetchLoading}/>} onClick={handleFetchRemote} style={{width: 100}}>
|
||||
刷新
|
||||
</Button>
|
||||
)}
|
||||
</Space.Compact>
|
||||
</Form.Item>
|
||||
|
||||
|
|
@ -609,6 +631,38 @@ const AiModels: React.FC = () => {
|
|||
</Row>
|
||||
)}
|
||||
|
||||
{activeType === "ASR" && isTencentProvider && (
|
||||
<Row gutter={16}>
|
||||
<Col span={12}>
|
||||
<Form.Item
|
||||
name="tencentAppId"
|
||||
label="App ID"
|
||||
rules={[{required: true, message: "请输入 App ID"}]}
|
||||
>
|
||||
<Input/>
|
||||
</Form.Item>
|
||||
</Col>
|
||||
<Col span={12}>
|
||||
<Form.Item
|
||||
name="tencentSecretId"
|
||||
label="Secret ID"
|
||||
rules={[{required: true, message: "请输入 Secret ID"}]}
|
||||
>
|
||||
<Input/>
|
||||
</Form.Item>
|
||||
</Col>
|
||||
<Col span={24}>
|
||||
<Form.Item
|
||||
name="tencentSecretKey"
|
||||
label="Secret Key"
|
||||
rules={[{required: true, message: "请输入 Secret Key"}]}
|
||||
>
|
||||
<Input.Password/>
|
||||
</Form.Item>
|
||||
</Col>
|
||||
</Row>
|
||||
)}
|
||||
|
||||
{activeType === "LLM" && (
|
||||
<>
|
||||
<Form.Item name="apiPath" label="API 路径" initialValue="/v1/chat/completions">
|
||||
|
|
|
|||
|
|
@ -43,6 +43,7 @@ type WsMessage = {
|
|||
text?: string;
|
||||
is_final?: boolean;
|
||||
sentence_id?: number;
|
||||
sentence_key?: string;
|
||||
start?: number;
|
||||
end?: number;
|
||||
speaker_id?: string;
|
||||
|
|
@ -57,6 +58,7 @@ type WsMessage = {
|
|||
|
||||
type TranscriptCard = {
|
||||
id: string;
|
||||
sentenceKey?: string;
|
||||
sentenceId?: number;
|
||||
speakerName: string;
|
||||
userId?: string | number;
|
||||
|
|
@ -67,6 +69,7 @@ type TranscriptCard = {
|
|||
};
|
||||
|
||||
type NormalizedWsMessage = {
|
||||
sentenceKey?: string;
|
||||
text: string;
|
||||
isFinal: boolean;
|
||||
sentenceId?: number;
|
||||
|
|
@ -174,7 +177,10 @@ function toMs(value?: number) {
|
|||
return Math.round(value * 1000);
|
||||
}
|
||||
|
||||
function buildTranscriptCardId(sentenceId?: number) {
|
||||
function buildTranscriptCardId(sentenceKey?: string, sentenceId?: number) {
|
||||
if (sentenceKey) {
|
||||
return sentenceKey;
|
||||
}
|
||||
if (sentenceId === undefined || sentenceId === null) {
|
||||
return `live-${Date.now()}-${Math.random()}`;
|
||||
}
|
||||
|
|
@ -192,6 +198,7 @@ function normalizeWsMessage(payload: WsMessage): NormalizedWsMessage | null {
|
|||
return {
|
||||
text: data.text || "",
|
||||
isFinal: payload.type === "segment" || !!data.is_final,
|
||||
sentenceKey: data.sentence_key,
|
||||
sentenceId: data.sentence_id,
|
||||
speaker: {
|
||||
name: data.speaker_name,
|
||||
|
|
@ -397,9 +404,10 @@ export function RealtimeAsrSession() {
|
|||
const upsertTranscriptCard = (normalized: NormalizedWsMessage, speaker: ReturnType<typeof resolveSpeaker>) => {
|
||||
setTranscripts((prev) => {
|
||||
const next = [...prev];
|
||||
const cardId = buildTranscriptCardId(normalized.sentenceId);
|
||||
const cardId = buildTranscriptCardId(normalized.sentenceKey, normalized.sentenceId);
|
||||
const nextCard: TranscriptCard = {
|
||||
id: cardId,
|
||||
sentenceKey: normalized.sentenceKey,
|
||||
sentenceId: normalized.sentenceId,
|
||||
speakerName: speaker.speakerName,
|
||||
userId: speaker.userId,
|
||||
|
|
@ -408,7 +416,7 @@ export function RealtimeAsrSession() {
|
|||
endTime: normalized.endTime,
|
||||
final: true,
|
||||
};
|
||||
if (normalized.sentenceId !== undefined && normalized.sentenceId !== null) {
|
||||
if (normalized.sentenceKey || (normalized.sentenceId !== undefined && normalized.sentenceId !== null)) {
|
||||
const index = next.findIndex((item) => item.id === cardId);
|
||||
if (index >= 0) {
|
||||
next[index] = {...next[index], ...nextCard};
|
||||
|
|
@ -496,9 +504,9 @@ export function RealtimeAsrSession() {
|
|||
if (recording && startedAtRef.current) {
|
||||
elapsedOffsetRef.current += Math.floor((Date.now() - startedAtRef.current) / 1000);
|
||||
}
|
||||
const pauseRes = await pauseRealtimeMeeting(meetingId);
|
||||
await closeFrontendSocket(false);
|
||||
await shutdownAudioPipeline();
|
||||
const pauseRes = await pauseRealtimeMeeting(meetingId);
|
||||
setSessionStatus(pauseRes.data.data);
|
||||
setRecording(false);
|
||||
setConnecting(false);
|
||||
|
|
|
|||
Loading…
Reference in New Issue