feat:知识库检索已经命中了,但 vLLM 流式回答没有正常吐正文
parent
9ed4f6edfc
commit
1a3d4b65d1
|
|
@ -3,7 +3,7 @@
|
|||
from typing import Dict, List
|
||||
from urllib.parse import urlparse, ParseResult
|
||||
|
||||
from langchain_core.messages import BaseMessage, get_buffer_string
|
||||
from langchain_core.messages import BaseMessage, get_buffer_string, AIMessageChunk
|
||||
|
||||
from common.config.tokenizer_manage_config import TokenizerManage
|
||||
from models_provider.base_model_provider import MaxKBBaseModel
|
||||
|
|
@ -48,3 +48,18 @@ class VllmChatModel(MaxKBBaseModel, BaseChatOpenAI):
|
|||
tokenizer = TokenizerManage.get_tokenizer()
|
||||
return len(tokenizer.encode(text))
|
||||
return self.get_last_generation_info().get('output_tokens', 0)
|
||||
|
||||
def stream(self, input, config=None, *, stop=None, **kwargs):
|
||||
has_content = False
|
||||
for chunk in super().stream(input, config=config, stop=stop, **kwargs):
|
||||
content = getattr(chunk, 'content', '') or ''
|
||||
reasoning_content = (getattr(chunk, 'additional_kwargs', {}) or {}).get('reasoning_content', '')
|
||||
if content or reasoning_content:
|
||||
has_content = True
|
||||
yield chunk
|
||||
if not has_content:
|
||||
result = self.invoke(input, config=config, stop=stop, **kwargs)
|
||||
yield AIMessageChunk(
|
||||
content=getattr(result, 'content', '') or '',
|
||||
additional_kwargs=getattr(result, 'additional_kwargs', {}) or {}
|
||||
)
|
||||
|
|
|
|||
Loading…
Reference in New Issue