From 965781213ea43459d1dd58bce7ce141b5d527852 Mon Sep 17 00:00:00 2001 From: panyy Date: Wed, 24 Jun 2026 19:07:15 +0800 Subject: [PATCH] =?UTF-8?q?feat(=E6=80=9D=E7=BB=B4=E5=AF=BC=E5=9B=BE?= =?UTF-8?q?=E5=8A=A9=E6=89=8B)=EF=BC=9A=E6=80=9D=E7=BB=B4=E5=AF=BC?= =?UTF-8?q?=E5=9B=BE=E5=8A=A9=E6=89=8B=E5=A2=9E=E5=8A=A0=E6=99=BA=E8=83=BD?= =?UTF-8?q?=E6=95=B4=E7=90=86=E6=80=BB=E7=BB=93-=E8=AE=BE=E7=BD=AE?= =?UTF-8?q?=E8=BF=94=E5=9B=9E=E5=8F=82=E6=95=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mineru/cli/fast_api.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/mineru/cli/fast_api.py b/mineru/cli/fast_api.py index eeaaee8..3a822b8 100644 --- a/mineru/cli/fast_api.py +++ b/mineru/cli/fast_api.py @@ -413,7 +413,13 @@ def _estimate_tokens(text: str) -> int: return max(1, int(ascii_chars / 4) + int(non_ascii_chars * 1.5)) -def _get_mindmap_context_budget(prompt: str, reserve_output_tokens: int = 4096) -> tuple[int, int]: +def _get_mindmap_max_output_tokens() -> int: + return int(os.getenv("MINDMAP_LLM_MAX_OUTPUT_TOKENS", "4096")) + + +def _get_mindmap_context_budget(prompt: str, reserve_output_tokens: Optional[int] = None) -> tuple[int, int]: + if reserve_output_tokens is None: + reserve_output_tokens = _get_mindmap_max_output_tokens() max_context_tokens = int(os.getenv("MINDMAP_LLM_MAX_CONTEXT_TOKENS", "32768")) prompt_tokens = _estimate_tokens(prompt) safety_tokens = int(os.getenv("MINDMAP_LLM_SAFETY_TOKENS", "1024")) @@ -503,6 +509,7 @@ def _call_mindmap_llm(markdown: str, mode: str = "smart", custom_prompt: Optiona model = os.getenv("MINDMAP_LLM_MODEL", "gemma-4-26B") api_key = os.getenv("MINDMAP_LLM_API_KEY", "") timeout = int(os.getenv("MINDMAP_LLM_TIMEOUT", "180")) + max_output_tokens = _get_mindmap_max_output_tokens() if not base_url: raise RuntimeError("未配置智能整理模型服务,请设置 MINDMAP_LLM_BASE_URL") @@ -515,8 +522,8 @@ def _call_mindmap_llm(markdown: str, mode: str = "smart", custom_prompt: Optiona {compact_markdown} """ logger.info( - "Mindmap LLM request start task_id={} role={} model={} base_url={} mode={} input_chars={} input_tokens_est={} prompt_chars={}", - task_id or "-", request_role, model, base_url, mode, len(compact_markdown), _estimate_tokens(compact_markdown), len(prompt_template) + "Mindmap LLM request start task_id={} role={} model={} base_url={} mode={} input_chars={} input_tokens_est={} prompt_chars={} max_tokens={}", + task_id or "-", request_role, model, base_url, mode, len(compact_markdown), _estimate_tokens(compact_markdown), len(prompt_template), max_output_tokens ) payload = { @@ -526,6 +533,7 @@ def _call_mindmap_llm(markdown: str, mode: str = "smart", custom_prompt: Optiona {"role": "user", "content": prompt}, ], "temperature": float(os.getenv("MINDMAP_LLM_TEMPERATURE", "0.2")), + "max_tokens": max_output_tokens, } data = json.dumps(payload, ensure_ascii=False).encode("utf-8") headers = {"Content-Type": "application/json"}