From 64ac579c2aeccc16776eaf0f017604ca059d7ecc Mon Sep 17 00:00:00 2001 From: panyy Date: Thu, 25 Jun 2026 09:39:25 +0800 Subject: [PATCH] =?UTF-8?q?feat(=E6=80=9D=E7=BB=B4=E5=AF=BC=E5=9B=BE?= =?UTF-8?q?=E5=8A=A9=E6=89=8B)=EF=BC=9A=E6=80=9D=E7=BB=B4=E5=AF=BC?= =?UTF-8?q?=E5=9B=BE=E5=8A=A9=E6=89=8B=E5=A2=9E=E5=8A=A0=E6=99=BA=E8=83=BD?= =?UTF-8?q?=E6=95=B4=E7=90=86=E6=80=BB=E7=BB=93-=E8=BF=94=E5=9B=9E?= =?UTF-8?q?=E5=8F=82=E6=95=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mineru/cli/fast_api.py | 51 ++++++++++++++++++++++++++++++++++++------ 1 file changed, 44 insertions(+), 7 deletions(-) diff --git a/mineru/cli/fast_api.py b/mineru/cli/fast_api.py index 3a822b8..58a32b7 100644 --- a/mineru/cli/fast_api.py +++ b/mineru/cli/fast_api.py @@ -367,14 +367,37 @@ class MindmapOrganizeRequest(BaseModel): prompt: Optional[str] = None -def _extract_json_object(text: str) -> str: - content = text.strip() +def _extract_json_object(text: Optional[str]) -> str: + content = (text or "").strip() if content.startswith("```"): content = re.sub(r"^```(?:json|markdown|md)?\s*", "", content, flags=re.IGNORECASE) content = re.sub(r"\s*```$", "", content) return content.strip() +def _extract_llm_message_content(message: dict) -> str: + content = message.get("content") + if isinstance(content, str): + return content + if isinstance(content, list): + parts = [] + for item in content: + if isinstance(item, str): + parts.append(item) + elif isinstance(item, dict): + text = item.get("text") or item.get("content") + if isinstance(text, str): + parts.append(text) + if parts: + return "\n".join(parts) + + for key in ("reasoning_content", "reasoning", "output_text", "text"): + value = message.get(key) + if isinstance(value, str) and value.strip(): + return value + return "" + + DEFAULT_MINDMAP_ORGANIZE_PROMPT = """你是文档结构整理助手。请基于用户提供的 Markdown 生成适合思维导图展示的 Markdown。 要求: @@ -551,14 +574,28 @@ def _call_mindmap_llm(markdown: str, mode: str = "smart", custom_prompt: Optiona except Exception as exc: raise RuntimeError(f"智能整理模型请求失败: {exc}") from exc - message = result.get("choices", [{}])[0].get("message", {}) - content = message.get("content", "") + choices = result.get("choices") or [] + choice = choices[0] if choices else {} + message = choice.get("message") or {} + finish_reason = choice.get("finish_reason") + usage = result.get("usage") + content = _extract_llm_message_content(message) + if not content: + logger.warning( + "Mindmap LLM returned empty content task_id={} role={} finish_reason={} usage={} message_keys={}", + task_id or "-", request_role, finish_reason, usage, list(message.keys()) + ) + raise RuntimeError(f"智能整理模型返回空内容,finish_reason={finish_reason}") organized = _extract_json_object(content) if not organized: - raise RuntimeError("智能整理模型未返回有效内容") + logger.warning( + "Mindmap LLM returned invalid organized content task_id={} role={} finish_reason={} usage={}", + task_id or "-", request_role, finish_reason, usage + ) + raise RuntimeError(f"智能整理模型未返回有效内容,finish_reason={finish_reason}") logger.info( - "Mindmap LLM request completed task_id={} role={} output_chars={} output_tokens_est={}", - task_id or "-", request_role, len(organized), _estimate_tokens(organized) + "Mindmap LLM request completed task_id={} role={} finish_reason={} usage={} output_chars={} output_tokens_est={}", + task_id or "-", request_role, finish_reason, usage, len(organized), _estimate_tokens(organized) ) return organized