feat(思维导图助手)：优化打包

2026-07-03 13:56:00 +08:00 · 2026-07-03 13:56:00 +08:00 · 4b50b6bf4e
parent 59cbda30e2
commit 4b50b6bf4e
7 changed files with 232 additions and 32 deletions
--- a/docker/china/Dockerfile
+++ b/docker/china/Dockerfile
@ -38,10 +38,11 @@ RUN python3 -m pip install -U pip -i https://mirrors.aliyun.com/pypi/simple && \
    python3 -m pip install \
        "numpy==1.26.4" \
        "opencv-python==4.11.0.86" \
-        "pydantic>=2.12,<3" \
        "modelscope" \
        "magic-pdf" \
        -i https://mirrors.aliyun.com/pypi/simple && \
+    python3 -m pip install --force-reinstall --no-cache-dir "pydantic>=2.12,<3" -i https://mirrors.aliyun.com/pypi/simple && \
+    python3 -c "import pydantic; print('pydantic', pydantic.__version__); from pydantic import TypeAdapter; print('pydantic TypeAdapter ok')" && \
    python3 -m pip cache purge

 WORKDIR /app
--- a/docker/china/corex.Dockerfile
+++ b/docker/china/corex.Dockerfile
@ -18,10 +18,12 @@ RUN python3 -m pip install -U pip -i https://mirrors.aliyun.com/pypi/simple && \
                            numpy==1.26.4 \
                            opencv-python==4.11.0.86 \
                            -i https://mirrors.aliyun.com/pypi/simple && \
+    python3 -m pip install --force-reinstall --no-cache-dir "pydantic>=2.12,<3" -i https://mirrors.aliyun.com/pypi/simple && \
+    python3 -c "import pydantic; print('pydantic', pydantic.__version__); from pydantic import TypeAdapter; print('pydantic TypeAdapter ok')" && \
    python3 -m pip cache purge

 # Download models and update the configuration file
 RUN /bin/bash -c "mineru-models-download -s modelscope -m all"

 # Set the entry point to activate the virtual environment and run the command line tool
-ENTRYPOINT ["/bin/bash", "-c", "export MINERU_MODEL_SOURCE=local && exec \"$@\"", "--"]
+ENTRYPOINT ["/bin/bash", "-c", "export MINERU_MODEL_SOURCE=local && exec \"$@\"", "--"]
--- a/mineru/cli/fast_api.py
+++ b/mineru/cli/fast_api.py
@ -361,10 +361,22 @@ def get_infer_result(file_suffix_identifier: str, pdf_name: str, parse_dir: str)
 api_router = APIRouter(prefix="/api")


+class MindmapLlmConfig(BaseModel):
+    base_url: Optional[str] = None
+    api_key: Optional[str] = None
+    model: Optional[str] = None
+    timeout: Optional[int] = None
+    temperature: Optional[float] = None
+    max_context_tokens: Optional[int] = None
+    max_output_tokens: Optional[int] = None
+    safety_tokens: Optional[int] = None
+
+
 class MindmapOrganizeRequest(BaseModel):
    markdown: str
    mode: str = "smart"
    prompt: Optional[str] = None
+    llm_config: Optional[MindmapLlmConfig] = None


 def _extract_json_object(text: Optional[str]) -> str:
@ -436,16 +448,34 @@ def _estimate_tokens(text: str) -> int:
    return max(1, int(ascii_chars / 4) + int(non_ascii_chars * 1.5))


-def _get_mindmap_max_output_tokens() -> int:
-    return int(os.getenv("MINDMAP_LLM_MAX_OUTPUT_TOKENS", "4096"))
+def _optional_str(value: Optional[str], env_name: str, default: str = "") -> str:
+    configured = (value or "").strip()
+    return configured or os.getenv(env_name, default)


-def _get_mindmap_context_budget(prompt: str, reserve_output_tokens: Optional[int] = None) -> tuple[int, int]:
+def _optional_int(value: Optional[int], env_name: str, default: int) -> int:
+    return int(value if value is not None else os.getenv(env_name, str(default)))
+
+
+def _optional_float(value: Optional[float], env_name: str, default: float) -> float:
+    return float(value if value is not None else os.getenv(env_name, str(default)))
+
+
+def _get_mindmap_max_output_tokens(llm_config: Optional[MindmapLlmConfig] = None) -> int:
+    value = llm_config.max_output_tokens if llm_config else None
+    return _optional_int(value, "MINDMAP_LLM_MAX_OUTPUT_TOKENS", 4096)
+
+
+def _get_mindmap_context_budget(
+    prompt: str,
+    reserve_output_tokens: Optional[int] = None,
+    llm_config: Optional[MindmapLlmConfig] = None
+) -> tuple[int, int]:
    if reserve_output_tokens is None:
-        reserve_output_tokens = _get_mindmap_max_output_tokens()
-    max_context_tokens = int(os.getenv("MINDMAP_LLM_MAX_CONTEXT_TOKENS", "32768"))
+        reserve_output_tokens = _get_mindmap_max_output_tokens(llm_config)
+    max_context_tokens = _optional_int(llm_config.max_context_tokens if llm_config else None, "MINDMAP_LLM_MAX_CONTEXT_TOKENS", 32768)
    prompt_tokens = _estimate_tokens(prompt)
-    safety_tokens = int(os.getenv("MINDMAP_LLM_SAFETY_TOKENS", "1024"))
+    safety_tokens = _optional_int(llm_config.safety_tokens if llm_config else None, "MINDMAP_LLM_SAFETY_TOKENS", 1024)
    input_budget_tokens = max(2048, max_context_tokens - prompt_tokens - reserve_output_tokens - safety_tokens)
    logger.info(
        "Mindmap context budget max_context_tokens={} prompt_tokens={} reserve_output_tokens={} safety_tokens={} input_budget_tokens={}",
@ -527,12 +557,20 @@ def _chunk_markdown_by_headings(markdown: str, max_tokens: int) -> list[str]:
    return chunks or [markdown]


-def _call_mindmap_llm(markdown: str, mode: str = "smart", custom_prompt: Optional[str] = None, task_id: Optional[str] = None, request_role: str = "organize") -> str:
-    base_url = os.getenv("MINDMAP_LLM_BASE_URL", "").rstrip("/")
-    model = os.getenv("MINDMAP_LLM_MODEL", "gemma-4-26B")
-    api_key = os.getenv("MINDMAP_LLM_API_KEY", "")
-    timeout = int(os.getenv("MINDMAP_LLM_TIMEOUT", "180"))
-    max_output_tokens = _get_mindmap_max_output_tokens()
+def _call_mindmap_llm(
+    markdown: str,
+    mode: str = "smart",
+    custom_prompt: Optional[str] = None,
+    task_id: Optional[str] = None,
+    request_role: str = "organize",
+    llm_config: Optional[MindmapLlmConfig] = None
+) -> str:
+    base_url = _optional_str(llm_config.base_url if llm_config else None, "MINDMAP_LLM_BASE_URL").rstrip("/")
+    model = _optional_str(llm_config.model if llm_config else None, "MINDMAP_LLM_MODEL", "gemma-4-26B")
+    api_key = _optional_str(llm_config.api_key if llm_config else None, "MINDMAP_LLM_API_KEY")
+    timeout = _optional_int(llm_config.timeout if llm_config else None, "MINDMAP_LLM_TIMEOUT", 180)
+    max_output_tokens = _get_mindmap_max_output_tokens(llm_config)
+    temperature = _optional_float(llm_config.temperature if llm_config else None, "MINDMAP_LLM_TEMPERATURE", 0.2)

    if not base_url:
        raise RuntimeError("未配置智能整理模型服务，请设置 MINDMAP_LLM_BASE_URL")
@ -545,8 +583,9 @@ def _call_mindmap_llm(markdown: str, mode: str = "smart", custom_prompt: Optiona
 {compact_markdown}
 """
    logger.info(
-        "Mindmap LLM request start task_id={} role={} model={} base_url={} mode={} input_chars={} input_tokens_est={} prompt_chars={} max_tokens={}",
-        task_id or "-", request_role, model, base_url, mode, len(compact_markdown), _estimate_tokens(compact_markdown), len(prompt_template), max_output_tokens
+        "Mindmap LLM request start task_id={} role={} model={} base_url={} mode={} input_chars={} input_tokens_est={} prompt_chars={} max_tokens={} timeout={} temperature={} api_key_configured={}",
+        task_id or "-", request_role, model, base_url, mode, len(compact_markdown), _estimate_tokens(compact_markdown),
+        len(prompt_template), max_output_tokens, timeout, temperature, bool(api_key)
    )

    payload = {
@ -555,7 +594,7 @@ def _call_mindmap_llm(markdown: str, mode: str = "smart", custom_prompt: Optiona
            {"role": "system", "content": "你擅长把长文档整理成结构清晰、层次合理的思维导图 Markdown，并严格保留原文语言。"},
            {"role": "user", "content": prompt},
        ],
-        "temperature": float(os.getenv("MINDMAP_LLM_TEMPERATURE", "0.2")),
+        "temperature": temperature,
        "max_tokens": max_output_tokens,
    }
    data = json.dumps(payload, ensure_ascii=False).encode("utf-8")
@ -600,9 +639,15 @@ def _call_mindmap_llm(markdown: str, mode: str = "smart", custom_prompt: Optiona
    return organized


-def _organize_mindmap_markdown(markdown: str, mode: str, custom_prompt: Optional[str], task_id: str) -> str:
+def _organize_mindmap_markdown(
+    markdown: str,
+    mode: str,
+    custom_prompt: Optional[str],
+    task_id: str,
+    llm_config: Optional[MindmapLlmConfig]
+) -> str:
    prompt_template = (custom_prompt or "").strip() or DEFAULT_MINDMAP_ORGANIZE_PROMPT
-    _, input_budget_tokens = _get_mindmap_context_budget(prompt_template)
+    _, input_budget_tokens = _get_mindmap_context_budget(prompt_template, llm_config=llm_config)
    source_tokens = _estimate_tokens(markdown)
    logger.info(
        "Mindmap organize strategy task_id={} source_chars={} source_tokens_est={} input_budget_tokens={}",
@ -611,7 +656,7 @@ def _organize_mindmap_markdown(markdown: str, mode: str, custom_prompt: Optional

    if source_tokens <= input_budget_tokens:
        _update_task_progress(task_id, 35, "调用智能整理模型")
-        return _call_mindmap_llm(markdown, mode, prompt_template, task_id, "single")
+        return _call_mindmap_llm(markdown, mode, prompt_template, task_id, "single", llm_config)

    chunks = _chunk_markdown_by_headings(markdown, input_budget_tokens)
    logger.info("Mindmap large input split task_id={} chunks={}", task_id, len(chunks))
@ -623,14 +668,14 @@ def _organize_mindmap_markdown(markdown: str, mode: str, custom_prompt: Optional
            "Mindmap chunk organize task_id={} chunk={}/{} chars={} tokens_est={}",
            task_id, index, len(chunks), len(chunk), _estimate_tokens(chunk)
        )
-        partial = _call_mindmap_llm(chunk, mode, prompt_template, task_id, f"chunk-{index}")
+        partial = _call_mindmap_llm(chunk, mode, prompt_template, task_id, f"chunk-{index}", llm_config)
        partial_results.append(partial)

    merged_input = "\n\n".join(
        f"<!-- chunk {index} -->\n{partial}"
        for index, partial in enumerate(partial_results, start=1)
    )
-    _, merge_budget_tokens = _get_mindmap_context_budget(MINDMAP_MERGE_PROMPT)
+    _, merge_budget_tokens = _get_mindmap_context_budget(MINDMAP_MERGE_PROMPT, llm_config=llm_config)
    merge_tokens = _estimate_tokens(merged_input)
    if merge_tokens > merge_budget_tokens:
        logger.warning(
@ -641,14 +686,20 @@ def _organize_mindmap_markdown(markdown: str, mode: str, custom_prompt: Optional
        merged_round: list[str] = []
        for index, chunk in enumerate(merge_chunks, start=1):
            _update_task_progress(task_id, 78 + int(index / max(len(merge_chunks), 1) * 10), f"合并局部大纲 {index}/{len(merge_chunks)}")
-            merged_round.append(_call_mindmap_llm(chunk, mode, MINDMAP_MERGE_PROMPT, task_id, f"merge-round-{index}"))
+            merged_round.append(_call_mindmap_llm(chunk, mode, MINDMAP_MERGE_PROMPT, task_id, f"merge-round-{index}", llm_config))
        merged_input = "\n\n".join(merged_round)

    _update_task_progress(task_id, 90, "全局整理标题结构")
-    return _call_mindmap_llm(merged_input, mode, MINDMAP_MERGE_PROMPT, task_id, "merge")
+    return _call_mindmap_llm(merged_input, mode, MINDMAP_MERGE_PROMPT, task_id, "merge", llm_config)


-async def _run_mindmap_organize_task(task_id: str, markdown: str, mode: str, prompt: Optional[str]):
+async def _run_mindmap_organize_task(
+    task_id: str,
+    markdown: str,
+    mode: str,
+    prompt: Optional[str],
+    llm_config: Optional[MindmapLlmConfig]
+):
    try:
        _store_task_progress(task_id, {
            "progress": 10,
@ -658,7 +709,7 @@ async def _run_mindmap_organize_task(task_id: str, markdown: str, mode: str, pro
            "file_names": "",
            "result_md": None,
        })
-        organized = await asyncio.to_thread(_organize_mindmap_markdown, markdown, mode, prompt, task_id)
+        organized = await asyncio.to_thread(_organize_mindmap_markdown, markdown, mode, prompt, task_id, llm_config)
        state = _get_task_progress(task_id) or {}
        state.update({
            "progress": 100,
@ -722,10 +773,10 @@ async def create_mindmap_task(task_id: str, request: MindmapOrganizeRequest):
        "result_md": None,
    }
    _store_task_progress(task_id, state)
-    asyncio.create_task(_run_mindmap_organize_task(task_id, markdown, request.mode, request.prompt))
+    asyncio.create_task(_run_mindmap_organize_task(task_id, markdown, request.mode, request.prompt, request.llm_config))
    logger.info(
-        "Registered mindmap organize task pid={} task_id={} mode={} input_chars={} custom_prompt={}",
-        os.getpid(), task_id, request.mode, len(markdown), bool((request.prompt or "").strip())
+        "Registered mindmap organize task pid={} task_id={} mode={} input_chars={} custom_prompt={} request_llm_config={}",
+        os.getpid(), task_id, request.mode, len(markdown), bool((request.prompt or "").strip()), bool(request.llm_config)
    )
    return state

--- a/web_ui/src/api/document.ts
+++ b/web_ui/src/api/document.ts
@ -41,6 +41,17 @@ export interface MindmapOrganizeProgress extends ParseProgress {
  result_md?: string | null
 }

+export interface MindmapLlmConfig {
+  base_url?: string
+  api_key?: string
+  model?: string
+  timeout?: number
+  temperature?: number
+  max_context_tokens?: number
+  max_output_tokens?: number
+  safety_tokens?: number
+}
+
 export const documentApi = {
  /**
   * 解析文档
@ -102,11 +113,18 @@ export const documentApi = {
    })
  },

-  createMindmapTask(taskId: string, markdown: string, mode = 'smart', prompt?: string): Promise<MindmapOrganizeProgress> {
+  createMindmapTask(
+    taskId: string,
+    markdown: string,
+    mode = 'smart',
+    prompt?: string,
+    llmConfig?: MindmapLlmConfig
+  ): Promise<MindmapOrganizeProgress> {
    return request.post(`/api/mindmap_tasks/${encodeURIComponent(taskId)}`, {
      markdown,
      mode,
-      prompt
+      prompt,
+      llm_config: llmConfig
    }).then(result => {
      return result as unknown as MindmapOrganizeProgress
    })
--- a/web_ui/src/components/ConfigPanel.vue
+++ b/web_ui/src/components/ConfigPanel.vue
@ -130,6 +130,99 @@
        </div>
      </el-form-item>

+      <el-form-item label="大模型服务地址" class="form-item">
+        <el-input
+          v-model="draftConfig.mindmapLlmBaseUrl"
+          placeholder="例如：http://10.100.53.199:9527/v1"
+          class="input"
+          clearable
+        />
+        <div class="form-item-description">
+          OpenAI 兼容接口地址，后端会请求该地址下的 /chat/completions；留空时使用服务端 MINDMAP_LLM_BASE_URL。
+        </div>
+      </el-form-item>
+
+      <el-form-item label="大模型 API Key" class="form-item">
+        <el-input
+          v-model="draftConfig.mindmapLlmApiKey"
+          type="password"
+          placeholder="留空则使用服务端 MINDMAP_LLM_API_KEY"
+          class="input"
+          show-password
+          clearable
+        />
+        <div class="form-item-description">
+          仅随智能整理请求发送给后端，后端日志不会输出明文 Key。
+        </div>
+      </el-form-item>
+
+      <el-form-item label="大模型名称" class="form-item">
+        <el-input
+          v-model="draftConfig.mindmapLlmModel"
+          placeholder="gemma-4-26B"
+          class="input"
+          clearable
+        />
+      </el-form-item>
+
+      <div class="llm-grid">
+        <el-form-item label="请求超时（秒）" class="form-item">
+          <el-input-number
+            v-model="draftConfig.mindmapLlmTimeout"
+            :min="30"
+            :max="3600"
+            :step="30"
+            controls-position="right"
+            class="number-input"
+          />
+        </el-form-item>
+
+        <el-form-item label="温度" class="form-item">
+          <el-input-number
+            v-model="draftConfig.mindmapLlmTemperature"
+            :min="0"
+            :max="2"
+            :step="0.1"
+            :precision="1"
+            controls-position="right"
+            class="number-input"
+          />
+        </el-form-item>
+
+        <el-form-item label="最大上下文 Token" class="form-item">
+          <el-input-number
+            v-model="draftConfig.mindmapLlmMaxContextTokens"
+            :min="4096"
+            :max="262144"
+            :step="1024"
+            controls-position="right"
+            class="number-input"
+          />
+        </el-form-item>
+
+        <el-form-item label="最大输出 Token" class="form-item">
+          <el-input-number
+            v-model="draftConfig.mindmapLlmMaxOutputTokens"
+            :min="512"
+            :max="32768"
+            :step="512"
+            controls-position="right"
+            class="number-input"
+          />
+        </el-form-item>
+
+        <el-form-item label="安全预留 Token" class="form-item">
+          <el-input-number
+            v-model="draftConfig.mindmapLlmSafetyTokens"
+            :min="0"
+            :max="8192"
+            :step="256"
+            controls-position="right"
+            class="number-input"
+          />
+        </el-form-item>
+      </div>
+
      <el-form-item label="智能整理提示词" class="form-item">
        <el-input
          v-model="draftConfig.mindmapPrompt"
@ -323,6 +416,16 @@ const getBackendDescription = (backend: string) => {
  line-height: 1.4;
 }

+.llm-grid {
+  display: grid;
+  grid-template-columns: repeat(2, minmax(0, 1fr));
+  gap: 0 12px;
+}
+
+.number-input {
+  width: 100%;
+}
+
 .config-actions {
  display: flex;
  justify-content: flex-end;
--- a/web_ui/src/composables/useDocumentProcessor.ts
+++ b/web_ui/src/composables/useDocumentProcessor.ts
@ -15,6 +15,14 @@ export interface DocumentConfig {
  serverUrl: string
  exportResolution: number
  mindmapPrompt: string
+  mindmapLlmBaseUrl: string
+  mindmapLlmApiKey: string
+  mindmapLlmModel: string
+  mindmapLlmTimeout: number
+  mindmapLlmTemperature: number
+  mindmapLlmMaxContextTokens: number
+  mindmapLlmMaxOutputTokens: number
+  mindmapLlmSafetyTokens: number
  tableEnable: boolean
  formulaEnable: boolean
  language: string
@ -49,6 +57,14 @@ export const DEFAULT_DOCUMENT_CONFIG: DocumentConfig = {
  serverUrl: 'http://localhost:30000',
  exportResolution: 7680,
  mindmapPrompt: DEFAULT_MINDMAP_PROMPT,
+  mindmapLlmBaseUrl: '',
+  mindmapLlmApiKey: '',
+  mindmapLlmModel: 'gemma-4-26B',
+  mindmapLlmTimeout: 180,
+  mindmapLlmTemperature: 0.2,
+  mindmapLlmMaxContextTokens: 32768,
+  mindmapLlmMaxOutputTokens: 4096,
+  mindmapLlmSafetyTokens: 1024,
  tableEnable: true,
  formulaEnable: true,
  language: 'ch',
--- a/web_ui/src/views/DocumentProcessor.vue
+++ b/web_ui/src/views/DocumentProcessor.vue
@ -456,7 +456,16 @@ const startSmartOrganize = async () => {
  smartOrganizeError.value = ''

  try {
-    await documentApi.createMindmapTask(taskId, markdown, 'hybrid', config.mindmapPrompt)
+    await documentApi.createMindmapTask(taskId, markdown, 'hybrid', config.mindmapPrompt, {
+      base_url: config.mindmapLlmBaseUrl || undefined,
+      api_key: config.mindmapLlmApiKey || undefined,
+      model: config.mindmapLlmModel || undefined,
+      timeout: config.mindmapLlmTimeout,
+      temperature: config.mindmapLlmTemperature,
+      max_context_tokens: config.mindmapLlmMaxContextTokens,
+      max_output_tokens: config.mindmapLlmMaxOutputTokens,
+      safety_tokens: config.mindmapLlmSafetyTokens
+    })
    startSmartOrganizePolling(taskId)
  } catch (err) {
    isSmartOrganizing.value = false