From 4b50b6bf4e79b09db7d040e326d43f8e2ffec998 Mon Sep 17 00:00:00 2001
From: panyy <yf_pan.yuanyin@unisinsight.com>
Date: Fri, 3 Jul 2026 13:56:00 +0800
Subject: [PATCH] =?UTF-8?q?feat(=E6=80=9D=E7=BB=B4=E5=AF=BC=E5=9B=BE?=
 =?UTF-8?q?=E5=8A=A9=E6=89=8B)=EF=BC=9A=E4=BC=98=E5=8C=96=E6=89=93?=
 =?UTF-8?q?=E5=8C=85?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 docker/china/Dockerfile                       |   3 +-
 docker/china/corex.Dockerfile                 |   4 +-
 mineru/cli/fast_api.py                        | 105 +++++++++++++-----
 web_ui/src/api/document.ts                    |  22 +++-
 web_ui/src/components/ConfigPanel.vue         | 103 +++++++++++++++++
 .../src/composables/useDocumentProcessor.ts   |  16 +++
 web_ui/src/views/DocumentProcessor.vue        |  11 +-
 7 files changed, 232 insertions(+), 32 deletions(-)

diff --git a/docker/china/Dockerfile b/docker/china/Dockerfile
index 09aa62a..bfb7006 100644
--- a/docker/china/Dockerfile
+++ b/docker/china/Dockerfile
@@ -38,10 +38,11 @@ RUN python3 -m pip install -U pip -i https://mirrors.aliyun.com/pypi/simple && \
     python3 -m pip install \
         "numpy==1.26.4" \
         "opencv-python==4.11.0.86" \
-        "pydantic>=2.12,<3" \
         "modelscope" \
         "magic-pdf" \
         -i https://mirrors.aliyun.com/pypi/simple && \
+    python3 -m pip install --force-reinstall --no-cache-dir "pydantic>=2.12,<3" -i https://mirrors.aliyun.com/pypi/simple && \
+    python3 -c "import pydantic; print('pydantic', pydantic.__version__); from pydantic import TypeAdapter; print('pydantic TypeAdapter ok')" && \
     python3 -m pip cache purge
 
 WORKDIR /app
diff --git a/docker/china/corex.Dockerfile b/docker/china/corex.Dockerfile
index a883bc9..c59ae05 100644
--- a/docker/china/corex.Dockerfile
+++ b/docker/china/corex.Dockerfile
@@ -18,10 +18,12 @@ RUN python3 -m pip install -U pip -i https://mirrors.aliyun.com/pypi/simple && \
                             numpy==1.26.4 \
                             opencv-python==4.11.0.86 \
                             -i https://mirrors.aliyun.com/pypi/simple && \
+    python3 -m pip install --force-reinstall --no-cache-dir "pydantic>=2.12,<3" -i https://mirrors.aliyun.com/pypi/simple && \
+    python3 -c "import pydantic; print('pydantic', pydantic.__version__); from pydantic import TypeAdapter; print('pydantic TypeAdapter ok')" && \
     python3 -m pip cache purge
 
 # Download models and update the configuration file
 RUN /bin/bash -c "mineru-models-download -s modelscope -m all"
 
 # Set the entry point to activate the virtual environment and run the command line tool
-ENTRYPOINT ["/bin/bash", "-c", "export MINERU_MODEL_SOURCE=local && exec \"$@\"", "--"]
\ No newline at end of file
+ENTRYPOINT ["/bin/bash", "-c", "export MINERU_MODEL_SOURCE=local && exec \"$@\"", "--"]
diff --git a/mineru/cli/fast_api.py b/mineru/cli/fast_api.py
index 58a32b7..4c333a8 100644
--- a/mineru/cli/fast_api.py
+++ b/mineru/cli/fast_api.py
@@ -361,10 +361,22 @@ def get_infer_result(file_suffix_identifier: str, pdf_name: str, parse_dir: str)
 api_router = APIRouter(prefix="/api")
 
 
+class MindmapLlmConfig(BaseModel):
+    base_url: Optional[str] = None
+    api_key: Optional[str] = None
+    model: Optional[str] = None
+    timeout: Optional[int] = None
+    temperature: Optional[float] = None
+    max_context_tokens: Optional[int] = None
+    max_output_tokens: Optional[int] = None
+    safety_tokens: Optional[int] = None
+
+
 class MindmapOrganizeRequest(BaseModel):
     markdown: str
     mode: str = "smart"
     prompt: Optional[str] = None
+    llm_config: Optional[MindmapLlmConfig] = None
 
 
 def _extract_json_object(text: Optional[str]) -> str:
@@ -436,16 +448,34 @@ def _estimate_tokens(text: str) -> int:
     return max(1, int(ascii_chars / 4) + int(non_ascii_chars * 1.5))
 
 
-def _get_mindmap_max_output_tokens() -> int:
-    return int(os.getenv("MINDMAP_LLM_MAX_OUTPUT_TOKENS", "4096"))
+def _optional_str(value: Optional[str], env_name: str, default: str = "") -> str:
+    configured = (value or "").strip()
+    return configured or os.getenv(env_name, default)
 
 
-def _get_mindmap_context_budget(prompt: str, reserve_output_tokens: Optional[int] = None) -> tuple[int, int]:
+def _optional_int(value: Optional[int], env_name: str, default: int) -> int:
+    return int(value if value is not None else os.getenv(env_name, str(default)))
+
+
+def _optional_float(value: Optional[float], env_name: str, default: float) -> float:
+    return float(value if value is not None else os.getenv(env_name, str(default)))
+
+
+def _get_mindmap_max_output_tokens(llm_config: Optional[MindmapLlmConfig] = None) -> int:
+    value = llm_config.max_output_tokens if llm_config else None
+    return _optional_int(value, "MINDMAP_LLM_MAX_OUTPUT_TOKENS", 4096)
+
+
+def _get_mindmap_context_budget(
+    prompt: str,
+    reserve_output_tokens: Optional[int] = None,
+    llm_config: Optional[MindmapLlmConfig] = None
+) -> tuple[int, int]:
     if reserve_output_tokens is None:
-        reserve_output_tokens = _get_mindmap_max_output_tokens()
-    max_context_tokens = int(os.getenv("MINDMAP_LLM_MAX_CONTEXT_TOKENS", "32768"))
+        reserve_output_tokens = _get_mindmap_max_output_tokens(llm_config)
+    max_context_tokens = _optional_int(llm_config.max_context_tokens if llm_config else None, "MINDMAP_LLM_MAX_CONTEXT_TOKENS", 32768)
     prompt_tokens = _estimate_tokens(prompt)
-    safety_tokens = int(os.getenv("MINDMAP_LLM_SAFETY_TOKENS", "1024"))
+    safety_tokens = _optional_int(llm_config.safety_tokens if llm_config else None, "MINDMAP_LLM_SAFETY_TOKENS", 1024)
     input_budget_tokens = max(2048, max_context_tokens - prompt_tokens - reserve_output_tokens - safety_tokens)
     logger.info(
         "Mindmap context budget max_context_tokens={} prompt_tokens={} reserve_output_tokens={} safety_tokens={} input_budget_tokens={}",
@@ -527,12 +557,20 @@ def _chunk_markdown_by_headings(markdown: str, max_tokens: int) -> list[str]:
     return chunks or [markdown]
 
 
-def _call_mindmap_llm(markdown: str, mode: str = "smart", custom_prompt: Optional[str] = None, task_id: Optional[str] = None, request_role: str = "organize") -> str:
-    base_url = os.getenv("MINDMAP_LLM_BASE_URL", "").rstrip("/")
-    model = os.getenv("MINDMAP_LLM_MODEL", "gemma-4-26B")
-    api_key = os.getenv("MINDMAP_LLM_API_KEY", "")
-    timeout = int(os.getenv("MINDMAP_LLM_TIMEOUT", "180"))
-    max_output_tokens = _get_mindmap_max_output_tokens()
+def _call_mindmap_llm(
+    markdown: str,
+    mode: str = "smart",
+    custom_prompt: Optional[str] = None,
+    task_id: Optional[str] = None,
+    request_role: str = "organize",
+    llm_config: Optional[MindmapLlmConfig] = None
+) -> str:
+    base_url = _optional_str(llm_config.base_url if llm_config else None, "MINDMAP_LLM_BASE_URL").rstrip("/")
+    model = _optional_str(llm_config.model if llm_config else None, "MINDMAP_LLM_MODEL", "gemma-4-26B")
+    api_key = _optional_str(llm_config.api_key if llm_config else None, "MINDMAP_LLM_API_KEY")
+    timeout = _optional_int(llm_config.timeout if llm_config else None, "MINDMAP_LLM_TIMEOUT", 180)
+    max_output_tokens = _get_mindmap_max_output_tokens(llm_config)
+    temperature = _optional_float(llm_config.temperature if llm_config else None, "MINDMAP_LLM_TEMPERATURE", 0.2)
 
     if not base_url:
         raise RuntimeError("未配置智能整理模型服务，请设置 MINDMAP_LLM_BASE_URL")
@@ -545,8 +583,9 @@ def _call_mindmap_llm(markdown: str, mode: str = "smart", custom_prompt: Optiona
 {compact_markdown}
 """
     logger.info(
-        "Mindmap LLM request start task_id={} role={} model={} base_url={} mode={} input_chars={} input_tokens_est={} prompt_chars={} max_tokens={}",
-        task_id or "-", request_role, model, base_url, mode, len(compact_markdown), _estimate_tokens(compact_markdown), len(prompt_template), max_output_tokens
+        "Mindmap LLM request start task_id={} role={} model={} base_url={} mode={} input_chars={} input_tokens_est={} prompt_chars={} max_tokens={} timeout={} temperature={} api_key_configured={}",
+        task_id or "-", request_role, model, base_url, mode, len(compact_markdown), _estimate_tokens(compact_markdown),
+        len(prompt_template), max_output_tokens, timeout, temperature, bool(api_key)
     )
 
     payload = {
@@ -555,7 +594,7 @@ def _call_mindmap_llm(markdown: str, mode: str = "smart", custom_prompt: Optiona
             {"role": "system", "content": "你擅长把长文档整理成结构清晰、层次合理的思维导图 Markdown，并严格保留原文语言。"},
             {"role": "user", "content": prompt},
         ],
-        "temperature": float(os.getenv("MINDMAP_LLM_TEMPERATURE", "0.2")),
+        "temperature": temperature,
         "max_tokens": max_output_tokens,
     }
     data = json.dumps(payload, ensure_ascii=False).encode("utf-8")
@@ -600,9 +639,15 @@ def _call_mindmap_llm(markdown: str, mode: str = "smart", custom_prompt: Optiona
     return organized
 
 
-def _organize_mindmap_markdown(markdown: str, mode: str, custom_prompt: Optional[str], task_id: str) -> str:
+def _organize_mindmap_markdown(
+    markdown: str,
+    mode: str,
+    custom_prompt: Optional[str],
+    task_id: str,
+    llm_config: Optional[MindmapLlmConfig]
+) -> str:
     prompt_template = (custom_prompt or "").strip() or DEFAULT_MINDMAP_ORGANIZE_PROMPT
-    _, input_budget_tokens = _get_mindmap_context_budget(prompt_template)
+    _, input_budget_tokens = _get_mindmap_context_budget(prompt_template, llm_config=llm_config)
     source_tokens = _estimate_tokens(markdown)
     logger.info(
         "Mindmap organize strategy task_id={} source_chars={} source_tokens_est={} input_budget_tokens={}",
@@ -611,7 +656,7 @@ def _organize_mindmap_markdown(markdown: str, mode: str, custom_prompt: Optional
 
     if source_tokens <= input_budget_tokens:
         _update_task_progress(task_id, 35, "调用智能整理模型")
-        return _call_mindmap_llm(markdown, mode, prompt_template, task_id, "single")
+        return _call_mindmap_llm(markdown, mode, prompt_template, task_id, "single", llm_config)
 
     chunks = _chunk_markdown_by_headings(markdown, input_budget_tokens)
     logger.info("Mindmap large input split task_id={} chunks={}", task_id, len(chunks))
@@ -623,14 +668,14 @@ def _organize_mindmap_markdown(markdown: str, mode: str, custom_prompt: Optional
             "Mindmap chunk organize task_id={} chunk={}/{} chars={} tokens_est={}",
             task_id, index, len(chunks), len(chunk), _estimate_tokens(chunk)
         )
-        partial = _call_mindmap_llm(chunk, mode, prompt_template, task_id, f"chunk-{index}")
+        partial = _call_mindmap_llm(chunk, mode, prompt_template, task_id, f"chunk-{index}", llm_config)
         partial_results.append(partial)
 
     merged_input = "\n\n".join(
         f"<!-- chunk {index} -->\n{partial}"
         for index, partial in enumerate(partial_results, start=1)
     )
-    _, merge_budget_tokens = _get_mindmap_context_budget(MINDMAP_MERGE_PROMPT)
+    _, merge_budget_tokens = _get_mindmap_context_budget(MINDMAP_MERGE_PROMPT, llm_config=llm_config)
     merge_tokens = _estimate_tokens(merged_input)
     if merge_tokens > merge_budget_tokens:
         logger.warning(
@@ -641,14 +686,20 @@ def _organize_mindmap_markdown(markdown: str, mode: str, custom_prompt: Optional
         merged_round: list[str] = []
         for index, chunk in enumerate(merge_chunks, start=1):
             _update_task_progress(task_id, 78 + int(index / max(len(merge_chunks), 1) * 10), f"合并局部大纲 {index}/{len(merge_chunks)}")
-            merged_round.append(_call_mindmap_llm(chunk, mode, MINDMAP_MERGE_PROMPT, task_id, f"merge-round-{index}"))
+            merged_round.append(_call_mindmap_llm(chunk, mode, MINDMAP_MERGE_PROMPT, task_id, f"merge-round-{index}", llm_config))
         merged_input = "\n\n".join(merged_round)
 
     _update_task_progress(task_id, 90, "全局整理标题结构")
-    return _call_mindmap_llm(merged_input, mode, MINDMAP_MERGE_PROMPT, task_id, "merge")
+    return _call_mindmap_llm(merged_input, mode, MINDMAP_MERGE_PROMPT, task_id, "merge", llm_config)
 
 
-async def _run_mindmap_organize_task(task_id: str, markdown: str, mode: str, prompt: Optional[str]):
+async def _run_mindmap_organize_task(
+    task_id: str,
+    markdown: str,
+    mode: str,
+    prompt: Optional[str],
+    llm_config: Optional[MindmapLlmConfig]
+):
     try:
         _store_task_progress(task_id, {
             "progress": 10,
@@ -658,7 +709,7 @@ async def _run_mindmap_organize_task(task_id: str, markdown: str, mode: str, pro
             "file_names": "",
             "result_md": None,
         })
-        organized = await asyncio.to_thread(_organize_mindmap_markdown, markdown, mode, prompt, task_id)
+        organized = await asyncio.to_thread(_organize_mindmap_markdown, markdown, mode, prompt, task_id, llm_config)
         state = _get_task_progress(task_id) or {}
         state.update({
             "progress": 100,
@@ -722,10 +773,10 @@ async def create_mindmap_task(task_id: str, request: MindmapOrganizeRequest):
         "result_md": None,
     }
     _store_task_progress(task_id, state)
-    asyncio.create_task(_run_mindmap_organize_task(task_id, markdown, request.mode, request.prompt))
+    asyncio.create_task(_run_mindmap_organize_task(task_id, markdown, request.mode, request.prompt, request.llm_config))
     logger.info(
-        "Registered mindmap organize task pid={} task_id={} mode={} input_chars={} custom_prompt={}",
-        os.getpid(), task_id, request.mode, len(markdown), bool((request.prompt or "").strip())
+        "Registered mindmap organize task pid={} task_id={} mode={} input_chars={} custom_prompt={} request_llm_config={}",
+        os.getpid(), task_id, request.mode, len(markdown), bool((request.prompt or "").strip()), bool(request.llm_config)
     )
     return state
 
diff --git a/web_ui/src/api/document.ts b/web_ui/src/api/document.ts
index 688626c..bfbc17c 100644
--- a/web_ui/src/api/document.ts
+++ b/web_ui/src/api/document.ts
@@ -41,6 +41,17 @@ export interface MindmapOrganizeProgress extends ParseProgress {
   result_md?: string | null
 }
 
+export interface MindmapLlmConfig {
+  base_url?: string
+  api_key?: string
+  model?: string
+  timeout?: number
+  temperature?: number
+  max_context_tokens?: number
+  max_output_tokens?: number
+  safety_tokens?: number
+}
+
 export const documentApi = {
   /**
    * 解析文档
@@ -102,11 +113,18 @@ export const documentApi = {
     })
   },
 
-  createMindmapTask(taskId: string, markdown: string, mode = 'smart', prompt?: string): Promise<MindmapOrganizeProgress> {
+  createMindmapTask(
+    taskId: string,
+    markdown: string,
+    mode = 'smart',
+    prompt?: string,
+    llmConfig?: MindmapLlmConfig
+  ): Promise<MindmapOrganizeProgress> {
     return request.post(`/api/mindmap_tasks/${encodeURIComponent(taskId)}`, {
       markdown,
       mode,
-      prompt
+      prompt,
+      llm_config: llmConfig
     }).then(result => {
       return result as unknown as MindmapOrganizeProgress
     })
diff --git a/web_ui/src/components/ConfigPanel.vue b/web_ui/src/components/ConfigPanel.vue
index f67b3ba..57bb8c1 100644
--- a/web_ui/src/components/ConfigPanel.vue
+++ b/web_ui/src/components/ConfigPanel.vue
@@ -130,6 +130,99 @@
         </div>
       </el-form-item>
 
+      <el-form-item label="大模型服务地址" class="form-item">
+        <el-input
+          v-model="draftConfig.mindmapLlmBaseUrl"
+          placeholder="例如：http://10.100.53.199:9527/v1"
+          class="input"
+          clearable
+        />
+        <div class="form-item-description">
+          OpenAI 兼容接口地址，后端会请求该地址下的 /chat/completions；留空时使用服务端 MINDMAP_LLM_BASE_URL。
+        </div>
+      </el-form-item>
+
+      <el-form-item label="大模型 API Key" class="form-item">
+        <el-input
+          v-model="draftConfig.mindmapLlmApiKey"
+          type="password"
+          placeholder="留空则使用服务端 MINDMAP_LLM_API_KEY"
+          class="input"
+          show-password
+          clearable
+        />
+        <div class="form-item-description">
+          仅随智能整理请求发送给后端，后端日志不会输出明文 Key。
+        </div>
+      </el-form-item>
+
+      <el-form-item label="大模型名称" class="form-item">
+        <el-input
+          v-model="draftConfig.mindmapLlmModel"
+          placeholder="gemma-4-26B"
+          class="input"
+          clearable
+        />
+      </el-form-item>
+
+      <div class="llm-grid">
+        <el-form-item label="请求超时（秒）" class="form-item">
+          <el-input-number
+            v-model="draftConfig.mindmapLlmTimeout"
+            :min="30"
+            :max="3600"
+            :step="30"
+            controls-position="right"
+            class="number-input"
+          />
+        </el-form-item>
+
+        <el-form-item label="温度" class="form-item">
+          <el-input-number
+            v-model="draftConfig.mindmapLlmTemperature"
+            :min="0"
+            :max="2"
+            :step="0.1"
+            :precision="1"
+            controls-position="right"
+            class="number-input"
+          />
+        </el-form-item>
+
+        <el-form-item label="最大上下文 Token" class="form-item">
+          <el-input-number
+            v-model="draftConfig.mindmapLlmMaxContextTokens"
+            :min="4096"
+            :max="262144"
+            :step="1024"
+            controls-position="right"
+            class="number-input"
+          />
+        </el-form-item>
+
+        <el-form-item label="最大输出 Token" class="form-item">
+          <el-input-number
+            v-model="draftConfig.mindmapLlmMaxOutputTokens"
+            :min="512"
+            :max="32768"
+            :step="512"
+            controls-position="right"
+            class="number-input"
+          />
+        </el-form-item>
+
+        <el-form-item label="安全预留 Token" class="form-item">
+          <el-input-number
+            v-model="draftConfig.mindmapLlmSafetyTokens"
+            :min="0"
+            :max="8192"
+            :step="256"
+            controls-position="right"
+            class="number-input"
+          />
+        </el-form-item>
+      </div>
+
       <el-form-item label="智能整理提示词" class="form-item">
         <el-input
           v-model="draftConfig.mindmapPrompt"
@@ -323,6 +416,16 @@ const getBackendDescription = (backend: string) => {
   line-height: 1.4;
 }
 
+.llm-grid {
+  display: grid;
+  grid-template-columns: repeat(2, minmax(0, 1fr));
+  gap: 0 12px;
+}
+
+.number-input {
+  width: 100%;
+}
+
 .config-actions {
   display: flex;
   justify-content: flex-end;
diff --git a/web_ui/src/composables/useDocumentProcessor.ts b/web_ui/src/composables/useDocumentProcessor.ts
index 8268674..b07459e 100644
--- a/web_ui/src/composables/useDocumentProcessor.ts
+++ b/web_ui/src/composables/useDocumentProcessor.ts
@@ -15,6 +15,14 @@ export interface DocumentConfig {
   serverUrl: string
   exportResolution: number
   mindmapPrompt: string
+  mindmapLlmBaseUrl: string
+  mindmapLlmApiKey: string
+  mindmapLlmModel: string
+  mindmapLlmTimeout: number
+  mindmapLlmTemperature: number
+  mindmapLlmMaxContextTokens: number
+  mindmapLlmMaxOutputTokens: number
+  mindmapLlmSafetyTokens: number
   tableEnable: boolean
   formulaEnable: boolean
   language: string
@@ -49,6 +57,14 @@ export const DEFAULT_DOCUMENT_CONFIG: DocumentConfig = {
   serverUrl: 'http://localhost:30000',
   exportResolution: 7680,
   mindmapPrompt: DEFAULT_MINDMAP_PROMPT,
+  mindmapLlmBaseUrl: '',
+  mindmapLlmApiKey: '',
+  mindmapLlmModel: 'gemma-4-26B',
+  mindmapLlmTimeout: 180,
+  mindmapLlmTemperature: 0.2,
+  mindmapLlmMaxContextTokens: 32768,
+  mindmapLlmMaxOutputTokens: 4096,
+  mindmapLlmSafetyTokens: 1024,
   tableEnable: true,
   formulaEnable: true,
   language: 'ch',
diff --git a/web_ui/src/views/DocumentProcessor.vue b/web_ui/src/views/DocumentProcessor.vue
index d9a824d..2c2e1e0 100644
--- a/web_ui/src/views/DocumentProcessor.vue
+++ b/web_ui/src/views/DocumentProcessor.vue
@@ -456,7 +456,16 @@ const startSmartOrganize = async () => {
   smartOrganizeError.value = ''
 
   try {
-    await documentApi.createMindmapTask(taskId, markdown, 'hybrid', config.mindmapPrompt)
+    await documentApi.createMindmapTask(taskId, markdown, 'hybrid', config.mindmapPrompt, {
+      base_url: config.mindmapLlmBaseUrl || undefined,
+      api_key: config.mindmapLlmApiKey || undefined,
+      model: config.mindmapLlmModel || undefined,
+      timeout: config.mindmapLlmTimeout,
+      temperature: config.mindmapLlmTemperature,
+      max_context_tokens: config.mindmapLlmMaxContextTokens,
+      max_output_tokens: config.mindmapLlmMaxOutputTokens,
+      safety_tokens: config.mindmapLlmSafetyTokens
+    })
     startSmartOrganizePolling(taskId)
   } catch (err) {
     isSmartOrganizing.value = false