From 4b50b6bf4e79b09db7d040e326d43f8e2ffec998 Mon Sep 17 00:00:00 2001 From: panyy Date: Fri, 3 Jul 2026 13:56:00 +0800 Subject: [PATCH] =?UTF-8?q?feat(=E6=80=9D=E7=BB=B4=E5=AF=BC=E5=9B=BE?= =?UTF-8?q?=E5=8A=A9=E6=89=8B)=EF=BC=9A=E4=BC=98=E5=8C=96=E6=89=93?= =?UTF-8?q?=E5=8C=85?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docker/china/Dockerfile | 3 +- docker/china/corex.Dockerfile | 4 +- mineru/cli/fast_api.py | 105 +++++++++++++----- web_ui/src/api/document.ts | 22 +++- web_ui/src/components/ConfigPanel.vue | 103 +++++++++++++++++ .../src/composables/useDocumentProcessor.ts | 16 +++ web_ui/src/views/DocumentProcessor.vue | 11 +- 7 files changed, 232 insertions(+), 32 deletions(-) diff --git a/docker/china/Dockerfile b/docker/china/Dockerfile index 09aa62a..bfb7006 100644 --- a/docker/china/Dockerfile +++ b/docker/china/Dockerfile @@ -38,10 +38,11 @@ RUN python3 -m pip install -U pip -i https://mirrors.aliyun.com/pypi/simple && \ python3 -m pip install \ "numpy==1.26.4" \ "opencv-python==4.11.0.86" \ - "pydantic>=2.12,<3" \ "modelscope" \ "magic-pdf" \ -i https://mirrors.aliyun.com/pypi/simple && \ + python3 -m pip install --force-reinstall --no-cache-dir "pydantic>=2.12,<3" -i https://mirrors.aliyun.com/pypi/simple && \ + python3 -c "import pydantic; print('pydantic', pydantic.__version__); from pydantic import TypeAdapter; print('pydantic TypeAdapter ok')" && \ python3 -m pip cache purge WORKDIR /app diff --git a/docker/china/corex.Dockerfile b/docker/china/corex.Dockerfile index a883bc9..c59ae05 100644 --- a/docker/china/corex.Dockerfile +++ b/docker/china/corex.Dockerfile @@ -18,10 +18,12 @@ RUN python3 -m pip install -U pip -i https://mirrors.aliyun.com/pypi/simple && \ numpy==1.26.4 \ opencv-python==4.11.0.86 \ -i https://mirrors.aliyun.com/pypi/simple && \ + python3 -m pip install --force-reinstall --no-cache-dir "pydantic>=2.12,<3" -i https://mirrors.aliyun.com/pypi/simple && \ + python3 -c "import pydantic; print('pydantic', pydantic.__version__); from pydantic import TypeAdapter; print('pydantic TypeAdapter ok')" && \ python3 -m pip cache purge # Download models and update the configuration file RUN /bin/bash -c "mineru-models-download -s modelscope -m all" # Set the entry point to activate the virtual environment and run the command line tool -ENTRYPOINT ["/bin/bash", "-c", "export MINERU_MODEL_SOURCE=local && exec \"$@\"", "--"] \ No newline at end of file +ENTRYPOINT ["/bin/bash", "-c", "export MINERU_MODEL_SOURCE=local && exec \"$@\"", "--"] diff --git a/mineru/cli/fast_api.py b/mineru/cli/fast_api.py index 58a32b7..4c333a8 100644 --- a/mineru/cli/fast_api.py +++ b/mineru/cli/fast_api.py @@ -361,10 +361,22 @@ def get_infer_result(file_suffix_identifier: str, pdf_name: str, parse_dir: str) api_router = APIRouter(prefix="/api") +class MindmapLlmConfig(BaseModel): + base_url: Optional[str] = None + api_key: Optional[str] = None + model: Optional[str] = None + timeout: Optional[int] = None + temperature: Optional[float] = None + max_context_tokens: Optional[int] = None + max_output_tokens: Optional[int] = None + safety_tokens: Optional[int] = None + + class MindmapOrganizeRequest(BaseModel): markdown: str mode: str = "smart" prompt: Optional[str] = None + llm_config: Optional[MindmapLlmConfig] = None def _extract_json_object(text: Optional[str]) -> str: @@ -436,16 +448,34 @@ def _estimate_tokens(text: str) -> int: return max(1, int(ascii_chars / 4) + int(non_ascii_chars * 1.5)) -def _get_mindmap_max_output_tokens() -> int: - return int(os.getenv("MINDMAP_LLM_MAX_OUTPUT_TOKENS", "4096")) +def _optional_str(value: Optional[str], env_name: str, default: str = "") -> str: + configured = (value or "").strip() + return configured or os.getenv(env_name, default) -def _get_mindmap_context_budget(prompt: str, reserve_output_tokens: Optional[int] = None) -> tuple[int, int]: +def _optional_int(value: Optional[int], env_name: str, default: int) -> int: + return int(value if value is not None else os.getenv(env_name, str(default))) + + +def _optional_float(value: Optional[float], env_name: str, default: float) -> float: + return float(value if value is not None else os.getenv(env_name, str(default))) + + +def _get_mindmap_max_output_tokens(llm_config: Optional[MindmapLlmConfig] = None) -> int: + value = llm_config.max_output_tokens if llm_config else None + return _optional_int(value, "MINDMAP_LLM_MAX_OUTPUT_TOKENS", 4096) + + +def _get_mindmap_context_budget( + prompt: str, + reserve_output_tokens: Optional[int] = None, + llm_config: Optional[MindmapLlmConfig] = None +) -> tuple[int, int]: if reserve_output_tokens is None: - reserve_output_tokens = _get_mindmap_max_output_tokens() - max_context_tokens = int(os.getenv("MINDMAP_LLM_MAX_CONTEXT_TOKENS", "32768")) + reserve_output_tokens = _get_mindmap_max_output_tokens(llm_config) + max_context_tokens = _optional_int(llm_config.max_context_tokens if llm_config else None, "MINDMAP_LLM_MAX_CONTEXT_TOKENS", 32768) prompt_tokens = _estimate_tokens(prompt) - safety_tokens = int(os.getenv("MINDMAP_LLM_SAFETY_TOKENS", "1024")) + safety_tokens = _optional_int(llm_config.safety_tokens if llm_config else None, "MINDMAP_LLM_SAFETY_TOKENS", 1024) input_budget_tokens = max(2048, max_context_tokens - prompt_tokens - reserve_output_tokens - safety_tokens) logger.info( "Mindmap context budget max_context_tokens={} prompt_tokens={} reserve_output_tokens={} safety_tokens={} input_budget_tokens={}", @@ -527,12 +557,20 @@ def _chunk_markdown_by_headings(markdown: str, max_tokens: int) -> list[str]: return chunks or [markdown] -def _call_mindmap_llm(markdown: str, mode: str = "smart", custom_prompt: Optional[str] = None, task_id: Optional[str] = None, request_role: str = "organize") -> str: - base_url = os.getenv("MINDMAP_LLM_BASE_URL", "").rstrip("/") - model = os.getenv("MINDMAP_LLM_MODEL", "gemma-4-26B") - api_key = os.getenv("MINDMAP_LLM_API_KEY", "") - timeout = int(os.getenv("MINDMAP_LLM_TIMEOUT", "180")) - max_output_tokens = _get_mindmap_max_output_tokens() +def _call_mindmap_llm( + markdown: str, + mode: str = "smart", + custom_prompt: Optional[str] = None, + task_id: Optional[str] = None, + request_role: str = "organize", + llm_config: Optional[MindmapLlmConfig] = None +) -> str: + base_url = _optional_str(llm_config.base_url if llm_config else None, "MINDMAP_LLM_BASE_URL").rstrip("/") + model = _optional_str(llm_config.model if llm_config else None, "MINDMAP_LLM_MODEL", "gemma-4-26B") + api_key = _optional_str(llm_config.api_key if llm_config else None, "MINDMAP_LLM_API_KEY") + timeout = _optional_int(llm_config.timeout if llm_config else None, "MINDMAP_LLM_TIMEOUT", 180) + max_output_tokens = _get_mindmap_max_output_tokens(llm_config) + temperature = _optional_float(llm_config.temperature if llm_config else None, "MINDMAP_LLM_TEMPERATURE", 0.2) if not base_url: raise RuntimeError("未配置智能整理模型服务,请设置 MINDMAP_LLM_BASE_URL") @@ -545,8 +583,9 @@ def _call_mindmap_llm(markdown: str, mode: str = "smart", custom_prompt: Optiona {compact_markdown} """ logger.info( - "Mindmap LLM request start task_id={} role={} model={} base_url={} mode={} input_chars={} input_tokens_est={} prompt_chars={} max_tokens={}", - task_id or "-", request_role, model, base_url, mode, len(compact_markdown), _estimate_tokens(compact_markdown), len(prompt_template), max_output_tokens + "Mindmap LLM request start task_id={} role={} model={} base_url={} mode={} input_chars={} input_tokens_est={} prompt_chars={} max_tokens={} timeout={} temperature={} api_key_configured={}", + task_id or "-", request_role, model, base_url, mode, len(compact_markdown), _estimate_tokens(compact_markdown), + len(prompt_template), max_output_tokens, timeout, temperature, bool(api_key) ) payload = { @@ -555,7 +594,7 @@ def _call_mindmap_llm(markdown: str, mode: str = "smart", custom_prompt: Optiona {"role": "system", "content": "你擅长把长文档整理成结构清晰、层次合理的思维导图 Markdown,并严格保留原文语言。"}, {"role": "user", "content": prompt}, ], - "temperature": float(os.getenv("MINDMAP_LLM_TEMPERATURE", "0.2")), + "temperature": temperature, "max_tokens": max_output_tokens, } data = json.dumps(payload, ensure_ascii=False).encode("utf-8") @@ -600,9 +639,15 @@ def _call_mindmap_llm(markdown: str, mode: str = "smart", custom_prompt: Optiona return organized -def _organize_mindmap_markdown(markdown: str, mode: str, custom_prompt: Optional[str], task_id: str) -> str: +def _organize_mindmap_markdown( + markdown: str, + mode: str, + custom_prompt: Optional[str], + task_id: str, + llm_config: Optional[MindmapLlmConfig] +) -> str: prompt_template = (custom_prompt or "").strip() or DEFAULT_MINDMAP_ORGANIZE_PROMPT - _, input_budget_tokens = _get_mindmap_context_budget(prompt_template) + _, input_budget_tokens = _get_mindmap_context_budget(prompt_template, llm_config=llm_config) source_tokens = _estimate_tokens(markdown) logger.info( "Mindmap organize strategy task_id={} source_chars={} source_tokens_est={} input_budget_tokens={}", @@ -611,7 +656,7 @@ def _organize_mindmap_markdown(markdown: str, mode: str, custom_prompt: Optional if source_tokens <= input_budget_tokens: _update_task_progress(task_id, 35, "调用智能整理模型") - return _call_mindmap_llm(markdown, mode, prompt_template, task_id, "single") + return _call_mindmap_llm(markdown, mode, prompt_template, task_id, "single", llm_config) chunks = _chunk_markdown_by_headings(markdown, input_budget_tokens) logger.info("Mindmap large input split task_id={} chunks={}", task_id, len(chunks)) @@ -623,14 +668,14 @@ def _organize_mindmap_markdown(markdown: str, mode: str, custom_prompt: Optional "Mindmap chunk organize task_id={} chunk={}/{} chars={} tokens_est={}", task_id, index, len(chunks), len(chunk), _estimate_tokens(chunk) ) - partial = _call_mindmap_llm(chunk, mode, prompt_template, task_id, f"chunk-{index}") + partial = _call_mindmap_llm(chunk, mode, prompt_template, task_id, f"chunk-{index}", llm_config) partial_results.append(partial) merged_input = "\n\n".join( f"\n{partial}" for index, partial in enumerate(partial_results, start=1) ) - _, merge_budget_tokens = _get_mindmap_context_budget(MINDMAP_MERGE_PROMPT) + _, merge_budget_tokens = _get_mindmap_context_budget(MINDMAP_MERGE_PROMPT, llm_config=llm_config) merge_tokens = _estimate_tokens(merged_input) if merge_tokens > merge_budget_tokens: logger.warning( @@ -641,14 +686,20 @@ def _organize_mindmap_markdown(markdown: str, mode: str, custom_prompt: Optional merged_round: list[str] = [] for index, chunk in enumerate(merge_chunks, start=1): _update_task_progress(task_id, 78 + int(index / max(len(merge_chunks), 1) * 10), f"合并局部大纲 {index}/{len(merge_chunks)}") - merged_round.append(_call_mindmap_llm(chunk, mode, MINDMAP_MERGE_PROMPT, task_id, f"merge-round-{index}")) + merged_round.append(_call_mindmap_llm(chunk, mode, MINDMAP_MERGE_PROMPT, task_id, f"merge-round-{index}", llm_config)) merged_input = "\n\n".join(merged_round) _update_task_progress(task_id, 90, "全局整理标题结构") - return _call_mindmap_llm(merged_input, mode, MINDMAP_MERGE_PROMPT, task_id, "merge") + return _call_mindmap_llm(merged_input, mode, MINDMAP_MERGE_PROMPT, task_id, "merge", llm_config) -async def _run_mindmap_organize_task(task_id: str, markdown: str, mode: str, prompt: Optional[str]): +async def _run_mindmap_organize_task( + task_id: str, + markdown: str, + mode: str, + prompt: Optional[str], + llm_config: Optional[MindmapLlmConfig] +): try: _store_task_progress(task_id, { "progress": 10, @@ -658,7 +709,7 @@ async def _run_mindmap_organize_task(task_id: str, markdown: str, mode: str, pro "file_names": "", "result_md": None, }) - organized = await asyncio.to_thread(_organize_mindmap_markdown, markdown, mode, prompt, task_id) + organized = await asyncio.to_thread(_organize_mindmap_markdown, markdown, mode, prompt, task_id, llm_config) state = _get_task_progress(task_id) or {} state.update({ "progress": 100, @@ -722,10 +773,10 @@ async def create_mindmap_task(task_id: str, request: MindmapOrganizeRequest): "result_md": None, } _store_task_progress(task_id, state) - asyncio.create_task(_run_mindmap_organize_task(task_id, markdown, request.mode, request.prompt)) + asyncio.create_task(_run_mindmap_organize_task(task_id, markdown, request.mode, request.prompt, request.llm_config)) logger.info( - "Registered mindmap organize task pid={} task_id={} mode={} input_chars={} custom_prompt={}", - os.getpid(), task_id, request.mode, len(markdown), bool((request.prompt or "").strip()) + "Registered mindmap organize task pid={} task_id={} mode={} input_chars={} custom_prompt={} request_llm_config={}", + os.getpid(), task_id, request.mode, len(markdown), bool((request.prompt or "").strip()), bool(request.llm_config) ) return state diff --git a/web_ui/src/api/document.ts b/web_ui/src/api/document.ts index 688626c..bfbc17c 100644 --- a/web_ui/src/api/document.ts +++ b/web_ui/src/api/document.ts @@ -41,6 +41,17 @@ export interface MindmapOrganizeProgress extends ParseProgress { result_md?: string | null } +export interface MindmapLlmConfig { + base_url?: string + api_key?: string + model?: string + timeout?: number + temperature?: number + max_context_tokens?: number + max_output_tokens?: number + safety_tokens?: number +} + export const documentApi = { /** * 解析文档 @@ -102,11 +113,18 @@ export const documentApi = { }) }, - createMindmapTask(taskId: string, markdown: string, mode = 'smart', prompt?: string): Promise { + createMindmapTask( + taskId: string, + markdown: string, + mode = 'smart', + prompt?: string, + llmConfig?: MindmapLlmConfig + ): Promise { return request.post(`/api/mindmap_tasks/${encodeURIComponent(taskId)}`, { markdown, mode, - prompt + prompt, + llm_config: llmConfig }).then(result => { return result as unknown as MindmapOrganizeProgress }) diff --git a/web_ui/src/components/ConfigPanel.vue b/web_ui/src/components/ConfigPanel.vue index f67b3ba..57bb8c1 100644 --- a/web_ui/src/components/ConfigPanel.vue +++ b/web_ui/src/components/ConfigPanel.vue @@ -130,6 +130,99 @@ + + +
+ OpenAI 兼容接口地址,后端会请求该地址下的 /chat/completions;留空时使用服务端 MINDMAP_LLM_BASE_URL。 +
+
+ + + +
+ 仅随智能整理请求发送给后端,后端日志不会输出明文 Key。 +
+
+ + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ { line-height: 1.4; } +.llm-grid { + display: grid; + grid-template-columns: repeat(2, minmax(0, 1fr)); + gap: 0 12px; +} + +.number-input { + width: 100%; +} + .config-actions { display: flex; justify-content: flex-end; diff --git a/web_ui/src/composables/useDocumentProcessor.ts b/web_ui/src/composables/useDocumentProcessor.ts index 8268674..b07459e 100644 --- a/web_ui/src/composables/useDocumentProcessor.ts +++ b/web_ui/src/composables/useDocumentProcessor.ts @@ -15,6 +15,14 @@ export interface DocumentConfig { serverUrl: string exportResolution: number mindmapPrompt: string + mindmapLlmBaseUrl: string + mindmapLlmApiKey: string + mindmapLlmModel: string + mindmapLlmTimeout: number + mindmapLlmTemperature: number + mindmapLlmMaxContextTokens: number + mindmapLlmMaxOutputTokens: number + mindmapLlmSafetyTokens: number tableEnable: boolean formulaEnable: boolean language: string @@ -49,6 +57,14 @@ export const DEFAULT_DOCUMENT_CONFIG: DocumentConfig = { serverUrl: 'http://localhost:30000', exportResolution: 7680, mindmapPrompt: DEFAULT_MINDMAP_PROMPT, + mindmapLlmBaseUrl: '', + mindmapLlmApiKey: '', + mindmapLlmModel: 'gemma-4-26B', + mindmapLlmTimeout: 180, + mindmapLlmTemperature: 0.2, + mindmapLlmMaxContextTokens: 32768, + mindmapLlmMaxOutputTokens: 4096, + mindmapLlmSafetyTokens: 1024, tableEnable: true, formulaEnable: true, language: 'ch', diff --git a/web_ui/src/views/DocumentProcessor.vue b/web_ui/src/views/DocumentProcessor.vue index d9a824d..2c2e1e0 100644 --- a/web_ui/src/views/DocumentProcessor.vue +++ b/web_ui/src/views/DocumentProcessor.vue @@ -456,7 +456,16 @@ const startSmartOrganize = async () => { smartOrganizeError.value = '' try { - await documentApi.createMindmapTask(taskId, markdown, 'hybrid', config.mindmapPrompt) + await documentApi.createMindmapTask(taskId, markdown, 'hybrid', config.mindmapPrompt, { + base_url: config.mindmapLlmBaseUrl || undefined, + api_key: config.mindmapLlmApiKey || undefined, + model: config.mindmapLlmModel || undefined, + timeout: config.mindmapLlmTimeout, + temperature: config.mindmapLlmTemperature, + max_context_tokens: config.mindmapLlmMaxContextTokens, + max_output_tokens: config.mindmapLlmMaxOutputTokens, + safety_tokens: config.mindmapLlmSafetyTokens + }) startSmartOrganizePolling(taskId) } catch (err) { isSmartOrganizing.value = false